From d4dc210f69bcb0b4bef5a83b1c323817be89bad1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Apr 2011 20:54:46 +0200
Subject: block: don't block events on excl write for non-optical devices

Disk event code automatically blocks events on excl write.  This is
primarily to avoid issuing polling commands while burning is in
progress.  This behavior doesn't fit other types of devices with
removeable media where polling commands don't have adverse side
effects and door locking usually doesn't exist.

This patch introduces new genhd flag which controls the auto-blocking
behavior and uses it to enable auto-blocking only on optical devices.

Note for stable: 2.6.38 and later only

Cc: stable@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/genhd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index d764a426e9fd..300d7582006e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -127,6 +127,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY		128
+#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE	256
 
 enum {
 	DISK_EVENT_MEDIA_CHANGE			= 1 << 0, /* media changed */
-- 
cgit v1.2.3


From f3876930952390a31c3a7fd68dd621464a36eb80 Mon Sep 17 00:00:00 2001
From: "shaohua.li@intel.com" <shaohua.li@intel.com>
Date: Fri, 6 May 2011 11:34:32 -0600
Subject: block: add a non-queueable flush flag

flush request isn't queueable in some drives. Add a flag to let driver
notify block layer about this. We can optimize flush performance with the
knowledge.

Stable: 2.6.39 only

Cc: stable@kernel.org
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-settings.c   | 6 ++++++
 include/linux/blkdev.h | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa769293597..cd3c428e194f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,6 +790,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
+void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
+{
+	q->flush_not_queueable = !queueable;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cbbfd98ad4a3..8bd2a271b2d8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -364,6 +364,7 @@ struct request_queue
 	 * for flush operations
 	 */
 	unsigned int		flush_flags;
+	unsigned int		flush_not_queueable:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
@@ -843,6 +844,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
+extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
@@ -1111,6 +1113,11 @@ static inline unsigned int block_size(struct block_device *bdev)
 	return bdev->bd_block_size;
 }
 
+static inline bool queue_flush_queueable(struct request_queue *q)
+{
+	return !q->flush_not_queueable;
+}
+
 typedef struct {struct page *v;} Sector;
 
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
-- 
cgit v1.2.3


From 3ac0cc4508709d42ec9aa351086c7d38bfc0660c Mon Sep 17 00:00:00 2001
From: "shaohua.li@intel.com" <shaohua.li@intel.com>
Date: Fri, 6 May 2011 11:34:41 -0600
Subject: block: hold queue if flush is running for non-queueable flush drive

In some drives, flush requests are non-queueable. When flush request is
running, normal read/write requests can't run. If block layer dispatches
such request, driver can't handle it and requeue it.  Tejun suggested we
can hold the queue when flush is running. This can avoid unnecessary
requeue.  Also this can improve performance. For example, we have
request flush1, write1, flush 2. flush1 is dispatched, then queue is
hold, write1 isn't inserted to queue. After flush1 is finished, flush2
will be dispatched. Since disk cache is already clean, flush2 will be
finished very soon, so looks like flush2 is folded to flush1.

In my test, the queue holding completely solves a regression introduced by
commit 53d63e6b0dfb95882ec0219ba6bbd50cde423794:

    block: make the flush insertion use the tail of the dispatch list

    It's not a preempt type request, in fact we have to insert it
    behind requests that do specify INSERT_FRONT.

which causes about 20% regression running a sysbench fileio
workload.

Stable: 2.6.39 only

Cc: stable@kernel.org
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c      | 16 +++++++++++-----
 block/blk.h            | 21 ++++++++++++++++++++-
 include/linux/blkdev.h |  1 +
 3 files changed, 32 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6c9b5e189e62..bb21e4c36f70 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error)
 	}
 
 	/*
-	 * Moving a request silently to empty queue_head may stall the
-	 * queue.  Kick the queue in those cases.  This function is called
-	 * from request completion path and calling directly into
-	 * request_fn may confuse the driver.  Always use kblockd.
+	 * Kick the queue to avoid stall for two cases:
+	 * 1. Moving a request silently to empty queue_head may stall the
+	 * queue.
+	 * 2. When flush request is running in non-queueable queue, the
+	 * queue is hold. Restart the queue after flush request is finished
+	 * to avoid stall.
+	 * This function is called from request completion path and calling
+	 * directly into request_fn may confuse the driver.  Always use
+	 * kblockd.
 	 */
-	if (queued)
+	if (queued || q->flush_queue_delayed)
 		blk_run_queue_async(q);
+	q->flush_queue_delayed = 0;
 }
 
 /**
diff --git a/block/blk.h b/block/blk.h
index c9df8fc3c999..83e4bff36201 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -62,7 +62,26 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			rq = list_entry_rq(q->queue_head.next);
 			return rq;
 		}
-
+		/*
+		 * Flush request is running and flush request isn't queueable
+		 * in the drive, we can hold the queue till flush request is
+		 * finished. Even we don't do this, driver can't dispatch next
+		 * requests and will requeue them. And this can improve
+		 * throughput too. For example, we have request flush1, write1,
+		 * flush 2. flush1 is dispatched, then queue is hold, write1
+		 * isn't inserted to queue. After flush1 is finished, flush2
+		 * will be dispatched. Since disk cache is already clean,
+		 * flush2 will be finished very soon, so looks like flush2 is
+		 * folded to flush1.
+		 * Since the queue is hold, a flag is set to indicate the queue
+		 * should be restarted later. Please see flush_end_io() for
+		 * details.
+		 */
+		if (q->flush_pending_idx != q->flush_running_idx &&
+				!queue_flush_queueable(q)) {
+			q->flush_queue_delayed = 1;
+			return NULL;
+		}
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bd2a271b2d8..9f921bf4bf8c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -365,6 +365,7 @@ struct request_queue
 	 */
 	unsigned int		flush_flags;
 	unsigned int		flush_not_queueable:1;
+	unsigned int		flush_queue_delayed:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
-- 
cgit v1.2.3


From 23ceb5b7719e9276d4fa72a3ecf94dd396755276 Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Fri, 6 May 2011 19:30:02 -0600
Subject: block: Remove extra discard_alignment from hd_struct.

Currently, hd_struct.discard_alignment is only used when we
show /sys/block/sdx/sdx/discard_alignment. So remove it and
calculate when it is asked to show.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/partitions/check.c | 9 ++++++---
 include/linux/genhd.h | 1 -
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..b7e16bccd5e5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,12 @@ ssize_t part_discard_alignment_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
-	return sprintf(buf, "%u\n", p->discard_alignment);
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%u\n",
+		       (unsigned long long)queue_limit_discard_alignment(
+							&disk->queue->limits,
+							p->start_sect));
 }
 
 ssize_t part_stat_show(struct device *dev,
@@ -449,8 +454,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	p->start_sect = start;
 	p->alignment_offset =
 		queue_limit_alignment_offset(&disk->queue->limits, start);
-	p->discard_alignment =
-		queue_limit_discard_alignment(&disk->queue->limits, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 300d7582006e..b78956b3c2e7 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -100,7 +100,6 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	sector_t alignment_offset;
-	unsigned int discard_alignment;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
-- 
cgit v1.2.3


From 6b4e306aa3dc94a0545eb9279475b1ab6209a31f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 16:41:34 -0800
Subject: ns: proc files for namespace naming policy.

Create files under /proc/<pid>/ns/ to allow controlling the
namespaces of a process.

This addresses three specific problems that can make namespaces hard to
work with.
- Namespaces require a dedicated process to pin them in memory.
- It is not possible to use a namespace unless you are the child
  of the original creator.
- Namespaces don't have names that userspace can use to talk about
  them.

The namespace files under /proc/<pid>/ns/ can be opened and the
file descriptor can be used to talk about a specific namespace, and
to keep the specified namespace alive.

A namespace can be kept alive by either holding the file descriptor
open or bind mounting the file someplace else.  aka:
mount --bind /proc/self/ns/net /some/filesystem/path
mount --bind /proc/self/fd/<N> /some/filesystem/path

This allows namespaces to be named with userspace policy.

It requires additional support to make use of these filedescriptors
and that will be comming in the following patches.

Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/Makefile        |   1 +
 fs/proc/base.c          |  20 +++---
 fs/proc/inode.c         |   7 ++
 fs/proc/internal.h      |  18 +++++
 fs/proc/namespaces.c    | 188 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/proc_fs.h |  18 +++++
 6 files changed, 241 insertions(+), 11 deletions(-)
 create mode 100644 fs/proc/namespaces.c

(limited to 'include/linux')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5f28fb..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y	+= stat.o
 proc-y	+= uptime.o
 proc-y	+= version.o
 proc-y	+= softirqs.o
+proc-y	+= namespaces.o
 proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
 proc-$(CONFIG_NET)		+= proc_net.o
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa532730e55..dc8bca72b002 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
 	return allowed;
 }
 
-static int proc_setattr(struct dentry *dentry, struct iattr *attr)
+int proc_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	int error;
 	struct inode *inode = dentry->d_inode;
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
 	return 0;
 }
 
-
-static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
 {
 	struct inode * inode;
 	struct proc_inode *ei;
@@ -1779,7 +1778,7 @@ out_unlock:
 	return NULL;
 }
 
-static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task;
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
  * made this apply to all per process world readable and executable
  * directories.
  */
-static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode;
 	struct task_struct *task;
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
 	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
 }
 
-static const struct dentry_operations pid_dentry_operations =
+const struct dentry_operations pid_dentry_operations =
 {
 	.d_revalidate	= pid_revalidate,
 	.d_delete	= pid_delete_dentry,
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
 
 /* Lookups */
 
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
-				struct task_struct *, const void *);
-
 /*
  * Fill a directory entry.
  *
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
  * reported by readdir in sync with the inode numbers reported
  * by stat.
  */
-static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-	char *name, int len,
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+	const char *name, int len,
 	instantiate_t instantiate, struct task_struct *task, const void *ptr)
 {
 	struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
 	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+	DIR("ns",	  S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 #ifdef CONFIG_NET
 	DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
 #endif
@@ -3168,6 +3165,7 @@ out_no_task:
 static const struct pid_entry tid_base_stuff[] = {
 	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
 	DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+	DIR("ns",	 S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 	REG("environ",   S_IRUSR, proc_environ_operations),
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b1cc8f..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
 {
 	struct proc_dir_entry *de;
 	struct ctl_table_header *head;
+	const struct proc_ns_operations *ns_ops;
 
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
 		rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
 		sysctl_head_put(head);
 	}
+	/* Release any associated namespace */
+	ns_ops = PROC_I(inode)->ns_ops;
+	if (ns_ops && ns_ops->put)
+		ns_ops->put(PROC_I(inode)->ns);
 }
 
 static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	ei->pde = NULL;
 	ei->sysctl = NULL;
 	ei->sysctl_entry = NULL;
+	ei->ns = NULL;
+	ei->ns_ops = NULL;
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..96245a1b1a7c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -119,3 +119,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
  */
 int proc_readdir(struct file *, void *, filldir_t);
 struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
+
+
+
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+				struct task_struct *, const void *);
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+	const char *name, int len,
+	instantiate_t instantiate, struct task_struct *task, const void *ptr);
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
+extern const struct dentry_operations pid_dentry_operations;
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+int proc_setattr(struct dentry *dentry, struct iattr *attr);
+
+extern const struct inode_operations proc_ns_dir_inode_operations;
+extern const struct file_operations proc_ns_dir_operations;
+
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..6ae9f07d59ee
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,188 @@
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/fs_struct.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
+#include <linux/mnt_namespace.h>
+#include <linux/ipc_namespace.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
+
+
+static const struct proc_ns_operations *ns_entries[] = {
+};
+
+static const struct file_operations ns_file_operations = {
+	.llseek		= no_llseek,
+};
+
+static struct dentry *proc_ns_instantiate(struct inode *dir,
+	struct dentry *dentry, struct task_struct *task, const void *ptr)
+{
+	const struct proc_ns_operations *ns_ops = ptr;
+	struct inode *inode;
+	struct proc_inode *ei;
+	struct dentry *error = ERR_PTR(-ENOENT);
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		goto out;
+
+	ei = PROC_I(inode);
+	inode->i_mode = S_IFREG|S_IRUSR;
+	inode->i_fop  = &ns_file_operations;
+	ei->ns_ops    = ns_ops;
+	ei->ns	      = ns_ops->get(task);
+	if (!ei->ns)
+		goto out_iput;
+
+	dentry->d_op = &pid_dentry_operations;
+	d_add(dentry, inode);
+	/* Close the race of the process dying before we return the dentry */
+	if (pid_revalidate(dentry, NULL))
+		error = NULL;
+out:
+	return error;
+out_iput:
+	iput(inode);
+	goto out;
+}
+
+static int proc_ns_fill_cache(struct file *filp, void *dirent,
+	filldir_t filldir, struct task_struct *task,
+	const struct proc_ns_operations *ops)
+{
+	return proc_fill_cache(filp, dirent, filldir,
+				ops->name, strlen(ops->name),
+				proc_ns_instantiate, task, ops);
+}
+
+static int proc_ns_dir_readdir(struct file *filp, void *dirent,
+				filldir_t filldir)
+{
+	int i;
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct task_struct *task = get_proc_task(inode);
+	const struct proc_ns_operations **entry, **last;
+	ino_t ino;
+	int ret;
+
+	ret = -ENOENT;
+	if (!task)
+		goto out_no_task;
+
+	ret = -EPERM;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto out;
+
+	ret = 0;
+	i = filp->f_pos;
+	switch (i) {
+	case 0:
+		ino = inode->i_ino;
+		if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+			goto out;
+		i++;
+		filp->f_pos++;
+		/* fall through */
+	case 1:
+		ino = parent_ino(dentry);
+		if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+			goto out;
+		i++;
+		filp->f_pos++;
+		/* fall through */
+	default:
+		i -= 2;
+		if (i >= ARRAY_SIZE(ns_entries)) {
+			ret = 1;
+			goto out;
+		}
+		entry = ns_entries + i;
+		last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+		while (entry <= last) {
+			if (proc_ns_fill_cache(filp, dirent, filldir,
+						task, *entry) < 0)
+				goto out;
+			filp->f_pos++;
+			entry++;
+		}
+	}
+
+	ret = 1;
+out:
+	put_task_struct(task);
+out_no_task:
+	return ret;
+}
+
+const struct file_operations proc_ns_dir_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_ns_dir_readdir,
+};
+
+static struct dentry *proc_ns_dir_lookup(struct inode *dir,
+				struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *error;
+	struct task_struct *task = get_proc_task(dir);
+	const struct proc_ns_operations **entry, **last;
+	unsigned int len = dentry->d_name.len;
+
+	error = ERR_PTR(-ENOENT);
+
+	if (!task)
+		goto out_no_task;
+
+	error = ERR_PTR(-EPERM);
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto out;
+
+	last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+	for (entry = ns_entries; entry <= last; entry++) {
+		if (strlen((*entry)->name) != len)
+			continue;
+		if (!memcmp(dentry->d_name.name, (*entry)->name, len))
+			break;
+	}
+	if (entry > last)
+		goto out;
+
+	error = proc_ns_instantiate(dir, dentry, task, *entry);
+out:
+	put_task_struct(task);
+out_no_task:
+	return error;
+}
+
+const struct inode_operations proc_ns_dir_inode_operations = {
+	.lookup		= proc_ns_dir_lookup,
+	.getattr	= pid_getattr,
+	.setattr	= proc_setattr,
+};
+
+struct file *proc_ns_fget(int fd)
+{
+	struct file *file;
+
+	file = fget(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
+
+	if (file->f_op != &ns_file_operations)
+		goto out_invalid;
+
+	return file;
+
+out_invalid:
+	fput(file);
+	return ERR_PTR(-EINVAL);
+}
+
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 838c1149251a..a6d2c6da5e5a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -179,6 +179,8 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
 
+extern struct file *proc_ns_fget(int fd);
+
 #else
 
 #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@ -239,6 +241,11 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm,
 	       			   struct mm_struct *newmm)
 {}
 
+static inline struct file *proc_ns_fget(int fd)
+{
+	return ERR_PTR(-EINVAL);
+}
+
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
@@ -250,6 +257,15 @@ kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 extern void kclist_add(struct kcore_list *, void *, size_t, int type);
 #endif
 
+struct nsproxy;
+struct proc_ns_operations {
+	const char *name;
+	int type;
+	void *(*get)(struct task_struct *task);
+	void (*put)(void *ns);
+	int (*install)(struct nsproxy *nsproxy, void *ns);
+};
+
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
 	int (*proc_read)(struct task_struct *task, char *page);
@@ -268,6 +284,8 @@ struct proc_inode {
 	struct proc_dir_entry *pde;
 	struct ctl_table_header *sysctl;
 	struct ctl_table *sysctl_entry;
+	void *ns;
+	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
 };
 
-- 
cgit v1.2.3


From 13b6f57623bc485e116344fe91fbcb29f149242b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 18:14:23 -0800
Subject: ns proc: Add support for the network namespace.

Implementing file descriptors for the network namespace
is simple and straight forward.

Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c     |  3 +++
 include/linux/proc_fs.h  |  1 +
 net/core/net_namespace.c | 31 +++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 6ae9f07d59ee..dcbd483e9915 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -16,6 +16,9 @@
 
 
 static const struct proc_ns_operations *ns_entries[] = {
+#ifdef CONFIG_NET_NS
+	&netns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index a6d2c6da5e5a..62126ec6ede9 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -265,6 +265,7 @@ struct proc_ns_operations {
 	void (*put)(void *ns);
 	int (*install)(struct nsproxy *nsproxy, void *ns);
 };
+extern const struct proc_ns_operations netns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3f860261c5ee..bf7707e09a80 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops)
 	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+#ifdef CONFIG_NET_NS
+static void *netns_get(struct task_struct *task)
+{
+	struct net *net;
+	rcu_read_lock();
+	net = get_net(task->nsproxy->net_ns);
+	rcu_read_unlock();
+	return net;
+}
+
+static void netns_put(void *ns)
+{
+	put_net(ns);
+}
+
+static int netns_install(struct nsproxy *nsproxy, void *ns)
+{
+	put_net(nsproxy->net_ns);
+	nsproxy->net_ns = get_net(ns);
+	return 0;
+}
+
+const struct proc_ns_operations netns_operations = {
+	.name		= "net",
+	.type		= CLONE_NEWNET,
+	.get		= netns_get,
+	.put		= netns_put,
+	.install	= netns_install,
+};
+#endif
-- 
cgit v1.2.3


From 34482e89a5218f0f9317abf1cfba3bb38b5c29dd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 18:43:27 -0800
Subject: ns proc: Add support for the uts namespace

Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c    |  3 +++
 include/linux/proc_fs.h |  1 +
 kernel/utsname.c        | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index dcbd483e9915..b017181f1273 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -19,6 +19,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_NET_NS
 	&netns_operations,
 #endif
+#ifdef CONFIG_UTS_NS
+	&utsns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 62126ec6ede9..52aa89df8a6d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -266,6 +266,7 @@ struct proc_ns_operations {
 	int (*install)(struct nsproxy *nsproxy, void *ns);
 };
 extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations utsns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 44646179eaba..bff131b9510a 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 
 static struct uts_namespace *create_uts_ns(void)
 {
@@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref)
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
+
+static void *utsns_get(struct task_struct *task)
+{
+	struct uts_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy) {
+		ns = nsproxy->uts_ns;
+		get_uts_ns(ns);
+	}
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void utsns_put(void *ns)
+{
+	put_uts_ns(ns);
+}
+
+static int utsns_install(struct nsproxy *nsproxy, void *ns)
+{
+	get_uts_ns(ns);
+	put_uts_ns(nsproxy->uts_ns);
+	nsproxy->uts_ns = ns;
+	return 0;
+}
+
+const struct proc_ns_operations utsns_operations = {
+	.name		= "uts",
+	.type		= CLONE_NEWUTS,
+	.get		= utsns_get,
+	.put		= utsns_put,
+	.install	= utsns_install,
+};
+
-- 
cgit v1.2.3


From a00eaf11a223c63fbb212369d6db69ce4c55a2d1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 18:48:39 -0800
Subject: ns proc: Add support for the ipc namespace

Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c    |  3 +++
 include/linux/proc_fs.h |  1 +
 ipc/namespace.c         | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b017181f1273..f18d6d58bf79 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -22,6 +22,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_UTS_NS
 	&utsns_operations,
 #endif
+#ifdef CONFIG_IPC_NS
+	&ipcns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 52aa89df8a6d..a23f0b72a023 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -267,6 +267,7 @@ struct proc_ns_operations {
 };
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
+extern const struct proc_ns_operations ipcns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 8054c8e5faf1..ce0a647869b1 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 
 #include "util.h"
 
@@ -140,3 +141,39 @@ void put_ipc_ns(struct ipc_namespace *ns)
 		free_ipc_ns(ns);
 	}
 }
+
+static void *ipcns_get(struct task_struct *task)
+{
+	struct ipc_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy)
+		ns = get_ipc_ns(nsproxy->ipc_ns);
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void ipcns_put(void *ns)
+{
+	return put_ipc_ns(ns);
+}
+
+static int ipcns_install(struct nsproxy *nsproxy, void *ns)
+{
+	/* Ditch state from the old ipc namespace */
+	exit_sem(current);
+	put_ipc_ns(nsproxy->ipc_ns);
+	nsproxy->ipc_ns = get_ipc_ns(ns);
+	return 0;
+}
+
+const struct proc_ns_operations ipcns_operations = {
+	.name		= "ipc",
+	.type		= CLONE_NEWIPC,
+	.get		= ipcns_get,
+	.put		= ipcns_put,
+	.install	= ipcns_install,
+};
-- 
cgit v1.2.3


From f063052947f770845a6252f7fa24f6f624592a24 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 4 May 2011 17:51:50 -0700
Subject: net: Allow setting the network namespace by fd

Take advantage of the new abstraction and allow network devices
to be placed in any network namespace that we have a fd to talk
about.

Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/if_link.h     |  1 +
 include/net/net_namespace.h |  1 +
 net/core/net_namespace.c    | 33 +++++++++++++++++++++++++++++++--
 net/core/rtnetlink.c        |  5 ++++-
 4 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f4a2e6b1b864..0ee969a5593d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -136,6 +136,7 @@ enum {
 	IFLA_PORT_SELF,
 	IFLA_AF_SPEC,
 	IFLA_GROUP,		/* Group the device belongs to */
+	IFLA_NET_NS_FD,
 	__IFLA_MAX
 };
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3ae491932bc8..dcc8f5749d3f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,6 +119,7 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
 extern struct list_head net_namespace_list;
 
 extern struct net *get_net_ns_by_pid(pid_t pid);
+extern struct net *get_net_ns_by_fd(int pid);
 
 #ifdef CONFIG_NET_NS
 extern void __put_net(struct net *net);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bf7707e09a80..b7403ff4d6c6 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
 #include <linux/idr.h>
 #include <linux/rculist.h>
 #include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -343,6 +345,28 @@ struct net *get_net_ns_by_pid(pid_t pid)
 }
 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
+struct net *get_net_ns_by_fd(int fd)
+{
+	struct proc_inode *ei;
+	struct file *file;
+	struct net *net;
+
+	net = ERR_PTR(-EINVAL);
+	file = proc_ns_fget(fd);
+	if (!file)
+		goto out;
+
+	ei = PROC_I(file->f_dentry->d_inode);
+	if (ei->ns_ops != &netns_operations)
+		goto out;
+
+	net = get_net(ei->ns);
+out:
+	if (file)
+		fput(file);
+	return net;
+}
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -577,10 +601,15 @@ EXPORT_SYMBOL_GPL(unregister_pernet_device);
 #ifdef CONFIG_NET_NS
 static void *netns_get(struct task_struct *task)
 {
-	struct net *net;
+	struct net *net = NULL;
+	struct nsproxy *nsproxy;
+
 	rcu_read_lock();
-	net = get_net(task->nsproxy->net_ns);
+	nsproxy = task_nsproxy(task);
+	if (nsproxy)
+		net = get_net(nsproxy->net_ns);
 	rcu_read_unlock();
+
 	return net;
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4b1820..dca9602c62e4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1043,6 +1043,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKMODE]		= { .type = NLA_U8 },
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
+	[IFLA_NET_NS_FD]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
@@ -1091,6 +1092,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 	 */
 	if (tb[IFLA_NET_NS_PID])
 		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+	else if (tb[IFLA_NET_NS_FD])
+		net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
 	else
 		net = get_net(src_net);
 	return net;
@@ -1221,7 +1224,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	int send_addr_notify = 0;
 	int err;
 
-	if (tb[IFLA_NET_NS_PID]) {
+	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
 		struct net *net = rtnl_link_get_net(dev_net(dev), tb);
 		if (IS_ERR(net)) {
 			err = PTR_ERR(net);
-- 
cgit v1.2.3


From 81f8115305f821335cf9e16110bf0806f7b93283 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Thu, 28 Apr 2011 13:25:36 +0900
Subject: i2c: i2c-sh_mobile bus speed platform data V2

Add support to the i2c-sh_mobile driver for setting
the I2C bus speed using platform data.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-sh_mobile.c | 13 +++++++++++--
 include/linux/i2c/i2c-sh_mobile.h  | 10 ++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/i2c/i2c-sh_mobile.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 5a1d37d0b62f..d917dd1cc091 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -32,6 +32,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/i2c/i2c-sh_mobile.h>
 
 /* Transmit operation:                                                      */
 /*                                                                          */
@@ -117,7 +118,7 @@ struct sh_mobile_i2c_data {
 	struct device *dev;
 	void __iomem *reg;
 	struct i2c_adapter adap;
-
+	unsigned long bus_speed;
 	struct clk *clk;
 	u_int8_t icic;
 	u_int8_t iccl;
@@ -205,7 +206,7 @@ static void activate_ch(struct sh_mobile_i2c_data *pd)
 	 * We also round off the result.
 	 */
 	num = i2c_clk * 5;
-	denom = NORMAL_SPEED * 9;
+	denom = pd->bus_speed * 9;
 	tmp = num * 10 / denom;
 	if (tmp % 10 >= 5)
 		pd->iccl = (u_int8_t)((num/denom) + 1);
@@ -574,6 +575,7 @@ static int sh_mobile_i2c_hook_irqs(struct platform_device *dev, int hook)
 
 static int sh_mobile_i2c_probe(struct platform_device *dev)
 {
+	struct i2c_sh_mobile_platform_data *pdata = dev->dev.platform_data;
 	struct sh_mobile_i2c_data *pd;
 	struct i2c_adapter *adap;
 	struct resource *res;
@@ -618,6 +620,11 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 		goto err_irq;
 	}
 
+	/* Use platformd data bus speed or NORMAL_SPEED */
+	pd->bus_speed = NORMAL_SPEED;
+	if (pdata && pdata->bus_speed)
+		pd->bus_speed = pdata->bus_speed;
+
 	/* The IIC blocks on SH-Mobile ARM processors
 	 * come with two new bits in ICIC.
 	 */
@@ -658,6 +665,8 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 		goto err_all;
 	}
 
+	dev_info(&dev->dev, "I2C adapter %d with bus speed %lu Hz\n",
+		 adap->nr, pd->bus_speed);
 	return 0;
 
  err_all:
diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
new file mode 100644
index 000000000000..beda7081aead
--- /dev/null
+++ b/include/linux/i2c/i2c-sh_mobile.h
@@ -0,0 +1,10 @@
+#ifndef __I2C_SH_MOBILE_H__
+#define __I2C_SH_MOBILE_H__
+
+#include <linux/platform_device.h>
+
+struct i2c_sh_mobile_platform_data {
+	unsigned long bus_speed;
+};
+
+#endif /* __I2C_SH_MOBILE_H__ */
-- 
cgit v1.2.3


From be84cb43833ee40a42e08f5425d20310f16229c7 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Mon, 9 May 2011 13:12:30 -0400
Subject: compat: fixes to allow working with tile arch

The existing <asm-generic/unistd.h> mechanism doesn't really provide
enough to create the 64-bit "compat" ABI properly in a generic way,
since the compat ABI is a mix of things were you can re-use the 64-bit
versions of syscalls and things where you need a compat wrapper.

To provide this in the most direct way possible, I added two new macros
to go along with the existing __SYSCALL and __SC_3264 macros: __SC_COMP
and SC_COMP_3264.  These macros take an additional argument, typically a
"compat_sys_xxx" function, which is passed to __SYSCALL if you define
__SYSCALL_COMPAT when including the header, resulting in a pointer to
the compat function being placed in the generated syscall table.

The change also adds some missing definitions to <linux/compat.h> so that
it actually has declarations for all the compat syscalls, since the
"[nr] = ##call" approach requires proper C declarations for all the
functions included in the syscall table.

Finally, compat.c defines compat_sys_sigpending() and
compat_sys_sigprocmask() even if the underlying architecture doesn't
request it, which tries to pull in undefined compat_old_sigset_t defines.
We need to guard those compat syscall definitions with appropriate
__ARCH_WANT_SYS_xxx ifdefs.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/compat.c    |   2 +-
 include/asm-generic/unistd.h | 221 ++++++++++++++++++++++++-------------------
 include/linux/compat.h       | 187 ++++++++++++++++++++++++++++++++++++
 kernel/compat.c              |   8 ++
 4 files changed, 320 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index dbc213adf5e1..e35a3975ca3b 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -135,7 +135,7 @@ long tile_compat_sys_msgrcv(int msqid,
 
 /* Provide the compat syscall number to call mapping. */
 #undef __SYSCALL
-#define __SYSCALL(nr, call) [nr] = (compat_##call),
+#define __SYSCALL(nr, call) [nr] = (call),
 
 /* The generic versions of these don't work for Tile. */
 #define compat_sys_msgrcv tile_compat_sys_msgrcv
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 07c40d5149de..33d524704883 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -24,16 +24,24 @@
 #define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64)
 #endif
 
+#ifdef __SYSCALL_COMPAT
+#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _comp)
+#define __SC_COMP_3264(_nr, _32, _64, _comp) __SYSCALL(_nr, _comp)
+#else
+#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _sys)
+#define __SC_COMP_3264(_nr, _32, _64, _comp) __SC_3264(_nr, _32, _64)
+#endif
+
 #define __NR_io_setup 0
-__SYSCALL(__NR_io_setup, sys_io_setup)
+__SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup)
 #define __NR_io_destroy 1
 __SYSCALL(__NR_io_destroy, sys_io_destroy)
 #define __NR_io_submit 2
-__SYSCALL(__NR_io_submit, sys_io_submit)
+__SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
 #define __NR_io_cancel 3
 __SYSCALL(__NR_io_cancel, sys_io_cancel)
 #define __NR_io_getevents 4
-__SYSCALL(__NR_io_getevents, sys_io_getevents)
+__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents)
 
 /* fs/xattr.c */
 #define __NR_setxattr 5
@@ -67,7 +75,7 @@ __SYSCALL(__NR_getcwd, sys_getcwd)
 
 /* fs/cookies.c */
 #define __NR_lookup_dcookie 18
-__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie)
+__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie)
 
 /* fs/eventfd.c */
 #define __NR_eventfd2 19
@@ -79,7 +87,7 @@ __SYSCALL(__NR_epoll_create1, sys_epoll_create1)
 #define __NR_epoll_ctl 21
 __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
 #define __NR_epoll_pwait 22
-__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
+__SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait)
 
 /* fs/fcntl.c */
 #define __NR_dup 23
@@ -87,7 +95,7 @@ __SYSCALL(__NR_dup, sys_dup)
 #define __NR_dup3 24
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR3264_fcntl 25
-__SC_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl)
+__SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64)
 
 /* fs/inotify_user.c */
 #define __NR_inotify_init1 26
@@ -99,7 +107,7 @@ __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
 
 /* fs/ioctl.c */
 #define __NR_ioctl 29
-__SYSCALL(__NR_ioctl, sys_ioctl)
+__SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl)
 
 /* fs/ioprio.c */
 #define __NR_ioprio_set 30
@@ -129,26 +137,30 @@ __SYSCALL(__NR_renameat, sys_renameat)
 #define __NR_umount2 39
 __SYSCALL(__NR_umount2, sys_umount)
 #define __NR_mount 40
-__SYSCALL(__NR_mount, sys_mount)
+__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
 #define __NR_pivot_root 41
 __SYSCALL(__NR_pivot_root, sys_pivot_root)
 
 /* fs/nfsctl.c */
 #define __NR_nfsservctl 42
-__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+__SC_COMP(__NR_nfsservctl, sys_nfsservctl, compat_sys_nfsservctl)
 
 /* fs/open.c */
 #define __NR3264_statfs 43
-__SC_3264(__NR3264_statfs, sys_statfs64, sys_statfs)
+__SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \
+	       compat_sys_statfs64)
 #define __NR3264_fstatfs 44
-__SC_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs)
+__SC_COMP_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs, \
+	       compat_sys_fstatfs64)
 #define __NR3264_truncate 45
-__SC_3264(__NR3264_truncate, sys_truncate64, sys_truncate)
+__SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \
+	       compat_sys_truncate64)
 #define __NR3264_ftruncate 46
-__SC_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate)
+__SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \
+	       compat_sys_ftruncate64)
 
 #define __NR_fallocate 47
-__SYSCALL(__NR_fallocate, sys_fallocate)
+__SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate)
 #define __NR_faccessat 48
 __SYSCALL(__NR_faccessat, sys_faccessat)
 #define __NR_chdir 49
@@ -166,7 +178,7 @@ __SYSCALL(__NR_fchownat, sys_fchownat)
 #define __NR_fchown 55
 __SYSCALL(__NR_fchown, sys_fchown)
 #define __NR_openat 56
-__SYSCALL(__NR_openat, sys_openat)
+__SC_COMP(__NR_openat, sys_openat, compat_sys_openat)
 #define __NR_close 57
 __SYSCALL(__NR_close, sys_close)
 #define __NR_vhangup 58
@@ -182,7 +194,7 @@ __SYSCALL(__NR_quotactl, sys_quotactl)
 
 /* fs/readdir.c */
 #define __NR_getdents64 61
-__SYSCALL(__NR_getdents64, sys_getdents64)
+__SC_COMP(__NR_getdents64, sys_getdents64, compat_sys_getdents64)
 
 /* fs/read_write.c */
 #define __NR3264_lseek 62
@@ -192,17 +204,17 @@ __SYSCALL(__NR_read, sys_read)
 #define __NR_write 64
 __SYSCALL(__NR_write, sys_write)
 #define __NR_readv 65
-__SYSCALL(__NR_readv, sys_readv)
+__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
 #define __NR_writev 66
-__SYSCALL(__NR_writev, sys_writev)
+__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
 #define __NR_pread64 67
-__SYSCALL(__NR_pread64, sys_pread64)
+__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
 #define __NR_pwrite64 68
-__SYSCALL(__NR_pwrite64, sys_pwrite64)
+__SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
 #define __NR_preadv 69
-__SYSCALL(__NR_preadv, sys_preadv)
+__SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
 #define __NR_pwritev 70
-__SYSCALL(__NR_pwritev, sys_pwritev)
+__SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
 
 /* fs/sendfile.c */
 #define __NR3264_sendfile 71
@@ -210,17 +222,17 @@ __SC_3264(__NR3264_sendfile, sys_sendfile64, sys_sendfile)
 
 /* fs/select.c */
 #define __NR_pselect6 72
-__SYSCALL(__NR_pselect6, sys_pselect6)
+__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6)
 #define __NR_ppoll 73
-__SYSCALL(__NR_ppoll, sys_ppoll)
+__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll)
 
 /* fs/signalfd.c */
 #define __NR_signalfd4 74
-__SYSCALL(__NR_signalfd4, sys_signalfd4)
+__SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SYSCALL(__NR_vmsplice, sys_vmsplice)
+__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
@@ -243,23 +255,27 @@ __SYSCALL(__NR_fsync, sys_fsync)
 __SYSCALL(__NR_fdatasync, sys_fdatasync)
 #ifdef __ARCH_WANT_SYNC_FILE_RANGE2
 #define __NR_sync_file_range2 84
-__SYSCALL(__NR_sync_file_range2, sys_sync_file_range2)
+__SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \
+	  compat_sys_sync_file_range2)
 #else
 #define __NR_sync_file_range 84
-__SYSCALL(__NR_sync_file_range, sys_sync_file_range)
+__SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
+	  compat_sys_sync_file_range)
 #endif
 
 /* fs/timerfd.c */
 #define __NR_timerfd_create 85
 __SYSCALL(__NR_timerfd_create, sys_timerfd_create)
 #define __NR_timerfd_settime 86
-__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
+__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \
+	  compat_sys_timerfd_settime)
 #define __NR_timerfd_gettime 87
-__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \
+	  compat_sys_timerfd_gettime)
 
 /* fs/utimes.c */
 #define __NR_utimensat 88
-__SYSCALL(__NR_utimensat, sys_utimensat)
+__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat)
 
 /* kernel/acct.c */
 #define __NR_acct 89
@@ -281,7 +297,7 @@ __SYSCALL(__NR_exit, sys_exit)
 #define __NR_exit_group 94
 __SYSCALL(__NR_exit_group, sys_exit_group)
 #define __NR_waitid 95
-__SYSCALL(__NR_waitid, sys_waitid)
+__SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid)
 
 /* kernel/fork.c */
 #define __NR_set_tid_address 96
@@ -291,25 +307,27 @@ __SYSCALL(__NR_unshare, sys_unshare)
 
 /* kernel/futex.c */
 #define __NR_futex 98
-__SYSCALL(__NR_futex, sys_futex)
+__SC_COMP(__NR_futex, sys_futex, compat_sys_futex)
 #define __NR_set_robust_list 99
-__SYSCALL(__NR_set_robust_list, sys_set_robust_list)
+__SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
+	  compat_sys_set_robust_list)
 #define __NR_get_robust_list 100
-__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
+__SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
+	  compat_sys_get_robust_list)
 
 /* kernel/hrtimer.c */
 #define __NR_nanosleep 101
-__SYSCALL(__NR_nanosleep, sys_nanosleep)
+__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep)
 
 /* kernel/itimer.c */
 #define __NR_getitimer 102
-__SYSCALL(__NR_getitimer, sys_getitimer)
+__SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer)
 #define __NR_setitimer 103
-__SYSCALL(__NR_setitimer, sys_setitimer)
+__SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer)
 
 /* kernel/kexec.c */
 #define __NR_kexec_load 104
-__SYSCALL(__NR_kexec_load, sys_kexec_load)
+__SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load)
 
 /* kernel/module.c */
 #define __NR_init_module 105
@@ -319,23 +337,24 @@ __SYSCALL(__NR_delete_module, sys_delete_module)
 
 /* kernel/posix-timers.c */
 #define __NR_timer_create 107
-__SYSCALL(__NR_timer_create, sys_timer_create)
+__SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
 #define __NR_timer_gettime 108
-__SYSCALL(__NR_timer_gettime, sys_timer_gettime)
+__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime)
 #define __NR_timer_getoverrun 109
 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
 #define __NR_timer_settime 110
-__SYSCALL(__NR_timer_settime, sys_timer_settime)
+__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime)
 #define __NR_timer_delete 111
 __SYSCALL(__NR_timer_delete, sys_timer_delete)
 #define __NR_clock_settime 112
-__SYSCALL(__NR_clock_settime, sys_clock_settime)
+__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime)
 #define __NR_clock_gettime 113
-__SYSCALL(__NR_clock_gettime, sys_clock_gettime)
+__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime)
 #define __NR_clock_getres 114
-__SYSCALL(__NR_clock_getres, sys_clock_getres)
+__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres)
 #define __NR_clock_nanosleep 115
-__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
+__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \
+	  compat_sys_clock_nanosleep)
 
 /* kernel/printk.c */
 #define __NR_syslog 116
@@ -355,9 +374,11 @@ __SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
 #define __NR_sched_getparam 121
 __SYSCALL(__NR_sched_getparam, sys_sched_getparam)
 #define __NR_sched_setaffinity 122
-__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity)
+__SC_COMP(__NR_sched_setaffinity, sys_sched_setaffinity, \
+	  compat_sys_sched_setaffinity)
 #define __NR_sched_getaffinity 123
-__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity)
+__SC_COMP(__NR_sched_getaffinity, sys_sched_getaffinity, \
+	  compat_sys_sched_getaffinity)
 #define __NR_sched_yield 124
 __SYSCALL(__NR_sched_yield, sys_sched_yield)
 #define __NR_sched_get_priority_max 125
@@ -365,7 +386,8 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
 #define __NR_sched_get_priority_min 126
 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
 #define __NR_sched_rr_get_interval 127
-__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval)
+__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \
+	  compat_sys_sched_rr_get_interval)
 
 /* kernel/signal.c */
 #define __NR_restart_syscall 128
@@ -377,21 +399,23 @@ __SYSCALL(__NR_tkill, sys_tkill)
 #define __NR_tgkill 131
 __SYSCALL(__NR_tgkill, sys_tgkill)
 #define __NR_sigaltstack 132
-__SYSCALL(__NR_sigaltstack, sys_sigaltstack)
+__SC_COMP(__NR_sigaltstack, sys_sigaltstack, compat_sys_sigaltstack)
 #define __NR_rt_sigsuspend 133
-__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
+__SC_COMP(__NR_rt_sigsuspend, sys_rt_sigsuspend, compat_sys_rt_sigsuspend)
 #define __NR_rt_sigaction 134
-__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) /* __ARCH_WANT_SYS_RT_SIGACTION */
+__SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
 #define __NR_rt_sigprocmask 135
 __SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask)
 #define __NR_rt_sigpending 136
 __SYSCALL(__NR_rt_sigpending, sys_rt_sigpending)
 #define __NR_rt_sigtimedwait 137
-__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
+__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \
+	  compat_sys_rt_sigtimedwait)
 #define __NR_rt_sigqueueinfo 138
-__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
+__SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
+	  compat_sys_rt_sigqueueinfo)
 #define __NR_rt_sigreturn 139
-__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) /* sys_rt_sigreturn_wrapper, */
+__SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn)
 
 /* kernel/sys.c */
 #define __NR_setpriority 140
@@ -421,7 +445,7 @@ __SYSCALL(__NR_setfsuid, sys_setfsuid)
 #define __NR_setfsgid 152
 __SYSCALL(__NR_setfsgid, sys_setfsgid)
 #define __NR_times 153
-__SYSCALL(__NR_times, sys_times)
+__SC_COMP(__NR_times, sys_times, compat_sys_times)
 #define __NR_setpgid 154
 __SYSCALL(__NR_setpgid, sys_setpgid)
 #define __NR_getpgid 155
@@ -441,11 +465,11 @@ __SYSCALL(__NR_sethostname, sys_sethostname)
 #define __NR_setdomainname 162
 __SYSCALL(__NR_setdomainname, sys_setdomainname)
 #define __NR_getrlimit 163
-__SYSCALL(__NR_getrlimit, sys_getrlimit)
+__SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit)
 #define __NR_setrlimit 164
-__SYSCALL(__NR_setrlimit, sys_setrlimit)
+__SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit)
 #define __NR_getrusage 165
-__SYSCALL(__NR_getrusage, sys_getrusage)
+__SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage)
 #define __NR_umask 166
 __SYSCALL(__NR_umask, sys_umask)
 #define __NR_prctl 167
@@ -455,11 +479,11 @@ __SYSCALL(__NR_getcpu, sys_getcpu)
 
 /* kernel/time.c */
 #define __NR_gettimeofday 169
-__SYSCALL(__NR_gettimeofday, sys_gettimeofday)
+__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
 #define __NR_settimeofday 170
-__SYSCALL(__NR_settimeofday, sys_settimeofday)
+__SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 #define __NR_adjtimex 171
-__SYSCALL(__NR_adjtimex, sys_adjtimex)
+__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex)
 
 /* kernel/timer.c */
 #define __NR_getpid 172
@@ -477,39 +501,40 @@ __SYSCALL(__NR_getegid, sys_getegid)
 #define __NR_gettid 178
 __SYSCALL(__NR_gettid, sys_gettid)
 #define __NR_sysinfo 179
-__SYSCALL(__NR_sysinfo, sys_sysinfo)
+__SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
 
 /* ipc/mqueue.c */
 #define __NR_mq_open 180
-__SYSCALL(__NR_mq_open, sys_mq_open)
+__SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
 #define __NR_mq_unlink 181
 __SYSCALL(__NR_mq_unlink, sys_mq_unlink)
 #define __NR_mq_timedsend 182
-__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend)
+__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend)
 #define __NR_mq_timedreceive 183
-__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive)
+__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \
+	  compat_sys_mq_timedreceive)
 #define __NR_mq_notify 184
-__SYSCALL(__NR_mq_notify, sys_mq_notify)
+__SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
 #define __NR_mq_getsetattr 185
-__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
+__SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr)
 
 /* ipc/msg.c */
 #define __NR_msgget 186
 __SYSCALL(__NR_msgget, sys_msgget)
 #define __NR_msgctl 187
-__SYSCALL(__NR_msgctl, sys_msgctl)
+__SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl)
 #define __NR_msgrcv 188
-__SYSCALL(__NR_msgrcv, sys_msgrcv)
+__SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv)
 #define __NR_msgsnd 189
-__SYSCALL(__NR_msgsnd, sys_msgsnd)
+__SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
 
 /* ipc/sem.c */
 #define __NR_semget 190
 __SYSCALL(__NR_semget, sys_semget)
 #define __NR_semctl 191
-__SYSCALL(__NR_semctl, sys_semctl)
+__SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
 #define __NR_semtimedop 192
-__SYSCALL(__NR_semtimedop, sys_semtimedop)
+__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop)
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
 
@@ -517,9 +542,9 @@ __SYSCALL(__NR_semop, sys_semop)
 #define __NR_shmget 194
 __SYSCALL(__NR_shmget, sys_shmget)
 #define __NR_shmctl 195
-__SYSCALL(__NR_shmctl, sys_shmctl)
+__SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl)
 #define __NR_shmat 196
-__SYSCALL(__NR_shmat, sys_shmat)
+__SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat)
 #define __NR_shmdt 197
 __SYSCALL(__NR_shmdt, sys_shmdt)
 
@@ -543,21 +568,21 @@ __SYSCALL(__NR_getpeername, sys_getpeername)
 #define __NR_sendto 206
 __SYSCALL(__NR_sendto, sys_sendto)
 #define __NR_recvfrom 207
-__SYSCALL(__NR_recvfrom, sys_recvfrom)
+__SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom)
 #define __NR_setsockopt 208
-__SYSCALL(__NR_setsockopt, sys_setsockopt)
+__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt)
 #define __NR_getsockopt 209
-__SYSCALL(__NR_getsockopt, sys_getsockopt)
+__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt)
 #define __NR_shutdown 210
 __SYSCALL(__NR_shutdown, sys_shutdown)
 #define __NR_sendmsg 211
-__SYSCALL(__NR_sendmsg, sys_sendmsg)
+__SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg)
 #define __NR_recvmsg 212
-__SYSCALL(__NR_recvmsg, sys_recvmsg)
+__SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg)
 
 /* mm/filemap.c */
 #define __NR_readahead 213
-__SYSCALL(__NR_readahead, sys_readahead)
+__SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead)
 
 /* mm/nommu.c, also with MMU */
 #define __NR_brk 214
@@ -573,19 +598,19 @@ __SYSCALL(__NR_add_key, sys_add_key)
 #define __NR_request_key 218
 __SYSCALL(__NR_request_key, sys_request_key)
 #define __NR_keyctl 219
-__SYSCALL(__NR_keyctl, sys_keyctl)
+__SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
 
 /* arch/example/kernel/sys_example.c */
 #define __NR_clone 220
-__SYSCALL(__NR_clone, sys_clone)	/* .long sys_clone_wrapper */
+__SYSCALL(__NR_clone, sys_clone)
 #define __NR_execve 221
-__SYSCALL(__NR_execve, sys_execve)	/* .long sys_execve_wrapper */
+__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
 
 #define __NR3264_mmap 222
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
 /* mm/fadvise.c */
 #define __NR3264_fadvise64 223
-__SYSCALL(__NR3264_fadvise64, sys_fadvise64_64)
+__SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64)
 
 /* mm/, CONFIG_MMU only */
 #ifndef __ARCH_NOMMU
@@ -612,25 +637,26 @@ __SYSCALL(__NR_madvise, sys_madvise)
 #define __NR_remap_file_pages 234
 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
 #define __NR_mbind 235
-__SYSCALL(__NR_mbind, sys_mbind)
+__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
 #define __NR_get_mempolicy 236
-__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
+__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
 #define __NR_set_mempolicy 237
-__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
+__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
 #define __NR_migrate_pages 238
-__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
+__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
 #define __NR_move_pages 239
-__SYSCALL(__NR_move_pages, sys_move_pages)
+__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
 #endif
 
 #define __NR_rt_tgsigqueueinfo 240
-__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
+__SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
+	  compat_sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open 241
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 #define __NR_accept4 242
 __SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_recvmmsg 243
-__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
+__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg)
 
 /*
  * Architectures may provide up to 16 syscalls of their own
@@ -639,19 +665,20 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 #define __NR_arch_specific_syscall 244
 
 #define __NR_wait4 260
-__SYSCALL(__NR_wait4, sys_wait4)
+__SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
 #define __NR_prlimit64 261
 __SYSCALL(__NR_prlimit64, sys_prlimit64)
 #define __NR_fanotify_init 262
 __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
 #define __NR_fanotify_mark 263
 __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
-#define __NR_name_to_handle_at		264
+#define __NR_name_to_handle_at         264
 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
-#define __NR_open_by_handle_at		265
-__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+#define __NR_open_by_handle_at         265
+__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \
+	  compat_sys_open_by_handle_at)
 #define __NR_clock_adjtime 266
-__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
+__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime)
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 5778b559d59c..e94184834b71 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -12,6 +12,7 @@
 #include <linux/sem.h>
 #include <linux/socket.h>
 #include <linux/if.h>
+#include <linux/fs.h>
 
 #include <asm/compat.h>
 #include <asm/siginfo.h>
@@ -209,6 +210,18 @@ struct compat_robust_list_head {
 	compat_uptr_t			list_op_pending;
 };
 
+struct compat_statfs;
+struct compat_statfs64;
+struct compat_old_linux_dirent;
+struct compat_linux_dirent;
+struct linux_dirent64;
+struct compat_msghdr;
+struct compat_mmsghdr;
+struct compat_sysinfo;
+struct compat_sysctl_args;
+struct compat_kexec_segment;
+struct compat_mq_attr;
+
 extern void compat_exit_robust_list(struct task_struct *curr);
 
 asmlinkage long
@@ -331,9 +344,13 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 			const compat_sigset_t __user *sigmask,
 			compat_size_t sigsetsize);
 
+asmlinkage long compat_sys_utime(const char __user *filename,
+				 struct compat_utimbuf __user *t);
 asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename,
 				struct compat_timespec __user *t, int flags);
 
+asmlinkage long compat_sys_time(compat_time_t __user *tloc);
+asmlinkage long compat_sys_stime(compat_time_t __user *tptr);
 asmlinkage long compat_sys_signalfd(int ufd,
 				const compat_sigset_t __user *sigmask,
                                 compat_size_t sigsetsize);
@@ -350,11 +367,181 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page,
 				      int flags);
 asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename,
 				     struct compat_timeval __user *t);
+asmlinkage long compat_sys_utimes(const char __user *filename,
+				  struct compat_timeval __user *t);
+asmlinkage long compat_sys_newstat(const char __user * filename,
+				   struct compat_stat __user *statbuf);
+asmlinkage long compat_sys_newlstat(const char __user * filename,
+				    struct compat_stat __user *statbuf);
 asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user * filename,
 				      struct compat_stat __user *statbuf,
 				      int flag);
+asmlinkage long compat_sys_newfstat(unsigned int fd,
+				    struct compat_stat __user * statbuf);
+asmlinkage long compat_sys_statfs(const char __user *pathname,
+				  struct compat_statfs __user *buf);
+asmlinkage long compat_sys_fstatfs(unsigned int fd,
+				   struct compat_statfs __user *buf);
+asmlinkage long compat_sys_statfs64(const char __user *pathname,
+				    compat_size_t sz,
+				    struct compat_statfs64 __user *buf);
+asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
+				     struct compat_statfs64 __user *buf);
+asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
+				   unsigned long arg);
+asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
+				 unsigned long arg);
+asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p);
+asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id,
+					unsigned long min_nr,
+					unsigned long nr,
+					struct io_event __user *events,
+					struct compat_timespec __user *timeout);
+asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr,
+				     u32 __user *iocb);
+asmlinkage long compat_sys_mount(const char __user * dev_name,
+				 const char __user * dir_name,
+				 const char __user * type, unsigned long flags,
+				 const void __user * data);
+asmlinkage long compat_sys_old_readdir(unsigned int fd,
+				       struct compat_old_linux_dirent __user *,
+				       unsigned int count);
+asmlinkage long compat_sys_getdents(unsigned int fd,
+				    struct compat_linux_dirent __user *dirent,
+				    unsigned int count);
+asmlinkage long compat_sys_getdents64(unsigned int fd,
+				      struct linux_dirent64 __user * dirent,
+				      unsigned int count);
+asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
+				    unsigned int nr_segs, unsigned int flags);
+asmlinkage long compat_sys_open(const char __user *filename, int flags,
+				int mode);
 asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
 				  int flags, int mode);
+asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
+					     struct file_handle __user *handle,
+					     int flags);
+asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
+				    compat_ulong_t __user *outp,
+				    compat_ulong_t __user *exp,
+				    struct compat_timespec __user *tsp,
+				    void __user *sig);
+asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
+				 unsigned int nfds,
+				 struct compat_timespec __user *tsp,
+				 const compat_sigset_t __user *sigmask,
+				 compat_size_t sigsetsize);
+#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
+union compat_nfsctl_res;
+struct compat_nfsctl_arg;
+asmlinkage long compat_sys_nfsservctl(int cmd,
+				      struct compat_nfsctl_arg __user *arg,
+				      union compat_nfsctl_res __user *res);
+#else
+long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
+#endif
+asmlinkage long compat_sys_signalfd4(int ufd,
+				     const compat_sigset_t __user *sigmask,
+				     compat_size_t sigsetsize, int flags);
+asmlinkage long compat_sys_get_mempolicy(int __user *policy,
+					 compat_ulong_t __user *nmask,
+					 compat_ulong_t maxnode,
+					 compat_ulong_t addr,
+					 compat_ulong_t flags);
+asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
+					 compat_ulong_t maxnode);
+asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
+				 compat_ulong_t mode,
+				 compat_ulong_t __user *nmask,
+				 compat_ulong_t maxnode, compat_ulong_t flags);
+
+asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
+				      char __user *optval, unsigned int optlen);
+asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg,
+				   unsigned flags);
+asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg,
+				   unsigned int flags);
+asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len,
+				unsigned flags);
+asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
+			    unsigned flags, struct sockaddr __user *addr,
+			    int __user *addrlen);
+asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+				    unsigned vlen, unsigned int flags,
+				    struct compat_timespec __user *timeout);
+asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
+				     struct compat_timespec __user *rmtp);
+asmlinkage long compat_sys_getitimer(int which,
+				     struct compat_itimerval __user *it);
+asmlinkage long compat_sys_setitimer(int which,
+				     struct compat_itimerval __user *in,
+				     struct compat_itimerval __user *out);
+asmlinkage long compat_sys_times(struct compat_tms __user *tbuf);
+asmlinkage long compat_sys_setrlimit(unsigned int resource,
+				     struct compat_rlimit __user *rlim);
+asmlinkage long compat_sys_getrlimit (unsigned int resource,
+				      struct compat_rlimit __user *rlim);
+asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru);
+asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
+				     unsigned int len,
+				     compat_ulong_t __user *user_mask_ptr);
+asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid,
+				     unsigned int len,
+				     compat_ulong_t __user *user_mask_ptr);
+asmlinkage long compat_sys_timer_create(clockid_t which_clock,
+			struct compat_sigevent __user *timer_event_spec,
+			timer_t __user *created_timer_id);
+asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags,
+					 struct compat_itimerspec __user *new,
+					 struct compat_itimerspec __user *old);
+asmlinkage long compat_sys_timer_gettime(timer_t timer_id,
+				 struct compat_itimerspec __user *setting);
+asmlinkage long compat_sys_clock_settime(clockid_t which_clock,
+					 struct compat_timespec __user *tp);
+asmlinkage long compat_sys_clock_gettime(clockid_t which_clock,
+					 struct compat_timespec __user *tp);
+asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock,
+					 struct compat_timex __user *tp);
+asmlinkage long compat_sys_clock_getres(clockid_t which_clock,
+					struct compat_timespec __user *tp);
+asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
+					   struct compat_timespec __user *rqtp,
+					   struct compat_timespec __user *rmtp);
+asmlinkage long compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
+		struct compat_siginfo __user *uinfo,
+		struct compat_timespec __user *uts, compat_size_t sigsetsize);
+asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset,
+					 compat_size_t sigsetsize);
+asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
+asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
+				 unsigned long arg);
+asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
+		struct compat_timespec __user *utime, u32 __user *uaddr2,
+		u32 val3);
+asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
+				      char __user *optval, int __user *optlen);
+asmlinkage long compat_sys_kexec_load(unsigned long entry,
+				      unsigned long nr_segments,
+				      struct compat_kexec_segment __user *,
+				      unsigned long flags);
+asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
+			const struct compat_mq_attr __user *u_mqstat,
+			struct compat_mq_attr __user *u_omqstat);
+asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
+			const struct compat_sigevent __user *u_notification);
+asmlinkage long compat_sys_mq_open(const char __user *u_name,
+			int oflag, compat_mode_t mode,
+			struct compat_mq_attr __user *u_attr);
+asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes,
+			const char __user *u_msg_ptr,
+			size_t msg_len, unsigned int msg_prio,
+			const struct compat_timespec __user *u_abs_timeout);
+asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes,
+			char __user *u_msg_ptr,
+			size_t msg_len, unsigned int __user *u_msg_prio,
+			const struct compat_timespec __user *u_abs_timeout);
+asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
+asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args);
 
 extern ssize_t compat_rw_copy_check_uvector(int type,
 		const struct compat_iovec __user *uvector, unsigned long nr_segs,
diff --git a/kernel/compat.c b/kernel/compat.c
index 38b1d2c1cbe8..80c28562f57b 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -293,6 +293,8 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
 	return compat_jiffies_to_clock_t(jiffies);
 }
 
+#ifdef __ARCH_WANT_SYS_SIGPENDING
+
 /*
  * Assumption: old_sigset_t and compat_old_sigset_t are both
  * types that can be passed to put_user()/get_user().
@@ -312,6 +314,10 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set)
 	return ret;
 }
 
+#endif
+
+#ifdef __ARCH_WANT_SYS_SIGPROCMASK
+
 asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
 		compat_old_sigset_t __user *oset)
 {
@@ -333,6 +339,8 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
 	return ret;
 }
 
+#endif
+
 asmlinkage long compat_sys_setrlimit(unsigned int resource,
 		struct compat_rlimit __user *rlim)
 {
-- 
cgit v1.2.3


From a934a00a69e940b126b9bdbf83e630ef5fe43523 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 18 May 2011 10:37:35 +0200
Subject: block: Fix discard topology stacking and reporting

In some cases we would end up stacking discard_zeroes_data incorrectly.
Fix this by enabling the feature by default for stacking drivers and
clearing it for low-level drivers. Incorporating a device that does not
support dzd will then cause the feature to be disabled in the stacking
driver.

Also ensure that the maximum discard value does not overflow when
exported in sysfs and return 0 in the alignment and dzd fields for
devices that don't support discard.

Reported-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-settings.c   | 3 ++-
 block/blk-sysfs.c      | 3 ++-
 include/linux/blkdev.h | 7 +++++--
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index cd3c428e194f..fa1eb0449a05 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -120,7 +120,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->discard_granularity = 0;
 	lim->discard_alignment = 0;
 	lim->discard_misaligned = 0;
-	lim->discard_zeroes_data = -1;
+	lim->discard_zeroes_data = 1;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -166,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 
 	blk_set_default_limits(&q->limits);
 	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
+	q->limits.discard_zeroes_data = 0;
 
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6d735122bc59..53bd0c77bfda 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->limits.max_discard_sectors << 9, page);
+	return sprintf(page, "%llu\n",
+		       (unsigned long long)q->limits.max_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9f921bf4bf8c..520d8618ed76 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -257,7 +257,7 @@ struct queue_limits {
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		cluster;
-	signed char		discard_zeroes_data;
+	unsigned char		discard_zeroes_data;
 };
 
 struct request_queue
@@ -1069,13 +1069,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
 {
 	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
 
+	if (!lim->max_discard_sectors)
+		return 0;
+
 	return (lim->discard_granularity + lim->discard_alignment - alignment)
 		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
-	if (q->limits.discard_zeroes_data == 1)
+	if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
 		return 1;
 
 	return 0;
-- 
cgit v1.2.3


From 7176ba23f8b589b1df3229574ff46fb904ce9ec5 Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Mon, 16 May 2011 14:41:48 -0700
Subject: net: rfkill: add generic gpio rfkill driver

This adds a new generic gpio rfkill driver to support rfkill switches
which are controlled by gpios. The driver also supports passing in
data about the clock for the radio, so that when rfkill is blocking,
it can disable the clock.

This driver assumes platform data is passed from the board files to
configure it for specific devices.

Original-patch-by: Anantha Idapalapati <aidapalapati@nvidia.com>
Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill-gpio.h |  43 +++++++++
 net/rfkill/Kconfig          |   9 ++
 net/rfkill/Makefile         |   1 +
 net/rfkill/rfkill-gpio.c    | 227 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 280 insertions(+)
 create mode 100644 include/linux/rfkill-gpio.h
 create mode 100644 net/rfkill/rfkill-gpio.c

(limited to 'include/linux')

diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h
new file mode 100644
index 000000000000..a175d0598033
--- /dev/null
+++ b/include/linux/rfkill-gpio.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2011, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef __RFKILL_GPIO_H
+#define __RFKILL_GPIO_H
+
+#include <linux/types.h>
+#include <linux/rfkill.h>
+
+/**
+ * struct rfkill_gpio_platform_data - platform data for rfkill gpio device.
+ * for unused gpio's, the expected value is -1.
+ * @name:		name for the gpio rf kill instance
+ * @reset_gpio:		GPIO which is used for reseting rfkill switch
+ * @shutdown_gpio:	GPIO which is used for shutdown of rfkill switch
+ * @power_clk_name:	[optional] name of clk to turn off while blocked
+ */
+
+struct rfkill_gpio_platform_data {
+	char			*name;
+	int			reset_gpio;
+	int			shutdown_gpio;
+	const char		*power_clk_name;
+	enum rfkill_type	type;
+};
+
+#endif /* __RFKILL_GPIO_H */
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 48464ca13b24..78efe895b663 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -33,3 +33,12 @@ config RFKILL_REGULATOR
 
           To compile this driver as a module, choose M here: the module will
           be called rfkill-regulator.
+
+config RFKILL_GPIO
+	tristate "GPIO RFKILL driver"
+	depends on RFKILL && GPIOLIB && HAVE_CLK
+	default n
+	help
+	  If you say yes here you get support of a generic gpio RFKILL
+	  driver. The platform should fill in the appropriate fields in the
+	  rfkill_gpio_platform_data structure and pass that to the driver.
diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile
index d9a5a58ffd8c..311768783f4a 100644
--- a/net/rfkill/Makefile
+++ b/net/rfkill/Makefile
@@ -6,3 +6,4 @@ rfkill-y			+= core.o
 rfkill-$(CONFIG_RFKILL_INPUT)	+= input.o
 obj-$(CONFIG_RFKILL)		+= rfkill.o
 obj-$(CONFIG_RFKILL_REGULATOR)	+= rfkill-regulator.o
+obj-$(CONFIG_RFKILL_GPIO)	+= rfkill-gpio.o
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
new file mode 100644
index 000000000000..256c5ddd2d72
--- /dev/null
+++ b/net/rfkill/rfkill-gpio.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2011, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rfkill.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+
+#include <linux/rfkill-gpio.h>
+
+enum rfkill_gpio_clk_state {
+	UNSPECIFIED = 0,
+	PWR_ENABLED,
+	PWR_DISABLED
+};
+
+#define PWR_CLK_SET(_RF, _EN) \
+	((_RF)->pwr_clk_enabled = (!(_EN) ? PWR_ENABLED : PWR_DISABLED))
+#define PWR_CLK_ENABLED(_RF) ((_RF)->pwr_clk_enabled == PWR_ENABLED)
+#define PWR_CLK_DISABLED(_RF) ((_RF)->pwr_clk_enabled != PWR_ENABLED)
+
+struct rfkill_gpio_data {
+	struct rfkill_gpio_platform_data	*pdata;
+	struct rfkill				*rfkill_dev;
+	char					*reset_name;
+	char					*shutdown_name;
+	enum rfkill_gpio_clk_state		pwr_clk_enabled;
+	struct clk				*pwr_clk;
+};
+
+static int rfkill_gpio_set_power(void *data, bool blocked)
+{
+	struct rfkill_gpio_data *rfkill = data;
+
+	if (blocked) {
+		if (gpio_is_valid(rfkill->pdata->shutdown_gpio))
+			gpio_direction_output(rfkill->pdata->shutdown_gpio, 0);
+		if (gpio_is_valid(rfkill->pdata->reset_gpio))
+			gpio_direction_output(rfkill->pdata->reset_gpio, 0);
+		if (rfkill->pwr_clk && PWR_CLK_ENABLED(rfkill))
+			clk_disable(rfkill->pwr_clk);
+	} else {
+		if (rfkill->pwr_clk && PWR_CLK_DISABLED(rfkill))
+			clk_enable(rfkill->pwr_clk);
+		if (gpio_is_valid(rfkill->pdata->reset_gpio))
+			gpio_direction_output(rfkill->pdata->reset_gpio, 1);
+		if (gpio_is_valid(rfkill->pdata->shutdown_gpio))
+			gpio_direction_output(rfkill->pdata->shutdown_gpio, 1);
+	}
+
+	if (rfkill->pwr_clk)
+		PWR_CLK_SET(rfkill, blocked);
+
+	return 0;
+}
+
+static const struct rfkill_ops rfkill_gpio_ops = {
+	.set_block = rfkill_gpio_set_power,
+};
+
+static int rfkill_gpio_probe(struct platform_device *pdev)
+{
+	struct rfkill_gpio_data *rfkill;
+	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
+	int ret = 0;
+	int len = 0;
+
+	if (!pdata) {
+		pr_warn("%s: No platform data specified\n", __func__);
+		return -EINVAL;
+	}
+
+	/* make sure at-least one of the GPIO is defined and that
+	 * a name is specified for this instance */
+	if (!pdata->name || (!gpio_is_valid(pdata->reset_gpio) &&
+		!gpio_is_valid(pdata->shutdown_gpio))) {
+		pr_warn("%s: invalid platform data\n", __func__);
+		return -EINVAL;
+	}
+
+	rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL);
+	if (!rfkill)
+		return -ENOMEM;
+
+	rfkill->pdata = pdata;
+
+	len = strlen(pdata->name);
+	rfkill->reset_name = kzalloc(len + 7, GFP_KERNEL);
+	if (!rfkill->reset_name) {
+		ret = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	rfkill->shutdown_name = kzalloc(len + 10, GFP_KERNEL);
+	if (!rfkill->shutdown_name) {
+		ret = -ENOMEM;
+		goto fail_reset_name;
+	}
+
+	snprintf(rfkill->reset_name, len + 6 , "%s_reset", pdata->name);
+	snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", pdata->name);
+
+	if (pdata->power_clk_name) {
+		rfkill->pwr_clk = clk_get(&pdev->dev, pdata->power_clk_name);
+		if (IS_ERR(rfkill->pwr_clk)) {
+			pr_warn("%s: can't find pwr_clk.\n", __func__);
+			goto fail_shutdown_name;
+		}
+	}
+
+	if (gpio_is_valid(pdata->reset_gpio)) {
+		ret = gpio_request(pdata->reset_gpio, rfkill->reset_name);
+		if (ret) {
+			pr_warn("%s: failed to get reset gpio.\n", __func__);
+			goto fail_clock;
+		}
+	}
+
+	if (gpio_is_valid(pdata->shutdown_gpio)) {
+		ret = gpio_request(pdata->shutdown_gpio, rfkill->shutdown_name);
+		if (ret) {
+			pr_warn("%s: failed to get shutdown gpio.\n", __func__);
+			goto fail_reset;
+		}
+	}
+
+	rfkill->rfkill_dev = rfkill_alloc(pdata->name, &pdev->dev, pdata->type,
+				&rfkill_gpio_ops, rfkill);
+	if (!rfkill->rfkill_dev)
+		goto fail_shutdown;
+
+	ret = rfkill_register(rfkill->rfkill_dev);
+	if (ret < 0)
+		goto fail_rfkill;
+
+	platform_set_drvdata(pdev, rfkill);
+
+	dev_info(&pdev->dev, "%s device registered.\n", pdata->name);
+
+	return 0;
+
+fail_rfkill:
+	rfkill_destroy(rfkill->rfkill_dev);
+fail_shutdown:
+	if (gpio_is_valid(pdata->shutdown_gpio))
+		gpio_free(pdata->shutdown_gpio);
+fail_reset:
+	if (gpio_is_valid(pdata->reset_gpio))
+		gpio_free(pdata->reset_gpio);
+fail_clock:
+	if (rfkill->pwr_clk)
+		clk_put(rfkill->pwr_clk);
+fail_shutdown_name:
+	kfree(rfkill->shutdown_name);
+fail_reset_name:
+	kfree(rfkill->reset_name);
+fail_alloc:
+	kfree(rfkill);
+
+	return ret;
+}
+
+static int rfkill_gpio_remove(struct platform_device *pdev)
+{
+	struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
+
+	rfkill_unregister(rfkill->rfkill_dev);
+	rfkill_destroy(rfkill->rfkill_dev);
+	if (gpio_is_valid(rfkill->pdata->shutdown_gpio))
+		gpio_free(rfkill->pdata->shutdown_gpio);
+	if (gpio_is_valid(rfkill->pdata->reset_gpio))
+		gpio_free(rfkill->pdata->reset_gpio);
+	if (rfkill->pwr_clk && PWR_CLK_ENABLED(rfkill))
+		clk_disable(rfkill->pwr_clk);
+	if (rfkill->pwr_clk)
+		clk_put(rfkill->pwr_clk);
+	kfree(rfkill->shutdown_name);
+	kfree(rfkill->reset_name);
+	kfree(rfkill);
+
+	return 0;
+}
+
+static struct platform_driver rfkill_gpio_driver = {
+	.probe = rfkill_gpio_probe,
+	.remove = __devexit_p(rfkill_gpio_remove),
+	.driver = {
+		   .name = "rfkill_gpio",
+		   .owner = THIS_MODULE,
+	},
+};
+
+static int __init rfkill_gpio_init(void)
+{
+	return platform_driver_register(&rfkill_gpio_driver);
+}
+
+static void __exit rfkill_gpio_exit(void)
+{
+	platform_driver_unregister(&rfkill_gpio_driver);
+}
+
+module_init(rfkill_gpio_init);
+module_exit(rfkill_gpio_exit);
+
+MODULE_DESCRIPTION("gpio rfkill");
+MODULE_AUTHOR("NVIDIA");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 4800a5bb13c09a572f7c74662a77c9eca229eba1 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 17 May 2011 14:41:06 -0400
Subject: include/linux/compat.h: coding-style fixes

I touched this file when adding support for the "tilegx" sub-architecture,
and Andrew Morton observed "The file's a mismash of old-style, wrong-style
and right-style.  There's no point in doing mishmash preservation!
May as well fix things up when we touch them."

Accordingly, this change makes <linux/compat.h> as checkpatch-clean
as possible.  It makes no semantic changes whatsoever.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compat.h | 74 ++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index e94184834b71..644e1a4692b1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -27,7 +27,7 @@ typedef __compat_gid32_t	compat_gid_t;
 struct compat_sel_arg_struct;
 struct rusage;
 
-struct compat_itimerspec { 
+struct compat_itimerspec {
 	struct compat_timespec it_interval;
 	struct compat_timespec it_value;
 };
@@ -71,9 +71,9 @@ struct compat_timex {
 	compat_long_t stbcnt;
 	compat_int_t tai;
 
-	compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32;
-	compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32;
-	compat_int_t :32; compat_int_t :32; compat_int_t :32;
+	compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
+	compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
+	compat_int_t:32; compat_int_t:32; compat_int_t:32;
 };
 
 #define _COMPAT_NSIG_WORDS	(_COMPAT_NSIG / _COMPAT_NSIG_BPW)
@@ -82,8 +82,10 @@ typedef struct {
 	compat_sigset_word	sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
 
-extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *);
-extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *);
+extern int get_compat_timespec(struct timespec *,
+			       const struct compat_timespec __user *);
+extern int put_compat_timespec(const struct timespec *,
+			       struct compat_timespec __user *);
 
 struct compat_iovec {
 	compat_uptr_t	iov_base;
@@ -114,7 +116,8 @@ struct compat_rusage {
 	compat_long_t	ru_nivcsw;
 };
 
-extern int put_compat_rusage(const struct rusage *, struct compat_rusage __user *);
+extern int put_compat_rusage(const struct rusage *,
+			     struct compat_rusage __user *);
 
 struct compat_siginfo;
 
@@ -167,8 +170,7 @@ struct compat_ifmap {
 	unsigned char port;
 };
 
-struct compat_if_settings
-{
+struct compat_if_settings {
 	unsigned int type;	/* Type of physical device or protocol */
 	unsigned int size;	/* Size of the data allocated by the caller */
 	compat_uptr_t ifs_ifsu;	/* union of pointers */
@@ -196,8 +198,8 @@ struct compat_ifreq {
 };
 
 struct compat_ifconf {
-        compat_int_t	ifc_len;                        /* size of buffer       */
-        compat_caddr_t  ifcbuf;
+	compat_int_t	ifc_len;                /* size of buffer */
+	compat_caddr_t  ifcbuf;
 };
 
 struct compat_robust_list {
@@ -256,8 +258,8 @@ asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
 		const struct compat_iovec __user *vec,
 		unsigned long vlen, u32 pos_low, u32 pos_high);
 
-int compat_do_execve(char * filename, compat_uptr_t __user *argv,
-	        compat_uptr_t __user *envp, struct pt_regs * regs);
+int compat_do_execve(char *filename, compat_uptr_t __user *argv,
+		     compat_uptr_t __user *envp, struct pt_regs *regs);
 
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
@@ -346,14 +348,16 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 
 asmlinkage long compat_sys_utime(const char __user *filename,
 				 struct compat_utimbuf __user *t);
-asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename,
-				struct compat_timespec __user *t, int flags);
+asmlinkage long compat_sys_utimensat(unsigned int dfd,
+				     const char __user *filename,
+				     struct compat_timespec __user *t,
+				     int flags);
 
 asmlinkage long compat_sys_time(compat_time_t __user *tloc);
 asmlinkage long compat_sys_stime(compat_time_t __user *tptr);
 asmlinkage long compat_sys_signalfd(int ufd,
-				const compat_sigset_t __user *sigmask,
-                                compat_size_t sigsetsize);
+				    const compat_sigset_t __user *sigmask,
+				    compat_size_t sigsetsize);
 asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
 				   const struct compat_itimerspec __user *utmr,
 				   struct compat_itimerspec __user *otmr);
@@ -365,19 +369,21 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page,
 				      const int __user *nodes,
 				      int __user *status,
 				      int flags);
-asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename,
+asmlinkage long compat_sys_futimesat(unsigned int dfd,
+				     const char __user *filename,
 				     struct compat_timeval __user *t);
 asmlinkage long compat_sys_utimes(const char __user *filename,
 				  struct compat_timeval __user *t);
-asmlinkage long compat_sys_newstat(const char __user * filename,
+asmlinkage long compat_sys_newstat(const char __user *filename,
 				   struct compat_stat __user *statbuf);
-asmlinkage long compat_sys_newlstat(const char __user * filename,
+asmlinkage long compat_sys_newlstat(const char __user *filename,
 				    struct compat_stat __user *statbuf);
-asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user * filename,
+asmlinkage long compat_sys_newfstatat(unsigned int dfd,
+				      const char __user *filename,
 				      struct compat_stat __user *statbuf,
 				      int flag);
 asmlinkage long compat_sys_newfstat(unsigned int fd,
-				    struct compat_stat __user * statbuf);
+				    struct compat_stat __user *statbuf);
 asmlinkage long compat_sys_statfs(const char __user *pathname,
 				  struct compat_statfs __user *buf);
 asmlinkage long compat_sys_fstatfs(unsigned int fd,
@@ -399,10 +405,10 @@ asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id,
 					struct compat_timespec __user *timeout);
 asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr,
 				     u32 __user *iocb);
-asmlinkage long compat_sys_mount(const char __user * dev_name,
-				 const char __user * dir_name,
-				 const char __user * type, unsigned long flags,
-				 const void __user * data);
+asmlinkage long compat_sys_mount(const char __user *dev_name,
+				 const char __user *dir_name,
+				 const char __user *type, unsigned long flags,
+				 const void __user *data);
 asmlinkage long compat_sys_old_readdir(unsigned int fd,
 				       struct compat_old_linux_dirent __user *,
 				       unsigned int count);
@@ -410,7 +416,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
 				    struct compat_linux_dirent __user *dirent,
 				    unsigned int count);
 asmlinkage long compat_sys_getdents64(unsigned int fd,
-				      struct linux_dirent64 __user * dirent,
+				      struct linux_dirent64 __user *dirent,
 				      unsigned int count);
 asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
 				    unsigned int nr_segs, unsigned int flags);
@@ -431,14 +437,15 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 				 struct compat_timespec __user *tsp,
 				 const compat_sigset_t __user *sigmask,
 				 compat_size_t sigsetsize);
-#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
+#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && \
+	!defined(CONFIG_NFSD_DEPRECATED)
 union compat_nfsctl_res;
 struct compat_nfsctl_arg;
 asmlinkage long compat_sys_nfsservctl(int cmd,
 				      struct compat_nfsctl_arg __user *arg,
 				      union compat_nfsctl_res __user *res);
 #else
-long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
+asmlinkage long compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
 #endif
 asmlinkage long compat_sys_signalfd4(int ufd,
 				     const compat_sigset_t __user *sigmask,
@@ -479,8 +486,8 @@ asmlinkage long compat_sys_setitimer(int which,
 asmlinkage long compat_sys_times(struct compat_tms __user *tbuf);
 asmlinkage long compat_sys_setrlimit(unsigned int resource,
 				     struct compat_rlimit __user *rlim);
-asmlinkage long compat_sys_getrlimit (unsigned int resource,
-				      struct compat_rlimit __user *rlim);
+asmlinkage long compat_sys_getrlimit(unsigned int resource,
+				     struct compat_rlimit __user *rlim);
 asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru);
 asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
 				     unsigned int len,
@@ -507,7 +514,7 @@ asmlinkage long compat_sys_clock_getres(clockid_t which_clock,
 asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
 					   struct compat_timespec __user *rqtp,
 					   struct compat_timespec __user *rmtp);
-asmlinkage long compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
+asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
 		struct compat_siginfo __user *uinfo,
 		struct compat_timespec __user *uts, compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset,
@@ -544,7 +551,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
 asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args);
 
 extern ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector, unsigned long nr_segs,
+		const struct compat_iovec __user *uvector,
+		unsigned long nr_segs,
 		unsigned long fast_segs, struct iovec *fast_pointer,
 		struct iovec **ret_pointer);
 
-- 
cgit v1.2.3


From 771949d03b4f5295f648f09141325fd478f6c7ce Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Fri, 20 May 2011 20:52:16 +0200
Subject: block: get rid of on-stack plugging debug checks

We don't need them anymore, so kill:

- REQ_ON_PLUG checks in various places
- !rq_mergeable() check in plug merging

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c          | 27 ---------------------------
 block/elevator.c          |  4 ----
 include/linux/blk_types.h |  2 --
 3 files changed, 33 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9e8e297374b9..7369eeeafe23 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -569,8 +569,6 @@ int blk_get_queue(struct request_queue *q)
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
@@ -1110,14 +1108,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 
-	/*
-	 * Debug stuff, kill later
-	 */
-	if (!rq_mergeable(req)) {
-		blk_dump_rq_flags(req, "back");
-		return false;
-	}
-
 	if (!ll_back_merge_fn(q, req, bio))
 		return false;
 
@@ -1141,14 +1131,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	sector_t sector;
 
-	/*
-	 * Debug stuff, kill later
-	 */
-	if (!rq_mergeable(req)) {
-		blk_dump_rq_flags(req, "front");
-		return false;
-	}
-
 	if (!ll_front_merge_fn(q, req, bio))
 		return false;
 
@@ -1258,14 +1240,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
-		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
 		if (bio_attempt_back_merge(q, req, bio)) {
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
-		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
 		if (bio_attempt_front_merge(q, req, bio)) {
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req, el_ret);
@@ -1320,10 +1300,6 @@ get_rq:
 			if (__rq->q != q)
 				plug->should_sort = 1;
 		}
-		/*
-		 * Debug flag, kill later
-		 */
-		req->cmd_flags |= REQ_ON_PLUG;
 		list_add_tail(&req->queuelist, &plug->list);
 		drive_stat_acct(req, 1);
 	} else {
@@ -2749,7 +2725,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	while (!list_empty(&list)) {
 		rq = list_entry_rq(list.next);
 		list_del_init(&rq->queuelist);
-		BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
 		BUG_ON(!rq->q);
 		if (rq->q != q) {
 			/*
@@ -2761,8 +2736,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 			depth = 0;
 			spin_lock(q->queue_lock);
 		}
-		rq->cmd_flags &= ~REQ_ON_PLUG;
-
 		/*
 		 * rq is already accounted, so use raw insert
 		 */
diff --git a/block/elevator.c b/block/elevator.c
index 2a0b653c90fd..b0b38ce0dcb6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -416,8 +416,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 	struct list_head *entry;
 	int stop_flags;
 
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (q->last_merge == rq)
 		q->last_merge = NULL;
 
@@ -656,8 +654,6 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 
 	rq->q = q;
 
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (rq->cmd_flags & REQ_SOFTBARRIER) {
 		/* barriers are scheduling boundary, update end_sector */
 		if (rq->cmd_type == REQ_TYPE_FS ||
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index be50d9e70a7d..2a7cea53ca0d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -151,7 +151,6 @@ enum rq_flag_bits {
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_SECURE,		/* secure discard (used with __REQ_DISCARD) */
-	__REQ_ON_PLUG,		/* on plug list */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -192,6 +191,5 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
-#define REQ_ON_PLUG		(1 << __REQ_ON_PLUG)
 
 #endif /* __LINUX_BLK_TYPES_H */
-- 
cgit v1.2.3


From d94ba80ebbea17f036cecb104398fbcd788aa742 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Fri, 22 Apr 2011 12:03:08 +0200
Subject: ptp: Added a brand new class driver for ptp clocks.

This patch adds an infrastructure for hardware clocks that implement
IEEE 1588, the Precision Time Protocol (PTP). A class driver offers a
registration method to particular hardware clock drivers. Each clock is
presented as a standard POSIX clock.

The ancillary clock features are exposed in two different ways, via
the sysfs and by a character device.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 Documentation/ABI/testing/sysfs-ptp |  98 ++++++++++
 Documentation/ptp/ptp.txt           |  89 +++++++++
 Documentation/ptp/testptp.c         | 381 ++++++++++++++++++++++++++++++++++++
 Documentation/ptp/testptp.mk        |  33 ++++
 drivers/Kconfig                     |   2 +
 drivers/Makefile                    |   1 +
 drivers/ptp/Kconfig                 |  30 +++
 drivers/ptp/Makefile                |   6 +
 drivers/ptp/ptp_chardev.c           | 159 +++++++++++++++
 drivers/ptp/ptp_clock.c             | 343 ++++++++++++++++++++++++++++++++
 drivers/ptp/ptp_private.h           |  92 +++++++++
 drivers/ptp/ptp_sysfs.c             | 230 ++++++++++++++++++++++
 include/linux/Kbuild                |   1 +
 include/linux/ptp_classify.h        |   7 +
 include/linux/ptp_clock.h           |  84 ++++++++
 include/linux/ptp_clock_kernel.h    | 139 +++++++++++++
 16 files changed, 1695 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-ptp
 create mode 100644 Documentation/ptp/ptp.txt
 create mode 100644 Documentation/ptp/testptp.c
 create mode 100644 Documentation/ptp/testptp.mk
 create mode 100644 drivers/ptp/Kconfig
 create mode 100644 drivers/ptp/Makefile
 create mode 100644 drivers/ptp/ptp_chardev.c
 create mode 100644 drivers/ptp/ptp_clock.c
 create mode 100644 drivers/ptp/ptp_private.h
 create mode 100644 drivers/ptp/ptp_sysfs.c
 create mode 100644 include/linux/ptp_clock.h
 create mode 100644 include/linux/ptp_clock_kernel.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp
new file mode 100644
index 000000000000..d40d2b550502
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ptp
@@ -0,0 +1,98 @@
+What:		/sys/class/ptp/
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This directory contains files and directories
+		providing a standardized interface to the ancillary
+		features of PTP hardware clocks.
+
+What:		/sys/class/ptp/ptpN/
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This directory contains the attributes of the Nth PTP
+		hardware clock registered into the PTP class driver
+		subsystem.
+
+What:		/sys/class/ptp/ptpN/clock_name
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the name of the PTP hardware clock
+		as a human readable string.
+
+What:		/sys/class/ptp/ptpN/max_adjustment
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the PTP hardware clock's maximum
+		frequency adjustment value (a positive integer) in
+		parts per billion.
+
+What:		/sys/class/ptp/ptpN/n_alarms
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of periodic or one shot
+		alarms offer by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/n_external_timestamps
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of external timestamp
+		channels offered by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/n_periodic_outputs
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of programmable periodic
+		output channels offered by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/pps_avaiable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file indicates whether the PTP hardware clock
+		supports a Pulse Per Second to the host CPU. Reading
+		"1" means that the PPS is supported, while "0" means
+		not supported.
+
+What:		/sys/class/ptp/ptpN/extts_enable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables external
+		timestamps. To enable external timestamps, write the
+		channel index followed by a "1" into the file.
+		To disable external timestamps, write the channel
+		index followed by a "0" into the file.
+
+What:		/sys/class/ptp/ptpN/fifo
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file provides timestamps on external events, in
+		the form of three integers: channel index, seconds,
+		and nanoseconds.
+
+What:		/sys/class/ptp/ptpN/period
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables periodic
+		outputs. To enable a periodic output, write five
+		integers into the file: channel index, start time
+		seconds, start time nanoseconds, period seconds, and
+		period nanoseconds. To disable a periodic output, set
+		all the seconds and nanoseconds values to zero.
+
+What:		/sys/class/ptp/ptpN/pps_enable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables delivery of
+		PPS events to the Linux PPS subsystem. To enable PPS
+		events, write a "1" into the file. To disable events,
+		write a "0" into the file.
diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt
new file mode 100644
index 000000000000..ae8fef86b832
--- /dev/null
+++ b/Documentation/ptp/ptp.txt
@@ -0,0 +1,89 @@
+
+* PTP hardware clock infrastructure for Linux
+
+  This patch set introduces support for IEEE 1588 PTP clocks in
+  Linux. Together with the SO_TIMESTAMPING socket options, this
+  presents a standardized method for developing PTP user space
+  programs, synchronizing Linux with external clocks, and using the
+  ancillary features of PTP hardware clocks.
+
+  A new class driver exports a kernel interface for specific clock
+  drivers and a user space interface. The infrastructure supports a
+  complete set of PTP hardware clock functionality.
+
+  + Basic clock operations
+    - Set time
+    - Get time
+    - Shift the clock by a given offset atomically
+    - Adjust clock frequency
+
+  + Ancillary clock features
+    - One short or periodic alarms, with signal delivery to user program
+    - Time stamp external events
+    - Period output signals configurable from user space
+    - Synchronization of the Linux system time via the PPS subsystem
+
+** PTP hardware clock kernel API
+
+   A PTP clock driver registers itself with the class driver. The
+   class driver handles all of the dealings with user space. The
+   author of a clock driver need only implement the details of
+   programming the clock hardware. The clock driver notifies the class
+   driver of asynchronous events (alarms and external time stamps) via
+   a simple message passing interface.
+
+   The class driver supports multiple PTP clock drivers. In normal use
+   cases, only one PTP clock is needed. However, for testing and
+   development, it can be useful to have more than one clock in a
+   single system, in order to allow performance comparisons.
+
+** PTP hardware clock user space API
+
+   The class driver also creates a character device for each
+   registered clock. User space can use an open file descriptor from
+   the character device as a POSIX clock id and may call
+   clock_gettime, clock_settime, and clock_adjtime.  These calls
+   implement the basic clock operations.
+
+   User space programs may control the clock using standardized
+   ioctls. A program may query, enable, configure, and disable the
+   ancillary clock features. User space can receive time stamped
+   events via blocking read() and poll(). One shot and periodic
+   signals may be configured via the POSIX timer_settime() system
+   call.
+
+** Writing clock drivers
+
+   Clock drivers include include/linux/ptp_clock_kernel.h and register
+   themselves by presenting a 'struct ptp_clock_info' to the
+   registration method. Clock drivers must implement all of the
+   functions in the interface. If a clock does not offer a particular
+   ancillary feature, then the driver should just return -EOPNOTSUPP
+   from those functions.
+
+   Drivers must ensure that all of the methods in interface are
+   reentrant. Since most hardware implementations treat the time value
+   as a 64 bit integer accessed as two 32 bit registers, drivers
+   should use spin_lock_irqsave/spin_unlock_irqrestore to protect
+   against concurrent access. This locking cannot be accomplished in
+   class driver, since the lock may also be needed by the clock
+   driver's interrupt service routine.
+
+** Supported hardware
+
+   + Freescale eTSEC gianfar
+     - 2 Time stamp external triggers, programmable polarity (opt. interrupt)
+     - 2 Alarm registers (optional interrupt)
+     - 3 Periodic signals (optional interrupt)
+
+   + National DP83640
+     - 6 GPIOs programmable as inputs or outputs
+     - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be
+       used as general inputs or outputs
+     - GPIO inputs can time stamp external triggers
+     - GPIO outputs can produce periodic signals
+     - 1 interrupt pin
+
+   + Intel IXP465
+     - Auxiliary Slave/Master Mode Snapshot (optional interrupt)
+     - Target Time (optional interrupt)
diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c
new file mode 100644
index 000000000000..f59ded066108
--- /dev/null
+++ b/Documentation/ptp/testptp.c
@@ -0,0 +1,381 @@
+/*
+ * PTP 1588 clock support - User space test program
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/ptp_clock.h>
+
+#define DEVICE "/dev/ptp0"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100
+#endif
+
+#ifndef CLOCK_INVALID
+#define CLOCK_INVALID -1
+#endif
+
+/* When glibc offers the syscall, this will go away. */
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+	return syscall(__NR_clock_adjtime, id, tx);
+}
+
+static clockid_t get_clockid(int fd)
+{
+#define CLOCKFD 3
+#define FD_TO_CLOCKID(fd)	((~(clockid_t) (fd) << 3) | CLOCKFD)
+
+	return FD_TO_CLOCKID(fd);
+}
+
+static void handle_alarm(int s)
+{
+	printf("received signal %d\n", s);
+}
+
+static int install_handler(int signum, void (*handler)(int))
+{
+	struct sigaction action;
+	sigset_t mask;
+
+	/* Unblock the signal. */
+	sigemptyset(&mask);
+	sigaddset(&mask, signum);
+	sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+	/* Install the signal handler. */
+	action.sa_handler = handler;
+	action.sa_flags = 0;
+	sigemptyset(&action.sa_mask);
+	sigaction(signum, &action, NULL);
+
+	return 0;
+}
+
+static long ppb_to_scaled_ppm(int ppb)
+{
+	/*
+	 * The 'freq' field in the 'struct timex' is in parts per
+	 * million, but with a 16 bit binary fractional field.
+	 * Instead of calculating either one of
+	 *
+	 *    scaled_ppm = (ppb / 1000) << 16  [1]
+	 *    scaled_ppm = (ppb << 16) / 1000  [2]
+	 *
+	 * we simply use double precision math, in order to avoid the
+	 * truncation in [1] and the possible overflow in [2].
+	 */
+	return (long) (ppb * 65.536);
+}
+
+static void usage(char *progname)
+{
+	fprintf(stderr,
+		"usage: %s [options]\n"
+		" -a val     request a one-shot alarm after 'val' seconds\n"
+		" -A val     request a periodic alarm every 'val' seconds\n"
+		" -c         query the ptp clock's capabilities\n"
+		" -d name    device to open\n"
+		" -e val     read 'val' external time stamp events\n"
+		" -f val     adjust the ptp clock frequency by 'val' ppb\n"
+		" -g         get the ptp clock time\n"
+		" -h         prints this message\n"
+		" -p val     enable output with a period of 'val' nanoseconds\n"
+		" -P val     enable or disable (val=1|0) the system clock PPS\n"
+		" -s         set the ptp clock time from the system time\n"
+		" -S         set the system time from the ptp clock time\n"
+		" -t val     shift the ptp clock time by 'val' seconds\n",
+		progname);
+}
+
+int main(int argc, char *argv[])
+{
+	struct ptp_clock_caps caps;
+	struct ptp_extts_event event;
+	struct ptp_extts_request extts_request;
+	struct ptp_perout_request perout_request;
+	struct timespec ts;
+	struct timex tx;
+
+	static timer_t timerid;
+	struct itimerspec timeout;
+	struct sigevent sigevent;
+
+	char *progname;
+	int c, cnt, fd;
+
+	char *device = DEVICE;
+	clockid_t clkid;
+	int adjfreq = 0x7fffffff;
+	int adjtime = 0;
+	int capabilities = 0;
+	int extts = 0;
+	int gettime = 0;
+	int oneshot = 0;
+	int periodic = 0;
+	int perout = -1;
+	int pps = -1;
+	int settime = 0;
+
+	progname = strrchr(argv[0], '/');
+	progname = progname ? 1+progname : argv[0];
+	while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghp:P:sSt:v"))) {
+		switch (c) {
+		case 'a':
+			oneshot = atoi(optarg);
+			break;
+		case 'A':
+			periodic = atoi(optarg);
+			break;
+		case 'c':
+			capabilities = 1;
+			break;
+		case 'd':
+			device = optarg;
+			break;
+		case 'e':
+			extts = atoi(optarg);
+			break;
+		case 'f':
+			adjfreq = atoi(optarg);
+			break;
+		case 'g':
+			gettime = 1;
+			break;
+		case 'p':
+			perout = atoi(optarg);
+			break;
+		case 'P':
+			pps = atoi(optarg);
+			break;
+		case 's':
+			settime = 1;
+			break;
+		case 'S':
+			settime = 2;
+			break;
+		case 't':
+			adjtime = atoi(optarg);
+			break;
+		case 'h':
+			usage(progname);
+			return 0;
+		case '?':
+		default:
+			usage(progname);
+			return -1;
+		}
+	}
+
+	fd = open(device, O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
+		return -1;
+	}
+
+	clkid = get_clockid(fd);
+	if (CLOCK_INVALID == clkid) {
+		fprintf(stderr, "failed to read clock id\n");
+		return -1;
+	}
+
+	if (capabilities) {
+		if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+			perror("PTP_CLOCK_GETCAPS");
+		} else {
+			printf("capabilities:\n"
+			       "  %d maximum frequency adjustment (ppb)\n"
+			       "  %d programmable alarms\n"
+			       "  %d external time stamp channels\n"
+			       "  %d programmable periodic signals\n"
+			       "  %d pulse per second\n",
+			       caps.max_adj,
+			       caps.n_alarm,
+			       caps.n_ext_ts,
+			       caps.n_per_out,
+			       caps.pps);
+		}
+	}
+
+	if (0x7fffffff != adjfreq) {
+		memset(&tx, 0, sizeof(tx));
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = ppb_to_scaled_ppm(adjfreq);
+		if (clock_adjtime(clkid, &tx)) {
+			perror("clock_adjtime");
+		} else {
+			puts("frequency adjustment okay");
+		}
+	}
+
+	if (adjtime) {
+		memset(&tx, 0, sizeof(tx));
+		tx.modes = ADJ_SETOFFSET;
+		tx.time.tv_sec = adjtime;
+		tx.time.tv_usec = 0;
+		if (clock_adjtime(clkid, &tx) < 0) {
+			perror("clock_adjtime");
+		} else {
+			puts("time shift okay");
+		}
+	}
+
+	if (gettime) {
+		if (clock_gettime(clkid, &ts)) {
+			perror("clock_gettime");
+		} else {
+			printf("clock time: %ld.%09ld or %s",
+			       ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
+		}
+	}
+
+	if (settime == 1) {
+		clock_gettime(CLOCK_REALTIME, &ts);
+		if (clock_settime(clkid, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (settime == 2) {
+		clock_gettime(clkid, &ts);
+		if (clock_settime(CLOCK_REALTIME, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (extts) {
+		memset(&extts_request, 0, sizeof(extts_request));
+		extts_request.index = 0;
+		extts_request.flags = PTP_ENABLE_FEATURE;
+		if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+			perror("PTP_EXTTS_REQUEST");
+			extts = 0;
+		} else {
+			puts("external time stamp request okay");
+		}
+		for (; extts; extts--) {
+			cnt = read(fd, &event, sizeof(event));
+			if (cnt != sizeof(event)) {
+				perror("read");
+				break;
+			}
+			printf("event index %u at %lld.%09u\n", event.index,
+			       event.t.sec, event.t.nsec);
+			fflush(stdout);
+		}
+		/* Disable the feature again. */
+		extts_request.flags = 0;
+		if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+			perror("PTP_EXTTS_REQUEST");
+		}
+	}
+
+	if (oneshot) {
+		install_handler(SIGALRM, handle_alarm);
+		/* Create a timer. */
+		sigevent.sigev_notify = SIGEV_SIGNAL;
+		sigevent.sigev_signo = SIGALRM;
+		if (timer_create(clkid, &sigevent, &timerid)) {
+			perror("timer_create");
+			return -1;
+		}
+		/* Start the timer. */
+		memset(&timeout, 0, sizeof(timeout));
+		timeout.it_value.tv_sec = oneshot;
+		if (timer_settime(timerid, 0, &timeout, NULL)) {
+			perror("timer_settime");
+			return -1;
+		}
+		pause();
+		timer_delete(timerid);
+	}
+
+	if (periodic) {
+		install_handler(SIGALRM, handle_alarm);
+		/* Create a timer. */
+		sigevent.sigev_notify = SIGEV_SIGNAL;
+		sigevent.sigev_signo = SIGALRM;
+		if (timer_create(clkid, &sigevent, &timerid)) {
+			perror("timer_create");
+			return -1;
+		}
+		/* Start the timer. */
+		memset(&timeout, 0, sizeof(timeout));
+		timeout.it_interval.tv_sec = periodic;
+		timeout.it_value.tv_sec = periodic;
+		if (timer_settime(timerid, 0, &timeout, NULL)) {
+			perror("timer_settime");
+			return -1;
+		}
+		while (1) {
+			pause();
+		}
+		timer_delete(timerid);
+	}
+
+	if (perout >= 0) {
+		if (clock_gettime(clkid, &ts)) {
+			perror("clock_gettime");
+			return -1;
+		}
+		memset(&perout_request, 0, sizeof(perout_request));
+		perout_request.index = 0;
+		perout_request.start.sec = ts.tv_sec + 2;
+		perout_request.start.nsec = 0;
+		perout_request.period.sec = 0;
+		perout_request.period.nsec = perout;
+		if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+			perror("PTP_PEROUT_REQUEST");
+		} else {
+			puts("periodic output request okay");
+		}
+	}
+
+	if (pps != -1) {
+		int enable = pps ? 1 : 0;
+		if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
+			perror("PTP_ENABLE_PPS");
+		} else {
+			puts("pps for system time request okay");
+		}
+	}
+
+	close(fd);
+	return 0;
+}
diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk
new file mode 100644
index 000000000000..4ef2d9755421
--- /dev/null
+++ b/Documentation/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC        = $(CROSS_COMPILE)gcc
+INC       = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS    = -Wall $(INC)
+LDLIBS    = -lrt
+PROGS     = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+	rm -f testptp.o
+
+distclean: clean
+	rm -f $(PROGS)
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 61631edfecc2..3bb154d8c8cc 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -54,6 +54,8 @@ source "drivers/spi/Kconfig"
 
 source "drivers/pps/Kconfig"
 
+source "drivers/ptp/Kconfig"
+
 source "drivers/gpio/Kconfig"
 
 source "drivers/w1/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index a29527f4ded6..3c2960128a7f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_I2O)		+= message/
 obj-$(CONFIG_RTC_LIB)		+= rtc/
 obj-y				+= i2c/ media/
 obj-$(CONFIG_PPS)		+= pps/
+obj-$(CONFIG_PTP_1588_CLOCK)	+= ptp/
 obj-$(CONFIG_W1)		+= w1/
 obj-$(CONFIG_POWER_SUPPLY)	+= power/
 obj-$(CONFIG_HWMON)		+= hwmon/
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
new file mode 100644
index 000000000000..70d4bb1cbdab
--- /dev/null
+++ b/drivers/ptp/Kconfig
@@ -0,0 +1,30 @@
+#
+# PTP clock support configuration
+#
+
+menu "PTP clock support"
+
+comment "Enable Device Drivers -> PPS to see the PTP clock options."
+	depends on PPS=n
+
+config PTP_1588_CLOCK
+	tristate "PTP clock support"
+	depends on EXPERIMENTAL
+	depends on PPS
+	help
+	  The IEEE 1588 standard defines a method to precisely
+	  synchronize distributed clocks over Ethernet networks. The
+	  standard defines a Precision Time Protocol (PTP), which can
+	  be used to achieve synchronization within a few dozen
+	  microseconds. In addition, with the help of special hardware
+	  time stamping units, it can be possible to achieve
+	  synchronization to within a few hundred nanoseconds.
+
+	  This driver adds support for PTP clocks as character
+	  devices. If you want to use a PTP clock, then you should
+	  also enable at least one clock driver as well.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ptp.
+
+endmenu
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
new file mode 100644
index 000000000000..480e2afdc999
--- /dev/null
+++ b/drivers/ptp/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for PTP 1588 clock support.
+#
+
+ptp-y					:= ptp_clock.o ptp_chardev.o ptp_sysfs.o
+obj-$(CONFIG_PTP_1588_CLOCK)		+= ptp.o
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
new file mode 100644
index 000000000000..a8d03aeb4051
--- /dev/null
+++ b/drivers/ptp/ptp_chardev.c
@@ -0,0 +1,159 @@
+/*
+ * PTP 1588 clock support - character device implementation.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/module.h>
+#include <linux/posix-clock.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+
+#include "ptp_private.h"
+
+int ptp_open(struct posix_clock *pc, fmode_t fmode)
+{
+	return 0;
+}
+
+long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
+{
+	struct ptp_clock_caps caps;
+	struct ptp_clock_request req;
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct ptp_clock_info *ops = ptp->info;
+	int enable, err = 0;
+
+	switch (cmd) {
+
+	case PTP_CLOCK_GETCAPS:
+		memset(&caps, 0, sizeof(caps));
+		caps.max_adj = ptp->info->max_adj;
+		caps.n_alarm = ptp->info->n_alarm;
+		caps.n_ext_ts = ptp->info->n_ext_ts;
+		caps.n_per_out = ptp->info->n_per_out;
+		caps.pps = ptp->info->pps;
+		err = copy_to_user((void __user *)arg, &caps, sizeof(caps));
+		break;
+
+	case PTP_EXTTS_REQUEST:
+		if (copy_from_user(&req.extts, (void __user *)arg,
+				   sizeof(req.extts))) {
+			err = -EFAULT;
+			break;
+		}
+		if (req.extts.index >= ops->n_ext_ts) {
+			err = -EINVAL;
+			break;
+		}
+		req.type = PTP_CLK_REQ_EXTTS;
+		enable = req.extts.flags & PTP_ENABLE_FEATURE ? 1 : 0;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	case PTP_PEROUT_REQUEST:
+		if (copy_from_user(&req.perout, (void __user *)arg,
+				   sizeof(req.perout))) {
+			err = -EFAULT;
+			break;
+		}
+		if (req.perout.index >= ops->n_per_out) {
+			err = -EINVAL;
+			break;
+		}
+		req.type = PTP_CLK_REQ_PEROUT;
+		enable = req.perout.period.sec || req.perout.period.nsec;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	case PTP_ENABLE_PPS:
+		if (!capable(CAP_SYS_TIME))
+			return -EPERM;
+		req.type = PTP_CLK_REQ_PPS;
+		enable = arg ? 1 : 0;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+	return err;
+}
+
+unsigned int ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+
+	poll_wait(fp, &ptp->tsev_wq, wait);
+
+	return queue_cnt(&ptp->tsevq) ? POLLIN : 0;
+}
+
+ssize_t ptp_read(struct posix_clock *pc,
+		 uint rdflags, char __user *buf, size_t cnt)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct timestamp_event_queue *queue = &ptp->tsevq;
+	struct ptp_extts_event event[PTP_BUF_TIMESTAMPS];
+	unsigned long flags;
+	size_t qcnt, i;
+
+	if (cnt % sizeof(struct ptp_extts_event) != 0)
+		return -EINVAL;
+
+	if (cnt > sizeof(event))
+		cnt = sizeof(event);
+
+	cnt = cnt / sizeof(struct ptp_extts_event);
+
+	if (mutex_lock_interruptible(&ptp->tsevq_mux))
+		return -ERESTARTSYS;
+
+	if (wait_event_interruptible(ptp->tsev_wq,
+				     ptp->defunct || queue_cnt(queue))) {
+		mutex_unlock(&ptp->tsevq_mux);
+		return -ERESTARTSYS;
+	}
+
+	if (ptp->defunct)
+		return -ENODEV;
+
+	spin_lock_irqsave(&queue->lock, flags);
+
+	qcnt = queue_cnt(queue);
+
+	if (cnt > qcnt)
+		cnt = qcnt;
+
+	for (i = 0; i < cnt; i++) {
+		event[i] = queue->buf[queue->head];
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+	}
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	cnt = cnt * sizeof(struct ptp_extts_event);
+
+	mutex_unlock(&ptp->tsevq_mux);
+
+	if (copy_to_user(buf, event, cnt)) {
+		mutex_unlock(&ptp->tsevq_mux);
+		return -EFAULT;
+	}
+
+	return cnt;
+}
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
new file mode 100644
index 000000000000..cf3f9997546d
--- /dev/null
+++ b/drivers/ptp/ptp_clock.c
@@ -0,0 +1,343 @@
+/*
+ * PTP 1588 clock support
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/posix-clock.h>
+#include <linux/pps_kernel.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+#include "ptp_private.h"
+
+#define PTP_MAX_ALARMS 4
+#define PTP_MAX_CLOCKS 8
+#define PTP_PPS_DEFAULTS (PPS_CAPTUREASSERT | PPS_OFFSETASSERT)
+#define PTP_PPS_EVENT PPS_CAPTUREASSERT
+#define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC)
+
+/* private globals */
+
+static dev_t ptp_devt;
+static struct class *ptp_class;
+
+static DECLARE_BITMAP(ptp_clocks_map, PTP_MAX_CLOCKS);
+static DEFINE_MUTEX(ptp_clocks_mutex); /* protects 'ptp_clocks_map' */
+
+/* time stamp event queue operations */
+
+static inline int queue_free(struct timestamp_event_queue *q)
+{
+	return PTP_MAX_TIMESTAMPS - queue_cnt(q) - 1;
+}
+
+static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
+				       struct ptp_clock_event *src)
+{
+	struct ptp_extts_event *dst;
+	unsigned long flags;
+	s64 seconds;
+	u32 remainder;
+
+	seconds = div_u64_rem(src->timestamp, 1000000000, &remainder);
+
+	spin_lock_irqsave(&queue->lock, flags);
+
+	dst = &queue->buf[queue->tail];
+	dst->index = src->index;
+	dst->t.sec = seconds;
+	dst->t.nsec = remainder;
+
+	if (!queue_free(queue))
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+
+	queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+}
+
+static s32 scaled_ppm_to_ppb(long ppm)
+{
+	/*
+	 * The 'freq' field in the 'struct timex' is in parts per
+	 * million, but with a 16 bit binary fractional field.
+	 *
+	 * We want to calculate
+	 *
+	 *    ppb = scaled_ppm * 1000 / 2^16
+	 *
+	 * which simplifies to
+	 *
+	 *    ppb = scaled_ppm * 125 / 2^13
+	 */
+	s64 ppb = 1 + ppm;
+	ppb *= 125;
+	ppb >>= 13;
+	return (s32) ppb;
+}
+
+/* posix clock implementation */
+
+static int ptp_clock_getres(struct posix_clock *pc, struct timespec *tp)
+{
+	return 1; /* always round timer functions to one nanosecond */
+}
+
+static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	return ptp->info->settime(ptp->info, tp);
+}
+
+static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	return ptp->info->gettime(ptp->info, tp);
+}
+
+static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct ptp_clock_info *ops;
+	int err = -EOPNOTSUPP;
+
+	ops = ptp->info;
+
+	if (tx->modes & ADJ_SETOFFSET) {
+		struct timespec ts;
+		ktime_t kt;
+		s64 delta;
+
+		ts.tv_sec  = tx->time.tv_sec;
+		ts.tv_nsec = tx->time.tv_usec;
+
+		if (!(tx->modes & ADJ_NANO))
+			ts.tv_nsec *= 1000;
+
+		if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC)
+			return -EINVAL;
+
+		kt = timespec_to_ktime(ts);
+		delta = ktime_to_ns(kt);
+		err = ops->adjtime(ops, delta);
+
+	} else if (tx->modes & ADJ_FREQUENCY) {
+
+		err = ops->adjfreq(ops, scaled_ppm_to_ppb(tx->freq));
+	}
+
+	return err;
+}
+
+static struct posix_clock_operations ptp_clock_ops = {
+	.owner		= THIS_MODULE,
+	.clock_adjtime	= ptp_clock_adjtime,
+	.clock_gettime	= ptp_clock_gettime,
+	.clock_getres	= ptp_clock_getres,
+	.clock_settime	= ptp_clock_settime,
+	.ioctl		= ptp_ioctl,
+	.open		= ptp_open,
+	.poll		= ptp_poll,
+	.read		= ptp_read,
+};
+
+static void delete_ptp_clock(struct posix_clock *pc)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+
+	mutex_destroy(&ptp->tsevq_mux);
+
+	/* Remove the clock from the bit map. */
+	mutex_lock(&ptp_clocks_mutex);
+	clear_bit(ptp->index, ptp_clocks_map);
+	mutex_unlock(&ptp_clocks_mutex);
+
+	kfree(ptp);
+}
+
+/* public interface */
+
+struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info)
+{
+	struct ptp_clock *ptp;
+	int err = 0, index, major = MAJOR(ptp_devt);
+
+	if (info->n_alarm > PTP_MAX_ALARMS)
+		return ERR_PTR(-EINVAL);
+
+	/* Find a free clock slot and reserve it. */
+	err = -EBUSY;
+	mutex_lock(&ptp_clocks_mutex);
+	index = find_first_zero_bit(ptp_clocks_map, PTP_MAX_CLOCKS);
+	if (index < PTP_MAX_CLOCKS)
+		set_bit(index, ptp_clocks_map);
+	else
+		goto no_slot;
+
+	/* Initialize a clock structure. */
+	err = -ENOMEM;
+	ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
+	if (ptp == NULL)
+		goto no_memory;
+
+	ptp->clock.ops = ptp_clock_ops;
+	ptp->clock.release = delete_ptp_clock;
+	ptp->info = info;
+	ptp->devid = MKDEV(major, index);
+	ptp->index = index;
+	spin_lock_init(&ptp->tsevq.lock);
+	mutex_init(&ptp->tsevq_mux);
+	init_waitqueue_head(&ptp->tsev_wq);
+
+	/* Create a new device in our class. */
+	ptp->dev = device_create(ptp_class, NULL, ptp->devid, ptp,
+				 "ptp%d", ptp->index);
+	if (IS_ERR(ptp->dev))
+		goto no_device;
+
+	dev_set_drvdata(ptp->dev, ptp);
+
+	err = ptp_populate_sysfs(ptp);
+	if (err)
+		goto no_sysfs;
+
+	/* Register a new PPS source. */
+	if (info->pps) {
+		struct pps_source_info pps;
+		memset(&pps, 0, sizeof(pps));
+		snprintf(pps.name, PPS_MAX_NAME_LEN, "ptp%d", index);
+		pps.mode = PTP_PPS_MODE;
+		pps.owner = info->owner;
+		ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS);
+		if (!ptp->pps_source) {
+			pr_err("failed to register pps source\n");
+			goto no_pps;
+		}
+	}
+
+	/* Create a posix clock. */
+	err = posix_clock_register(&ptp->clock, ptp->devid);
+	if (err) {
+		pr_err("failed to create posix clock\n");
+		goto no_clock;
+	}
+
+	mutex_unlock(&ptp_clocks_mutex);
+	return ptp;
+
+no_clock:
+	if (ptp->pps_source)
+		pps_unregister_source(ptp->pps_source);
+no_pps:
+	ptp_cleanup_sysfs(ptp);
+no_sysfs:
+	device_destroy(ptp_class, ptp->devid);
+no_device:
+	mutex_destroy(&ptp->tsevq_mux);
+	kfree(ptp);
+no_memory:
+	clear_bit(index, ptp_clocks_map);
+no_slot:
+	mutex_unlock(&ptp_clocks_mutex);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ptp_clock_register);
+
+int ptp_clock_unregister(struct ptp_clock *ptp)
+{
+	ptp->defunct = 1;
+	wake_up_interruptible(&ptp->tsev_wq);
+
+	/* Release the clock's resources. */
+	if (ptp->pps_source)
+		pps_unregister_source(ptp->pps_source);
+	ptp_cleanup_sysfs(ptp);
+	device_destroy(ptp_class, ptp->devid);
+
+	posix_clock_unregister(&ptp->clock);
+	return 0;
+}
+EXPORT_SYMBOL(ptp_clock_unregister);
+
+void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
+{
+	struct pps_event_time evt;
+
+	switch (event->type) {
+
+	case PTP_CLOCK_ALARM:
+		break;
+
+	case PTP_CLOCK_EXTTS:
+		enqueue_external_timestamp(&ptp->tsevq, event);
+		wake_up_interruptible(&ptp->tsev_wq);
+		break;
+
+	case PTP_CLOCK_PPS:
+		pps_get_ts(&evt);
+		pps_event(ptp->pps_source, &evt, PTP_PPS_EVENT, NULL);
+		break;
+	}
+}
+EXPORT_SYMBOL(ptp_clock_event);
+
+/* module operations */
+
+static void __exit ptp_exit(void)
+{
+	class_destroy(ptp_class);
+	unregister_chrdev_region(ptp_devt, PTP_MAX_CLOCKS);
+}
+
+static int __init ptp_init(void)
+{
+	int err;
+
+	ptp_class = class_create(THIS_MODULE, "ptp");
+	if (IS_ERR(ptp_class)) {
+		pr_err("ptp: failed to allocate class\n");
+		return PTR_ERR(ptp_class);
+	}
+
+	err = alloc_chrdev_region(&ptp_devt, 0, PTP_MAX_CLOCKS, "ptp");
+	if (err < 0) {
+		pr_err("ptp: failed to allocate device region\n");
+		goto no_region;
+	}
+
+	ptp_class->dev_attrs = ptp_dev_attrs;
+	pr_info("PTP clock support registered\n");
+	return 0;
+
+no_region:
+	class_destroy(ptp_class);
+	return err;
+}
+
+subsys_initcall(ptp_init);
+module_exit(ptp_exit);
+
+MODULE_AUTHOR("Richard Cochran <richard.cochran@omicron.at>");
+MODULE_DESCRIPTION("PTP clocks support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
new file mode 100644
index 000000000000..4d5b5082c3b1
--- /dev/null
+++ b/drivers/ptp/ptp_private.h
@@ -0,0 +1,92 @@
+/*
+ * PTP 1588 clock support - private declarations for the core module.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _PTP_PRIVATE_H_
+#define _PTP_PRIVATE_H_
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/posix-clock.h>
+#include <linux/ptp_clock.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/time.h>
+
+#define PTP_MAX_TIMESTAMPS 128
+#define PTP_BUF_TIMESTAMPS 30
+
+struct timestamp_event_queue {
+	struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS];
+	int head;
+	int tail;
+	spinlock_t lock;
+};
+
+struct ptp_clock {
+	struct posix_clock clock;
+	struct device *dev;
+	struct ptp_clock_info *info;
+	dev_t devid;
+	int index; /* index into clocks.map */
+	struct pps_device *pps_source;
+	struct timestamp_event_queue tsevq; /* simple fifo for time stamps */
+	struct mutex tsevq_mux; /* one process at a time reading the fifo */
+	wait_queue_head_t tsev_wq;
+	int defunct; /* tells readers to go away when clock is being removed */
+};
+
+/*
+ * The function queue_cnt() is safe for readers to call without
+ * holding q->lock. Readers use this function to verify that the queue
+ * is nonempty before proceeding with a dequeue operation. The fact
+ * that a writer might concurrently increment the tail does not
+ * matter, since the queue remains nonempty nonetheless.
+ */
+static inline int queue_cnt(struct timestamp_event_queue *q)
+{
+	int cnt = q->tail - q->head;
+	return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
+}
+
+/*
+ * see ptp_chardev.c
+ */
+
+long ptp_ioctl(struct posix_clock *pc,
+	       unsigned int cmd, unsigned long arg);
+
+int ptp_open(struct posix_clock *pc, fmode_t fmode);
+
+ssize_t ptp_read(struct posix_clock *pc,
+		 uint flags, char __user *buf, size_t cnt);
+
+uint ptp_poll(struct posix_clock *pc,
+	      struct file *fp, poll_table *wait);
+
+/*
+ * see ptp_sysfs.c
+ */
+
+extern struct device_attribute ptp_dev_attrs[];
+
+int ptp_cleanup_sysfs(struct ptp_clock *ptp);
+
+int ptp_populate_sysfs(struct ptp_clock *ptp);
+
+#endif
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
new file mode 100644
index 000000000000..2f93926ac976
--- /dev/null
+++ b/drivers/ptp/ptp_sysfs.c
@@ -0,0 +1,230 @@
+/*
+ * PTP 1588 clock support - sysfs interface.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/capability.h>
+
+#include "ptp_private.h"
+
+static ssize_t clock_name_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+}
+
+#define PTP_SHOW_INT(name)						\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr, char *page)	\
+{									\
+	struct ptp_clock *ptp = dev_get_drvdata(dev);			\
+	return snprintf(page, PAGE_SIZE-1, "%d\n", ptp->info->name);	\
+}
+
+PTP_SHOW_INT(max_adj);
+PTP_SHOW_INT(n_alarm);
+PTP_SHOW_INT(n_ext_ts);
+PTP_SHOW_INT(n_per_out);
+PTP_SHOW_INT(pps);
+
+#define PTP_RO_ATTR(_var, _name) {				\
+	.attr	= { .name = __stringify(_name), .mode = 0444 },	\
+	.show	= _var##_show,					\
+}
+
+struct device_attribute ptp_dev_attrs[] = {
+	PTP_RO_ATTR(clock_name,	clock_name),
+	PTP_RO_ATTR(max_adj,	max_adjustment),
+	PTP_RO_ATTR(n_alarm,	n_alarms),
+	PTP_RO_ATTR(n_ext_ts,	n_external_timestamps),
+	PTP_RO_ATTR(n_per_out,	n_periodic_outputs),
+	PTP_RO_ATTR(pps,	pps_available),
+	__ATTR_NULL,
+};
+
+static ssize_t extts_enable_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_EXTTS };
+	int cnt, enable;
+	int err = -EINVAL;
+
+	cnt = sscanf(buf, "%u %d", &req.extts.index, &enable);
+	if (cnt != 2)
+		goto out;
+	if (req.extts.index >= ops->n_ext_ts)
+		goto out;
+
+	err = ops->enable(ops, &req, enable ? 1 : 0);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static ssize_t extts_fifo_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct timestamp_event_queue *queue = &ptp->tsevq;
+	struct ptp_extts_event event;
+	unsigned long flags;
+	size_t qcnt;
+	int cnt = 0;
+
+	memset(&event, 0, sizeof(event));
+
+	if (mutex_lock_interruptible(&ptp->tsevq_mux))
+		return -ERESTARTSYS;
+
+	spin_lock_irqsave(&queue->lock, flags);
+	qcnt = queue_cnt(queue);
+	if (qcnt) {
+		event = queue->buf[queue->head];
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+	}
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	if (!qcnt)
+		goto out;
+
+	cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n",
+		       event.index, event.t.sec, event.t.nsec);
+out:
+	mutex_unlock(&ptp->tsevq_mux);
+	return cnt;
+}
+
+static ssize_t period_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_PEROUT };
+	int cnt, enable, err = -EINVAL;
+
+	cnt = sscanf(buf, "%u %lld %u %lld %u", &req.perout.index,
+		     &req.perout.start.sec, &req.perout.start.nsec,
+		     &req.perout.period.sec, &req.perout.period.nsec);
+	if (cnt != 5)
+		goto out;
+	if (req.perout.index >= ops->n_per_out)
+		goto out;
+
+	enable = req.perout.period.sec || req.perout.period.nsec;
+	err = ops->enable(ops, &req, enable);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static ssize_t pps_enable_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_PPS };
+	int cnt, enable;
+	int err = -EINVAL;
+
+	if (!capable(CAP_SYS_TIME))
+		return -EPERM;
+
+	cnt = sscanf(buf, "%d", &enable);
+	if (cnt != 1)
+		goto out;
+
+	err = ops->enable(ops, &req, enable ? 1 : 0);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store);
+static DEVICE_ATTR(fifo,         0444, extts_fifo_show, NULL);
+static DEVICE_ATTR(period,       0220, NULL, period_store);
+static DEVICE_ATTR(pps_enable,   0220, NULL, pps_enable_store);
+
+int ptp_cleanup_sysfs(struct ptp_clock *ptp)
+{
+	struct device *dev = ptp->dev;
+	struct ptp_clock_info *info = ptp->info;
+
+	if (info->n_ext_ts) {
+		device_remove_file(dev, &dev_attr_extts_enable);
+		device_remove_file(dev, &dev_attr_fifo);
+	}
+	if (info->n_per_out)
+		device_remove_file(dev, &dev_attr_period);
+
+	if (info->pps)
+		device_remove_file(dev, &dev_attr_pps_enable);
+
+	return 0;
+}
+
+int ptp_populate_sysfs(struct ptp_clock *ptp)
+{
+	struct device *dev = ptp->dev;
+	struct ptp_clock_info *info = ptp->info;
+	int err;
+
+	if (info->n_ext_ts) {
+		err = device_create_file(dev, &dev_attr_extts_enable);
+		if (err)
+			goto out1;
+		err = device_create_file(dev, &dev_attr_fifo);
+		if (err)
+			goto out2;
+	}
+	if (info->n_per_out) {
+		err = device_create_file(dev, &dev_attr_period);
+		if (err)
+			goto out3;
+	}
+	if (info->pps) {
+		err = device_create_file(dev, &dev_attr_pps_enable);
+		if (err)
+			goto out4;
+	}
+	return 0;
+out4:
+	if (info->n_per_out)
+		device_remove_file(dev, &dev_attr_period);
+out3:
+	if (info->n_ext_ts)
+		device_remove_file(dev, &dev_attr_fifo);
+out2:
+	if (info->n_ext_ts)
+		device_remove_file(dev, &dev_attr_extts_enable);
+out1:
+	return err;
+}
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 75cf611641e6..4585836ba664 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -302,6 +302,7 @@ header-y += ppp-comp.h
 header-y += ppp_defs.h
 header-y += pps.h
 header-y += prctl.h
+header-y += ptp_clock.h
 header-y += ptrace.h
 header-y += qnx4_fs.h
 header-y += qnxtypes.h
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 943a85ab0020..e07e2742a865 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -25,6 +25,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
 #include <linux/filter.h>
 #ifdef __KERNEL__
 #include <linux/in.h>
@@ -58,6 +59,12 @@
 #define OFF_NEXT	6
 #define OFF_UDP_DST	2
 
+#define OFF_PTP_SOURCE_UUID	22 /* PTPv1 only */
+#define OFF_PTP_SEQUENCE_ID	30
+#define OFF_PTP_CONTROL		32 /* PTPv1 only */
+
+#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2)
+
 #define IP6_HLEN	40
 #define UDP_HLEN	8
 
diff --git a/include/linux/ptp_clock.h b/include/linux/ptp_clock.h
new file mode 100644
index 000000000000..94e981f810a2
--- /dev/null
+++ b/include/linux/ptp_clock.h
@@ -0,0 +1,84 @@
+/*
+ * PTP 1588 clock support - user space interface
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PTP_CLOCK_H_
+#define _PTP_CLOCK_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/* PTP_xxx bits, for the flags field within the request structures. */
+#define PTP_ENABLE_FEATURE (1<<0)
+#define PTP_RISING_EDGE    (1<<1)
+#define PTP_FALLING_EDGE   (1<<2)
+
+/*
+ * struct ptp_clock_time - represents a time value
+ *
+ * The sign of the seconds field applies to the whole value. The
+ * nanoseconds field is always unsigned. The reserved field is
+ * included for sub-nanosecond resolution, should the demand for
+ * this ever appear.
+ *
+ */
+struct ptp_clock_time {
+	__s64 sec;  /* seconds */
+	__u32 nsec; /* nanoseconds */
+	__u32 reserved;
+};
+
+struct ptp_clock_caps {
+	int max_adj;   /* Maximum frequency adjustment in parts per billon. */
+	int n_alarm;   /* Number of programmable alarms. */
+	int n_ext_ts;  /* Number of external time stamp channels. */
+	int n_per_out; /* Number of programmable periodic signals. */
+	int pps;       /* Whether the clock supports a PPS callback. */
+	int rsv[15];   /* Reserved for future use. */
+};
+
+struct ptp_extts_request {
+	unsigned int index;  /* Which channel to configure. */
+	unsigned int flags;  /* Bit field for PTP_xxx flags. */
+	unsigned int rsv[2]; /* Reserved for future use. */
+};
+
+struct ptp_perout_request {
+	struct ptp_clock_time start;  /* Absolute start time. */
+	struct ptp_clock_time period; /* Desired period, zero means disable. */
+	unsigned int index;           /* Which channel to configure. */
+	unsigned int flags;           /* Reserved for future use. */
+	unsigned int rsv[4];          /* Reserved for future use. */
+};
+
+#define PTP_CLK_MAGIC '='
+
+#define PTP_CLOCK_GETCAPS  _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps)
+#define PTP_EXTTS_REQUEST  _IOW(PTP_CLK_MAGIC, 2, struct ptp_extts_request)
+#define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request)
+#define PTP_ENABLE_PPS     _IOW(PTP_CLK_MAGIC, 4, int)
+
+struct ptp_extts_event {
+	struct ptp_clock_time t; /* Time event occured. */
+	unsigned int index;      /* Which channel produced the event. */
+	unsigned int flags;      /* Reserved for future use. */
+	unsigned int rsv[2];     /* Reserved for future use. */
+};
+
+#endif
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
new file mode 100644
index 000000000000..dd2e44fba63e
--- /dev/null
+++ b/include/linux/ptp_clock_kernel.h
@@ -0,0 +1,139 @@
+/*
+ * PTP 1588 clock support
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PTP_CLOCK_KERNEL_H_
+#define _PTP_CLOCK_KERNEL_H_
+
+#include <linux/ptp_clock.h>
+
+
+struct ptp_clock_request {
+	enum {
+		PTP_CLK_REQ_EXTTS,
+		PTP_CLK_REQ_PEROUT,
+		PTP_CLK_REQ_PPS,
+	} type;
+	union {
+		struct ptp_extts_request extts;
+		struct ptp_perout_request perout;
+	};
+};
+
+/**
+ * struct ptp_clock_info - decribes a PTP hardware clock
+ *
+ * @owner:     The clock driver should set to THIS_MODULE.
+ * @name:      A short name to identify the clock.
+ * @max_adj:   The maximum possible frequency adjustment, in parts per billon.
+ * @n_alarm:   The number of programmable alarms.
+ * @n_ext_ts:  The number of external time stamp channels.
+ * @n_per_out: The number of programmable periodic signals.
+ * @pps:       Indicates whether the clock supports a PPS callback.
+ *
+ * clock operations
+ *
+ * @adjfreq:  Adjusts the frequency of the hardware clock.
+ *            parameter delta: Desired period change in parts per billion.
+ *
+ * @adjtime:  Shifts the time of the hardware clock.
+ *            parameter delta: Desired change in nanoseconds.
+ *
+ * @gettime:  Reads the current time from the hardware clock.
+ *            parameter ts: Holds the result.
+ *
+ * @settime:  Set the current time on the hardware clock.
+ *            parameter ts: Time value to set.
+ *
+ * @enable:   Request driver to enable or disable an ancillary feature.
+ *            parameter request: Desired resource to enable or disable.
+ *            parameter on: Caller passes one to enable or zero to disable.
+ *
+ * Drivers should embed their ptp_clock_info within a private
+ * structure, obtaining a reference to it using container_of().
+ *
+ * The callbacks must all return zero on success, non-zero otherwise.
+ */
+
+struct ptp_clock_info {
+	struct module *owner;
+	char name[16];
+	s32 max_adj;
+	int n_alarm;
+	int n_ext_ts;
+	int n_per_out;
+	int pps;
+	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
+	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
+	int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts);
+	int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts);
+	int (*enable)(struct ptp_clock_info *ptp,
+		      struct ptp_clock_request *request, int on);
+};
+
+struct ptp_clock;
+
+/**
+ * ptp_clock_register() - register a PTP hardware clock driver
+ *
+ * @info:  Structure describing the new clock.
+ */
+
+extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info);
+
+/**
+ * ptp_clock_unregister() - unregister a PTP hardware clock driver
+ *
+ * @ptp:  The clock to remove from service.
+ */
+
+extern int ptp_clock_unregister(struct ptp_clock *ptp);
+
+
+enum ptp_clock_events {
+	PTP_CLOCK_ALARM,
+	PTP_CLOCK_EXTTS,
+	PTP_CLOCK_PPS,
+};
+
+/**
+ * struct ptp_clock_event - decribes a PTP hardware clock event
+ *
+ * @type:  One of the ptp_clock_events enumeration values.
+ * @index: Identifies the source of the event.
+ * @timestamp: When the event occured.
+ */
+
+struct ptp_clock_event {
+	int type;
+	int index;
+	u64 timestamp;
+};
+
+/**
+ * ptp_clock_event() - notify the PTP layer about an event
+ *
+ * @ptp:    The clock obtained from ptp_clock_register().
+ * @event:  Message structure describing the event.
+ */
+
+extern void ptp_clock_event(struct ptp_clock *ptp,
+			    struct ptp_clock_event *event);
+
+#endif
-- 
cgit v1.2.3


From 600942e0fdb7ed1565d056d7305c46c7c0544a3e Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Thu, 27 Jan 2011 15:24:58 +0100
Subject: lru_cache.h: fix comments referring to ts_ instead of lc_

For some time we contemplated calling the "struct lru_cache"
a "struct tracked_set", and some comments kept the ts_ prefix.

Fix those to match the member field names.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/lru_cache.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 6a4fab7c6e09..7a71ffad037c 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -139,9 +139,9 @@ write intent log information, three of which are mentioned here.
  * .list is on one of three lists:
  *  in_use: currently in use (refcnt > 0, lc_number != LC_FREE)
  *     lru: unused but ready to be reused or recycled
- *          (ts_refcnt == 0, lc_number != LC_FREE),
+ *          (lc_refcnt == 0, lc_number != LC_FREE),
  *    free: unused but ready to be recycled
- *          (ts_refcnt == 0, lc_number == LC_FREE),
+ *          (lc_refcnt == 0, lc_number == LC_FREE),
  *
  * an element is said to be "in the active set",
  * if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
@@ -160,8 +160,8 @@ struct lc_element {
 	struct hlist_node colision;
 	struct list_head list;		 /* LRU list or free list */
 	unsigned refcnt;
-	/* back "pointer" into ts_cache->element[index],
-	 * for paranoia, and for "ts_element_to_index" */
+	/* back "pointer" into lc_cache->element[index],
+	 * for paranoia, and for "lc_element_to_index" */
 	unsigned lc_index;
 	/* if we want to track a larger set of objects,
 	 * it needs to become arch independend u64 */
@@ -190,8 +190,8 @@ struct lru_cache {
 	/* Arbitrary limit on maximum tracked objects. Practical limit is much
 	 * lower due to allocation failures, probably. For typical use cases,
 	 * nr_elements should be a few thousand at most.
-	 * This also limits the maximum value of ts_element.ts_index, allowing the
-	 * 8 high bits of .ts_index to be overloaded with flags in the future. */
+	 * This also limits the maximum value of lc_element.lc_index, allowing the
+	 * 8 high bits of .lc_index to be overloaded with flags in the future. */
 #define LC_MAX_ACTIVE	(1<<24)
 
 	/* statistics */
-- 
cgit v1.2.3


From 9a0d9d0389ef769e4b01abf50fcc11407706270b Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 2 May 2011 11:51:31 +0200
Subject: drbd: fix schedule in atomic

An administrative detach used to request a state change directly to D_DISKLESS,
first suspending IO to avoid the last put_ldev() occuring from an endio handler,
potentially in irq context.

This is not enough on the receiving side (typically secondary), we may miss
some peer_req on the way to local disk, which then may do the last put_ldev()
from their drbd_peer_request_endio().

This patch makes the detach always go through the intermediate D_FAILED state.
We may consider to rename it D_DETACHING.

Alternative approach would be to create yet an other work item to be scheduled
on the worker, do the destructor work from there, and get the timing right.

manually picked commit 564040f from the drbd 8.4 branch.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h |  4 ++++
 drivers/block/drbd/drbd_nl.c  | 14 +++++++++++---
 include/linux/drbd.h          |  2 +-
 3 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8aa10391115b..a74d3ee04ba8 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -2157,6 +2157,10 @@ static inline int get_net_conf(struct drbd_conf *mdev)
 static inline void put_ldev(struct drbd_conf *mdev)
 {
 	int i = atomic_dec_return(&mdev->local_cnt);
+
+	/* This may be called from some endio handler,
+	 * so we must not sleep here. */
+
 	__release(local);
 	D_ASSERT(i >= 0);
 	if (i == 0) {
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 7c64ec042124..13569635b922 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1334,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			  struct drbd_nl_cfg_reply *reply)
 {
+	enum drbd_ret_code retcode;
+	int ret;
 	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
-	reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
-	if (mdev->state.disk == D_DISKLESS)
-		wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
+	/* D_FAILED will transition to DISKLESS. */
+	ret = wait_event_interruptible(mdev->misc_wait,
+			mdev->state.disk != D_FAILED);
 	drbd_resume_io(mdev);
+	if (retcode == SS_IS_DISKLESS)
+		retcode = SS_NOTHING_TO_DO;
+	if (ret)
+		retcode = ERR_INTR;
+	reply->ret_code = retcode;
 	return 0;
 }
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index cec467f5d676..c86283b4fbab 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.10"
+#define REL_VERSION "8.3.11"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 96
-- 
cgit v1.2.3


From 24c4830c8ec3cbc904d84c213126a35f41a4e455 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Sat, 21 May 2011 18:32:29 +0200
Subject: drbd: Fix spelling

Found these with the help of ispell -l.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c   |  2 +-
 drivers/block/drbd/drbd_bitmap.c   |  6 +++---
 drivers/block/drbd/drbd_int.h      |  6 +++---
 drivers/block/drbd/drbd_main.c     |  4 ++--
 drivers/block/drbd/drbd_nl.c       |  6 +++---
 drivers/block/drbd/drbd_receiver.c | 30 +++++++++++++++---------------
 drivers/block/drbd/drbd_req.c      | 18 +++++++++---------
 drivers/block/drbd/drbd_req.h      |  4 ++--
 drivers/block/drbd/drbd_worker.c   |  4 ++--
 include/linux/drbd.h               |  8 ++++----
 include/linux/drbd_tag_magic.h     |  2 +-
 11 files changed, 45 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index c6828b68d77b..09ef9a878ef0 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -28,7 +28,7 @@
 #include "drbd_int.h"
 #include "drbd_wrappers.h"
 
-/* We maintain a trivial check sum in our on disk activity log.
+/* We maintain a trivial checksum in our on disk activity log.
  * With that we can ensure correct operation even when the storage
  * device might do a partial (last) sector write while losing power.
  */
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 76210ba401ac..f440a02dfdb1 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -74,7 +74,7 @@
  *	as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
  *	seems excessive.
  *
- *	We plan to reduce the amount of in-core bitmap pages by pageing them in
+ *	We plan to reduce the amount of in-core bitmap pages by paging them in
  *	and out against their on-disk location as necessary, but need to make
  *	sure we don't cause too much meta data IO, and must not deadlock in
  *	tight memory situations. This needs some more work.
@@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
  * we if bits have been cleared since last IO. */
 #define BM_PAGE_LAZY_WRITEOUT	28
 
-/* store_page_idx uses non-atomic assingment. It is only used directly after
+/* store_page_idx uses non-atomic assignment. It is only used directly after
  * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
  * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
  * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
@@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr)
 /* word offset from start of bitmap to word number _in_page_
  * modulo longs per page
 #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
- hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
+ hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
  so do it explicitly:
  */
 #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a74d3ee04ba8..7952eb90d17f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -699,7 +699,7 @@ struct drbd_request {
 	 * see drbd_endio_pri(). */
 	struct bio *private_bio;
 
-	struct hlist_node colision;
+	struct hlist_node collision;
 	sector_t sector;
 	unsigned int size;
 	unsigned int epoch; /* barrier_nr */
@@ -765,7 +765,7 @@ struct digest_info {
 
 struct drbd_epoch_entry {
 	struct drbd_work w;
-	struct hlist_node colision;
+	struct hlist_node collision;
 	struct drbd_epoch *epoch; /* for writes */
 	struct drbd_conf *mdev;
 	struct page *pages;
@@ -1520,7 +1520,7 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index cfeb13b5a216..0358e55356c8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2732,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 
 		/* double check digest, sometimes buffers have been modified in flight. */
 		if (dgs > 0 && dgs <= 64) {
-			/* 64 byte, 512 bit, is the larges digest size
+			/* 64 byte, 512 bit, is the largest digest size
 			 * currently supported in kernel crypto. */
 			unsigned char digest[64];
 			drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
@@ -3287,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor)
 
 	drbd_release_ee_lists(mdev);
 
-	/* should be free'd on disconnect? */
+	/* should be freed on disconnect? */
 	kfree(mdev->ee_hash);
 	/*
 	mdev->ee_hash_s = 0;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 13569635b922..259ca0b20961 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -596,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size;
@@ -1205,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev, 0);
+	dd = drbd_determine_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1719,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 
 	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determin_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6ea0a4b51ece..d9f2109fd9e6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
 	if (!page)
 		goto fail;
 
-	INIT_HLIST_NODE(&e->colision);
+	INIT_HLIST_NODE(&e->collision);
 	e->epoch = NULL;
 	e->mdev = mdev;
 	e->pages = page;
@@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i
 		kfree(e->digest);
 	drbd_pp_free(mdev, e->pages, is_net);
 	D_ASSERT(atomic_read(&e->pending_bios) == 0);
-	D_ASSERT(hlist_unhashed(&e->colision));
+	D_ASSERT(hlist_unhashed(&e->collision));
 	mempool_free(e, drbd_ee_mempool);
 }
 
@@ -1413,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u
 	sector_t sector = e->sector;
 	int ok;
 
-	D_ASSERT(hlist_unhashed(&e->colision));
+	D_ASSERT(hlist_unhashed(&e->collision));
 
 	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 		drbd_set_in_sync(mdev, sector, e->size);
@@ -1482,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
 		return false;
 	}
 
-	/* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
+	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
 	 * special casing it there for the various failure cases.
 	 * still no race with drbd_fail_pending_reads */
 	ok = recv_dless_read(mdev, req, sector, data_size);
@@ -1553,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
 	if (mdev->net_conf->two_primaries) {
 		spin_lock_irq(&mdev->req_lock);
-		D_ASSERT(!hlist_unhashed(&e->colision));
-		hlist_del_init(&e->colision);
+		D_ASSERT(!hlist_unhashed(&e->collision));
+		hlist_del_init(&e->collision);
 		spin_unlock_irq(&mdev->req_lock);
 	} else {
-		D_ASSERT(hlist_unhashed(&e->colision));
+		D_ASSERT(hlist_unhashed(&e->collision));
 	}
 
 	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
@@ -1574,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
 	ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
 
 	spin_lock_irq(&mdev->req_lock);
-	D_ASSERT(!hlist_unhashed(&e->colision));
-	hlist_del_init(&e->colision);
+	D_ASSERT(!hlist_unhashed(&e->collision));
+	hlist_del_init(&e->collision);
 	spin_unlock_irq(&mdev->req_lock);
 
 	dec_unacked(mdev);
@@ -1750,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
 		spin_lock_irq(&mdev->req_lock);
 
-		hlist_add_head(&e->colision, ee_hash_slot(mdev, sector));
+		hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
 
 #define OVERLAPS overlaps(i->sector, i->size, sector, size)
 		slot = tl_hash_slot(mdev, sector);
@@ -1760,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 			int have_conflict = 0;
 			prepare_to_wait(&mdev->misc_wait, &wait,
 				TASK_INTERRUPTIBLE);
-			hlist_for_each_entry(i, n, slot, colision) {
+			hlist_for_each_entry(i, n, slot, collision) {
 				if (OVERLAPS) {
 					/* only ALERT on first iteration,
 					 * we may be woken up early... */
@@ -1799,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 			}
 
 			if (signal_pending(current)) {
-				hlist_del_init(&e->colision);
+				hlist_del_init(&e->collision);
 
 				spin_unlock_irq(&mdev->req_lock);
 
@@ -1857,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 	dev_err(DEV, "submit failed, triggering re-connect\n");
 	spin_lock_irq(&mdev->req_lock);
 	list_del(&e->w.list);
-	hlist_del_init(&e->colision);
+	hlist_del_init(&e->collision);
 	spin_unlock_irq(&mdev->req_lock);
 	if (e->flags & EE_CALL_AL_COMPLETE_IO)
 		drbd_al_complete_io(mdev, e->sector);
@@ -2988,7 +2988,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
-		dd = drbd_determin_dev_size(mdev, ddsf);
+		dd = drbd_determine_dev_size(mdev, ddsf);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
 			return false;
@@ -4261,7 +4261,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
 	struct hlist_node *n;
 	struct drbd_request *req;
 
-	hlist_for_each_entry(req, n, slot, colision) {
+	hlist_for_each_entry(req, n, slot, collision) {
 		if ((unsigned long)req == (unsigned long)id) {
 			if (req->sector != sector) {
 				dev_err(DEV, "_ack_id_to_req: found req %p but it has "
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 14645bd40092..3424d675b769 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
 		 * they must have been failed on the spot */
 #define OVERLAPS overlaps(sector, size, i->sector, i->size)
 		slot = tl_hash_slot(mdev, sector);
-		hlist_for_each_entry(i, n, slot, colision) {
+		hlist_for_each_entry(i, n, slot, collision) {
 			if (OVERLAPS) {
 				dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
 				      "other: %p %llus +%u\n",
@@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
 #undef OVERLAPS
 #define OVERLAPS overlaps(sector, size, e->sector, e->size)
 		slot = ee_hash_slot(mdev, req->sector);
-		hlist_for_each_entry(e, n, slot, colision) {
+		hlist_for_each_entry(e, n, slot, collision) {
 			if (OVERLAPS) {
 				wake_up(&mdev->misc_wait);
 				break;
@@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 
 		/* remove the request from the conflict detection
 		 * respective block_id verification hash */
-		if (!hlist_unhashed(&req->colision))
-			hlist_del(&req->colision);
+		if (!hlist_unhashed(&req->collision))
+			hlist_del(&req->collision);
 		else
 			D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
 
@@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req)
 	struct hlist_node *n;
 	struct hlist_head *slot;
 
-	D_ASSERT(hlist_unhashed(&req->colision));
+	D_ASSERT(hlist_unhashed(&req->collision));
 
 	if (!get_net_conf(mdev))
 		return 0;
@@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req)
 
 #define OVERLAPS overlaps(i->sector, i->size, sector, size)
 	slot = tl_hash_slot(mdev, sector);
-	hlist_for_each_entry(i, n, slot, colision) {
+	hlist_for_each_entry(i, n, slot, collision) {
 		if (OVERLAPS) {
 			dev_alert(DEV, "%s[%u] Concurrent local write detected! "
 			      "[DISCARD L] new: %llus +%u; "
@@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req)
 #undef OVERLAPS
 #define OVERLAPS overlaps(e->sector, e->size, sector, size)
 		slot = ee_hash_slot(mdev, sector);
-		hlist_for_each_entry(e, n, slot, colision) {
+		hlist_for_each_entry(e, n, slot, collision) {
 			if (OVERLAPS) {
 				dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
 				      " [DISCARD L] new: %llus +%u; "
@@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
 		/* so we can verify the handle in the answer packet
 		 * corresponding hlist_del is in _req_may_be_done() */
-		hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector));
+		hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector));
 
 		set_bit(UNPLUG_REMOTE, &mdev->flags);
 
@@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		/* assert something? */
 		/* from drbd_make_request_common only */
 
-		hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector));
+		hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector));
 		/* corresponding hlist_del is in _req_may_be_done() */
 
 		/* NOTE
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 32e2c3e6a813..281342dca2c8 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
 	struct hlist_node *n;
 	struct drbd_request *req;
 
-	hlist_for_each_entry(req, n, slot, colision) {
+	hlist_for_each_entry(req, n, slot, collision) {
 		if ((unsigned long)req == (unsigned long)id) {
 			D_ASSERT(req->sector == sector);
 			return req;
@@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
 		req->epoch       = 0;
 		req->sector      = bio_src->bi_sector;
 		req->size        = bio_src->bi_size;
-		INIT_HLIST_NODE(&req->colision);
+		INIT_HLIST_NODE(&req->collision);
 		INIT_LIST_HEAD(&req->tl_requests);
 		INIT_LIST_HEAD(&req->w.list);
 	}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b5e53695fd7e..4d76b06b6b20 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
 	list_del(&e->w.list); /* has been on active_ee or sync_ee */
 	list_add_tail(&e->w.list, &mdev->done_ee);
 
-	/* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
+	/* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
 	 * neither did we wake possibly waiting conflicting requests.
 	 * done from "drbd_process_done_ee" within the appropriate w.cb
 	 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
@@ -840,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 			const int ratio =
 				(t == 0)     ? 0 :
 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
-			dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; "
+			dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
 			     "transferred %luK total %luK\n",
 			     ratio,
 			     Bit2KB(mdev->rs_same_csum),
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index c86283b4fbab..9e5f5607eba3 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -38,7 +38,7 @@
 
 /* Although the Linux source code makes a difference between
    generic endianness and the bitfields' endianness, there is no
-   architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness
+   architecture as of Linux-2.6.24-rc4 where the bitfields' endianness
    does not match the generic endianness. */
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -195,7 +195,7 @@ enum drbd_conns {
 	C_WF_REPORT_PARAMS, /* we have a socket */
 	C_CONNECTED,      /* we have introduced each other */
 	C_STARTING_SYNC_S,  /* starting full sync by admin request. */
-	C_STARTING_SYNC_T,  /* stariing full sync by admin request. */
+	C_STARTING_SYNC_T,  /* starting full sync by admin request. */
 	C_WF_BITMAP_S,
 	C_WF_BITMAP_T,
 	C_WF_SYNC_UUID,
@@ -236,7 +236,7 @@ union drbd_state {
  * pointed out by Maxim Uvarov q<muvarov@ru.mvista.com>
  * even though we transmit as "cpu_to_be32(state)",
  * the offsets of the bitfields still need to be swapped
- * on different endianess.
+ * on different endianness.
  */
 	struct {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -266,7 +266,7 @@ union drbd_state {
 		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
 		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
 #else
-# error "this endianess is not supported"
+# error "this endianness is not supported"
 #endif
 	};
 	unsigned int i;
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
index f14a165e82dc..069543190516 100644
--- a/include/linux/drbd_tag_magic.h
+++ b/include/linux/drbd_tag_magic.h
@@ -30,7 +30,7 @@ enum packet_types {
 	int tag_and_len ## member;
 #include "linux/drbd_nl.h"
 
-/* declate tag-list-sizes */
+/* declare tag-list-sizes */
 static const int tag_list_sizes[] = {
 #define NL_PACKET(name, number, fields) 2 fields ,
 #define NL_INTEGER(pn, pr, member)      + 4 + 4
-- 
cgit v1.2.3


From 8af088710d1eb3c980e0ef3779c8d47f3f217b48 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 24 May 2011 11:12:58 +0200
Subject: posix-timers: RCU conversion

Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard
a single spinlock (idr_lock) in posix-timers code, on its 48 core
machine.

Even on a 16 cpu machine (2x4x2), a single test can show 98% of cpu time
used in ticket_spin_lock, from lock_timer

Ref: http://www.spinics.net/lists/kvm/msg51526.html

Switching to RCU is quite easy, IDR being already RCU ready. idr_lock
should be locked only for an insert/delete, not a lookup.

Benchmark on a 2x4x2 machine, 16 processes calling timer_gettime().

Before :

real    1m18.669s
user    0m1.346s
sys     1m17.180s

After :

real    0m3.296s
user    0m1.366s
sys     0m1.926s

Reported-by: Ben Nagy <ben@iagu.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Ben Nagy <ben@iagu.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Richard Cochran <richard.cochran@omicron.at>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-timers.h |  1 +
 kernel/posix-timers.c        | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 808227d40a64..959c14132f46 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -82,6 +82,7 @@ struct k_itimer {
 			unsigned long expires;
 		} mmtimer;
 		struct alarm alarmtimer;
+		struct rcu_head rcu;
 	} it;
 };
 
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index a1b5edf1bf92..4556182527f3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void)
 	return tmr;
 }
 
+static void k_itimer_rcu_free(struct rcu_head *head)
+{
+	struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+
+	kmem_cache_free(posix_timers_cache, tmr);
+}
+
 #define IT_ID_SET	1
 #define IT_ID_NOT_SET	0
 static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 	}
 	put_pid(tmr->it_pid);
 	sigqueue_free(tmr->sigq);
-	kmem_cache_free(posix_timers_cache, tmr);
+	call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
 }
 
 static struct k_clock *clockid_to_kclock(const clockid_t id)
@@ -631,22 +638,18 @@ out:
 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
 {
 	struct k_itimer *timr;
-	/*
-	 * Watch out here.  We do a irqsave on the idr_lock and pass the
-	 * flags part over to the timer lock.  Must not let interrupts in
-	 * while we are moving the lock.
-	 */
-	spin_lock_irqsave(&idr_lock, *flags);
+
+	rcu_read_lock();
 	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
-		spin_lock(&timr->it_lock);
+		spin_lock_irqsave(&timr->it_lock, *flags);
 		if (timr->it_signal == current->signal) {
-			spin_unlock(&idr_lock);
+			rcu_read_unlock();
 			return timr;
 		}
-		spin_unlock(&timr->it_lock);
+		spin_unlock_irqrestore(&timr->it_lock, *flags);
 	}
-	spin_unlock_irqrestore(&idr_lock, *flags);
+	rcu_read_unlock();
 
 	return NULL;
 }
-- 
cgit v1.2.3


From 45e9683e87b69328175df3a9e42039b9892ca47e Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 24 May 2011 13:28:54 +1000
Subject: compat: include aio_abi.h for aio_context_t

fixes this build error on sparc64 (at least):

In file included from arch/sparc/include/asm/siginfo.h:19,
                 from include/linux/signal.h:5,
                 from include/linux/sched.h:73,
                 from arch/sparc/kernel/asm-offsets.c:13:
include/linux/compat.h:401: error: expected ')' before 'ctx_id'
include/linux/compat.h:406: error: expected ')' before 'ctx_id'

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 include/linux/compat.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 644e1a4692b1..ddcb7db38e67 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -13,6 +13,7 @@
 #include <linux/socket.h>
 #include <linux/if.h>
 #include <linux/fs.h>
+#include <linux/aio_abi.h>	/* for aio_context_t */
 
 #include <asm/compat.h>
 #include <asm/siginfo.h>
-- 
cgit v1.2.3


From 3c454cf21645bc96668e286f6352ac2c4c895fa2 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 6 Apr 2011 09:31:40 -0700
Subject: ceph: use LOOKUPINO to make unconnected nfs fh more reliable

If we are unable to locate an inode by ino, ask the MDS using the new
LOOKUPINO command.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/export.c             | 19 +++++++++++++++++--
 include/linux/ceph/ceph_fs.h |  1 +
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e41056174bf8..f1828af09912 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
 static struct dentry *__fh_to_dentry(struct super_block *sb,
 				     struct ceph_nfs_fh *fh)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
 	struct inode *inode;
 	struct dentry *dentry;
 	struct ceph_vino vino;
@@ -95,8 +96,22 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
 	vino.ino = fh->ino;
 	vino.snap = CEPH_NOSNAP;
 	inode = ceph_find_inode(sb, vino);
-	if (!inode)
-		return ERR_PTR(-ESTALE);
+	if (!inode) {
+		struct ceph_mds_request *req;
+
+		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+					       USE_ANY_MDS);
+		if (IS_ERR(req))
+			return ERR_CAST(req);
+
+		req->r_ino1 = vino;
+		req->r_num_caps = 1;
+		err = ceph_mdsc_do_request(mdsc, NULL, req);
+		ceph_mdsc_put_request(req);
+		inode = ceph_find_inode(sb, vino);
+		if (!inode)
+			return ERR_PTR(-ESTALE);
+	}
 
 	dentry = d_obtain_alias(inode);
 	if (IS_ERR(dentry)) {
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index b8e995fbd867..b8c60694b2b0 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -313,6 +313,7 @@ enum {
 	CEPH_MDS_OP_GETATTR    = 0x00101,
 	CEPH_MDS_OP_LOOKUPHASH = 0x00102,
 	CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
+	CEPH_MDS_OP_LOOKUPINO  = 0x00104,
 
 	CEPH_MDS_OP_SETXATTR   = 0x01105,
 	CEPH_MDS_OP_RMXATTR    = 0x01106,
-- 
cgit v1.2.3


From 650c2a2145981696c414be1d540a32447d0e353e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 15 May 2011 22:53:56 +0200
Subject: mach-ux500: move the DB8500 PRCMU driver to MFD

We have decided that this function arbiter fits better in the MFD
subsystem. Since we need to concatenate the split header files we move
it basically like this:

mv mach-ux500/prcmu-db8500.c drivers/mfd/db8500-prcmu.c
mv mach-ux500/include/mach/prcmu-defs.h include/linux/mfd/db8500-prcmu.h
mv mach-ux500/include/mach/prcmu-regs.h drivers/mfd/db8500-prcmu-regs.h
mach-ux500/include/mach/prcmu.h >> include/linux/mfd/db8500-prcmu.h
rm arch/arm/mach-ux500/include/mach/prcmu.h

Then we update different #include statements and Makefile orders etc
to make the PRCMU driver compile, link and boot in the new place
without really changing any code.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Kconfig                   |   1 +
 arch/arm/mach-ux500/Makefile                  |   2 +-
 arch/arm/mach-ux500/cpu.c                     |   2 +-
 arch/arm/mach-ux500/cpufreq.c                 |   3 +-
 arch/arm/mach-ux500/include/mach/prcmu-defs.h |  30 --
 arch/arm/mach-ux500/include/mach/prcmu-regs.h |  94 ------
 arch/arm/mach-ux500/include/mach/prcmu.h      |  28 --
 arch/arm/mach-ux500/prcmu-db8500.c            | 394 -------------------------
 drivers/mfd/Kconfig                           |  12 +-
 drivers/mfd/Makefile                          |   4 +-
 drivers/mfd/ab8500-i2c.c                      |   3 +-
 drivers/mfd/db8500-prcmu-regs.h               |  94 ++++++
 drivers/mfd/db8500-prcmu.c                    | 395 ++++++++++++++++++++++++++
 include/linux/mfd/db8500-prcmu.h              |  58 ++++
 14 files changed, 566 insertions(+), 554 deletions(-)
 delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu-defs.h
 delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu-regs.h
 delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu.h
 delete mode 100644 arch/arm/mach-ux500/prcmu-db8500.c
 create mode 100644 drivers/mfd/db8500-prcmu-regs.h
 create mode 100644 drivers/mfd/db8500-prcmu.c
 create mode 100644 include/linux/mfd/db8500-prcmu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 58626013aa32..8071d2746f70 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -15,6 +15,7 @@ config UX500_SOC_DB5500
 
 config UX500_SOC_DB8500
 	bool "DB8500"
+	select MFD_DB8500_PRCMU
 
 endmenu
 
diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile
index 2a08a10e09da..7a1d43e04f97 100644
--- a/arch/arm/mach-ux500/Makefile
+++ b/arch/arm/mach-ux500/Makefile
@@ -5,7 +5,7 @@
 obj-y				:= clock.o cpu.o devices.o devices-common.o \
 				   id.o usb.o
 obj-$(CONFIG_UX500_SOC_DB5500)	+= cpu-db5500.o dma-db5500.o
-obj-$(CONFIG_UX500_SOC_DB8500)	+= cpu-db8500.o devices-db8500.o prcmu-db8500.o
+obj-$(CONFIG_UX500_SOC_DB8500)	+= cpu-db8500.o devices-db8500.o
 obj-$(CONFIG_MACH_U8500)	+= board-mop500.o board-mop500-sdi.o \
 				board-mop500-regulators.o \
 				board-mop500-uib.o board-mop500-stuib.o \
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index 0190e0e68b4d..11360f734cec 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -8,6 +8,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/clk.h>
+#include <linux/mfd/db8500-prcmu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -19,7 +20,6 @@
 #include <mach/hardware.h>
 #include <mach/setup.h>
 #include <mach/devices.h>
-#include <mach/prcmu.h>
 
 #include "clock.h"
 
diff --git a/arch/arm/mach-ux500/cpufreq.c b/arch/arm/mach-ux500/cpufreq.c
index 5c5b747f134d..d196939fcdb9 100644
--- a/arch/arm/mach-ux500/cpufreq.c
+++ b/arch/arm/mach-ux500/cpufreq.c
@@ -17,10 +17,9 @@
 #include <linux/kernel.h>
 #include <linux/cpufreq.h>
 #include <linux/delay.h>
+#include <linux/mfd/db8500-prcmu.h>
 
 #include <mach/hardware.h>
-#include <mach/prcmu.h>
-#include <mach/prcmu-defs.h>
 
 #define DRIVER_NAME "cpufreq-u8500"
 #define CPUFREQ_NAME "u8500"
diff --git a/arch/arm/mach-ux500/include/mach/prcmu-defs.h b/arch/arm/mach-ux500/include/mach/prcmu-defs.h
deleted file mode 100644
index 848ba64b561f..000000000000
--- a/arch/arm/mach-ux500/include/mach/prcmu-defs.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Martin Persson <martin.persson@stericsson.com>
- *
- * License Terms: GNU General Public License v2
- *
- * PRCM Unit definitions
- */
-
-#ifndef __MACH_PRCMU_DEFS_H
-#define __MACH_PRCMU_DEFS_H
-
-enum prcmu_cpu_opp {
-	CPU_OPP_INIT	  = 0x00,
-	CPU_OPP_NO_CHANGE = 0x01,
-	CPU_OPP_100	  = 0x02,
-	CPU_OPP_50	  = 0x03,
-	CPU_OPP_MAX	  = 0x04,
-	CPU_OPP_EXT_CLK	  = 0x07
-};
-enum prcmu_ape_opp {
-	APE_OPP_NO_CHANGE = 0x00,
-	APE_OPP_100	  = 0x02,
-	APE_OPP_50	  = 0x03,
-};
-
-#endif /* __MACH_PRCMU_DEFS_H */
diff --git a/arch/arm/mach-ux500/include/mach/prcmu-regs.h b/arch/arm/mach-ux500/include/mach/prcmu-regs.h
deleted file mode 100644
index c1226da19bfb..000000000000
--- a/arch/arm/mach-ux500/include/mach/prcmu-regs.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- *
- * License Terms: GNU General Public License v2
- *
- * PRCM Unit registers
- */
-
-#ifndef __MACH_PRCMU_REGS_H
-#define __MACH_PRCMU_REGS_H
-
-#include <mach/hardware.h>
-
-#define PRCM_ARM_PLLDIVPS	(_PRCMU_BASE + 0x118)
-#define PRCM_ARM_CHGCLKREQ	(_PRCMU_BASE + 0x114)
-#define PRCM_PLLARM_ENABLE	(_PRCMU_BASE + 0x98)
-#define PRCM_ARMCLKFIX_MGT	(_PRCMU_BASE + 0x0)
-#define PRCM_A9_RESETN_CLR	(_PRCMU_BASE + 0x1f4)
-#define PRCM_A9_RESETN_SET	(_PRCMU_BASE + 0x1f0)
-#define PRCM_ARM_LS_CLAMP	(_PRCMU_BASE + 0x30c)
-#define PRCM_SRAM_A9		(_PRCMU_BASE + 0x308)
-
-/* ARM WFI Standby signal register */
-#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
-#define PRCMU_IOCR              (_PRCMU_BASE + 0x310)
-
-/* CPU mailbox registers */
-#define PRCM_MBOX_CPU_VAL	(_PRCMU_BASE + 0x0fc)
-#define PRCM_MBOX_CPU_SET	(_PRCMU_BASE + 0x100)
-#define PRCM_MBOX_CPU_CLR	(_PRCMU_BASE + 0x104)
-
-/* Dual A9 core interrupt management unit registers */
-#define PRCM_A9_MASK_REQ	(_PRCMU_BASE + 0x328)
-#define PRCM_A9_MASK_ACK	(_PRCMU_BASE + 0x32c)
-#define PRCM_ARMITMSK31TO0	(_PRCMU_BASE + 0x11c)
-#define PRCM_ARMITMSK63TO32	(_PRCMU_BASE + 0x120)
-#define PRCM_ARMITMSK95TO64	(_PRCMU_BASE + 0x124)
-#define PRCM_ARMITMSK127TO96	(_PRCMU_BASE + 0x128)
-#define PRCM_POWER_STATE_VAL	(_PRCMU_BASE + 0x25C)
-#define PRCM_ARMITVAL31TO0	(_PRCMU_BASE + 0x260)
-#define PRCM_ARMITVAL63TO32	(_PRCMU_BASE + 0x264)
-#define PRCM_ARMITVAL95TO64	(_PRCMU_BASE + 0x268)
-#define PRCM_ARMITVAL127TO96	(_PRCMU_BASE + 0x26C)
-
-#define PRCM_HOSTACCESS_REQ	(_PRCMU_BASE + 0x334)
-#define ARM_WAKEUP_MODEM	0x1
-
-#define PRCM_ARM_IT1_CLEAR	(_PRCMU_BASE + 0x48C)
-#define PRCM_ARM_IT1_VAL	(_PRCMU_BASE + 0x494)
-#define PRCM_HOLD_EVT		(_PRCMU_BASE + 0x174)
-
-#define PRCM_ITSTATUS0		(_PRCMU_BASE + 0x148)
-#define PRCM_ITSTATUS1		(_PRCMU_BASE + 0x150)
-#define PRCM_ITSTATUS2		(_PRCMU_BASE + 0x158)
-#define PRCM_ITSTATUS3		(_PRCMU_BASE + 0x160)
-#define PRCM_ITSTATUS4		(_PRCMU_BASE + 0x168)
-#define PRCM_ITSTATUS5		(_PRCMU_BASE + 0x484)
-#define PRCM_ITCLEAR5		(_PRCMU_BASE + 0x488)
-#define PRCM_ARMIT_MASKXP70_IT	(_PRCMU_BASE + 0x1018)
-
-/* System reset register */
-#define PRCM_APE_SOFTRST	(_PRCMU_BASE + 0x228)
-
-/* Level shifter and clamp control registers */
-#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
-#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
-
-/* PRCMU clock/PLL/reset registers */
-#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
-#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
-#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
-#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
-#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
-#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
-#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
-#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
-#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
-#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
-
-/* ePOD and memory power signal control registers */
-#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
-#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
-
-/* Debug power control unit registers */
-#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
-
-/* Miscellaneous unit registers */
-#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
-
-#endif /* __MACH_PRCMU_REGS_H */
diff --git a/arch/arm/mach-ux500/include/mach/prcmu.h b/arch/arm/mach-ux500/include/mach/prcmu.h
deleted file mode 100644
index c49e456162ef..000000000000
--- a/arch/arm/mach-ux500/include/mach/prcmu.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
- *
- * License Terms: GNU General Public License v2
- *
- * PRCM Unit f/w API
- */
-#ifndef __MACH_PRCMU_H
-#define __MACH_PRCMU_H
-#include <mach/prcmu-defs.h>
-
-void __init prcmu_early_init(void);
-int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
-int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
-int prcmu_set_ape_opp(enum prcmu_ape_opp opp);
-int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp);
-int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
-			   enum prcmu_cpu_opp cpu_opp);
-int prcmu_get_ape_opp(void);
-int prcmu_get_cpu_opp(void);
-bool prcmu_has_arm_maxopp(void);
-
-#endif /* __MACH_PRCMU_H */
diff --git a/arch/arm/mach-ux500/prcmu-db8500.c b/arch/arm/mach-ux500/prcmu-db8500.c
deleted file mode 100644
index c522d26ef348..000000000000
--- a/arch/arm/mach-ux500/prcmu-db8500.c
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License Terms: GNU General Public License v2
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
- *
- * U8500 PRCM Unit interface driver
- *
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/mutex.h>
-#include <linux/completion.h>
-#include <linux/jiffies.h>
-#include <linux/bitops.h>
-#include <linux/interrupt.h>
-
-#include <mach/hardware.h>
-#include <mach/prcmu-regs.h>
-#include <mach/prcmu-defs.h>
-
-/* Global var to runtime determine TCDM base for v2 or v1 */
-static __iomem void *tcdm_base;
-
-#define _MBOX_HEADER		(tcdm_base + 0xFE8)
-#define MBOX_HEADER_REQ_MB0	(_MBOX_HEADER + 0x0)
-
-#define REQ_MB1 (tcdm_base + 0xFD0)
-#define REQ_MB5 (tcdm_base + 0xE44)
-
-#define REQ_MB1_ARMOPP		(REQ_MB1 + 0x0)
-#define REQ_MB1_APEOPP		(REQ_MB1 + 0x1)
-#define REQ_MB1_BOOSTOPP	(REQ_MB1 + 0x2)
-
-#define ACK_MB1 (tcdm_base + 0xE04)
-#define ACK_MB5 (tcdm_base + 0xDF4)
-
-#define ACK_MB1_CURR_ARMOPP		(ACK_MB1 + 0x0)
-#define ACK_MB1_CURR_APEOPP		(ACK_MB1 + 0x1)
-
-#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5)
-#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1)
-#define REQ_MB5_I2C_REG (REQ_MB5 + 2)
-#define REQ_MB5_I2C_VAL (REQ_MB5 + 3)
-
-#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1)
-#define ACK_MB5_I2C_VAL (ACK_MB5 + 3)
-
-#define PRCM_AVS_VARM_MAX_OPP		(tcdm_base + 0x2E4)
-#define PRCM_AVS_ISMODEENABLE		7
-#define PRCM_AVS_ISMODEENABLE_MASK	(1 << PRCM_AVS_ISMODEENABLE)
-
-#define I2C_WRITE(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
-#define I2C_READ(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0))
-#define I2C_STOP_EN BIT(3)
-
-enum mb1_h {
-	MB1H_ARM_OPP = 1,
-	MB1H_APE_OPP,
-	MB1H_ARM_APE_OPP,
-};
-
-static struct {
-	struct mutex lock;
-	struct completion work;
-	struct {
-		u8 arm_opp;
-		u8 ape_opp;
-		u8 arm_status;
-		u8 ape_status;
-	} ack;
-} mb1_transfer;
-
-enum ack_mb5_status {
-	I2C_WR_OK = 0x01,
-	I2C_RD_OK = 0x02,
-};
-
-#define MBOX_BIT BIT
-#define NUM_MBOX 8
-
-static struct {
-	struct mutex lock;
-	struct completion work;
-	bool failed;
-	struct {
-		u8 status;
-		u8 value;
-	} ack;
-} mb5_transfer;
-
-/**
- * prcmu_abb_read() - Read register value(s) from the ABB.
- * @slave:	The I2C slave address.
- * @reg:	The (start) register address.
- * @value:	The read out value(s).
- * @size:	The number of registers to read.
- *
- * Reads register value(s) from the ABB.
- * @size has to be 1 for the current firmware version.
- */
-int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
-{
-	int r;
-
-	if (size != 1)
-		return -EINVAL;
-
-	r = mutex_lock_interruptible(&mb5_transfer.lock);
-	if (r)
-		return r;
-
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
-		cpu_relax();
-
-	writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP);
-	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
-	writeb(reg, REQ_MB5_I2C_REG);
-
-	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
-	if (!wait_for_completion_timeout(&mb5_transfer.work,
-			msecs_to_jiffies(500))) {
-		pr_err("prcmu: prcmu_abb_read timed out.\n");
-		r = -EIO;
-		goto unlock_and_return;
-	}
-	r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO);
-	if (!r)
-		*value = mb5_transfer.ack.value;
-
-unlock_and_return:
-	mutex_unlock(&mb5_transfer.lock);
-	return r;
-}
-EXPORT_SYMBOL(prcmu_abb_read);
-
-/**
- * prcmu_abb_write() - Write register value(s) to the ABB.
- * @slave:	The I2C slave address.
- * @reg:	The (start) register address.
- * @value:	The value(s) to write.
- * @size:	The number of registers to write.
- *
- * Reads register value(s) from the ABB.
- * @size has to be 1 for the current firmware version.
- */
-int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
-{
-	int r;
-
-	if (size != 1)
-		return -EINVAL;
-
-	r = mutex_lock_interruptible(&mb5_transfer.lock);
-	if (r)
-		return r;
-
-
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
-		cpu_relax();
-
-	writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP);
-	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
-	writeb(reg, REQ_MB5_I2C_REG);
-	writeb(*value, REQ_MB5_I2C_VAL);
-
-	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
-	if (!wait_for_completion_timeout(&mb5_transfer.work,
-			msecs_to_jiffies(500))) {
-		pr_err("prcmu: prcmu_abb_write timed out.\n");
-		r = -EIO;
-		goto unlock_and_return;
-	}
-	r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO);
-
-unlock_and_return:
-	mutex_unlock(&mb5_transfer.lock);
-	return r;
-}
-EXPORT_SYMBOL(prcmu_abb_write);
-
-static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp,
-			    enum prcmu_cpu_opp cpu_opp)
-{
-	bool do_ape;
-	bool do_arm;
-	int err = 0;
-
-	do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP));
-	do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP));
-
-	mutex_lock(&mb1_transfer.lock);
-
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
-		cpu_relax();
-
-	writeb(0, MBOX_HEADER_REQ_MB0);
-	writeb(cpu_opp, REQ_MB1_ARMOPP);
-	writeb(ape_opp, REQ_MB1_APEOPP);
-	writeb(0, REQ_MB1_BOOSTOPP);
-	writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
-	wait_for_completion(&mb1_transfer.work);
-	if ((do_ape) && (mb1_transfer.ack.ape_status != 0))
-		err = -EIO;
-	if ((do_arm) && (mb1_transfer.ack.arm_status != 0))
-		err = -EIO;
-
-	mutex_unlock(&mb1_transfer.lock);
-
-	return err;
-}
-
-/**
- * prcmu_set_ape_opp() - Set the OPP of the APE.
- * @opp:	The OPP to set.
- *
- * This function sets the OPP of the APE.
- */
-int prcmu_set_ape_opp(enum prcmu_ape_opp opp)
-{
-	return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE);
-}
-EXPORT_SYMBOL(prcmu_set_ape_opp);
-
-/**
- * prcmu_set_cpu_opp() - Set the OPP of the CPU.
- * @opp:	The OPP to set.
- *
- * This function sets the OPP of the CPU.
- */
-int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp)
-{
-	return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp);
-}
-EXPORT_SYMBOL(prcmu_set_cpu_opp);
-
-/**
- * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU.
- * @ape_opp:	The APE OPP to set.
- * @cpu_opp:	The CPU OPP to set.
- *
- * This function sets the OPPs of the APE and the CPU.
- */
-int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
-			   enum prcmu_cpu_opp cpu_opp)
-{
-	return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp);
-}
-EXPORT_SYMBOL(prcmu_set_ape_cpu_opps);
-
-/**
- * prcmu_get_ape_opp() - Get the OPP of the APE.
- *
- * This function gets the OPP of the APE.
- */
-enum prcmu_ape_opp prcmu_get_ape_opp(void)
-{
-	return readb(ACK_MB1_CURR_APEOPP);
-}
-EXPORT_SYMBOL(prcmu_get_ape_opp);
-
-/**
- * prcmu_get_cpu_opp() - Get the OPP of the CPU.
- *
- * This function gets the OPP of the CPU. The OPP is specified in %%.
- * PRCMU_OPP_EXT is a special OPP value, not specified in %%.
- */
-int prcmu_get_cpu_opp(void)
-{
-	return readb(ACK_MB1_CURR_ARMOPP);
-}
-EXPORT_SYMBOL(prcmu_get_cpu_opp);
-
-bool prcmu_has_arm_maxopp(void)
-{
-	return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK)
-		== PRCM_AVS_ISMODEENABLE_MASK;
-}
-
-static void read_mailbox_0(void)
-{
-	writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_1(void)
-{
-	mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP);
-	mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP);
-	complete(&mb1_transfer.work);
-	writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_2(void)
-{
-	writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_3(void)
-{
-	writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_4(void)
-{
-	writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_5(void)
-{
-	mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS);
-	mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL);
-	complete(&mb5_transfer.work);
-	writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_6(void)
-{
-	writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_7(void)
-{
-	writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
-}
-
-static void (* const read_mailbox[NUM_MBOX])(void) = {
-	read_mailbox_0,
-	read_mailbox_1,
-	read_mailbox_2,
-	read_mailbox_3,
-	read_mailbox_4,
-	read_mailbox_5,
-	read_mailbox_6,
-	read_mailbox_7
-};
-
-static irqreturn_t prcmu_irq_handler(int irq, void *data)
-{
-	u32 bits;
-	u8 n;
-
-	bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1));
-	if (unlikely(!bits))
-		return IRQ_NONE;
-
-	for (n = 0; bits; n++) {
-		if (bits & MBOX_BIT(n)) {
-			bits -= MBOX_BIT(n);
-			read_mailbox[n]();
-		}
-	}
-	return IRQ_HANDLED;
-}
-
-void __init prcmu_early_init(void)
-{
-	if (cpu_is_u8500v11() || cpu_is_u8500ed()) {
-		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1);
-	} else if (cpu_is_u8500v2()) {
-		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
-	} else {
-		pr_err("prcmu: Unsupported chip version\n");
-		BUG();
-	}
-}
-
-static int __init prcmu_init(void)
-{
-	if (cpu_is_u8500ed()) {
-		pr_err("prcmu: Unsupported chip version\n");
-		return 0;
-	}
-
-	mutex_init(&mb1_transfer.lock);
-	init_completion(&mb1_transfer.work);
-	mutex_init(&mb5_transfer.lock);
-	init_completion(&mb5_transfer.work);
-
-	/* Clean up the mailbox interrupts after pre-kernel code. */
-	writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR);
-
-	return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0,
-			   "prcmu", NULL);
-}
-
-arch_initcall(prcmu_init);
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 3ed3ff06be5d..7eaeb9750793 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -538,7 +538,7 @@ config AB8500_CORE
 
 config AB8500_I2C_CORE
 	bool "AB8500 register access via PRCMU I2C"
-	depends on AB8500_CORE && UX500_SOC_DB8500
+	depends on AB8500_CORE && MFD_DB8500_PRCMU
 	default y
 	help
 	  This enables register access to the AB8500 chip via PRCMU I2C.
@@ -575,6 +575,16 @@ config AB3550_CORE
 	  LEDs, vibrator, system power and temperature, power management
 	  and ALSA sound.
 
+config MFD_DB8500_PRCMU
+	bool "ST-Ericsson DB8500 Power Reset Control Management Unit"
+	depends on UX500_SOC_DB8500
+	select MFD_CORE
+	help
+	  Select this option to enable support for the DB8500 Power Reset
+	  and Control Management Unit. This is basically an autonomous
+	  system controller running an XP70 microprocessor, which is accessed
+	  through a register map.
+
 config MFD_CS5535
 	tristate "Support for CS5535 and CS5536 southbridge core functions"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 419caa9d7dcf..814c57a692a9 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -74,9 +74,11 @@ obj-$(CONFIG_AB3100_CORE)	+= ab3100-core.o
 obj-$(CONFIG_AB3100_OTP)	+= ab3100-otp.o
 obj-$(CONFIG_AB3550_CORE)	+= ab3550-core.o
 obj-$(CONFIG_AB8500_CORE)	+= ab8500-core.o ab8500-sysctrl.o
-obj-$(CONFIG_AB8500_I2C_CORE)	+= ab8500-i2c.o
 obj-$(CONFIG_AB8500_DEBUG)	+= ab8500-debugfs.o
 obj-$(CONFIG_AB8500_GPADC)	+= ab8500-gpadc.o
+obj-$(CONFIG_MFD_DB8500_PRCMU)	+= db8500-prcmu.o
+# ab8500-i2c need to come after db8500-prcmu (which provides the channel)
+obj-$(CONFIG_AB8500_I2C_CORE)	+= ab8500-i2c.o
 obj-$(CONFIG_MFD_TIMBERDALE)    += timberdale.o
 obj-$(CONFIG_PMIC_ADP5520)	+= adp5520.o
 obj-$(CONFIG_LPC_SCH)		+= lpc_sch.o
diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c
index 821e6b86afd2..9be541c6b004 100644
--- a/drivers/mfd/ab8500-i2c.c
+++ b/drivers/mfd/ab8500-i2c.c
@@ -11,8 +11,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ab8500.h>
-
-#include <mach/prcmu.h>
+#include <linux/mfd/db8500-prcmu.h>
 
 static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data)
 {
diff --git a/drivers/mfd/db8500-prcmu-regs.h b/drivers/mfd/db8500-prcmu-regs.h
new file mode 100644
index 000000000000..c1226da19bfb
--- /dev/null
+++ b/drivers/mfd/db8500-prcmu-regs.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit registers
+ */
+
+#ifndef __MACH_PRCMU_REGS_H
+#define __MACH_PRCMU_REGS_H
+
+#include <mach/hardware.h>
+
+#define PRCM_ARM_PLLDIVPS	(_PRCMU_BASE + 0x118)
+#define PRCM_ARM_CHGCLKREQ	(_PRCMU_BASE + 0x114)
+#define PRCM_PLLARM_ENABLE	(_PRCMU_BASE + 0x98)
+#define PRCM_ARMCLKFIX_MGT	(_PRCMU_BASE + 0x0)
+#define PRCM_A9_RESETN_CLR	(_PRCMU_BASE + 0x1f4)
+#define PRCM_A9_RESETN_SET	(_PRCMU_BASE + 0x1f0)
+#define PRCM_ARM_LS_CLAMP	(_PRCMU_BASE + 0x30c)
+#define PRCM_SRAM_A9		(_PRCMU_BASE + 0x308)
+
+/* ARM WFI Standby signal register */
+#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
+#define PRCMU_IOCR              (_PRCMU_BASE + 0x310)
+
+/* CPU mailbox registers */
+#define PRCM_MBOX_CPU_VAL	(_PRCMU_BASE + 0x0fc)
+#define PRCM_MBOX_CPU_SET	(_PRCMU_BASE + 0x100)
+#define PRCM_MBOX_CPU_CLR	(_PRCMU_BASE + 0x104)
+
+/* Dual A9 core interrupt management unit registers */
+#define PRCM_A9_MASK_REQ	(_PRCMU_BASE + 0x328)
+#define PRCM_A9_MASK_ACK	(_PRCMU_BASE + 0x32c)
+#define PRCM_ARMITMSK31TO0	(_PRCMU_BASE + 0x11c)
+#define PRCM_ARMITMSK63TO32	(_PRCMU_BASE + 0x120)
+#define PRCM_ARMITMSK95TO64	(_PRCMU_BASE + 0x124)
+#define PRCM_ARMITMSK127TO96	(_PRCMU_BASE + 0x128)
+#define PRCM_POWER_STATE_VAL	(_PRCMU_BASE + 0x25C)
+#define PRCM_ARMITVAL31TO0	(_PRCMU_BASE + 0x260)
+#define PRCM_ARMITVAL63TO32	(_PRCMU_BASE + 0x264)
+#define PRCM_ARMITVAL95TO64	(_PRCMU_BASE + 0x268)
+#define PRCM_ARMITVAL127TO96	(_PRCMU_BASE + 0x26C)
+
+#define PRCM_HOSTACCESS_REQ	(_PRCMU_BASE + 0x334)
+#define ARM_WAKEUP_MODEM	0x1
+
+#define PRCM_ARM_IT1_CLEAR	(_PRCMU_BASE + 0x48C)
+#define PRCM_ARM_IT1_VAL	(_PRCMU_BASE + 0x494)
+#define PRCM_HOLD_EVT		(_PRCMU_BASE + 0x174)
+
+#define PRCM_ITSTATUS0		(_PRCMU_BASE + 0x148)
+#define PRCM_ITSTATUS1		(_PRCMU_BASE + 0x150)
+#define PRCM_ITSTATUS2		(_PRCMU_BASE + 0x158)
+#define PRCM_ITSTATUS3		(_PRCMU_BASE + 0x160)
+#define PRCM_ITSTATUS4		(_PRCMU_BASE + 0x168)
+#define PRCM_ITSTATUS5		(_PRCMU_BASE + 0x484)
+#define PRCM_ITCLEAR5		(_PRCMU_BASE + 0x488)
+#define PRCM_ARMIT_MASKXP70_IT	(_PRCMU_BASE + 0x1018)
+
+/* System reset register */
+#define PRCM_APE_SOFTRST	(_PRCMU_BASE + 0x228)
+
+/* Level shifter and clamp control registers */
+#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
+#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
+
+/* PRCMU clock/PLL/reset registers */
+#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
+#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
+#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
+#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
+#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
+#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
+#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
+#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
+#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
+#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
+
+/* ePOD and memory power signal control registers */
+#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
+#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
+
+/* Debug power control unit registers */
+#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
+
+/* Miscellaneous unit registers */
+#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
+
+#endif /* __MACH_PRCMU_REGS_H */
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
new file mode 100644
index 000000000000..31f18c8c6bf8
--- /dev/null
+++ b/drivers/mfd/db8500-prcmu.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ *
+ * U8500 PRCM Unit interface driver
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <linux/jiffies.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/db8500-prcmu.h>
+
+#include <mach/hardware.h>
+
+#include "db8500-prcmu-regs.h"
+
+/* Global var to runtime determine TCDM base for v2 or v1 */
+static __iomem void *tcdm_base;
+
+#define _MBOX_HEADER		(tcdm_base + 0xFE8)
+#define MBOX_HEADER_REQ_MB0	(_MBOX_HEADER + 0x0)
+
+#define REQ_MB1 (tcdm_base + 0xFD0)
+#define REQ_MB5 (tcdm_base + 0xE44)
+
+#define REQ_MB1_ARMOPP		(REQ_MB1 + 0x0)
+#define REQ_MB1_APEOPP		(REQ_MB1 + 0x1)
+#define REQ_MB1_BOOSTOPP	(REQ_MB1 + 0x2)
+
+#define ACK_MB1 (tcdm_base + 0xE04)
+#define ACK_MB5 (tcdm_base + 0xDF4)
+
+#define ACK_MB1_CURR_ARMOPP		(ACK_MB1 + 0x0)
+#define ACK_MB1_CURR_APEOPP		(ACK_MB1 + 0x1)
+
+#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5)
+#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1)
+#define REQ_MB5_I2C_REG (REQ_MB5 + 2)
+#define REQ_MB5_I2C_VAL (REQ_MB5 + 3)
+
+#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1)
+#define ACK_MB5_I2C_VAL (ACK_MB5 + 3)
+
+#define PRCM_AVS_VARM_MAX_OPP		(tcdm_base + 0x2E4)
+#define PRCM_AVS_ISMODEENABLE		7
+#define PRCM_AVS_ISMODEENABLE_MASK	(1 << PRCM_AVS_ISMODEENABLE)
+
+#define I2C_WRITE(slave) \
+	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
+#define I2C_READ(slave) \
+	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0))
+#define I2C_STOP_EN BIT(3)
+
+enum mb1_h {
+	MB1H_ARM_OPP = 1,
+	MB1H_APE_OPP,
+	MB1H_ARM_APE_OPP,
+};
+
+static struct {
+	struct mutex lock;
+	struct completion work;
+	struct {
+		u8 arm_opp;
+		u8 ape_opp;
+		u8 arm_status;
+		u8 ape_status;
+	} ack;
+} mb1_transfer;
+
+enum ack_mb5_status {
+	I2C_WR_OK = 0x01,
+	I2C_RD_OK = 0x02,
+};
+
+#define MBOX_BIT BIT
+#define NUM_MBOX 8
+
+static struct {
+	struct mutex lock;
+	struct completion work;
+	bool failed;
+	struct {
+		u8 status;
+		u8 value;
+	} ack;
+} mb5_transfer;
+
+/**
+ * prcmu_abb_read() - Read register value(s) from the ABB.
+ * @slave:	The I2C slave address.
+ * @reg:	The (start) register address.
+ * @value:	The read out value(s).
+ * @size:	The number of registers to read.
+ *
+ * Reads register value(s) from the ABB.
+ * @size has to be 1 for the current firmware version.
+ */
+int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	int r;
+
+	if (size != 1)
+		return -EINVAL;
+
+	r = mutex_lock_interruptible(&mb5_transfer.lock);
+	if (r)
+		return r;
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+		cpu_relax();
+
+	writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP);
+	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
+	writeb(reg, REQ_MB5_I2C_REG);
+
+	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
+	if (!wait_for_completion_timeout(&mb5_transfer.work,
+			msecs_to_jiffies(500))) {
+		pr_err("prcmu: prcmu_abb_read timed out.\n");
+		r = -EIO;
+		goto unlock_and_return;
+	}
+	r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO);
+	if (!r)
+		*value = mb5_transfer.ack.value;
+
+unlock_and_return:
+	mutex_unlock(&mb5_transfer.lock);
+	return r;
+}
+EXPORT_SYMBOL(prcmu_abb_read);
+
+/**
+ * prcmu_abb_write() - Write register value(s) to the ABB.
+ * @slave:	The I2C slave address.
+ * @reg:	The (start) register address.
+ * @value:	The value(s) to write.
+ * @size:	The number of registers to write.
+ *
+ * Reads register value(s) from the ABB.
+ * @size has to be 1 for the current firmware version.
+ */
+int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	int r;
+
+	if (size != 1)
+		return -EINVAL;
+
+	r = mutex_lock_interruptible(&mb5_transfer.lock);
+	if (r)
+		return r;
+
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+		cpu_relax();
+
+	writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP);
+	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
+	writeb(reg, REQ_MB5_I2C_REG);
+	writeb(*value, REQ_MB5_I2C_VAL);
+
+	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
+	if (!wait_for_completion_timeout(&mb5_transfer.work,
+			msecs_to_jiffies(500))) {
+		pr_err("prcmu: prcmu_abb_write timed out.\n");
+		r = -EIO;
+		goto unlock_and_return;
+	}
+	r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO);
+
+unlock_and_return:
+	mutex_unlock(&mb5_transfer.lock);
+	return r;
+}
+EXPORT_SYMBOL(prcmu_abb_write);
+
+static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp,
+			    enum prcmu_cpu_opp cpu_opp)
+{
+	bool do_ape;
+	bool do_arm;
+	int err = 0;
+
+	do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP));
+	do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP));
+
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(0, MBOX_HEADER_REQ_MB0);
+	writeb(cpu_opp, REQ_MB1_ARMOPP);
+	writeb(ape_opp, REQ_MB1_APEOPP);
+	writeb(0, REQ_MB1_BOOSTOPP);
+	writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
+	wait_for_completion(&mb1_transfer.work);
+	if ((do_ape) && (mb1_transfer.ack.ape_status != 0))
+		err = -EIO;
+	if ((do_arm) && (mb1_transfer.ack.arm_status != 0))
+		err = -EIO;
+
+	mutex_unlock(&mb1_transfer.lock);
+
+	return err;
+}
+
+/**
+ * prcmu_set_ape_opp() - Set the OPP of the APE.
+ * @opp:	The OPP to set.
+ *
+ * This function sets the OPP of the APE.
+ */
+int prcmu_set_ape_opp(enum prcmu_ape_opp opp)
+{
+	return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE);
+}
+EXPORT_SYMBOL(prcmu_set_ape_opp);
+
+/**
+ * prcmu_set_cpu_opp() - Set the OPP of the CPU.
+ * @opp:	The OPP to set.
+ *
+ * This function sets the OPP of the CPU.
+ */
+int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp)
+{
+	return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp);
+}
+EXPORT_SYMBOL(prcmu_set_cpu_opp);
+
+/**
+ * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU.
+ * @ape_opp:	The APE OPP to set.
+ * @cpu_opp:	The CPU OPP to set.
+ *
+ * This function sets the OPPs of the APE and the CPU.
+ */
+int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
+			   enum prcmu_cpu_opp cpu_opp)
+{
+	return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp);
+}
+EXPORT_SYMBOL(prcmu_set_ape_cpu_opps);
+
+/**
+ * prcmu_get_ape_opp() - Get the OPP of the APE.
+ *
+ * This function gets the OPP of the APE.
+ */
+enum prcmu_ape_opp prcmu_get_ape_opp(void)
+{
+	return readb(ACK_MB1_CURR_APEOPP);
+}
+EXPORT_SYMBOL(prcmu_get_ape_opp);
+
+/**
+ * prcmu_get_cpu_opp() - Get the OPP of the CPU.
+ *
+ * This function gets the OPP of the CPU. The OPP is specified in %%.
+ * PRCMU_OPP_EXT is a special OPP value, not specified in %%.
+ */
+int prcmu_get_cpu_opp(void)
+{
+	return readb(ACK_MB1_CURR_ARMOPP);
+}
+EXPORT_SYMBOL(prcmu_get_cpu_opp);
+
+bool prcmu_has_arm_maxopp(void)
+{
+	return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK)
+		== PRCM_AVS_ISMODEENABLE_MASK;
+}
+
+static void read_mailbox_0(void)
+{
+	writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_1(void)
+{
+	mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP);
+	mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP);
+	complete(&mb1_transfer.work);
+	writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_2(void)
+{
+	writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_3(void)
+{
+	writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_4(void)
+{
+	writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_5(void)
+{
+	mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS);
+	mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL);
+	complete(&mb5_transfer.work);
+	writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_6(void)
+{
+	writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_7(void)
+{
+	writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
+}
+
+static void (* const read_mailbox[NUM_MBOX])(void) = {
+	read_mailbox_0,
+	read_mailbox_1,
+	read_mailbox_2,
+	read_mailbox_3,
+	read_mailbox_4,
+	read_mailbox_5,
+	read_mailbox_6,
+	read_mailbox_7
+};
+
+static irqreturn_t prcmu_irq_handler(int irq, void *data)
+{
+	u32 bits;
+	u8 n;
+
+	bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1));
+	if (unlikely(!bits))
+		return IRQ_NONE;
+
+	for (n = 0; bits; n++) {
+		if (bits & MBOX_BIT(n)) {
+			bits -= MBOX_BIT(n);
+			read_mailbox[n]();
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+void __init prcmu_early_init(void)
+{
+	if (cpu_is_u8500v11() || cpu_is_u8500ed()) {
+		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1);
+	} else if (cpu_is_u8500v2()) {
+		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
+	} else {
+		pr_err("prcmu: Unsupported chip version\n");
+		BUG();
+	}
+}
+
+static int __init prcmu_init(void)
+{
+	if (cpu_is_u8500ed()) {
+		pr_err("prcmu: Unsupported chip version\n");
+		return 0;
+	}
+
+	mutex_init(&mb1_transfer.lock);
+	init_completion(&mb1_transfer.work);
+	mutex_init(&mb5_transfer.lock);
+	init_completion(&mb5_transfer.work);
+
+	/* Clean up the mailbox interrupts after pre-kernel code. */
+	writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR);
+
+	return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0,
+			   "prcmu", NULL);
+}
+
+arch_initcall(prcmu_init);
diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h
new file mode 100644
index 000000000000..d591d79aa6f0
--- /dev/null
+++ b/include/linux/mfd/db8500-prcmu.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ * Author: Martin Persson <martin.persson@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit definitions
+ */
+
+#ifndef __MACH_PRCMU_DEFS_H
+#define __MACH_PRCMU_DEFS_H
+
+enum prcmu_cpu_opp {
+	CPU_OPP_INIT	  = 0x00,
+	CPU_OPP_NO_CHANGE = 0x01,
+	CPU_OPP_100	  = 0x02,
+	CPU_OPP_50	  = 0x03,
+	CPU_OPP_MAX	  = 0x04,
+	CPU_OPP_EXT_CLK	  = 0x07
+};
+enum prcmu_ape_opp {
+	APE_OPP_NO_CHANGE = 0x00,
+	APE_OPP_100	  = 0x02,
+	APE_OPP_50	  = 0x03,
+};
+
+#endif /* __MACH_PRCMU_DEFS_H */
+
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit f/w API
+ */
+#ifndef __MACH_PRCMU_H
+#define __MACH_PRCMU_H
+
+void __init prcmu_early_init(void);
+int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
+int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
+int prcmu_set_ape_opp(enum prcmu_ape_opp opp);
+int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp);
+int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
+			   enum prcmu_cpu_opp cpu_opp);
+enum prcmu_ape_opp prcmu_get_ape_opp(void);
+int prcmu_get_cpu_opp(void);
+bool prcmu_has_arm_maxopp(void);
+
+#endif /* __MACH_PRCMU_H */
-- 
cgit v1.2.3


From 3df57bcf5a6ba74572218a811bd0e311414f2aff Mon Sep 17 00:00:00 2001
From: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
Date: Mon, 16 May 2011 00:15:05 +0200
Subject: mfd: update DB8500 PRCMU driver

This updates the DB8500 PRCMU driver to the latest version
available internally. Nominally we would update the dependent
CPUfreq driver at the same time but since that is being moved
around in this patch set we postpone that by simply deactivating
it for the time being.

This is a snapshot of the current PRCMU firmware API as it looks
right now. The PRCMU firmware is still subject to change. This
also updates the CPUfreq driver to a newer version that will
utilize the new API.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
Signed-off-by: Martin Persson <martin.persson@stericsson.com>
Signed-off-by: Per Fransson <per.xx.fransson@stericsson.com>
Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Sebastien Rault <sebastien.rault@stericsson.com>
Signed-off-by: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Signed-off-by: Rickard Andersson <rickard.andersson@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Makefile     |    2 +-
 arch/arm/mach-ux500/cpu-db8500.c |    5 +
 drivers/mfd/db8500-prcmu-regs.h  |  194 ++--
 drivers/mfd/db8500-prcmu.c       | 1879 ++++++++++++++++++++++++++++++++++----
 include/linux/mfd/db8500-prcmu.h |  992 +++++++++++++++++++-
 5 files changed, 2782 insertions(+), 290 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile
index 7a1d43e04f97..1694916e6822 100644
--- a/arch/arm/mach-ux500/Makefile
+++ b/arch/arm/mach-ux500/Makefile
@@ -17,4 +17,4 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug.o
 obj-$(CONFIG_LOCAL_TIMERS)	+= localtimer.o
 obj-$(CONFIG_U5500_MODEM_IRQ)	+= modem-irq-db5500.o
 obj-$(CONFIG_U5500_MBOX)	+= mbox-db5500.o
-obj-$(CONFIG_CPU_FREQ)		+= cpufreq.o
+
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 3d419fef0470..c3c417656bd9 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -131,9 +131,14 @@ static struct platform_device db8500_pmu_device = {
 	.dev.platform_data	= &db8500_pmu_platdata,
 };
 
+static struct platform_device db8500_prcmu_device = {
+	.name			= "db8500-prcmu",
+};
+
 static struct platform_device *platform_devs[] __initdata = {
 	&u8500_dma40_device,
 	&db8500_pmu_device,
+	&db8500_prcmu_device,
 };
 
 static resource_size_t __initdata db8500_gpio_base[] = {
diff --git a/drivers/mfd/db8500-prcmu-regs.h b/drivers/mfd/db8500-prcmu-regs.h
index c1226da19bfb..3bbf04d58043 100644
--- a/drivers/mfd/db8500-prcmu-regs.h
+++ b/drivers/mfd/db8500-prcmu-regs.h
@@ -9,86 +9,158 @@
  *
  * PRCM Unit registers
  */
+#ifndef __DB8500_PRCMU_REGS_H
+#define __DB8500_PRCMU_REGS_H
 
-#ifndef __MACH_PRCMU_REGS_H
-#define __MACH_PRCMU_REGS_H
-
+#include <linux/bitops.h>
 #include <mach/hardware.h>
 
-#define PRCM_ARM_PLLDIVPS	(_PRCMU_BASE + 0x118)
-#define PRCM_ARM_CHGCLKREQ	(_PRCMU_BASE + 0x114)
-#define PRCM_PLLARM_ENABLE	(_PRCMU_BASE + 0x98)
-#define PRCM_ARMCLKFIX_MGT	(_PRCMU_BASE + 0x0)
-#define PRCM_A9_RESETN_CLR	(_PRCMU_BASE + 0x1f4)
-#define PRCM_A9_RESETN_SET	(_PRCMU_BASE + 0x1f0)
-#define PRCM_ARM_LS_CLAMP	(_PRCMU_BASE + 0x30c)
-#define PRCM_SRAM_A9		(_PRCMU_BASE + 0x308)
+#define BITS(_start, _end) ((BIT(_end) - BIT(_start)) + BIT(_end))
+
+#define PRCM_ARM_PLLDIVPS 0x118
+#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE	BITS(0, 5)
+#define PRCM_ARM_PLLDIVPS_MAX_MASK	0xF
+
+#define PRCM_PLLARM_LOCKP 0x0A8
+#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3 BIT(1)
+
+#define PRCM_ARM_CHGCLKREQ 0x114
+#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ BIT(0)
+
+#define PRCM_PLLARM_ENABLE 0x98
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE	BIT(0)
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON	BIT(8)
+
+#define PRCM_ARMCLKFIX_MGT	0x0
+#define PRCM_A9_RESETN_CLR	0x1f4
+#define PRCM_A9_RESETN_SET	0x1f0
+#define PRCM_ARM_LS_CLAMP	0x30C
+#define PRCM_SRAM_A9		0x308
 
 /* ARM WFI Standby signal register */
-#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
-#define PRCMU_IOCR              (_PRCMU_BASE + 0x310)
+#define PRCM_ARM_WFI_STANDBY	0x130
+#define PRCM_IOCR		0x310
+#define PRCM_IOCR_IOFORCE BIT(0)
 
 /* CPU mailbox registers */
-#define PRCM_MBOX_CPU_VAL	(_PRCMU_BASE + 0x0fc)
-#define PRCM_MBOX_CPU_SET	(_PRCMU_BASE + 0x100)
-#define PRCM_MBOX_CPU_CLR	(_PRCMU_BASE + 0x104)
+#define PRCM_MBOX_CPU_VAL 0x0FC
+#define PRCM_MBOX_CPU_SET 0x100
 
 /* Dual A9 core interrupt management unit registers */
-#define PRCM_A9_MASK_REQ	(_PRCMU_BASE + 0x328)
-#define PRCM_A9_MASK_ACK	(_PRCMU_BASE + 0x32c)
-#define PRCM_ARMITMSK31TO0	(_PRCMU_BASE + 0x11c)
-#define PRCM_ARMITMSK63TO32	(_PRCMU_BASE + 0x120)
-#define PRCM_ARMITMSK95TO64	(_PRCMU_BASE + 0x124)
-#define PRCM_ARMITMSK127TO96	(_PRCMU_BASE + 0x128)
-#define PRCM_POWER_STATE_VAL	(_PRCMU_BASE + 0x25C)
-#define PRCM_ARMITVAL31TO0	(_PRCMU_BASE + 0x260)
-#define PRCM_ARMITVAL63TO32	(_PRCMU_BASE + 0x264)
-#define PRCM_ARMITVAL95TO64	(_PRCMU_BASE + 0x268)
-#define PRCM_ARMITVAL127TO96	(_PRCMU_BASE + 0x26C)
-
-#define PRCM_HOSTACCESS_REQ	(_PRCMU_BASE + 0x334)
-#define ARM_WAKEUP_MODEM	0x1
-
-#define PRCM_ARM_IT1_CLEAR	(_PRCMU_BASE + 0x48C)
-#define PRCM_ARM_IT1_VAL	(_PRCMU_BASE + 0x494)
-#define PRCM_HOLD_EVT		(_PRCMU_BASE + 0x174)
-
-#define PRCM_ITSTATUS0		(_PRCMU_BASE + 0x148)
-#define PRCM_ITSTATUS1		(_PRCMU_BASE + 0x150)
-#define PRCM_ITSTATUS2		(_PRCMU_BASE + 0x158)
-#define PRCM_ITSTATUS3		(_PRCMU_BASE + 0x160)
-#define PRCM_ITSTATUS4		(_PRCMU_BASE + 0x168)
-#define PRCM_ITSTATUS5		(_PRCMU_BASE + 0x484)
-#define PRCM_ITCLEAR5		(_PRCMU_BASE + 0x488)
-#define PRCM_ARMIT_MASKXP70_IT	(_PRCMU_BASE + 0x1018)
+#define PRCM_A9_MASK_REQ 0x328
+#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ BIT(0)
+
+#define PRCM_A9_MASK_ACK	0x32C
+#define PRCM_ARMITMSK31TO0	0x11C
+#define PRCM_ARMITMSK63TO32	0x120
+#define PRCM_ARMITMSK95TO64	0x124
+#define PRCM_ARMITMSK127TO96	0x128
+#define PRCM_POWER_STATE_VAL	0x25C
+#define PRCM_ARMITVAL31TO0	0x260
+#define PRCM_ARMITVAL63TO32	0x264
+#define PRCM_ARMITVAL95TO64	0x268
+#define PRCM_ARMITVAL127TO96	0x26C
+
+#define PRCM_HOSTACCESS_REQ 0x334
+#define PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ BIT(0)
+
+#define PRCM_ARM_IT1_CLR 0x48C
+#define PRCM_ARM_IT1_VAL 0x494
+
+#define PRCM_ITSTATUS0		0x148
+#define PRCM_ITSTATUS1		0x150
+#define PRCM_ITSTATUS2		0x158
+#define PRCM_ITSTATUS3		0x160
+#define PRCM_ITSTATUS4		0x168
+#define PRCM_ITSTATUS5		0x484
+#define PRCM_ITCLEAR5		0x488
+#define PRCM_ARMIT_MASKXP70_IT	0x1018
 
 /* System reset register */
-#define PRCM_APE_SOFTRST	(_PRCMU_BASE + 0x228)
+#define PRCM_APE_SOFTRST 0x228
 
 /* Level shifter and clamp control registers */
-#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
-#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
+#define PRCM_MMIP_LS_CLAMP_SET 0x420
+#define PRCM_MMIP_LS_CLAMP_CLR 0x424
+
+/* PRCMU HW semaphore */
+#define PRCM_SEM 0x400
+#define PRCM_SEM_PRCM_SEM BIT(0)
 
 /* PRCMU clock/PLL/reset registers */
-#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
-#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
-#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
-#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
-#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
-#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
-#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
-#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
-#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
-#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
+#define PRCM_PLLDSI_FREQ	0x500
+#define PRCM_PLLDSI_ENABLE	0x504
+#define PRCM_PLLDSI_LOCKP	0x508
+#define PRCM_DSI_PLLOUT_SEL	0x530
+#define PRCM_DSITVCLK_DIV	0x52C
+#define PRCM_APE_RESETN_SET	0x1E4
+#define PRCM_APE_RESETN_CLR	0x1E8
+
+#define PRCM_TCR		0x1C8
+#define PRCM_TCR_TENSEL_MASK	BITS(0, 7)
+#define PRCM_TCR_STOP_TIMERS	BIT(16)
+#define PRCM_TCR_DOZE_MODE	BIT(17)
+
+#define PRCM_CLKOCR			0x1CC
+#define PRCM_CLKOCR_CLKODIV0_SHIFT	0
+#define PRCM_CLKOCR_CLKODIV0_MASK	BITS(0, 5)
+#define PRCM_CLKOCR_CLKOSEL0_SHIFT	6
+#define PRCM_CLKOCR_CLKOSEL0_MASK	BITS(6, 8)
+#define PRCM_CLKOCR_CLKODIV1_SHIFT	16
+#define PRCM_CLKOCR_CLKODIV1_MASK	BITS(16, 21)
+#define PRCM_CLKOCR_CLKOSEL1_SHIFT	22
+#define PRCM_CLKOCR_CLKOSEL1_MASK	BITS(22, 24)
+#define PRCM_CLKOCR_CLK1TYPE		BIT(28)
+
+#define PRCM_SGACLK_MGT		0x014
+#define PRCM_UARTCLK_MGT	0x018
+#define PRCM_MSP02CLK_MGT	0x01C
+#define PRCM_MSP1CLK_MGT	0x288
+#define PRCM_I2CCLK_MGT		0x020
+#define PRCM_SDMMCCLK_MGT	0x024
+#define PRCM_SLIMCLK_MGT	0x028
+#define PRCM_PER1CLK_MGT	0x02C
+#define PRCM_PER2CLK_MGT	0x030
+#define PRCM_PER3CLK_MGT	0x034
+#define PRCM_PER5CLK_MGT	0x038
+#define PRCM_PER6CLK_MGT	0x03C
+#define PRCM_PER7CLK_MGT	0x040
+#define PRCM_LCDCLK_MGT		0x044
+#define PRCM_BMLCLK_MGT		0x04C
+#define PRCM_HSITXCLK_MGT	0x050
+#define PRCM_HSIRXCLK_MGT	0x054
+#define PRCM_HDMICLK_MGT	0x058
+#define PRCM_APEATCLK_MGT	0x05C
+#define PRCM_APETRACECLK_MGT	0x060
+#define PRCM_MCDECLK_MGT	0x064
+#define PRCM_IPI2CCLK_MGT	0x068
+#define PRCM_DSIALTCLK_MGT	0x06C
+#define PRCM_DMACLK_MGT		0x074
+#define PRCM_B2R2CLK_MGT	0x078
+#define PRCM_TVCLK_MGT		0x07C
+#define PRCM_UNIPROCLK_MGT	0x278
+#define PRCM_SSPCLK_MGT		0x280
+#define PRCM_RNGCLK_MGT		0x284
+#define PRCM_UICCCLK_MGT	0x27C
+
+#define PRCM_CLK_MGT_CLKPLLDIV_MASK	BITS(0, 4)
+#define PRCM_CLK_MGT_CLKPLLSW_MASK	BITS(5, 7)
+#define PRCM_CLK_MGT_CLKEN		BIT(8)
 
 /* ePOD and memory power signal control registers */
-#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
-#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
+#define PRCM_EPOD_C_SET		0x410
+#define PRCM_SRAM_LS_SLEEP	0x304
 
 /* Debug power control unit registers */
-#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
+#define PRCM_POWER_STATE_SET 0x254
 
 /* Miscellaneous unit registers */
-#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
+#define PRCM_DSI_SW_RESET 0x324
+#define PRCM_GPIOCR		0x138
+
+/* GPIOCR register */
+#define PRCM_GPIOCR_SPI2_SELECT BIT(23)
+
+#define PRCM_DDR_SUBSYS_APE_MINBW  0x438
 
-#endif /* __MACH_PRCMU_REGS_H */
+#endif /* __DB8500_PRCMU_REGS_H */
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 31f18c8c6bf8..c44725bd8b9a 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -10,93 +10,1354 @@
  * U8500 PRCM Unit interface driver
  *
  */
-#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/spinlock.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/completion.h>
+#include <linux/irq.h>
 #include <linux/jiffies.h>
 #include <linux/bitops.h>
-#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/mfd/core.h>
 #include <linux/mfd/db8500-prcmu.h>
+#include <mach/hardware.h>
+#include <mach/irqs.h>
+#include <mach/db8500-regs.h>
+#include <mach/id.h>
+#include "db8500-prcmu-regs.h"
+
+/* Offset for the firmware version within the TCPM */
+#define PRCMU_FW_VERSION_OFFSET 0xA4
+
+/* PRCMU project numbers, defined by PRCMU FW */
+#define PRCMU_PROJECT_ID_8500V1_0 1
+#define PRCMU_PROJECT_ID_8500V2_0 2
+#define PRCMU_PROJECT_ID_8400V2_0 3
+
+/* Index of different voltages to be used when accessing AVSData */
+#define PRCM_AVS_BASE		0x2FC
+#define PRCM_AVS_VBB_RET	(PRCM_AVS_BASE + 0x0)
+#define PRCM_AVS_VBB_MAX_OPP	(PRCM_AVS_BASE + 0x1)
+#define PRCM_AVS_VBB_100_OPP	(PRCM_AVS_BASE + 0x2)
+#define PRCM_AVS_VBB_50_OPP	(PRCM_AVS_BASE + 0x3)
+#define PRCM_AVS_VARM_MAX_OPP	(PRCM_AVS_BASE + 0x4)
+#define PRCM_AVS_VARM_100_OPP	(PRCM_AVS_BASE + 0x5)
+#define PRCM_AVS_VARM_50_OPP	(PRCM_AVS_BASE + 0x6)
+#define PRCM_AVS_VARM_RET	(PRCM_AVS_BASE + 0x7)
+#define PRCM_AVS_VAPE_100_OPP	(PRCM_AVS_BASE + 0x8)
+#define PRCM_AVS_VAPE_50_OPP	(PRCM_AVS_BASE + 0x9)
+#define PRCM_AVS_VMOD_100_OPP	(PRCM_AVS_BASE + 0xA)
+#define PRCM_AVS_VMOD_50_OPP	(PRCM_AVS_BASE + 0xB)
+#define PRCM_AVS_VSAFE		(PRCM_AVS_BASE + 0xC)
+
+#define PRCM_AVS_VOLTAGE		0
+#define PRCM_AVS_VOLTAGE_MASK		0x3f
+#define PRCM_AVS_ISSLOWSTARTUP		6
+#define PRCM_AVS_ISSLOWSTARTUP_MASK	(1 << PRCM_AVS_ISSLOWSTARTUP)
+#define PRCM_AVS_ISMODEENABLE		7
+#define PRCM_AVS_ISMODEENABLE_MASK	(1 << PRCM_AVS_ISMODEENABLE)
+
+#define PRCM_BOOT_STATUS	0xFFF
+#define PRCM_ROMCODE_A2P	0xFFE
+#define PRCM_ROMCODE_P2A	0xFFD
+#define PRCM_XP70_CUR_PWR_STATE 0xFFC      /* 4 BYTES */
+
+#define PRCM_SW_RST_REASON 0xFF8 /* 2 bytes */
+
+#define _PRCM_MBOX_HEADER		0xFE8 /* 16 bytes */
+#define PRCM_MBOX_HEADER_REQ_MB0	(_PRCM_MBOX_HEADER + 0x0)
+#define PRCM_MBOX_HEADER_REQ_MB1	(_PRCM_MBOX_HEADER + 0x1)
+#define PRCM_MBOX_HEADER_REQ_MB2	(_PRCM_MBOX_HEADER + 0x2)
+#define PRCM_MBOX_HEADER_REQ_MB3	(_PRCM_MBOX_HEADER + 0x3)
+#define PRCM_MBOX_HEADER_REQ_MB4	(_PRCM_MBOX_HEADER + 0x4)
+#define PRCM_MBOX_HEADER_REQ_MB5	(_PRCM_MBOX_HEADER + 0x5)
+#define PRCM_MBOX_HEADER_ACK_MB0	(_PRCM_MBOX_HEADER + 0x8)
+
+/* Req Mailboxes */
+#define PRCM_REQ_MB0 0xFDC /* 12 bytes  */
+#define PRCM_REQ_MB1 0xFD0 /* 12 bytes  */
+#define PRCM_REQ_MB2 0xFC0 /* 16 bytes  */
+#define PRCM_REQ_MB3 0xE4C /* 372 bytes  */
+#define PRCM_REQ_MB4 0xE48 /* 4 bytes  */
+#define PRCM_REQ_MB5 0xE44 /* 4 bytes  */
+
+/* Ack Mailboxes */
+#define PRCM_ACK_MB0 0xE08 /* 52 bytes  */
+#define PRCM_ACK_MB1 0xE04 /* 4 bytes */
+#define PRCM_ACK_MB2 0xE00 /* 4 bytes */
+#define PRCM_ACK_MB3 0xDFC /* 4 bytes */
+#define PRCM_ACK_MB4 0xDF8 /* 4 bytes */
+#define PRCM_ACK_MB5 0xDF4 /* 4 bytes */
+
+/* Mailbox 0 headers */
+#define MB0H_POWER_STATE_TRANS		0
+#define MB0H_CONFIG_WAKEUPS_EXE		1
+#define MB0H_READ_WAKEUP_ACK		3
+#define MB0H_CONFIG_WAKEUPS_SLEEP	4
+
+#define MB0H_WAKEUP_EXE 2
+#define MB0H_WAKEUP_SLEEP 5
+
+/* Mailbox 0 REQs */
+#define PRCM_REQ_MB0_AP_POWER_STATE	(PRCM_REQ_MB0 + 0x0)
+#define PRCM_REQ_MB0_AP_PLL_STATE	(PRCM_REQ_MB0 + 0x1)
+#define PRCM_REQ_MB0_ULP_CLOCK_STATE	(PRCM_REQ_MB0 + 0x2)
+#define PRCM_REQ_MB0_DO_NOT_WFI		(PRCM_REQ_MB0 + 0x3)
+#define PRCM_REQ_MB0_WAKEUP_8500	(PRCM_REQ_MB0 + 0x4)
+#define PRCM_REQ_MB0_WAKEUP_4500	(PRCM_REQ_MB0 + 0x8)
+
+/* Mailbox 0 ACKs */
+#define PRCM_ACK_MB0_AP_PWRSTTR_STATUS	(PRCM_ACK_MB0 + 0x0)
+#define PRCM_ACK_MB0_READ_POINTER	(PRCM_ACK_MB0 + 0x1)
+#define PRCM_ACK_MB0_WAKEUP_0_8500	(PRCM_ACK_MB0 + 0x4)
+#define PRCM_ACK_MB0_WAKEUP_0_4500	(PRCM_ACK_MB0 + 0x8)
+#define PRCM_ACK_MB0_WAKEUP_1_8500	(PRCM_ACK_MB0 + 0x1C)
+#define PRCM_ACK_MB0_WAKEUP_1_4500	(PRCM_ACK_MB0 + 0x20)
+#define PRCM_ACK_MB0_EVENT_4500_NUMBERS	20
+
+/* Mailbox 1 headers */
+#define MB1H_ARM_APE_OPP 0x0
+#define MB1H_RESET_MODEM 0x2
+#define MB1H_REQUEST_APE_OPP_100_VOLT 0x3
+#define MB1H_RELEASE_APE_OPP_100_VOLT 0x4
+#define MB1H_RELEASE_USB_WAKEUP 0x5
+
+/* Mailbox 1 Requests */
+#define PRCM_REQ_MB1_ARM_OPP			(PRCM_REQ_MB1 + 0x0)
+#define PRCM_REQ_MB1_APE_OPP			(PRCM_REQ_MB1 + 0x1)
+#define PRCM_REQ_MB1_APE_OPP_100_RESTORE	(PRCM_REQ_MB1 + 0x4)
+#define PRCM_REQ_MB1_ARM_OPP_100_RESTORE	(PRCM_REQ_MB1 + 0x8)
+
+/* Mailbox 1 ACKs */
+#define PRCM_ACK_MB1_CURRENT_ARM_OPP	(PRCM_ACK_MB1 + 0x0)
+#define PRCM_ACK_MB1_CURRENT_APE_OPP	(PRCM_ACK_MB1 + 0x1)
+#define PRCM_ACK_MB1_APE_VOLTAGE_STATUS	(PRCM_ACK_MB1 + 0x2)
+#define PRCM_ACK_MB1_DVFS_STATUS	(PRCM_ACK_MB1 + 0x3)
+
+/* Mailbox 2 headers */
+#define MB2H_DPS	0x0
+#define MB2H_AUTO_PWR	0x1
+
+/* Mailbox 2 REQs */
+#define PRCM_REQ_MB2_SVA_MMDSP		(PRCM_REQ_MB2 + 0x0)
+#define PRCM_REQ_MB2_SVA_PIPE		(PRCM_REQ_MB2 + 0x1)
+#define PRCM_REQ_MB2_SIA_MMDSP		(PRCM_REQ_MB2 + 0x2)
+#define PRCM_REQ_MB2_SIA_PIPE		(PRCM_REQ_MB2 + 0x3)
+#define PRCM_REQ_MB2_SGA		(PRCM_REQ_MB2 + 0x4)
+#define PRCM_REQ_MB2_B2R2_MCDE		(PRCM_REQ_MB2 + 0x5)
+#define PRCM_REQ_MB2_ESRAM12		(PRCM_REQ_MB2 + 0x6)
+#define PRCM_REQ_MB2_ESRAM34		(PRCM_REQ_MB2 + 0x7)
+#define PRCM_REQ_MB2_AUTO_PM_SLEEP	(PRCM_REQ_MB2 + 0x8)
+#define PRCM_REQ_MB2_AUTO_PM_IDLE	(PRCM_REQ_MB2 + 0xC)
+
+/* Mailbox 2 ACKs */
+#define PRCM_ACK_MB2_DPS_STATUS (PRCM_ACK_MB2 + 0x0)
+#define HWACC_PWR_ST_OK 0xFE
+
+/* Mailbox 3 headers */
+#define MB3H_ANC	0x0
+#define MB3H_SIDETONE	0x1
+#define MB3H_SYSCLK	0xE
+
+/* Mailbox 3 Requests */
+#define PRCM_REQ_MB3_ANC_FIR_COEFF	(PRCM_REQ_MB3 + 0x0)
+#define PRCM_REQ_MB3_ANC_IIR_COEFF	(PRCM_REQ_MB3 + 0x20)
+#define PRCM_REQ_MB3_ANC_SHIFTER	(PRCM_REQ_MB3 + 0x60)
+#define PRCM_REQ_MB3_ANC_WARP		(PRCM_REQ_MB3 + 0x64)
+#define PRCM_REQ_MB3_SIDETONE_FIR_GAIN	(PRCM_REQ_MB3 + 0x68)
+#define PRCM_REQ_MB3_SIDETONE_FIR_COEFF	(PRCM_REQ_MB3 + 0x6C)
+#define PRCM_REQ_MB3_SYSCLK_MGT		(PRCM_REQ_MB3 + 0x16C)
+
+/* Mailbox 4 headers */
+#define MB4H_DDR_INIT	0x0
+#define MB4H_MEM_ST	0x1
+#define MB4H_HOTDOG	0x12
+#define MB4H_HOTMON	0x13
+#define MB4H_HOT_PERIOD	0x14
+
+/* Mailbox 4 Requests */
+#define PRCM_REQ_MB4_DDR_ST_AP_SLEEP_IDLE	(PRCM_REQ_MB4 + 0x0)
+#define PRCM_REQ_MB4_DDR_ST_AP_DEEP_IDLE	(PRCM_REQ_MB4 + 0x1)
+#define PRCM_REQ_MB4_ESRAM0_ST			(PRCM_REQ_MB4 + 0x3)
+#define PRCM_REQ_MB4_HOTDOG_THRESHOLD		(PRCM_REQ_MB4 + 0x0)
+#define PRCM_REQ_MB4_HOTMON_LOW			(PRCM_REQ_MB4 + 0x0)
+#define PRCM_REQ_MB4_HOTMON_HIGH		(PRCM_REQ_MB4 + 0x1)
+#define PRCM_REQ_MB4_HOTMON_CONFIG		(PRCM_REQ_MB4 + 0x2)
+#define PRCM_REQ_MB4_HOT_PERIOD			(PRCM_REQ_MB4 + 0x0)
+#define HOTMON_CONFIG_LOW			BIT(0)
+#define HOTMON_CONFIG_HIGH			BIT(1)
+
+/* Mailbox 5 Requests */
+#define PRCM_REQ_MB5_I2C_SLAVE_OP	(PRCM_REQ_MB5 + 0x0)
+#define PRCM_REQ_MB5_I2C_HW_BITS	(PRCM_REQ_MB5 + 0x1)
+#define PRCM_REQ_MB5_I2C_REG		(PRCM_REQ_MB5 + 0x2)
+#define PRCM_REQ_MB5_I2C_VAL		(PRCM_REQ_MB5 + 0x3)
+#define PRCMU_I2C_WRITE(slave) \
+	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
+#define PRCMU_I2C_READ(slave) \
+	(((slave) << 1) | BIT(0) | (cpu_is_u8500v2() ? BIT(6) : 0))
+#define PRCMU_I2C_STOP_EN		BIT(3)
+
+/* Mailbox 5 ACKs */
+#define PRCM_ACK_MB5_I2C_STATUS	(PRCM_ACK_MB5 + 0x1)
+#define PRCM_ACK_MB5_I2C_VAL	(PRCM_ACK_MB5 + 0x3)
+#define I2C_WR_OK 0x1
+#define I2C_RD_OK 0x2
+
+#define NUM_MB 8
+#define MBOX_BIT BIT
+#define ALL_MBOX_BITS (MBOX_BIT(NUM_MB) - 1)
+
+/*
+ * Wakeups/IRQs
+ */
+
+#define WAKEUP_BIT_RTC BIT(0)
+#define WAKEUP_BIT_RTT0 BIT(1)
+#define WAKEUP_BIT_RTT1 BIT(2)
+#define WAKEUP_BIT_HSI0 BIT(3)
+#define WAKEUP_BIT_HSI1 BIT(4)
+#define WAKEUP_BIT_CA_WAKE BIT(5)
+#define WAKEUP_BIT_USB BIT(6)
+#define WAKEUP_BIT_ABB BIT(7)
+#define WAKEUP_BIT_ABB_FIFO BIT(8)
+#define WAKEUP_BIT_SYSCLK_OK BIT(9)
+#define WAKEUP_BIT_CA_SLEEP BIT(10)
+#define WAKEUP_BIT_AC_WAKE_ACK BIT(11)
+#define WAKEUP_BIT_SIDE_TONE_OK BIT(12)
+#define WAKEUP_BIT_ANC_OK BIT(13)
+#define WAKEUP_BIT_SW_ERROR BIT(14)
+#define WAKEUP_BIT_AC_SLEEP_ACK BIT(15)
+#define WAKEUP_BIT_ARM BIT(17)
+#define WAKEUP_BIT_HOTMON_LOW BIT(18)
+#define WAKEUP_BIT_HOTMON_HIGH BIT(19)
+#define WAKEUP_BIT_MODEM_SW_RESET_REQ BIT(20)
+#define WAKEUP_BIT_GPIO0 BIT(23)
+#define WAKEUP_BIT_GPIO1 BIT(24)
+#define WAKEUP_BIT_GPIO2 BIT(25)
+#define WAKEUP_BIT_GPIO3 BIT(26)
+#define WAKEUP_BIT_GPIO4 BIT(27)
+#define WAKEUP_BIT_GPIO5 BIT(28)
+#define WAKEUP_BIT_GPIO6 BIT(29)
+#define WAKEUP_BIT_GPIO7 BIT(30)
+#define WAKEUP_BIT_GPIO8 BIT(31)
+
+/*
+ * This vector maps irq numbers to the bits in the bit field used in
+ * communication with the PRCMU firmware.
+ *
+ * The reason for having this is to keep the irq numbers contiguous even though
+ * the bits in the bit field are not. (The bits also have a tendency to move
+ * around, to further complicate matters.)
+ */
+#define IRQ_INDEX(_name) ((IRQ_PRCMU_##_name) - IRQ_PRCMU_BASE)
+#define IRQ_ENTRY(_name)[IRQ_INDEX(_name)] = (WAKEUP_BIT_##_name)
+static u32 prcmu_irq_bit[NUM_PRCMU_WAKEUPS] = {
+	IRQ_ENTRY(RTC),
+	IRQ_ENTRY(RTT0),
+	IRQ_ENTRY(RTT1),
+	IRQ_ENTRY(HSI0),
+	IRQ_ENTRY(HSI1),
+	IRQ_ENTRY(CA_WAKE),
+	IRQ_ENTRY(USB),
+	IRQ_ENTRY(ABB),
+	IRQ_ENTRY(ABB_FIFO),
+	IRQ_ENTRY(CA_SLEEP),
+	IRQ_ENTRY(ARM),
+	IRQ_ENTRY(HOTMON_LOW),
+	IRQ_ENTRY(HOTMON_HIGH),
+	IRQ_ENTRY(MODEM_SW_RESET_REQ),
+	IRQ_ENTRY(GPIO0),
+	IRQ_ENTRY(GPIO1),
+	IRQ_ENTRY(GPIO2),
+	IRQ_ENTRY(GPIO3),
+	IRQ_ENTRY(GPIO4),
+	IRQ_ENTRY(GPIO5),
+	IRQ_ENTRY(GPIO6),
+	IRQ_ENTRY(GPIO7),
+	IRQ_ENTRY(GPIO8)
+};
+
+#define VALID_WAKEUPS (BIT(NUM_PRCMU_WAKEUP_INDICES) - 1)
+#define WAKEUP_ENTRY(_name)[PRCMU_WAKEUP_INDEX_##_name] = (WAKEUP_BIT_##_name)
+static u32 prcmu_wakeup_bit[NUM_PRCMU_WAKEUP_INDICES] = {
+	WAKEUP_ENTRY(RTC),
+	WAKEUP_ENTRY(RTT0),
+	WAKEUP_ENTRY(RTT1),
+	WAKEUP_ENTRY(HSI0),
+	WAKEUP_ENTRY(HSI1),
+	WAKEUP_ENTRY(USB),
+	WAKEUP_ENTRY(ABB),
+	WAKEUP_ENTRY(ABB_FIFO),
+	WAKEUP_ENTRY(ARM)
+};
+
+/*
+ * mb0_transfer - state needed for mailbox 0 communication.
+ * @lock:		The transaction lock.
+ * @dbb_events_lock:	A lock used to handle concurrent access to (parts of)
+ *			the request data.
+ * @mask_work:		Work structure used for (un)masking wakeup interrupts.
+ * @req:		Request data that need to persist between requests.
+ */
+static struct {
+	spinlock_t lock;
+	spinlock_t dbb_irqs_lock;
+	struct work_struct mask_work;
+	struct mutex ac_wake_lock;
+	struct completion ac_wake_work;
+	struct {
+		u32 dbb_irqs;
+		u32 dbb_wakeups;
+		u32 abb_events;
+	} req;
+} mb0_transfer;
+
+/*
+ * mb1_transfer - state needed for mailbox 1 communication.
+ * @lock:	The transaction lock.
+ * @work:	The transaction completion structure.
+ * @ack:	Reply ("acknowledge") data.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+	struct {
+		u8 header;
+		u8 arm_opp;
+		u8 ape_opp;
+		u8 ape_voltage_status;
+	} ack;
+} mb1_transfer;
+
+/*
+ * mb2_transfer - state needed for mailbox 2 communication.
+ * @lock:            The transaction lock.
+ * @work:            The transaction completion structure.
+ * @auto_pm_lock:    The autonomous power management configuration lock.
+ * @auto_pm_enabled: A flag indicating whether autonomous PM is enabled.
+ * @req:             Request data that need to persist between requests.
+ * @ack:             Reply ("acknowledge") data.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+	spinlock_t auto_pm_lock;
+	bool auto_pm_enabled;
+	struct {
+		u8 status;
+	} ack;
+} mb2_transfer;
+
+/*
+ * mb3_transfer - state needed for mailbox 3 communication.
+ * @lock:		The request lock.
+ * @sysclk_lock:	A lock used to handle concurrent sysclk requests.
+ * @sysclk_work:	Work structure used for sysclk requests.
+ */
+static struct {
+	spinlock_t lock;
+	struct mutex sysclk_lock;
+	struct completion sysclk_work;
+} mb3_transfer;
+
+/*
+ * mb4_transfer - state needed for mailbox 4 communication.
+ * @lock:	The transaction lock.
+ * @work:	The transaction completion structure.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+} mb4_transfer;
+
+/*
+ * mb5_transfer - state needed for mailbox 5 communication.
+ * @lock:	The transaction lock.
+ * @work:	The transaction completion structure.
+ * @ack:	Reply ("acknowledge") data.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+	struct {
+		u8 status;
+		u8 value;
+	} ack;
+} mb5_transfer;
+
+static atomic_t ac_wake_req_state = ATOMIC_INIT(0);
+
+/* Spinlocks */
+static DEFINE_SPINLOCK(clkout_lock);
+static DEFINE_SPINLOCK(gpiocr_lock);
+
+/* Global var to runtime determine TCDM base for v2 or v1 */
+static __iomem void *tcdm_base;
+
+struct clk_mgt {
+	unsigned int offset;
+	u32 pllsw;
+};
+
+static DEFINE_SPINLOCK(clk_mgt_lock);
+
+#define CLK_MGT_ENTRY(_name)[PRCMU_##_name] = { (PRCM_##_name##_MGT), 0 }
+struct clk_mgt clk_mgt[PRCMU_NUM_REG_CLOCKS] = {
+	CLK_MGT_ENTRY(SGACLK),
+	CLK_MGT_ENTRY(UARTCLK),
+	CLK_MGT_ENTRY(MSP02CLK),
+	CLK_MGT_ENTRY(MSP1CLK),
+	CLK_MGT_ENTRY(I2CCLK),
+	CLK_MGT_ENTRY(SDMMCCLK),
+	CLK_MGT_ENTRY(SLIMCLK),
+	CLK_MGT_ENTRY(PER1CLK),
+	CLK_MGT_ENTRY(PER2CLK),
+	CLK_MGT_ENTRY(PER3CLK),
+	CLK_MGT_ENTRY(PER5CLK),
+	CLK_MGT_ENTRY(PER6CLK),
+	CLK_MGT_ENTRY(PER7CLK),
+	CLK_MGT_ENTRY(LCDCLK),
+	CLK_MGT_ENTRY(BMLCLK),
+	CLK_MGT_ENTRY(HSITXCLK),
+	CLK_MGT_ENTRY(HSIRXCLK),
+	CLK_MGT_ENTRY(HDMICLK),
+	CLK_MGT_ENTRY(APEATCLK),
+	CLK_MGT_ENTRY(APETRACECLK),
+	CLK_MGT_ENTRY(MCDECLK),
+	CLK_MGT_ENTRY(IPI2CCLK),
+	CLK_MGT_ENTRY(DSIALTCLK),
+	CLK_MGT_ENTRY(DMACLK),
+	CLK_MGT_ENTRY(B2R2CLK),
+	CLK_MGT_ENTRY(TVCLK),
+	CLK_MGT_ENTRY(SSPCLK),
+	CLK_MGT_ENTRY(RNGCLK),
+	CLK_MGT_ENTRY(UICCCLK),
+};
+
+/*
+* Used by MCDE to setup all necessary PRCMU registers
+*/
+#define PRCMU_RESET_DSIPLL		0x00004000
+#define PRCMU_UNCLAMP_DSIPLL		0x00400800
+
+#define PRCMU_CLK_PLL_DIV_SHIFT		0
+#define PRCMU_CLK_PLL_SW_SHIFT		5
+#define PRCMU_CLK_38			(1 << 9)
+#define PRCMU_CLK_38_SRC		(1 << 10)
+#define PRCMU_CLK_38_DIV		(1 << 11)
+
+/* PLLDIV=12, PLLSW=4 (PLLDDR) */
+#define PRCMU_DSI_CLOCK_SETTING		0x0000008C
+
+/* PLLDIV=8, PLLSW=4 (PLLDDR) */
+#define PRCMU_DSI_CLOCK_SETTING_U8400	0x00000088
+
+/* DPI 50000000 Hz */
+#define PRCMU_DPI_CLOCK_SETTING		((1 << PRCMU_CLK_PLL_SW_SHIFT) | \
+					  (16 << PRCMU_CLK_PLL_DIV_SHIFT))
+#define PRCMU_DSI_LP_CLOCK_SETTING	0x00000E00
+
+/* D=101, N=1, R=4, SELDIV2=0 */
+#define PRCMU_PLLDSI_FREQ_SETTING	0x00040165
+
+/* D=70, N=1, R=3, SELDIV2=0 */
+#define PRCMU_PLLDSI_FREQ_SETTING_U8400	0x00030146
+
+#define PRCMU_ENABLE_PLLDSI		0x00000001
+#define PRCMU_DISABLE_PLLDSI		0x00000000
+#define PRCMU_RELEASE_RESET_DSS		0x0000400C
+#define PRCMU_DSI_PLLOUT_SEL_SETTING	0x00000202
+/* ESC clk, div0=1, div1=1, div2=3 */
+#define PRCMU_ENABLE_ESCAPE_CLOCK_DIV	0x07030101
+#define PRCMU_DISABLE_ESCAPE_CLOCK_DIV	0x00030101
+#define PRCMU_DSI_RESET_SW		0x00000007
+
+#define PRCMU_PLLDSI_LOCKP_LOCKED	0x3
+
+static struct {
+	u8 project_number;
+	u8 api_version;
+	u8 func_version;
+	u8 errata;
+} prcmu_version;
+
+
+int prcmu_enable_dsipll(void)
+{
+	int i;
+	unsigned int plldsifreq;
+
+	/* Clear DSIPLL_RESETN */
+	writel(PRCMU_RESET_DSIPLL, (_PRCMU_BASE + PRCM_APE_RESETN_CLR));
+	/* Unclamp DSIPLL in/out */
+	writel(PRCMU_UNCLAMP_DSIPLL, (_PRCMU_BASE + PRCM_MMIP_LS_CLAMP_CLR));
+
+	if (prcmu_is_u8400())
+		plldsifreq = PRCMU_PLLDSI_FREQ_SETTING_U8400;
+	else
+		plldsifreq = PRCMU_PLLDSI_FREQ_SETTING;
+	/* Set DSI PLL FREQ */
+	writel(plldsifreq, (_PRCMU_BASE + PRCM_PLLDSI_FREQ));
+	writel(PRCMU_DSI_PLLOUT_SEL_SETTING,
+		(_PRCMU_BASE + PRCM_DSI_PLLOUT_SEL));
+	/* Enable Escape clocks */
+	writel(PRCMU_ENABLE_ESCAPE_CLOCK_DIV,
+					(_PRCMU_BASE + PRCM_DSITVCLK_DIV));
+
+	/* Start DSI PLL */
+	writel(PRCMU_ENABLE_PLLDSI, (_PRCMU_BASE + PRCM_PLLDSI_ENABLE));
+	/* Reset DSI PLL */
+	writel(PRCMU_DSI_RESET_SW, (_PRCMU_BASE + PRCM_DSI_SW_RESET));
+	for (i = 0; i < 10; i++) {
+		if ((readl(_PRCMU_BASE + PRCM_PLLDSI_LOCKP) &
+			PRCMU_PLLDSI_LOCKP_LOCKED)
+					== PRCMU_PLLDSI_LOCKP_LOCKED)
+			break;
+		udelay(100);
+	}
+	/* Set DSIPLL_RESETN */
+	writel(PRCMU_RESET_DSIPLL, (_PRCMU_BASE + PRCM_APE_RESETN_SET));
+	return 0;
+}
+
+int prcmu_disable_dsipll(void)
+{
+	/* Disable dsi pll */
+	writel(PRCMU_DISABLE_PLLDSI, (_PRCMU_BASE + PRCM_PLLDSI_ENABLE));
+	/* Disable  escapeclock */
+	writel(PRCMU_DISABLE_ESCAPE_CLOCK_DIV,
+					(_PRCMU_BASE + PRCM_DSITVCLK_DIV));
+	return 0;
+}
+
+int prcmu_set_display_clocks(void)
+{
+	unsigned long flags;
+	unsigned int dsiclk;
+
+	if (prcmu_is_u8400())
+		dsiclk = PRCMU_DSI_CLOCK_SETTING_U8400;
+	else
+		dsiclk = PRCMU_DSI_CLOCK_SETTING;
+
+	spin_lock_irqsave(&clk_mgt_lock, flags);
+
+	/* Grab the HW semaphore. */
+	while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+		cpu_relax();
+
+	writel(dsiclk, (_PRCMU_BASE + PRCM_HDMICLK_MGT));
+	writel(PRCMU_DSI_LP_CLOCK_SETTING, (_PRCMU_BASE + PRCM_TVCLK_MGT));
+	writel(PRCMU_DPI_CLOCK_SETTING, (_PRCMU_BASE + PRCM_LCDCLK_MGT));
+
+	/* Release the HW semaphore. */
+	writel(0, (_PRCMU_BASE + PRCM_SEM));
+
+	spin_unlock_irqrestore(&clk_mgt_lock, flags);
+
+	return 0;
+}
+
+/**
+ * prcmu_enable_spi2 - Enables pin muxing for SPI2 on OtherAlternateC1.
+ */
+void prcmu_enable_spi2(void)
+{
+	u32 reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpiocr_lock, flags);
+	reg = readl(_PRCMU_BASE + PRCM_GPIOCR);
+	writel(reg | PRCM_GPIOCR_SPI2_SELECT, _PRCMU_BASE + PRCM_GPIOCR);
+	spin_unlock_irqrestore(&gpiocr_lock, flags);
+}
+
+/**
+ * prcmu_disable_spi2 - Disables pin muxing for SPI2 on OtherAlternateC1.
+ */
+void prcmu_disable_spi2(void)
+{
+	u32 reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpiocr_lock, flags);
+	reg = readl(_PRCMU_BASE + PRCM_GPIOCR);
+	writel(reg & ~PRCM_GPIOCR_SPI2_SELECT, _PRCMU_BASE + PRCM_GPIOCR);
+	spin_unlock_irqrestore(&gpiocr_lock, flags);
+}
+
+bool prcmu_has_arm_maxopp(void)
+{
+	return (readb(tcdm_base + PRCM_AVS_VARM_MAX_OPP) &
+		PRCM_AVS_ISMODEENABLE_MASK) == PRCM_AVS_ISMODEENABLE_MASK;
+}
+
+bool prcmu_is_u8400(void)
+{
+	return prcmu_version.project_number == PRCMU_PROJECT_ID_8400V2_0;
+}
+
+/**
+ * prcmu_get_boot_status - PRCMU boot status checking
+ * Returns: the current PRCMU boot status
+ */
+int prcmu_get_boot_status(void)
+{
+	return readb(tcdm_base + PRCM_BOOT_STATUS);
+}
+
+/**
+ * prcmu_set_rc_a2p - This function is used to run few power state sequences
+ * @val: Value to be set, i.e. transition requested
+ * Returns: 0 on success, -EINVAL on invalid argument
+ *
+ * This function is used to run the following power state sequences -
+ * any state to ApReset,  ApDeepSleep to ApExecute, ApExecute to ApDeepSleep
+ */
+int prcmu_set_rc_a2p(enum romcode_write val)
+{
+	if (val < RDY_2_DS || val > RDY_2_XP70_RST)
+		return -EINVAL;
+	writeb(val, (tcdm_base + PRCM_ROMCODE_A2P));
+	return 0;
+}
+
+/**
+ * prcmu_get_rc_p2a - This function is used to get power state sequences
+ * Returns: the power transition that has last happened
+ *
+ * This function can return the following transitions-
+ * any state to ApReset,  ApDeepSleep to ApExecute, ApExecute to ApDeepSleep
+ */
+enum romcode_read prcmu_get_rc_p2a(void)
+{
+	return readb(tcdm_base + PRCM_ROMCODE_P2A);
+}
+
+/**
+ * prcmu_get_current_mode - Return the current XP70 power mode
+ * Returns: Returns the current AP(ARM) power mode: init,
+ * apBoot, apExecute, apDeepSleep, apSleep, apIdle, apReset
+ */
+enum ap_pwrst prcmu_get_xp70_current_state(void)
+{
+	return readb(tcdm_base + PRCM_XP70_CUR_PWR_STATE);
+}
+
+/**
+ * prcmu_config_clkout - Configure one of the programmable clock outputs.
+ * @clkout:	The CLKOUT number (0 or 1).
+ * @source:	The clock to be used (one of the PRCMU_CLKSRC_*).
+ * @div:	The divider to be applied.
+ *
+ * Configures one of the programmable clock outputs (CLKOUTs).
+ * @div should be in the range [1,63] to request a configuration, or 0 to
+ * inform that the configuration is no longer requested.
+ */
+int prcmu_config_clkout(u8 clkout, u8 source, u8 div)
+{
+	static int requests[2];
+	int r = 0;
+	unsigned long flags;
+	u32 val;
+	u32 bits;
+	u32 mask;
+	u32 div_mask;
+
+	BUG_ON(clkout > 1);
+	BUG_ON(div > 63);
+	BUG_ON((clkout == 0) && (source > PRCMU_CLKSRC_CLK009));
+
+	if (!div && !requests[clkout])
+		return -EINVAL;
+
+	switch (clkout) {
+	case 0:
+		div_mask = PRCM_CLKOCR_CLKODIV0_MASK;
+		mask = (PRCM_CLKOCR_CLKODIV0_MASK | PRCM_CLKOCR_CLKOSEL0_MASK);
+		bits = ((source << PRCM_CLKOCR_CLKOSEL0_SHIFT) |
+			(div << PRCM_CLKOCR_CLKODIV0_SHIFT));
+		break;
+	case 1:
+		div_mask = PRCM_CLKOCR_CLKODIV1_MASK;
+		mask = (PRCM_CLKOCR_CLKODIV1_MASK | PRCM_CLKOCR_CLKOSEL1_MASK |
+			PRCM_CLKOCR_CLK1TYPE);
+		bits = ((source << PRCM_CLKOCR_CLKOSEL1_SHIFT) |
+			(div << PRCM_CLKOCR_CLKODIV1_SHIFT));
+		break;
+	}
+	bits &= mask;
+
+	spin_lock_irqsave(&clkout_lock, flags);
+
+	val = readl(_PRCMU_BASE + PRCM_CLKOCR);
+	if (val & div_mask) {
+		if (div) {
+			if ((val & mask) != bits) {
+				r = -EBUSY;
+				goto unlock_and_return;
+			}
+		} else {
+			if ((val & mask & ~div_mask) != bits) {
+				r = -EINVAL;
+				goto unlock_and_return;
+			}
+		}
+	}
+	writel((bits | (val & ~mask)), (_PRCMU_BASE + PRCM_CLKOCR));
+	requests[clkout] += (div ? 1 : -1);
+
+unlock_and_return:
+	spin_unlock_irqrestore(&clkout_lock, flags);
+
+	return r;
+}
+
+int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll)
+{
+	unsigned long flags;
+
+	BUG_ON((state < PRCMU_AP_SLEEP) || (PRCMU_AP_DEEP_IDLE < state));
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+		cpu_relax();
+
+	writeb(MB0H_POWER_STATE_TRANS, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
+	writeb(state, (tcdm_base + PRCM_REQ_MB0_AP_POWER_STATE));
+	writeb((keep_ap_pll ? 1 : 0), (tcdm_base + PRCM_REQ_MB0_AP_PLL_STATE));
+	writeb((keep_ulp_clk ? 1 : 0),
+		(tcdm_base + PRCM_REQ_MB0_ULP_CLOCK_STATE));
+	writeb(0, (tcdm_base + PRCM_REQ_MB0_DO_NOT_WFI));
+	writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+
+	return 0;
+}
+
+/* This function should only be called while mb0_transfer.lock is held. */
+static void config_wakeups(void)
+{
+	const u8 header[2] = {
+		MB0H_CONFIG_WAKEUPS_EXE,
+		MB0H_CONFIG_WAKEUPS_SLEEP
+	};
+	static u32 last_dbb_events;
+	static u32 last_abb_events;
+	u32 dbb_events;
+	u32 abb_events;
+	unsigned int i;
+
+	dbb_events = mb0_transfer.req.dbb_irqs | mb0_transfer.req.dbb_wakeups;
+	dbb_events |= (WAKEUP_BIT_AC_WAKE_ACK | WAKEUP_BIT_AC_SLEEP_ACK);
+
+	abb_events = mb0_transfer.req.abb_events;
+
+	if ((dbb_events == last_dbb_events) && (abb_events == last_abb_events))
+		return;
+
+	for (i = 0; i < 2; i++) {
+		while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+			cpu_relax();
+		writel(dbb_events, (tcdm_base + PRCM_REQ_MB0_WAKEUP_8500));
+		writel(abb_events, (tcdm_base + PRCM_REQ_MB0_WAKEUP_4500));
+		writeb(header[i], (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
+		writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	}
+	last_dbb_events = dbb_events;
+	last_abb_events = abb_events;
+}
+
+void prcmu_enable_wakeups(u32 wakeups)
+{
+	unsigned long flags;
+	u32 bits;
+	int i;
+
+	BUG_ON(wakeups != (wakeups & VALID_WAKEUPS));
+
+	for (i = 0, bits = 0; i < NUM_PRCMU_WAKEUP_INDICES; i++) {
+		if (wakeups & BIT(i))
+			bits |= prcmu_wakeup_bit[i];
+	}
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	mb0_transfer.req.dbb_wakeups = bits;
+	config_wakeups();
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+}
+
+void prcmu_config_abb_event_readout(u32 abb_events)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	mb0_transfer.req.abb_events = abb_events;
+	config_wakeups();
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+}
+
+void prcmu_get_abb_event_buffer(void __iomem **buf)
+{
+	if (readb(tcdm_base + PRCM_ACK_MB0_READ_POINTER) & 1)
+		*buf = (tcdm_base + PRCM_ACK_MB0_WAKEUP_1_4500);
+	else
+		*buf = (tcdm_base + PRCM_ACK_MB0_WAKEUP_0_4500);
+}
+
+/**
+ * prcmu_set_arm_opp - set the appropriate ARM OPP
+ * @opp: The new ARM operating point to which transition is to be made
+ * Returns: 0 on success, non-zero on failure
+ *
+ * This function sets the the operating point of the ARM.
+ */
+int prcmu_set_arm_opp(u8 opp)
+{
+	int r;
+
+	if (opp < ARM_NO_CHANGE || opp > ARM_EXTCLK)
+		return -EINVAL;
+
+	r = 0;
+
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(MB1H_ARM_APE_OPP, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+	writeb(opp, (tcdm_base + PRCM_REQ_MB1_ARM_OPP));
+	writeb(APE_NO_CHANGE, (tcdm_base + PRCM_REQ_MB1_APE_OPP));
+
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	if ((mb1_transfer.ack.header != MB1H_ARM_APE_OPP) ||
+		(mb1_transfer.ack.arm_opp != opp))
+		r = -EIO;
+
+	mutex_unlock(&mb1_transfer.lock);
+
+	return r;
+}
+
+/**
+ * prcmu_get_arm_opp - get the current ARM OPP
+ *
+ * Returns: the current ARM OPP
+ */
+int prcmu_get_arm_opp(void)
+{
+	return readb(tcdm_base + PRCM_ACK_MB1_CURRENT_ARM_OPP);
+}
+
+/**
+ * prcmu_get_ddr_opp - get the current DDR OPP
+ *
+ * Returns: the current DDR OPP
+ */
+int prcmu_get_ddr_opp(void)
+{
+	return readb(_PRCMU_BASE + PRCM_DDR_SUBSYS_APE_MINBW);
+}
+
+/**
+ * set_ddr_opp - set the appropriate DDR OPP
+ * @opp: The new DDR operating point to which transition is to be made
+ * Returns: 0 on success, non-zero on failure
+ *
+ * This function sets the operating point of the DDR.
+ */
+int prcmu_set_ddr_opp(u8 opp)
+{
+	if (opp < DDR_100_OPP || opp > DDR_25_OPP)
+		return -EINVAL;
+	/* Changing the DDR OPP can hang the hardware pre-v21 */
+	if (cpu_is_u8500v20_or_later() && !cpu_is_u8500v20())
+		writeb(opp, (_PRCMU_BASE + PRCM_DDR_SUBSYS_APE_MINBW));
+
+	return 0;
+}
+/**
+ * set_ape_opp - set the appropriate APE OPP
+ * @opp: The new APE operating point to which transition is to be made
+ * Returns: 0 on success, non-zero on failure
+ *
+ * This function sets the operating point of the APE.
+ */
+int prcmu_set_ape_opp(u8 opp)
+{
+	int r = 0;
+
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(MB1H_ARM_APE_OPP, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+	writeb(ARM_NO_CHANGE, (tcdm_base + PRCM_REQ_MB1_ARM_OPP));
+	writeb(opp, (tcdm_base + PRCM_REQ_MB1_APE_OPP));
+
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	if ((mb1_transfer.ack.header != MB1H_ARM_APE_OPP) ||
+		(mb1_transfer.ack.ape_opp != opp))
+		r = -EIO;
+
+	mutex_unlock(&mb1_transfer.lock);
+
+	return r;
+}
+
+/**
+ * prcmu_get_ape_opp - get the current APE OPP
+ *
+ * Returns: the current APE OPP
+ */
+int prcmu_get_ape_opp(void)
+{
+	return readb(tcdm_base + PRCM_ACK_MB1_CURRENT_APE_OPP);
+}
+
+/**
+ * prcmu_request_ape_opp_100_voltage - Request APE OPP 100% voltage
+ * @enable: true to request the higher voltage, false to drop a request.
+ *
+ * Calls to this function to enable and disable requests must be balanced.
+ */
+int prcmu_request_ape_opp_100_voltage(bool enable)
+{
+	int r = 0;
+	u8 header;
+	static unsigned int requests;
+
+	mutex_lock(&mb1_transfer.lock);
+
+	if (enable) {
+		if (0 != requests++)
+			goto unlock_and_return;
+		header = MB1H_REQUEST_APE_OPP_100_VOLT;
+	} else {
+		if (requests == 0) {
+			r = -EIO;
+			goto unlock_and_return;
+		} else if (1 != requests--) {
+			goto unlock_and_return;
+		}
+		header = MB1H_RELEASE_APE_OPP_100_VOLT;
+	}
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(header, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	if ((mb1_transfer.ack.header != header) ||
+		((mb1_transfer.ack.ape_voltage_status & BIT(0)) != 0))
+		r = -EIO;
+
+unlock_and_return:
+	mutex_unlock(&mb1_transfer.lock);
+
+	return r;
+}
+
+/**
+ * prcmu_release_usb_wakeup_state - release the state required by a USB wakeup
+ *
+ * This function releases the power state requirements of a USB wakeup.
+ */
+int prcmu_release_usb_wakeup_state(void)
+{
+	int r = 0;
+
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(MB1H_RELEASE_USB_WAKEUP,
+		(tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	if ((mb1_transfer.ack.header != MB1H_RELEASE_USB_WAKEUP) ||
+		((mb1_transfer.ack.ape_voltage_status & BIT(0)) != 0))
+		r = -EIO;
+
+	mutex_unlock(&mb1_transfer.lock);
+
+	return r;
+}
+
+/**
+ * prcmu_set_epod - set the state of a EPOD (power domain)
+ * @epod_id: The EPOD to set
+ * @epod_state: The new EPOD state
+ *
+ * This function sets the state of a EPOD (power domain). It may not be called
+ * from interrupt context.
+ */
+int prcmu_set_epod(u16 epod_id, u8 epod_state)
+{
+	int r = 0;
+	bool ram_retention = false;
+	int i;
+
+	/* check argument */
+	BUG_ON(epod_id >= NUM_EPOD_ID);
+
+	/* set flag if retention is possible */
+	switch (epod_id) {
+	case EPOD_ID_SVAMMDSP:
+	case EPOD_ID_SIAMMDSP:
+	case EPOD_ID_ESRAM12:
+	case EPOD_ID_ESRAM34:
+		ram_retention = true;
+		break;
+	}
+
+	/* check argument */
+	BUG_ON(epod_state > EPOD_STATE_ON);
+	BUG_ON(epod_state == EPOD_STATE_RAMRET && !ram_retention);
+
+	/* get lock */
+	mutex_lock(&mb2_transfer.lock);
+
+	/* wait for mailbox */
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(2))
+		cpu_relax();
+
+	/* fill in mailbox */
+	for (i = 0; i < NUM_EPOD_ID; i++)
+		writeb(EPOD_STATE_NO_CHANGE, (tcdm_base + PRCM_REQ_MB2 + i));
+	writeb(epod_state, (tcdm_base + PRCM_REQ_MB2 + epod_id));
+
+	writeb(MB2H_DPS, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB2));
+
+	writel(MBOX_BIT(2), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+
+	/*
+	 * The current firmware version does not handle errors correctly,
+	 * and we cannot recover if there is an error.
+	 * This is expected to change when the firmware is updated.
+	 */
+	if (!wait_for_completion_timeout(&mb2_transfer.work,
+			msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
+		r = -EIO;
+		goto unlock_and_return;
+	}
+
+	if (mb2_transfer.ack.status != HWACC_PWR_ST_OK)
+		r = -EIO;
+
+unlock_and_return:
+	mutex_unlock(&mb2_transfer.lock);
+	return r;
+}
+
+/**
+ * prcmu_configure_auto_pm - Configure autonomous power management.
+ * @sleep: Configuration for ApSleep.
+ * @idle:  Configuration for ApIdle.
+ */
+void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep,
+	struct prcmu_auto_pm_config *idle)
+{
+	u32 sleep_cfg;
+	u32 idle_cfg;
+	unsigned long flags;
 
-#include <mach/hardware.h>
+	BUG_ON((sleep == NULL) || (idle == NULL));
 
-#include "db8500-prcmu-regs.h"
+	sleep_cfg = (sleep->sva_auto_pm_enable & 0xF);
+	sleep_cfg = ((sleep_cfg << 4) | (sleep->sia_auto_pm_enable & 0xF));
+	sleep_cfg = ((sleep_cfg << 8) | (sleep->sva_power_on & 0xFF));
+	sleep_cfg = ((sleep_cfg << 8) | (sleep->sia_power_on & 0xFF));
+	sleep_cfg = ((sleep_cfg << 4) | (sleep->sva_policy & 0xF));
+	sleep_cfg = ((sleep_cfg << 4) | (sleep->sia_policy & 0xF));
 
-/* Global var to runtime determine TCDM base for v2 or v1 */
-static __iomem void *tcdm_base;
+	idle_cfg = (idle->sva_auto_pm_enable & 0xF);
+	idle_cfg = ((idle_cfg << 4) | (idle->sia_auto_pm_enable & 0xF));
+	idle_cfg = ((idle_cfg << 8) | (idle->sva_power_on & 0xFF));
+	idle_cfg = ((idle_cfg << 8) | (idle->sia_power_on & 0xFF));
+	idle_cfg = ((idle_cfg << 4) | (idle->sva_policy & 0xF));
+	idle_cfg = ((idle_cfg << 4) | (idle->sia_policy & 0xF));
 
-#define _MBOX_HEADER		(tcdm_base + 0xFE8)
-#define MBOX_HEADER_REQ_MB0	(_MBOX_HEADER + 0x0)
+	spin_lock_irqsave(&mb2_transfer.auto_pm_lock, flags);
 
-#define REQ_MB1 (tcdm_base + 0xFD0)
-#define REQ_MB5 (tcdm_base + 0xE44)
+	/*
+	 * The autonomous power management configuration is done through
+	 * fields in mailbox 2, but these fields are only used as shared
+	 * variables - i.e. there is no need to send a message.
+	 */
+	writel(sleep_cfg, (tcdm_base + PRCM_REQ_MB2_AUTO_PM_SLEEP));
+	writel(idle_cfg, (tcdm_base + PRCM_REQ_MB2_AUTO_PM_IDLE));
 
-#define REQ_MB1_ARMOPP		(REQ_MB1 + 0x0)
-#define REQ_MB1_APEOPP		(REQ_MB1 + 0x1)
-#define REQ_MB1_BOOSTOPP	(REQ_MB1 + 0x2)
+	mb2_transfer.auto_pm_enabled =
+		((sleep->sva_auto_pm_enable == PRCMU_AUTO_PM_ON) ||
+		 (sleep->sia_auto_pm_enable == PRCMU_AUTO_PM_ON) ||
+		 (idle->sva_auto_pm_enable == PRCMU_AUTO_PM_ON) ||
+		 (idle->sia_auto_pm_enable == PRCMU_AUTO_PM_ON));
 
-#define ACK_MB1 (tcdm_base + 0xE04)
-#define ACK_MB5 (tcdm_base + 0xDF4)
+	spin_unlock_irqrestore(&mb2_transfer.auto_pm_lock, flags);
+}
+EXPORT_SYMBOL(prcmu_configure_auto_pm);
 
-#define ACK_MB1_CURR_ARMOPP		(ACK_MB1 + 0x0)
-#define ACK_MB1_CURR_APEOPP		(ACK_MB1 + 0x1)
+bool prcmu_is_auto_pm_enabled(void)
+{
+	return mb2_transfer.auto_pm_enabled;
+}
 
-#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5)
-#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1)
-#define REQ_MB5_I2C_REG (REQ_MB5 + 2)
-#define REQ_MB5_I2C_VAL (REQ_MB5 + 3)
+static int request_sysclk(bool enable)
+{
+	int r;
+	unsigned long flags;
 
-#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1)
-#define ACK_MB5_I2C_VAL (ACK_MB5 + 3)
+	r = 0;
 
-#define PRCM_AVS_VARM_MAX_OPP		(tcdm_base + 0x2E4)
-#define PRCM_AVS_ISMODEENABLE		7
-#define PRCM_AVS_ISMODEENABLE_MASK	(1 << PRCM_AVS_ISMODEENABLE)
+	mutex_lock(&mb3_transfer.sysclk_lock);
 
-#define I2C_WRITE(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
-#define I2C_READ(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0))
-#define I2C_STOP_EN BIT(3)
-
-enum mb1_h {
-	MB1H_ARM_OPP = 1,
-	MB1H_APE_OPP,
-	MB1H_ARM_APE_OPP,
-};
+	spin_lock_irqsave(&mb3_transfer.lock, flags);
 
-static struct {
-	struct mutex lock;
-	struct completion work;
-	struct {
-		u8 arm_opp;
-		u8 ape_opp;
-		u8 arm_status;
-		u8 ape_status;
-	} ack;
-} mb1_transfer;
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(3))
+		cpu_relax();
 
-enum ack_mb5_status {
-	I2C_WR_OK = 0x01,
-	I2C_RD_OK = 0x02,
-};
+	writeb((enable ? ON : OFF), (tcdm_base + PRCM_REQ_MB3_SYSCLK_MGT));
 
-#define MBOX_BIT BIT
-#define NUM_MBOX 8
+	writeb(MB3H_SYSCLK, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB3));
+	writel(MBOX_BIT(3), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
 
-static struct {
-	struct mutex lock;
-	struct completion work;
-	bool failed;
-	struct {
-		u8 status;
-		u8 value;
-	} ack;
-} mb5_transfer;
+	spin_unlock_irqrestore(&mb3_transfer.lock, flags);
+
+	/*
+	 * The firmware only sends an ACK if we want to enable the
+	 * SysClk, and it succeeds.
+	 */
+	if (enable && !wait_for_completion_timeout(&mb3_transfer.sysclk_work,
+			msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
+		r = -EIO;
+	}
+
+	mutex_unlock(&mb3_transfer.sysclk_lock);
+
+	return r;
+}
+
+static int request_timclk(bool enable)
+{
+	u32 val = (PRCM_TCR_DOZE_MODE | PRCM_TCR_TENSEL_MASK);
+
+	if (!enable)
+		val |= PRCM_TCR_STOP_TIMERS;
+	writel(val, (_PRCMU_BASE + PRCM_TCR));
+
+	return 0;
+}
+
+static int request_reg_clock(u8 clock, bool enable)
+{
+	u32 val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&clk_mgt_lock, flags);
+
+	/* Grab the HW semaphore. */
+	while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+		cpu_relax();
+
+	val = readl(_PRCMU_BASE + clk_mgt[clock].offset);
+	if (enable) {
+		val |= (PRCM_CLK_MGT_CLKEN | clk_mgt[clock].pllsw);
+	} else {
+		clk_mgt[clock].pllsw = (val & PRCM_CLK_MGT_CLKPLLSW_MASK);
+		val &= ~(PRCM_CLK_MGT_CLKEN | PRCM_CLK_MGT_CLKPLLSW_MASK);
+	}
+	writel(val, (_PRCMU_BASE + clk_mgt[clock].offset));
+
+	/* Release the HW semaphore. */
+	writel(0, (_PRCMU_BASE + PRCM_SEM));
+
+	spin_unlock_irqrestore(&clk_mgt_lock, flags);
+
+	return 0;
+}
+
+/**
+ * prcmu_request_clock() - Request for a clock to be enabled or disabled.
+ * @clock:      The clock for which the request is made.
+ * @enable:     Whether the clock should be enabled (true) or disabled (false).
+ *
+ * This function should only be used by the clock implementation.
+ * Do not use it from any other place!
+ */
+int prcmu_request_clock(u8 clock, bool enable)
+{
+	if (clock < PRCMU_NUM_REG_CLOCKS)
+		return request_reg_clock(clock, enable);
+	else if (clock == PRCMU_TIMCLK)
+		return request_timclk(enable);
+	else if (clock == PRCMU_SYSCLK)
+		return request_sysclk(enable);
+	else
+		return -EINVAL;
+}
+
+int prcmu_config_esram0_deep_sleep(u8 state)
+{
+	if ((state > ESRAM0_DEEP_SLEEP_STATE_RET) ||
+	    (state < ESRAM0_DEEP_SLEEP_STATE_OFF))
+		return -EINVAL;
+
+	mutex_lock(&mb4_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+		cpu_relax();
+
+	writeb(MB4H_MEM_ST, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+	writeb(((DDR_PWR_STATE_OFFHIGHLAT << 4) | DDR_PWR_STATE_ON),
+	       (tcdm_base + PRCM_REQ_MB4_DDR_ST_AP_SLEEP_IDLE));
+	writeb(DDR_PWR_STATE_ON,
+	       (tcdm_base + PRCM_REQ_MB4_DDR_ST_AP_DEEP_IDLE));
+	writeb(state, (tcdm_base + PRCM_REQ_MB4_ESRAM0_ST));
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb4_transfer.work);
+
+	mutex_unlock(&mb4_transfer.lock);
+
+	return 0;
+}
+
+int prcmu_config_hotdog(u8 threshold)
+{
+	mutex_lock(&mb4_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+		cpu_relax();
+
+	writeb(threshold, (tcdm_base + PRCM_REQ_MB4_HOTDOG_THRESHOLD));
+	writeb(MB4H_HOTDOG, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb4_transfer.work);
+
+	mutex_unlock(&mb4_transfer.lock);
+
+	return 0;
+}
+
+int prcmu_config_hotmon(u8 low, u8 high)
+{
+	mutex_lock(&mb4_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+		cpu_relax();
+
+	writeb(low, (tcdm_base + PRCM_REQ_MB4_HOTMON_LOW));
+	writeb(high, (tcdm_base + PRCM_REQ_MB4_HOTMON_HIGH));
+	writeb((HOTMON_CONFIG_LOW | HOTMON_CONFIG_HIGH),
+		(tcdm_base + PRCM_REQ_MB4_HOTMON_CONFIG));
+	writeb(MB4H_HOTMON, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb4_transfer.work);
+
+	mutex_unlock(&mb4_transfer.lock);
+
+	return 0;
+}
+
+static int config_hot_period(u16 val)
+{
+	mutex_lock(&mb4_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+		cpu_relax();
+
+	writew(val, (tcdm_base + PRCM_REQ_MB4_HOT_PERIOD));
+	writeb(MB4H_HOT_PERIOD, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb4_transfer.work);
+
+	mutex_unlock(&mb4_transfer.lock);
+
+	return 0;
+}
+
+int prcmu_start_temp_sense(u16 cycles32k)
+{
+	if (cycles32k == 0xFFFF)
+		return -EINVAL;
+
+	return config_hot_period(cycles32k);
+}
+
+int prcmu_stop_temp_sense(void)
+{
+	return config_hot_period(0xFFFF);
+}
+
+/**
+ * prcmu_set_clock_divider() - Configure the clock divider.
+ * @clock:	The clock for which the request is made.
+ * @divider:	The clock divider. (< 32)
+ *
+ * This function should only be used by the clock implementation.
+ * Do not use it from any other place!
+ */
+int prcmu_set_clock_divider(u8 clock, u8 divider)
+{
+	u32 val;
+	unsigned long flags;
+
+	if ((clock >= PRCMU_NUM_REG_CLOCKS) || (divider < 1) || (31 < divider))
+		return -EINVAL;
+
+	spin_lock_irqsave(&clk_mgt_lock, flags);
+
+	/* Grab the HW semaphore. */
+	while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+		cpu_relax();
+
+	val = readl(_PRCMU_BASE + clk_mgt[clock].offset);
+	val &= ~(PRCM_CLK_MGT_CLKPLLDIV_MASK);
+	val |= (u32)divider;
+	writel(val, (_PRCMU_BASE + clk_mgt[clock].offset));
+
+	/* Release the HW semaphore. */
+	writel(0, (_PRCMU_BASE + PRCM_SEM));
+
+	spin_unlock_irqrestore(&clk_mgt_lock, flags);
+
+	return 0;
+}
 
 /**
  * prcmu_abb_read() - Read register value(s) from the ABB.
@@ -115,33 +1376,34 @@ int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
 	if (size != 1)
 		return -EINVAL;
 
-	r = mutex_lock_interruptible(&mb5_transfer.lock);
-	if (r)
-		return r;
+	mutex_lock(&mb5_transfer.lock);
 
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
 		cpu_relax();
 
-	writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP);
-	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
-	writeb(reg, REQ_MB5_I2C_REG);
+	writeb(PRCMU_I2C_READ(slave), (tcdm_base + PRCM_REQ_MB5_I2C_SLAVE_OP));
+	writeb(PRCMU_I2C_STOP_EN, (tcdm_base + PRCM_REQ_MB5_I2C_HW_BITS));
+	writeb(reg, (tcdm_base + PRCM_REQ_MB5_I2C_REG));
+	writeb(0, (tcdm_base + PRCM_REQ_MB5_I2C_VAL));
+
+	writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
 
-	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
 	if (!wait_for_completion_timeout(&mb5_transfer.work,
-			msecs_to_jiffies(500))) {
-		pr_err("prcmu: prcmu_abb_read timed out.\n");
+				msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
 		r = -EIO;
-		goto unlock_and_return;
+	} else {
+		r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO);
 	}
-	r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO);
+
 	if (!r)
 		*value = mb5_transfer.ack.value;
 
-unlock_and_return:
 	mutex_unlock(&mb5_transfer.lock);
+
 	return r;
 }
-EXPORT_SYMBOL(prcmu_abb_read);
 
 /**
  * prcmu_abb_write() - Write register value(s) to the ABB.
@@ -160,179 +1422,262 @@ int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
 	if (size != 1)
 		return -EINVAL;
 
-	r = mutex_lock_interruptible(&mb5_transfer.lock);
-	if (r)
-		return r;
+	mutex_lock(&mb5_transfer.lock);
 
-
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
 		cpu_relax();
 
-	writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP);
-	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
-	writeb(reg, REQ_MB5_I2C_REG);
-	writeb(*value, REQ_MB5_I2C_VAL);
+	writeb(PRCMU_I2C_WRITE(slave), (tcdm_base + PRCM_REQ_MB5_I2C_SLAVE_OP));
+	writeb(PRCMU_I2C_STOP_EN, (tcdm_base + PRCM_REQ_MB5_I2C_HW_BITS));
+	writeb(reg, (tcdm_base + PRCM_REQ_MB5_I2C_REG));
+	writeb(*value, (tcdm_base + PRCM_REQ_MB5_I2C_VAL));
+
+	writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
 
-	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
 	if (!wait_for_completion_timeout(&mb5_transfer.work,
-			msecs_to_jiffies(500))) {
-		pr_err("prcmu: prcmu_abb_write timed out.\n");
+				msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
 		r = -EIO;
-		goto unlock_and_return;
+	} else {
+		r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO);
 	}
-	r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO);
 
-unlock_and_return:
 	mutex_unlock(&mb5_transfer.lock);
+
 	return r;
 }
-EXPORT_SYMBOL(prcmu_abb_write);
 
-static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp,
-			    enum prcmu_cpu_opp cpu_opp)
+/**
+ * prcmu_ac_wake_req - should be called whenever ARM wants to wakeup Modem
+ */
+void prcmu_ac_wake_req(void)
 {
-	bool do_ape;
-	bool do_arm;
-	int err = 0;
+	u32 val;
 
-	do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP));
-	do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP));
+	mutex_lock(&mb0_transfer.ac_wake_lock);
 
-	mutex_lock(&mb1_transfer.lock);
+	val = readl(_PRCMU_BASE + PRCM_HOSTACCESS_REQ);
+	if (val & PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ)
+		goto unlock_and_return;
 
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
-		cpu_relax();
+	atomic_set(&ac_wake_req_state, 1);
 
-	writeb(0, MBOX_HEADER_REQ_MB0);
-	writeb(cpu_opp, REQ_MB1_ARMOPP);
-	writeb(ape_opp, REQ_MB1_APEOPP);
-	writeb(0, REQ_MB1_BOOSTOPP);
-	writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
-	wait_for_completion(&mb1_transfer.work);
-	if ((do_ape) && (mb1_transfer.ack.ape_status != 0))
-		err = -EIO;
-	if ((do_arm) && (mb1_transfer.ack.arm_status != 0))
-		err = -EIO;
+	writel((val | PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ),
+		(_PRCMU_BASE + PRCM_HOSTACCESS_REQ));
 
-	mutex_unlock(&mb1_transfer.lock);
+	if (!wait_for_completion_timeout(&mb0_transfer.ac_wake_work,
+			msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
+	}
 
-	return err;
+unlock_and_return:
+	mutex_unlock(&mb0_transfer.ac_wake_lock);
 }
 
 /**
- * prcmu_set_ape_opp() - Set the OPP of the APE.
- * @opp:	The OPP to set.
- *
- * This function sets the OPP of the APE.
+ * prcmu_ac_sleep_req - called when ARM no longer needs to talk to modem
  */
-int prcmu_set_ape_opp(enum prcmu_ape_opp opp)
+void prcmu_ac_sleep_req()
 {
-	return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE);
+	u32 val;
+
+	mutex_lock(&mb0_transfer.ac_wake_lock);
+
+	val = readl(_PRCMU_BASE + PRCM_HOSTACCESS_REQ);
+	if (!(val & PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ))
+		goto unlock_and_return;
+
+	writel((val & ~PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ),
+		(_PRCMU_BASE + PRCM_HOSTACCESS_REQ));
+
+	if (!wait_for_completion_timeout(&mb0_transfer.ac_wake_work,
+			msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
+	}
+
+	atomic_set(&ac_wake_req_state, 0);
+
+unlock_and_return:
+	mutex_unlock(&mb0_transfer.ac_wake_lock);
 }
-EXPORT_SYMBOL(prcmu_set_ape_opp);
 
-/**
- * prcmu_set_cpu_opp() - Set the OPP of the CPU.
- * @opp:	The OPP to set.
- *
- * This function sets the OPP of the CPU.
- */
-int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp)
+bool prcmu_is_ac_wake_requested(void)
 {
-	return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp);
+	return (atomic_read(&ac_wake_req_state) != 0);
 }
-EXPORT_SYMBOL(prcmu_set_cpu_opp);
 
 /**
- * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU.
- * @ape_opp:	The APE OPP to set.
- * @cpu_opp:	The CPU OPP to set.
+ * prcmu_system_reset - System reset
  *
- * This function sets the OPPs of the APE and the CPU.
+ * Saves the reset reason code and then sets the APE_SOFRST register which
+ * fires interrupt to fw
  */
-int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
-			   enum prcmu_cpu_opp cpu_opp)
+void prcmu_system_reset(u16 reset_code)
 {
-	return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp);
+	writew(reset_code, (tcdm_base + PRCM_SW_RST_REASON));
+	writel(1, (_PRCMU_BASE + PRCM_APE_SOFTRST));
 }
-EXPORT_SYMBOL(prcmu_set_ape_cpu_opps);
 
 /**
- * prcmu_get_ape_opp() - Get the OPP of the APE.
- *
- * This function gets the OPP of the APE.
+ * prcmu_reset_modem - ask the PRCMU to reset modem
  */
-enum prcmu_ape_opp prcmu_get_ape_opp(void)
+void prcmu_modem_reset(void)
 {
-	return readb(ACK_MB1_CURR_APEOPP);
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(MB1H_RESET_MODEM, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	/*
+	 * No need to check return from PRCMU as modem should go in reset state
+	 * This state is already managed by upper layer
+	 */
+
+	mutex_unlock(&mb1_transfer.lock);
 }
-EXPORT_SYMBOL(prcmu_get_ape_opp);
 
-/**
- * prcmu_get_cpu_opp() - Get the OPP of the CPU.
- *
- * This function gets the OPP of the CPU. The OPP is specified in %%.
- * PRCMU_OPP_EXT is a special OPP value, not specified in %%.
- */
-int prcmu_get_cpu_opp(void)
+static void ack_dbb_wakeup(void)
 {
-	return readb(ACK_MB1_CURR_ARMOPP);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+		cpu_relax();
+
+	writeb(MB0H_READ_WAKEUP_ACK, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
+	writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
 }
-EXPORT_SYMBOL(prcmu_get_cpu_opp);
 
-bool prcmu_has_arm_maxopp(void)
+static inline void print_unknown_header_warning(u8 n, u8 header)
 {
-	return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK)
-		== PRCM_AVS_ISMODEENABLE_MASK;
+	pr_warning("prcmu: Unknown message header (%d) in mailbox %d.\n",
+		header, n);
 }
 
-static void read_mailbox_0(void)
+static bool read_mailbox_0(void)
 {
-	writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
+	bool r;
+	u32 ev;
+	unsigned int n;
+	u8 header;
+
+	header = readb(tcdm_base + PRCM_MBOX_HEADER_ACK_MB0);
+	switch (header) {
+	case MB0H_WAKEUP_EXE:
+	case MB0H_WAKEUP_SLEEP:
+		if (readb(tcdm_base + PRCM_ACK_MB0_READ_POINTER) & 1)
+			ev = readl(tcdm_base + PRCM_ACK_MB0_WAKEUP_1_8500);
+		else
+			ev = readl(tcdm_base + PRCM_ACK_MB0_WAKEUP_0_8500);
+
+		if (ev & (WAKEUP_BIT_AC_WAKE_ACK | WAKEUP_BIT_AC_SLEEP_ACK))
+			complete(&mb0_transfer.ac_wake_work);
+		if (ev & WAKEUP_BIT_SYSCLK_OK)
+			complete(&mb3_transfer.sysclk_work);
+
+		ev &= mb0_transfer.req.dbb_irqs;
+
+		for (n = 0; n < NUM_PRCMU_WAKEUPS; n++) {
+			if (ev & prcmu_irq_bit[n])
+				generic_handle_irq(IRQ_PRCMU_BASE + n);
+		}
+		r = true;
+		break;
+	default:
+		print_unknown_header_warning(0, header);
+		r = false;
+		break;
+	}
+	writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	return r;
 }
 
-static void read_mailbox_1(void)
+static bool read_mailbox_1(void)
 {
-	mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP);
-	mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP);
+	mb1_transfer.ack.header = readb(tcdm_base + PRCM_MBOX_HEADER_REQ_MB1);
+	mb1_transfer.ack.arm_opp = readb(tcdm_base +
+		PRCM_ACK_MB1_CURRENT_ARM_OPP);
+	mb1_transfer.ack.ape_opp = readb(tcdm_base +
+		PRCM_ACK_MB1_CURRENT_APE_OPP);
+	mb1_transfer.ack.ape_voltage_status = readb(tcdm_base +
+		PRCM_ACK_MB1_APE_VOLTAGE_STATUS);
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
 	complete(&mb1_transfer.work);
-	writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
+	return false;
 }
 
-static void read_mailbox_2(void)
+static bool read_mailbox_2(void)
 {
-	writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
+	mb2_transfer.ack.status = readb(tcdm_base + PRCM_ACK_MB2_DPS_STATUS);
+	writel(MBOX_BIT(2), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	complete(&mb2_transfer.work);
+	return false;
 }
 
-static void read_mailbox_3(void)
+static bool read_mailbox_3(void)
 {
-	writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
+	writel(MBOX_BIT(3), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	return false;
 }
 
-static void read_mailbox_4(void)
+static bool read_mailbox_4(void)
 {
-	writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
+	u8 header;
+	bool do_complete = true;
+
+	header = readb(tcdm_base + PRCM_MBOX_HEADER_REQ_MB4);
+	switch (header) {
+	case MB4H_MEM_ST:
+	case MB4H_HOTDOG:
+	case MB4H_HOTMON:
+	case MB4H_HOT_PERIOD:
+		break;
+	default:
+		print_unknown_header_warning(4, header);
+		do_complete = false;
+		break;
+	}
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+
+	if (do_complete)
+		complete(&mb4_transfer.work);
+
+	return false;
 }
 
-static void read_mailbox_5(void)
+static bool read_mailbox_5(void)
 {
-	mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS);
-	mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL);
+	mb5_transfer.ack.status = readb(tcdm_base + PRCM_ACK_MB5_I2C_STATUS);
+	mb5_transfer.ack.value = readb(tcdm_base + PRCM_ACK_MB5_I2C_VAL);
+	writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
 	complete(&mb5_transfer.work);
-	writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
+	return false;
 }
 
-static void read_mailbox_6(void)
+static bool read_mailbox_6(void)
 {
-	writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
+	writel(MBOX_BIT(6), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	return false;
 }
 
-static void read_mailbox_7(void)
+static bool read_mailbox_7(void)
 {
-	writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
+	writel(MBOX_BIT(7), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	return false;
 }
 
-static void (* const read_mailbox[NUM_MBOX])(void) = {
+static bool (* const read_mailbox[NUM_MB])(void) = {
 	read_mailbox_0,
 	read_mailbox_1,
 	read_mailbox_2,
@@ -347,49 +1692,199 @@ static irqreturn_t prcmu_irq_handler(int irq, void *data)
 {
 	u32 bits;
 	u8 n;
+	irqreturn_t r;
 
-	bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1));
+	bits = (readl(_PRCMU_BASE + PRCM_ARM_IT1_VAL) & ALL_MBOX_BITS);
 	if (unlikely(!bits))
 		return IRQ_NONE;
 
+	r = IRQ_HANDLED;
 	for (n = 0; bits; n++) {
 		if (bits & MBOX_BIT(n)) {
 			bits -= MBOX_BIT(n);
-			read_mailbox[n]();
+			if (read_mailbox[n]())
+				r = IRQ_WAKE_THREAD;
 		}
 	}
+	return r;
+}
+
+static irqreturn_t prcmu_irq_thread_fn(int irq, void *data)
+{
+	ack_dbb_wakeup();
 	return IRQ_HANDLED;
 }
 
+static void prcmu_mask_work(struct work_struct *work)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	config_wakeups();
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+}
+
+static void prcmu_irq_mask(struct irq_data *d)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.dbb_irqs_lock, flags);
+
+	mb0_transfer.req.dbb_irqs &= ~prcmu_irq_bit[d->irq - IRQ_PRCMU_BASE];
+
+	spin_unlock_irqrestore(&mb0_transfer.dbb_irqs_lock, flags);
+
+	if (d->irq != IRQ_PRCMU_CA_SLEEP)
+		schedule_work(&mb0_transfer.mask_work);
+}
+
+static void prcmu_irq_unmask(struct irq_data *d)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.dbb_irqs_lock, flags);
+
+	mb0_transfer.req.dbb_irqs |= prcmu_irq_bit[d->irq - IRQ_PRCMU_BASE];
+
+	spin_unlock_irqrestore(&mb0_transfer.dbb_irqs_lock, flags);
+
+	if (d->irq != IRQ_PRCMU_CA_SLEEP)
+		schedule_work(&mb0_transfer.mask_work);
+}
+
+static void noop(struct irq_data *d)
+{
+}
+
+static struct irq_chip prcmu_irq_chip = {
+	.name		= "prcmu",
+	.irq_disable	= prcmu_irq_mask,
+	.irq_ack	= noop,
+	.irq_mask	= prcmu_irq_mask,
+	.irq_unmask	= prcmu_irq_unmask,
+};
+
 void __init prcmu_early_init(void)
 {
-	if (cpu_is_u8500v11() || cpu_is_u8500ed()) {
+	unsigned int i;
+
+	if (cpu_is_u8500v1()) {
 		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1);
 	} else if (cpu_is_u8500v2()) {
+		void *tcpm_base = ioremap_nocache(U8500_PRCMU_TCPM_BASE, SZ_4K);
+
+		if (tcpm_base != NULL) {
+			int version;
+			version = readl(tcpm_base + PRCMU_FW_VERSION_OFFSET);
+			prcmu_version.project_number = version & 0xFF;
+			prcmu_version.api_version = (version >> 8) & 0xFF;
+			prcmu_version.func_version = (version >> 16) & 0xFF;
+			prcmu_version.errata = (version >> 24) & 0xFF;
+			pr_info("PRCMU firmware version %d.%d.%d\n",
+				(version >> 8) & 0xFF, (version >> 16) & 0xFF,
+				(version >> 24) & 0xFF);
+			iounmap(tcpm_base);
+		}
+
 		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
 	} else {
 		pr_err("prcmu: Unsupported chip version\n");
 		BUG();
 	}
-}
-
-static int __init prcmu_init(void)
-{
-	if (cpu_is_u8500ed()) {
-		pr_err("prcmu: Unsupported chip version\n");
-		return 0;
-	}
 
+	spin_lock_init(&mb0_transfer.lock);
+	spin_lock_init(&mb0_transfer.dbb_irqs_lock);
+	mutex_init(&mb0_transfer.ac_wake_lock);
+	init_completion(&mb0_transfer.ac_wake_work);
 	mutex_init(&mb1_transfer.lock);
 	init_completion(&mb1_transfer.work);
+	mutex_init(&mb2_transfer.lock);
+	init_completion(&mb2_transfer.work);
+	spin_lock_init(&mb2_transfer.auto_pm_lock);
+	spin_lock_init(&mb3_transfer.lock);
+	mutex_init(&mb3_transfer.sysclk_lock);
+	init_completion(&mb3_transfer.sysclk_work);
+	mutex_init(&mb4_transfer.lock);
+	init_completion(&mb4_transfer.work);
 	mutex_init(&mb5_transfer.lock);
 	init_completion(&mb5_transfer.work);
 
+	INIT_WORK(&mb0_transfer.mask_work, prcmu_mask_work);
+
+	/* Initalize irqs. */
+	for (i = 0; i < NUM_PRCMU_WAKEUPS; i++) {
+		unsigned int irq;
+
+		irq = IRQ_PRCMU_BASE + i;
+		irq_set_chip_and_handler(irq, &prcmu_irq_chip,
+					 handle_simple_irq);
+		set_irq_flags(irq, IRQF_VALID);
+	}
+}
+
+static struct mfd_cell db8500_prcmu_devs[] = {
+	{
+		.name = "db8500-prcmu-regulators",
+	},
+	{
+		.name = "cpufreq-u8500",
+	},
+};
+
+/**
+ * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic
+ *
+ */
+static int __init db8500_prcmu_probe(struct platform_device *pdev)
+{
+	int err = 0;
+
+	if (ux500_is_svp())
+		return -ENODEV;
+
 	/* Clean up the mailbox interrupts after pre-kernel code. */
-	writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR);
+	writel(ALL_MBOX_BITS, (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+
+	err = request_threaded_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler,
+		prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL);
+	if (err < 0) {
+		pr_err("prcmu: Failed to allocate IRQ_DB8500_PRCMU1.\n");
+		err = -EBUSY;
+		goto no_irq_return;
+	}
+
+	if (cpu_is_u8500v20_or_later())
+		prcmu_config_esram0_deep_sleep(ESRAM0_DEEP_SLEEP_STATE_RET);
+
+	err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs,
+			      ARRAY_SIZE(db8500_prcmu_devs), NULL,
+			      0);
 
-	return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0,
-			   "prcmu", NULL);
+	if (err)
+		pr_err("prcmu: Failed to add subdevices\n");
+	else
+		pr_info("DB8500 PRCMU initialized\n");
+
+no_irq_return:
+	return err;
+}
+
+static struct platform_driver db8500_prcmu_driver = {
+	.driver = {
+		.name = "db8500-prcmu",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init db8500_prcmu_init(void)
+{
+	return platform_driver_probe(&db8500_prcmu_driver, db8500_prcmu_probe);
 }
 
-arch_initcall(prcmu_init);
+arch_initcall(db8500_prcmu_init);
+
+MODULE_AUTHOR("Mattias Nilsson <mattias.i.nilsson@stericsson.com>");
+MODULE_DESCRIPTION("DB8500 PRCM Unit driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h
index d591d79aa6f0..917dbcab701c 100644
--- a/include/linux/mfd/db8500-prcmu.h
+++ b/include/linux/mfd/db8500-prcmu.h
@@ -2,57 +2,977 @@
  * Copyright (C) STMicroelectronics 2009
  * Copyright (C) ST-Ericsson SA 2010
  *
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Martin Persson <martin.persson@stericsson.com>
- *
  * License Terms: GNU General Public License v2
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
  *
- * PRCM Unit definitions
+ * PRCMU f/w APIs
  */
+#ifndef __MFD_DB8500_PRCMU_H
+#define __MFD_DB8500_PRCMU_H
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
 
-#ifndef __MACH_PRCMU_DEFS_H
-#define __MACH_PRCMU_DEFS_H
+/* This portion previously known as <mach/prcmu-fw-defs_v1.h> */
 
-enum prcmu_cpu_opp {
-	CPU_OPP_INIT	  = 0x00,
-	CPU_OPP_NO_CHANGE = 0x01,
-	CPU_OPP_100	  = 0x02,
-	CPU_OPP_50	  = 0x03,
-	CPU_OPP_MAX	  = 0x04,
-	CPU_OPP_EXT_CLK	  = 0x07
+/**
+ * enum state - ON/OFF state definition
+ * @OFF: State is ON
+ * @ON: State is OFF
+ *
+ */
+enum state {
+	OFF = 0x0,
+	ON  = 0x1,
 };
-enum prcmu_ape_opp {
-	APE_OPP_NO_CHANGE = 0x00,
-	APE_OPP_100	  = 0x02,
-	APE_OPP_50	  = 0x03,
+
+/**
+ * enum ret_state - general purpose On/Off/Retention states
+ *
+ */
+enum ret_state {
+	OFFST = 0,
+	ONST  = 1,
+	RETST = 2
 };
 
-#endif /* __MACH_PRCMU_DEFS_H */
+/**
+ * enum clk_arm - ARM Cortex A9 clock schemes
+ * @A9_OFF:
+ * @A9_BOOT:
+ * @A9_OPPT1:
+ * @A9_OPPT2:
+ * @A9_EXTCLK:
+ */
+enum clk_arm {
+	A9_OFF,
+	A9_BOOT,
+	A9_OPPT1,
+	A9_OPPT2,
+	A9_EXTCLK
+};
 
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
+/**
+ * enum clk_gen - GEN#0/GEN#1 clock schemes
+ * @GEN_OFF:
+ * @GEN_BOOT:
+ * @GEN_OPPT1:
+ */
+enum clk_gen {
+	GEN_OFF,
+	GEN_BOOT,
+	GEN_OPPT1,
+};
+
+/* some information between arm and xp70 */
+
+/**
+ * enum romcode_write - Romcode message written by A9 AND read by XP70
+ * @RDY_2_DS: Value set when ApDeepSleep state can be executed by XP70
+ * @RDY_2_XP70_RST: Value set when 0x0F has been successfully polled by the
+ *                 romcode. The xp70 will go into self-reset
+ */
+enum romcode_write {
+	RDY_2_DS = 0x09,
+	RDY_2_XP70_RST = 0x10
+};
+
+/**
+ * enum romcode_read - Romcode message written by XP70 and read by A9
+ * @INIT: Init value when romcode field is not used
+ * @FS_2_DS: Value set when power state is going from ApExecute to
+ *          ApDeepSleep
+ * @END_DS: Value set when ApDeepSleep power state is reached coming from
+ *         ApExecute state
+ * @DS_TO_FS: Value set when power state is going from ApDeepSleep to
+ *           ApExecute
+ * @END_FS: Value set when ApExecute power state is reached coming from
+ *         ApDeepSleep state
+ * @SWR: Value set when power state is going to ApReset
+ * @END_SWR: Value set when the xp70 finished executing ApReset actions and
+ *          waits for romcode acknowledgment to go to self-reset
+ */
+enum romcode_read {
+	INIT = 0x00,
+	FS_2_DS = 0x0A,
+	END_DS = 0x0B,
+	DS_TO_FS = 0x0C,
+	END_FS = 0x0D,
+	SWR = 0x0E,
+	END_SWR = 0x0F
+};
+
+/**
+ * enum ap_pwrst - current power states defined in PRCMU firmware
+ * @NO_PWRST: Current power state init
+ * @AP_BOOT: Current power state is apBoot
+ * @AP_EXECUTE: Current power state is apExecute
+ * @AP_DEEP_SLEEP: Current power state is apDeepSleep
+ * @AP_SLEEP: Current power state is apSleep
+ * @AP_IDLE: Current power state is apIdle
+ * @AP_RESET: Current power state is apReset
+ */
+enum ap_pwrst {
+	NO_PWRST = 0x00,
+	AP_BOOT = 0x01,
+	AP_EXECUTE = 0x02,
+	AP_DEEP_SLEEP = 0x03,
+	AP_SLEEP = 0x04,
+	AP_IDLE = 0x05,
+	AP_RESET = 0x06
+};
+
+/**
+ * enum ap_pwrst_trans - Transition states defined in PRCMU firmware
+ * @NO_TRANSITION: No power state transition
+ * @APEXECUTE_TO_APSLEEP: Power state transition from ApExecute to ApSleep
+ * @APIDLE_TO_APSLEEP: Power state transition from ApIdle to ApSleep
+ * @APBOOT_TO_APEXECUTE: Power state transition from ApBoot to ApExecute
+ * @APEXECUTE_TO_APDEEPSLEEP: Power state transition from ApExecute to
+ *                          ApDeepSleep
+ * @APEXECUTE_TO_APIDLE: Power state transition from ApExecute to ApIdle
+ */
+enum ap_pwrst_trans {
+	NO_TRANSITION			= 0x00,
+	APEXECUTE_TO_APSLEEP		= 0x01,
+	APIDLE_TO_APSLEEP		= 0x02, /* To be removed */
+	PRCMU_AP_SLEEP			= 0x01,
+	APBOOT_TO_APEXECUTE		= 0x03,
+	APEXECUTE_TO_APDEEPSLEEP	= 0x04, /* To be removed */
+	PRCMU_AP_DEEP_SLEEP		= 0x04,
+	APEXECUTE_TO_APIDLE		= 0x05, /* To be removed */
+	PRCMU_AP_IDLE			= 0x05,
+	PRCMU_AP_DEEP_IDLE		= 0x07,
+};
+
+/**
+ * enum ddr_pwrst - DDR power states definition
+ * @DDR_PWR_STATE_UNCHANGED: SDRAM and DDR controller state is unchanged
+ * @DDR_PWR_STATE_ON:
+ * @DDR_PWR_STATE_OFFLOWLAT:
+ * @DDR_PWR_STATE_OFFHIGHLAT:
+ */
+enum ddr_pwrst {
+	DDR_PWR_STATE_UNCHANGED     = 0x00,
+	DDR_PWR_STATE_ON            = 0x01,
+	DDR_PWR_STATE_OFFLOWLAT     = 0x02,
+	DDR_PWR_STATE_OFFHIGHLAT    = 0x03
+};
+
+/**
+ * enum arm_opp - ARM OPP states definition
+ * @ARM_OPP_INIT:
+ * @ARM_NO_CHANGE: The ARM operating point is unchanged
+ * @ARM_100_OPP: The new ARM operating point is arm100opp
+ * @ARM_50_OPP: The new ARM operating point is arm50opp
+ * @ARM_MAX_OPP: Operating point is "max" (more than 100)
+ * @ARM_MAX_FREQ100OPP: Set max opp if available, else 100
+ * @ARM_EXTCLK: The new ARM operating point is armExtClk
+ */
+enum arm_opp {
+	ARM_OPP_INIT = 0x00,
+	ARM_NO_CHANGE = 0x01,
+	ARM_100_OPP = 0x02,
+	ARM_50_OPP = 0x03,
+	ARM_MAX_OPP = 0x04,
+	ARM_MAX_FREQ100OPP = 0x05,
+	ARM_EXTCLK = 0x07
+};
+
+/**
+ * enum ape_opp - APE OPP states definition
+ * @APE_OPP_INIT:
+ * @APE_NO_CHANGE: The APE operating point is unchanged
+ * @APE_100_OPP: The new APE operating point is ape100opp
+ * @APE_50_OPP: 50%
+ */
+enum ape_opp {
+	APE_OPP_INIT = 0x00,
+	APE_NO_CHANGE = 0x01,
+	APE_100_OPP = 0x02,
+	APE_50_OPP = 0x03
+};
+
+/**
+ * enum hw_acc_state - State definition for hardware accelerator
+ * @HW_NO_CHANGE: The hardware accelerator state must remain unchanged
+ * @HW_OFF: The hardware accelerator must be switched off
+ * @HW_OFF_RAMRET: The hardware accelerator must be switched off with its
+ *               internal RAM in retention
+ * @HW_ON: The hwa hardware accelerator hwa must be switched on
  *
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ * NOTE! Deprecated, to be removed when all users switched over to use the
+ * regulator API.
+ */
+enum hw_acc_state {
+	HW_NO_CHANGE = 0x00,
+	HW_OFF = 0x01,
+	HW_OFF_RAMRET = 0x02,
+	HW_ON = 0x04
+};
+
+/**
+ * enum  mbox_2_arm_stat - Status messages definition for mbox_arm
+ * @BOOT_TO_EXECUTEOK: The apBoot to apExecute state transition has been
+ *                    completed
+ * @DEEPSLEEPOK: The apExecute to apDeepSleep state transition has been
+ *              completed
+ * @SLEEPOK: The apExecute to apSleep state transition has been completed
+ * @IDLEOK: The apExecute to apIdle state transition has been completed
+ * @SOFTRESETOK: The A9 watchdog/ SoftReset state has been completed
+ * @SOFTRESETGO : The A9 watchdog/SoftReset state is on going
+ * @BOOT_TO_EXECUTE: The apBoot to apExecute state transition is on going
+ * @EXECUTE_TO_DEEPSLEEP: The apExecute to apDeepSleep state transition is on
+ *                       going
+ * @DEEPSLEEP_TO_EXECUTE: The apDeepSleep to apExecute state transition is on
+ *                       going
+ * @DEEPSLEEP_TO_EXECUTEOK: The apDeepSleep to apExecute state transition has
+ *                         been completed
+ * @EXECUTE_TO_SLEEP: The apExecute to apSleep state transition is on going
+ * @SLEEP_TO_EXECUTE: The apSleep to apExecute state transition is on going
+ * @SLEEP_TO_EXECUTEOK: The apSleep to apExecute state transition has been
+ *                     completed
+ * @EXECUTE_TO_IDLE: The apExecute to apIdle state transition is on going
+ * @IDLE_TO_EXECUTE: The apIdle to apExecute state transition is on going
+ * @IDLE_TO_EXECUTEOK: The apIdle to apExecute state transition has been
+ *                    completed
+ * @INIT_STATUS: Status init
+ */
+enum ap_pwrsttr_status {
+	BOOT_TO_EXECUTEOK = 0xFF,
+	DEEPSLEEPOK = 0xFE,
+	SLEEPOK = 0xFD,
+	IDLEOK = 0xFC,
+	SOFTRESETOK = 0xFB,
+	SOFTRESETGO = 0xFA,
+	BOOT_TO_EXECUTE = 0xF9,
+	EXECUTE_TO_DEEPSLEEP = 0xF8,
+	DEEPSLEEP_TO_EXECUTE = 0xF7,
+	DEEPSLEEP_TO_EXECUTEOK = 0xF6,
+	EXECUTE_TO_SLEEP = 0xF5,
+	SLEEP_TO_EXECUTE = 0xF4,
+	SLEEP_TO_EXECUTEOK = 0xF3,
+	EXECUTE_TO_IDLE = 0xF2,
+	IDLE_TO_EXECUTE = 0xF1,
+	IDLE_TO_EXECUTEOK = 0xF0,
+	RDYTODS_RETURNTOEXE    = 0xEF,
+	NORDYTODS_RETURNTOEXE  = 0xEE,
+	EXETOSLEEP_RETURNTOEXE = 0xED,
+	EXETOIDLE_RETURNTOEXE  = 0xEC,
+	INIT_STATUS = 0xEB,
+
+	/*error messages */
+	INITERROR                     = 0x00,
+	PLLARMLOCKP_ER                = 0x01,
+	PLLDDRLOCKP_ER                = 0x02,
+	PLLSOCLOCKP_ER                = 0x03,
+	PLLSOCK1LOCKP_ER              = 0x04,
+	ARMWFI_ER                     = 0x05,
+	SYSCLKOK_ER                   = 0x06,
+	I2C_NACK_DATA_ER              = 0x07,
+	BOOT_ER                       = 0x08,
+	I2C_STATUS_ALWAYS_1           = 0x0A,
+	I2C_NACK_REG_ADDR_ER          = 0x0B,
+	I2C_NACK_DATA0123_ER          = 0x1B,
+	I2C_NACK_ADDR_ER              = 0x1F,
+	CURAPPWRSTISNOT_BOOT          = 0x20,
+	CURAPPWRSTISNOT_EXECUTE       = 0x21,
+	CURAPPWRSTISNOT_SLEEPMODE     = 0x22,
+	CURAPPWRSTISNOT_CORRECTFORIT10 = 0x23,
+	FIFO4500WUISNOT_WUPEVENT      = 0x24,
+	PLL32KLOCKP_ER                = 0x29,
+	DDRDEEPSLEEPOK_ER             = 0x2A,
+	ROMCODEREADY_ER               = 0x50,
+	WUPBEFOREDS                   = 0x51,
+	DDRCONFIG_ER                  = 0x52,
+	WUPBEFORESLEEP                = 0x53,
+	WUPBEFOREIDLE                 = 0x54
+};  /* earlier called as  mbox_2_arm_stat */
+
+/**
+ * enum dvfs_stat - DVFS status messages definition
+ * @DVFS_GO: A state transition DVFS is on going
+ * @DVFS_ARM100OPPOK: The state transition DVFS has been completed for 100OPP
+ * @DVFS_ARM50OPPOK: The state transition DVFS has been completed for 50OPP
+ * @DVFS_ARMEXTCLKOK: The state transition DVFS has been completed for EXTCLK
+ * @DVFS_NOCHGTCLKOK: The state transition DVFS has been completed for
+ *                   NOCHGCLK
+ * @DVFS_INITSTATUS: Value init
+ */
+enum dvfs_stat {
+	DVFS_GO = 0xFF,
+	DVFS_ARM100OPPOK = 0xFE,
+	DVFS_ARM50OPPOK = 0xFD,
+	DVFS_ARMEXTCLKOK = 0xFC,
+	DVFS_NOCHGTCLKOK = 0xFB,
+	DVFS_INITSTATUS = 0x00
+};
+
+/**
+ * enum sva_mmdsp_stat - SVA MMDSP status messages
+ * @SVA_MMDSP_GO: SVAMMDSP interrupt has happened
+ * @SVA_MMDSP_INIT: Status init
+ */
+enum sva_mmdsp_stat {
+	SVA_MMDSP_GO = 0xFF,
+	SVA_MMDSP_INIT = 0x00
+};
+
+/**
+ * enum sia_mmdsp_stat - SIA MMDSP status messages
+ * @SIA_MMDSP_GO: SIAMMDSP interrupt has happened
+ * @SIA_MMDSP_INIT: Status init
+ */
+enum sia_mmdsp_stat {
+	SIA_MMDSP_GO = 0xFF,
+	SIA_MMDSP_INIT = 0x00
+};
+
+/**
+ * enum  mbox_to_arm_err - Error messages definition
+ * @INIT_ERR: Init value
+ * @PLLARMLOCKP_ERR: PLLARM has not been correctly locked in given time
+ * @PLLDDRLOCKP_ERR: PLLDDR has not been correctly locked in the given time
+ * @PLLSOC0LOCKP_ERR: PLLSOC0 has not been correctly locked in the given time
+ * @PLLSOC1LOCKP_ERR: PLLSOC1 has not been correctly locked in the given time
+ * @ARMWFI_ERR: The ARM WFI has not been correctly executed in the given time
+ * @SYSCLKOK_ERR: The SYSCLK is not available in the given time
+ * @BOOT_ERR: Romcode has not validated the XP70 self reset in the given time
+ * @ROMCODESAVECONTEXT: The Romcode didn.t correctly save it secure context
+ * @VARMHIGHSPEEDVALTO_ERR: The ARM high speed supply value transfered
+ *          through I2C has not been correctly executed in the given time
+ * @VARMHIGHSPEEDACCESS_ERR: The command value of VarmHighSpeedVal transfered
+ *             through I2C has not been correctly executed in the given time
+ * @VARMLOWSPEEDVALTO_ERR:The ARM low speed supply value transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VARMLOWSPEEDACCESS_ERR: The command value of VarmLowSpeedVal transfered
+ *             through I2C has not been correctly executed in the given time
+ * @VARMRETENTIONVALTO_ERR: The ARM retention supply value transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VARMRETENTIONACCESS_ERR: The command value of VarmRetentionVal transfered
+ *             through I2C has not been correctly executed in the given time
+ * @VAPEHIGHSPEEDVALTO_ERR: The APE highspeed supply value transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VSAFEHPVALTO_ERR: The SAFE high power supply value transfered through I2C
+ *                         has not been correctly executed in the given time
+ * @VMODSEL1VALTO_ERR: The MODEM sel1 supply value transfered through I2C has
+ *                             not been correctly executed in the given time
+ * @VMODSEL2VALTO_ERR: The MODEM sel2 supply value transfered through I2C has
+ *                             not been correctly executed in the given time
+ * @VARMOFFACCESS_ERR: The command value of Varm ON/OFF transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VAPEOFFACCESS_ERR: The command value of Vape ON/OFF transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VARMRETACCES_ERR: The command value of Varm retention ON/OFF transfered
+ *             through I2C has not been correctly executed in the given time
+ * @CURAPPWRSTISNOTBOOT:Generated when Arm want to do power state transition
+ *             ApBoot to ApExecute but the power current state is not Apboot
+ * @CURAPPWRSTISNOTEXECUTE: Generated when Arm want to do power state
+ *              transition from ApExecute to others power state but the
+ *              power current state is not ApExecute
+ * @CURAPPWRSTISNOTSLEEPMODE: Generated when wake up events are transmitted
+ *             but the power current state is not ApDeepSleep/ApSleep/ApIdle
+ * @CURAPPWRSTISNOTCORRECTDBG:  Generated when wake up events are transmitted
+ *              but the power current state is not correct
+ * @ARMREGU1VALTO_ERR:The ArmRegu1 value transferred through I2C has not
+ *                    been correctly executed in the given time
+ * @ARMREGU2VALTO_ERR: The ArmRegu2 value transferred through I2C has not
+ *                    been correctly executed in the given time
+ * @VAPEREGUVALTO_ERR: The VApeRegu value transfered through I2C has not
+ *                    been correctly executed in the given time
+ * @VSMPS3REGUVALTO_ERR: The VSmps3Regu value transfered through I2C has not
+ *                      been correctly executed in the given time
+ * @VMODREGUVALTO_ERR: The VModemRegu value transfered through I2C has not
+ *                    been correctly executed in the given time
+ */
+enum mbox_to_arm_err {
+	INIT_ERR = 0x00,
+	PLLARMLOCKP_ERR = 0x01,
+	PLLDDRLOCKP_ERR = 0x02,
+	PLLSOC0LOCKP_ERR = 0x03,
+	PLLSOC1LOCKP_ERR = 0x04,
+	ARMWFI_ERR = 0x05,
+	SYSCLKOK_ERR = 0x06,
+	BOOT_ERR = 0x07,
+	ROMCODESAVECONTEXT = 0x08,
+	VARMHIGHSPEEDVALTO_ERR = 0x10,
+	VARMHIGHSPEEDACCESS_ERR = 0x11,
+	VARMLOWSPEEDVALTO_ERR = 0x12,
+	VARMLOWSPEEDACCESS_ERR = 0x13,
+	VARMRETENTIONVALTO_ERR = 0x14,
+	VARMRETENTIONACCESS_ERR = 0x15,
+	VAPEHIGHSPEEDVALTO_ERR = 0x16,
+	VSAFEHPVALTO_ERR = 0x17,
+	VMODSEL1VALTO_ERR = 0x18,
+	VMODSEL2VALTO_ERR = 0x19,
+	VARMOFFACCESS_ERR = 0x1A,
+	VAPEOFFACCESS_ERR = 0x1B,
+	VARMRETACCES_ERR = 0x1C,
+	CURAPPWRSTISNOTBOOT = 0x20,
+	CURAPPWRSTISNOTEXECUTE = 0x21,
+	CURAPPWRSTISNOTSLEEPMODE = 0x22,
+	CURAPPWRSTISNOTCORRECTDBG = 0x23,
+	ARMREGU1VALTO_ERR = 0x24,
+	ARMREGU2VALTO_ERR = 0x25,
+	VAPEREGUVALTO_ERR = 0x26,
+	VSMPS3REGUVALTO_ERR = 0x27,
+	VMODREGUVALTO_ERR = 0x28
+};
+
+enum hw_acc {
+	SVAMMDSP = 0,
+	SVAPIPE = 1,
+	SIAMMDSP = 2,
+	SIAPIPE = 3,
+	SGA = 4,
+	B2R2MCDE = 5,
+	ESRAM12 = 6,
+	ESRAM34 = 7,
+};
+
+enum cs_pwrmgt {
+	PWRDNCS0  = 0,
+	WKUPCS0   = 1,
+	PWRDNCS1  = 2,
+	WKUPCS1   = 3
+};
+
+/* Defs related to autonomous power management */
+
+/**
+ * enum sia_sva_pwr_policy - Power policy
+ * @NO_CHGT:	No change
+ * @DSPOFF_HWPOFF:
+ * @DSPOFFRAMRET_HWPOFF:
+ * @DSPCLKOFF_HWPOFF:
+ * @DSPCLKOFF_HWPCLKOFF:
  *
- * License Terms: GNU General Public License v2
+ */
+enum sia_sva_pwr_policy {
+	NO_CHGT			= 0x0,
+	DSPOFF_HWPOFF		= 0x1,
+	DSPOFFRAMRET_HWPOFF	= 0x2,
+	DSPCLKOFF_HWPOFF	= 0x3,
+	DSPCLKOFF_HWPCLKOFF	= 0x4,
+};
+
+/**
+ * enum auto_enable - Auto Power enable
+ * @AUTO_OFF:
+ * @AUTO_ON:
+ *
+ */
+enum auto_enable {
+	AUTO_OFF	= 0x0,
+	AUTO_ON		= 0x1,
+};
+
+/* End of file previously known as prcmu-fw-defs_v1.h */
+
+/* PRCMU Wakeup defines */
+enum prcmu_wakeup_index {
+	PRCMU_WAKEUP_INDEX_RTC,
+	PRCMU_WAKEUP_INDEX_RTT0,
+	PRCMU_WAKEUP_INDEX_RTT1,
+	PRCMU_WAKEUP_INDEX_HSI0,
+	PRCMU_WAKEUP_INDEX_HSI1,
+	PRCMU_WAKEUP_INDEX_USB,
+	PRCMU_WAKEUP_INDEX_ABB,
+	PRCMU_WAKEUP_INDEX_ABB_FIFO,
+	PRCMU_WAKEUP_INDEX_ARM,
+	NUM_PRCMU_WAKEUP_INDICES
+};
+#define PRCMU_WAKEUP(_name) (BIT(PRCMU_WAKEUP_INDEX_##_name))
+
+/* PRCMU QoS APE OPP class */
+#define PRCMU_QOS_APE_OPP 1
+#define PRCMU_QOS_DDR_OPP 2
+#define PRCMU_QOS_DEFAULT_VALUE -1
+
+/**
+ * enum hw_acc_dev - enum for hw accelerators
+ * @HW_ACC_SVAMMDSP: for SVAMMDSP
+ * @HW_ACC_SVAPIPE:  for SVAPIPE
+ * @HW_ACC_SIAMMDSP: for SIAMMDSP
+ * @HW_ACC_SIAPIPE: for SIAPIPE
+ * @HW_ACC_SGA: for SGA
+ * @HW_ACC_B2R2: for B2R2
+ * @HW_ACC_MCDE: for MCDE
+ * @HW_ACC_ESRAM1: for ESRAM1
+ * @HW_ACC_ESRAM2: for ESRAM2
+ * @HW_ACC_ESRAM3: for ESRAM3
+ * @HW_ACC_ESRAM4: for ESRAM4
+ * @NUM_HW_ACC: number of hardware accelerators
+ *
+ * Different hw accelerators which can be turned ON/
+ * OFF or put into retention (MMDSPs and ESRAMs).
+ * Used with EPOD API.
  *
- * PRCM Unit f/w API
+ * NOTE! Deprecated, to be removed when all users switched over to use the
+ * regulator API.
+ */
+enum hw_acc_dev {
+	HW_ACC_SVAMMDSP,
+	HW_ACC_SVAPIPE,
+	HW_ACC_SIAMMDSP,
+	HW_ACC_SIAPIPE,
+	HW_ACC_SGA,
+	HW_ACC_B2R2,
+	HW_ACC_MCDE,
+	HW_ACC_ESRAM1,
+	HW_ACC_ESRAM2,
+	HW_ACC_ESRAM3,
+	HW_ACC_ESRAM4,
+	NUM_HW_ACC
+};
+
+/*
+ * Ids for all EPODs (power domains)
+ * - EPOD_ID_SVAMMDSP: power domain for SVA MMDSP
+ * - EPOD_ID_SVAPIPE: power domain for SVA pipe
+ * - EPOD_ID_SIAMMDSP: power domain for SIA MMDSP
+ * - EPOD_ID_SIAPIPE: power domain for SIA pipe
+ * - EPOD_ID_SGA: power domain for SGA
+ * - EPOD_ID_B2R2_MCDE: power domain for B2R2 and MCDE
+ * - EPOD_ID_ESRAM12: power domain for ESRAM 1 and 2
+ * - EPOD_ID_ESRAM34: power domain for ESRAM 3 and 4
+ * - NUM_EPOD_ID: number of power domains
+ */
+#define EPOD_ID_SVAMMDSP	0
+#define EPOD_ID_SVAPIPE		1
+#define EPOD_ID_SIAMMDSP	2
+#define EPOD_ID_SIAPIPE		3
+#define EPOD_ID_SGA		4
+#define EPOD_ID_B2R2_MCDE	5
+#define EPOD_ID_ESRAM12		6
+#define EPOD_ID_ESRAM34		7
+#define NUM_EPOD_ID		8
+
+/*
+ * state definition for EPOD (power domain)
+ * - EPOD_STATE_NO_CHANGE: The EPOD should remain unchanged
+ * - EPOD_STATE_OFF: The EPOD is switched off
+ * - EPOD_STATE_RAMRET: The EPOD is switched off with its internal RAM in
+ *                         retention
+ * - EPOD_STATE_ON_CLK_OFF: The EPOD is switched on, clock is still off
+ * - EPOD_STATE_ON: Same as above, but with clock enabled
  */
-#ifndef __MACH_PRCMU_H
-#define __MACH_PRCMU_H
+#define EPOD_STATE_NO_CHANGE	0x00
+#define EPOD_STATE_OFF		0x01
+#define EPOD_STATE_RAMRET	0x02
+#define EPOD_STATE_ON_CLK_OFF	0x03
+#define EPOD_STATE_ON		0x04
 
+/*
+ * CLKOUT sources
+ */
+#define PRCMU_CLKSRC_CLK38M		0x00
+#define PRCMU_CLKSRC_ACLK		0x01
+#define PRCMU_CLKSRC_SYSCLK		0x02
+#define PRCMU_CLKSRC_LCDCLK		0x03
+#define PRCMU_CLKSRC_SDMMCCLK		0x04
+#define PRCMU_CLKSRC_TVCLK		0x05
+#define PRCMU_CLKSRC_TIMCLK		0x06
+#define PRCMU_CLKSRC_CLK009		0x07
+/* These are only valid for CLKOUT1: */
+#define PRCMU_CLKSRC_SIAMMDSPCLK	0x40
+#define PRCMU_CLKSRC_I2CCLK		0x41
+#define PRCMU_CLKSRC_MSP02CLK		0x42
+#define PRCMU_CLKSRC_ARMPLL_OBSCLK	0x43
+#define PRCMU_CLKSRC_HSIRXCLK		0x44
+#define PRCMU_CLKSRC_HSITXCLK		0x45
+#define PRCMU_CLKSRC_ARMCLKFIX		0x46
+#define PRCMU_CLKSRC_HDMICLK		0x47
+
+/*
+ * Definitions for autonomous power management configuration.
+ */
+
+#define PRCMU_AUTO_PM_OFF 0
+#define PRCMU_AUTO_PM_ON 1
+
+#define PRCMU_AUTO_PM_POWER_ON_HSEM BIT(0)
+#define PRCMU_AUTO_PM_POWER_ON_ABB_FIFO_IT BIT(1)
+
+enum prcmu_auto_pm_policy {
+	PRCMU_AUTO_PM_POLICY_NO_CHANGE,
+	PRCMU_AUTO_PM_POLICY_DSP_OFF_HWP_OFF,
+	PRCMU_AUTO_PM_POLICY_DSP_OFF_RAMRET_HWP_OFF,
+	PRCMU_AUTO_PM_POLICY_DSP_CLK_OFF_HWP_OFF,
+	PRCMU_AUTO_PM_POLICY_DSP_CLK_OFF_HWP_CLK_OFF,
+};
+
+/**
+ * struct prcmu_auto_pm_config - Autonomous power management configuration.
+ * @sia_auto_pm_enable: SIA autonomous pm enable. (PRCMU_AUTO_PM_{OFF,ON})
+ * @sia_power_on:       SIA power ON enable. (PRCMU_AUTO_PM_POWER_ON_* bitmask)
+ * @sia_policy:         SIA power policy. (enum prcmu_auto_pm_policy)
+ * @sva_auto_pm_enable: SVA autonomous pm enable. (PRCMU_AUTO_PM_{OFF,ON})
+ * @sva_power_on:       SVA power ON enable. (PRCMU_AUTO_PM_POWER_ON_* bitmask)
+ * @sva_policy:         SVA power policy. (enum prcmu_auto_pm_policy)
+ */
+struct prcmu_auto_pm_config {
+	u8 sia_auto_pm_enable;
+	u8 sia_power_on;
+	u8 sia_policy;
+	u8 sva_auto_pm_enable;
+	u8 sva_power_on;
+	u8 sva_policy;
+};
+
+/**
+ * enum ddr_opp - DDR OPP states definition
+ * @DDR_100_OPP: The new DDR operating point is ddr100opp
+ * @DDR_50_OPP: The new DDR operating point is ddr50opp
+ * @DDR_25_OPP: The new DDR operating point is ddr25opp
+ */
+enum ddr_opp {
+	DDR_100_OPP = 0x00,
+	DDR_50_OPP = 0x01,
+	DDR_25_OPP = 0x02,
+};
+
+/*
+ * Clock identifiers.
+ */
+enum prcmu_clock {
+	PRCMU_SGACLK,
+	PRCMU_UARTCLK,
+	PRCMU_MSP02CLK,
+	PRCMU_MSP1CLK,
+	PRCMU_I2CCLK,
+	PRCMU_SDMMCCLK,
+	PRCMU_SLIMCLK,
+	PRCMU_PER1CLK,
+	PRCMU_PER2CLK,
+	PRCMU_PER3CLK,
+	PRCMU_PER5CLK,
+	PRCMU_PER6CLK,
+	PRCMU_PER7CLK,
+	PRCMU_LCDCLK,
+	PRCMU_BMLCLK,
+	PRCMU_HSITXCLK,
+	PRCMU_HSIRXCLK,
+	PRCMU_HDMICLK,
+	PRCMU_APEATCLK,
+	PRCMU_APETRACECLK,
+	PRCMU_MCDECLK,
+	PRCMU_IPI2CCLK,
+	PRCMU_DSIALTCLK,
+	PRCMU_DMACLK,
+	PRCMU_B2R2CLK,
+	PRCMU_TVCLK,
+	PRCMU_SSPCLK,
+	PRCMU_RNGCLK,
+	PRCMU_UICCCLK,
+	PRCMU_NUM_REG_CLOCKS,
+	PRCMU_SYSCLK = PRCMU_NUM_REG_CLOCKS,
+	PRCMU_TIMCLK,
+};
+
+/*
+ * Definitions for controlling ESRAM0 in deep sleep.
+ */
+#define ESRAM0_DEEP_SLEEP_STATE_OFF 1
+#define ESRAM0_DEEP_SLEEP_STATE_RET 2
+
+#ifdef CONFIG_MFD_DB8500_PRCMU
 void __init prcmu_early_init(void);
+int prcmu_set_display_clocks(void);
+int prcmu_disable_dsipll(void);
+int prcmu_enable_dsipll(void);
+#else
+static inline void __init prcmu_early_init(void) {}
+#endif
+
+#ifdef CONFIG_MFD_DB8500_PRCMU
+
+int prcmu_set_rc_a2p(enum romcode_write);
+enum romcode_read prcmu_get_rc_p2a(void);
+enum ap_pwrst prcmu_get_xp70_current_state(void);
+int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll);
+
+void prcmu_enable_wakeups(u32 wakeups);
+static inline void prcmu_disable_wakeups(void)
+{
+	prcmu_enable_wakeups(0);
+}
+
+void prcmu_config_abb_event_readout(u32 abb_events);
+void prcmu_get_abb_event_buffer(void __iomem **buf);
+int prcmu_set_arm_opp(u8 opp);
+int prcmu_get_arm_opp(void);
+bool prcmu_has_arm_maxopp(void);
+bool prcmu_is_u8400(void);
+int prcmu_set_ape_opp(u8 opp);
+int prcmu_get_ape_opp(void);
+int prcmu_request_ape_opp_100_voltage(bool enable);
+int prcmu_release_usb_wakeup_state(void);
+int prcmu_set_ddr_opp(u8 opp);
+int prcmu_get_ddr_opp(void);
+unsigned long prcmu_qos_get_cpufreq_opp_delay(void);
+void prcmu_qos_set_cpufreq_opp_delay(unsigned long);
+/* NOTE! Use regulator framework instead */
+int prcmu_set_hwacc(u16 hw_acc_dev, u8 state);
+int prcmu_set_epod(u16 epod_id, u8 epod_state);
+void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep,
+	struct prcmu_auto_pm_config *idle);
+bool prcmu_is_auto_pm_enabled(void);
+
+int prcmu_config_clkout(u8 clkout, u8 source, u8 div);
+int prcmu_request_clock(u8 clock, bool enable);
+int prcmu_set_clock_divider(u8 clock, u8 divider);
+int prcmu_config_esram0_deep_sleep(u8 state);
+int prcmu_config_hotdog(u8 threshold);
+int prcmu_config_hotmon(u8 low, u8 high);
+int prcmu_start_temp_sense(u16 cycles32k);
+int prcmu_stop_temp_sense(void);
 int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
 int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
-int prcmu_set_ape_opp(enum prcmu_ape_opp opp);
-int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp);
-int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
-			   enum prcmu_cpu_opp cpu_opp);
-enum prcmu_ape_opp prcmu_get_ape_opp(void);
-int prcmu_get_cpu_opp(void);
-bool prcmu_has_arm_maxopp(void);
 
-#endif /* __MACH_PRCMU_H */
+void prcmu_ac_wake_req(void);
+void prcmu_ac_sleep_req(void);
+void prcmu_system_reset(u16 reset_code);
+void prcmu_modem_reset(void);
+bool prcmu_is_ac_wake_requested(void);
+void prcmu_enable_spi2(void);
+void prcmu_disable_spi2(void);
+
+#else /* !CONFIG_MFD_DB8500_PRCMU */
+
+static inline int prcmu_set_rc_a2p(enum romcode_write code)
+{
+	return 0;
+}
+
+static inline enum romcode_read prcmu_get_rc_p2a(void)
+{
+	return INIT;
+}
+
+static inline enum ap_pwrst prcmu_get_xp70_current_state(void)
+{
+	return AP_EXECUTE;
+}
+
+static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk,
+	bool keep_ap_pll)
+{
+	return 0;
+}
+
+static inline void prcmu_enable_wakeups(u32 wakeups) {}
+
+static inline void prcmu_disable_wakeups(void) {}
+
+static inline void prcmu_config_abb_event_readout(u32 abb_events) {}
+
+static inline int prcmu_set_arm_opp(u8 opp)
+{
+	return 0;
+}
+
+static inline int prcmu_get_arm_opp(void)
+{
+	return ARM_100_OPP;
+}
+
+static bool prcmu_has_arm_maxopp(void)
+{
+	return false;
+}
+
+static bool prcmu_is_u8400(void)
+{
+	return false;
+}
+
+static inline int prcmu_set_ape_opp(u8 opp)
+{
+	return 0;
+}
+
+static inline int prcmu_get_ape_opp(void)
+{
+	return APE_100_OPP;
+}
+
+static inline int prcmu_request_ape_opp_100_voltage(bool enable)
+{
+	return 0;
+}
+
+static inline int prcmu_release_usb_wakeup_state(void)
+{
+	return 0;
+}
+
+static inline int prcmu_set_ddr_opp(u8 opp)
+{
+	return 0;
+}
+
+static inline int prcmu_get_ddr_opp(void)
+{
+	return DDR_100_OPP;
+}
+
+static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void)
+{
+	return 0;
+}
+
+static inline void prcmu_qos_set_cpufreq_opp_delay(unsigned long n) {}
+
+static inline int prcmu_set_hwacc(u16 hw_acc_dev, u8 state)
+{
+	return 0;
+}
+
+static inline void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep,
+	struct prcmu_auto_pm_config *idle)
+{
+}
+
+static inline bool prcmu_is_auto_pm_enabled(void)
+{
+	return false;
+}
+
+static inline int prcmu_config_clkout(u8 clkout, u8 source, u8 div)
+{
+	return 0;
+}
+
+static inline int prcmu_request_clock(u8 clock, bool enable)
+{
+	return 0;
+}
+
+static inline int prcmu_set_clock_divider(u8 clock, u8 divider)
+{
+	return 0;
+}
+
+int prcmu_config_esram0_deep_sleep(u8 state)
+{
+	return 0;
+}
+
+static inline int prcmu_config_hotdog(u8 threshold)
+{
+	return 0;
+}
+
+static inline int prcmu_config_hotmon(u8 low, u8 high)
+{
+	return 0;
+}
+
+static inline int prcmu_start_temp_sense(u16 cycles32k)
+{
+	return 0;
+}
+
+static inline int prcmu_stop_temp_sense(void)
+{
+	return 0;
+}
+
+static inline int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	return -ENOSYS;
+}
+
+static inline int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	return -ENOSYS;
+}
+
+static inline void prcmu_ac_wake_req(void) {}
+
+static inline void prcmu_ac_sleep_req(void) {}
+
+static inline void prcmu_system_reset(u16 reset_code) {}
+
+static inline void prcmu_modem_reset(void) {}
+
+static inline bool prcmu_is_ac_wake_requested(void)
+{
+	return false;
+}
+
+#ifndef CONFIG_UX500_SOC_DB5500
+static inline int prcmu_set_display_clocks(void)
+{
+	return 0;
+}
+
+static inline int prcmu_disable_dsipll(void)
+{
+	return 0;
+}
+
+static inline int prcmu_enable_dsipll(void)
+{
+	return 0;
+}
+#endif
+
+static inline int prcmu_enable_spi2(void)
+{
+	return 0;
+}
+
+static inline int prcmu_disable_spi2(void)
+{
+	return 0;
+}
+
+#endif /* !CONFIG_MFD_DB8500_PRCMU */
+
+#ifdef CONFIG_UX500_PRCMU_QOS_POWER
+int prcmu_qos_requirement(int pm_qos_class);
+int prcmu_qos_add_requirement(int pm_qos_class, char *name, s32 value);
+int prcmu_qos_update_requirement(int pm_qos_class, char *name, s32 new_value);
+void prcmu_qos_remove_requirement(int pm_qos_class, char *name);
+int prcmu_qos_add_notifier(int prcmu_qos_class,
+			   struct notifier_block *notifier);
+int prcmu_qos_remove_notifier(int prcmu_qos_class,
+			      struct notifier_block *notifier);
+#else
+static inline int prcmu_qos_requirement(int prcmu_qos_class)
+{
+	return 0;
+}
+
+static inline int prcmu_qos_add_requirement(int prcmu_qos_class,
+					    char *name, s32 value)
+{
+	return 0;
+}
+
+static inline int prcmu_qos_update_requirement(int prcmu_qos_class,
+					       char *name, s32 new_value)
+{
+	return 0;
+}
+
+static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name)
+{
+}
+
+static inline int prcmu_qos_add_notifier(int prcmu_qos_class,
+					 struct notifier_block *notifier)
+{
+	return 0;
+}
+static inline int prcmu_qos_remove_notifier(int prcmu_qos_class,
+					    struct notifier_block *notifier)
+{
+	return 0;
+}
+
+#endif
+
+#endif /* __MFD_DB8500_PRCMU_H */
-- 
cgit v1.2.3


From 8317797ca657081ed81312ea3501f3a3d59d52e9 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 3 May 2011 18:14:48 +0200
Subject: mfd: add DB5500 PRCMU driver

This adds the DB5500 PRCMU driver. Right now this one is pretty
restricted in functionality, exposing a simple interface to send
I2C messages.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Kconfig      |   1 +
 arch/arm/mach-ux500/cpu.c        |   3 +
 drivers/mfd/Kconfig              |  10 +
 drivers/mfd/Makefile             |   1 +
 drivers/mfd/db5500-prcmu-regs.h  | 115 ++++++++++
 drivers/mfd/db5500-prcmu.c       | 448 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/db5500-prcmu.h |  45 ++++
 7 files changed, 623 insertions(+)
 create mode 100644 drivers/mfd/db5500-prcmu-regs.h
 create mode 100644 drivers/mfd/db5500-prcmu.c
 create mode 100644 include/linux/mfd/db5500-prcmu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 8071d2746f70..365c4c35faa7 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -12,6 +12,7 @@ menu "Ux500 SoC"
 
 config UX500_SOC_DB5500
 	bool "DB5500"
+	select MFD_DB5500_PRCMU
 
 config UX500_SOC_DB8500
 	bool "DB8500"
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index 11360f734cec..1da23bb87c16 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -9,6 +9,7 @@
 #include <linux/io.h>
 #include <linux/clk.h>
 #include <linux/mfd/db8500-prcmu.h>
+#include <linux/mfd/db5500-prcmu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -49,6 +50,8 @@ void __init ux500_init_irq(void)
 	 * Init clocks here so that they are available for system timer
 	 * initialization.
 	 */
+	if (cpu_is_u5500())
+		db5500_prcmu_early_init();
 	if (cpu_is_u8500())
 		prcmu_early_init();
 	clk_init();
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 7eaeb9750793..481770ab2716 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -585,6 +585,16 @@ config MFD_DB8500_PRCMU
 	  system controller running an XP70 microprocessor, which is accessed
 	  through a register map.
 
+config MFD_DB5500_PRCMU
+	bool "ST-Ericsson DB5500 Power Reset Control Management Unit"
+	depends on UX500_SOC_DB5500
+	select MFD_CORE
+	help
+	  Select this option to enable support for the DB5500 Power Reset
+	  and Control Management Unit. This is basically an autonomous
+	  system controller running an XP70 microprocessor, which is accessed
+	  through a register map.
+
 config MFD_CS5535
 	tristate "Support for CS5535 and CS5536 southbridge core functions"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 814c57a692a9..24aa44448daf 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_AB8500_GPADC)	+= ab8500-gpadc.o
 obj-$(CONFIG_MFD_DB8500_PRCMU)	+= db8500-prcmu.o
 # ab8500-i2c need to come after db8500-prcmu (which provides the channel)
 obj-$(CONFIG_AB8500_I2C_CORE)	+= ab8500-i2c.o
+obj-$(CONFIG_MFD_DB5500_PRCMU)	+= db5500-prcmu.o
 obj-$(CONFIG_MFD_TIMBERDALE)    += timberdale.o
 obj-$(CONFIG_PMIC_ADP5520)	+= adp5520.o
 obj-$(CONFIG_LPC_SCH)		+= lpc_sch.o
diff --git a/drivers/mfd/db5500-prcmu-regs.h b/drivers/mfd/db5500-prcmu-regs.h
new file mode 100644
index 000000000000..9a8e9e4ddd33
--- /dev/null
+++ b/drivers/mfd/db5500-prcmu-regs.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit registers
+ */
+
+#ifndef __MACH_PRCMU_REGS_H
+#define __MACH_PRCMU_REGS_H
+
+#include <mach/hardware.h>
+
+#define PRCM_ARM_PLLDIVPS	(_PRCMU_BASE + 0x118)
+#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE		0x3f
+#define PRCM_ARM_PLLDIVPS_MAX_MASK		0xf
+
+#define PRCM_PLLARM_LOCKP       (_PRCMU_BASE + 0x0a8)
+#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3	0x2
+
+#define PRCM_ARM_CHGCLKREQ	(_PRCMU_BASE + 0x114)
+#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ	0x1
+
+#define PRCM_PLLARM_ENABLE	(_PRCMU_BASE + 0x98)
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE	0x1
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON	0x100
+
+#define PRCM_ARMCLKFIX_MGT	(_PRCMU_BASE + 0x0)
+#define PRCM_A9_RESETN_CLR	(_PRCMU_BASE + 0x1f4)
+#define PRCM_A9_RESETN_SET	(_PRCMU_BASE + 0x1f0)
+#define PRCM_ARM_LS_CLAMP	(_PRCMU_BASE + 0x30c)
+#define PRCM_SRAM_A9		(_PRCMU_BASE + 0x308)
+
+/* ARM WFI Standby signal register */
+#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
+#define PRCM_IOCR		(_PRCMU_BASE + 0x310)
+#define PRCM_IOCR_IOFORCE			0x1
+
+/* CPU mailbox registers */
+#define PRCM_MBOX_CPU_VAL	(_PRCMU_BASE + 0x0fc)
+#define PRCM_MBOX_CPU_SET	(_PRCMU_BASE + 0x100)
+#define PRCM_MBOX_CPU_CLR	(_PRCMU_BASE + 0x104)
+
+/* Dual A9 core interrupt management unit registers */
+#define PRCM_A9_MASK_REQ	(_PRCMU_BASE + 0x328)
+#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ	0x1
+
+#define PRCM_A9_MASK_ACK	(_PRCMU_BASE + 0x32c)
+#define PRCM_ARMITMSK31TO0	(_PRCMU_BASE + 0x11c)
+#define PRCM_ARMITMSK63TO32	(_PRCMU_BASE + 0x120)
+#define PRCM_ARMITMSK95TO64	(_PRCMU_BASE + 0x124)
+#define PRCM_ARMITMSK127TO96	(_PRCMU_BASE + 0x128)
+#define PRCM_POWER_STATE_VAL	(_PRCMU_BASE + 0x25C)
+#define PRCM_ARMITVAL31TO0	(_PRCMU_BASE + 0x260)
+#define PRCM_ARMITVAL63TO32	(_PRCMU_BASE + 0x264)
+#define PRCM_ARMITVAL95TO64	(_PRCMU_BASE + 0x268)
+#define PRCM_ARMITVAL127TO96	(_PRCMU_BASE + 0x26C)
+
+#define PRCM_HOSTACCESS_REQ	(_PRCMU_BASE + 0x334)
+#define ARM_WAKEUP_MODEM	0x1
+
+#define PRCM_ARM_IT1_CLEAR	(_PRCMU_BASE + 0x48C)
+#define PRCM_ARM_IT1_VAL	(_PRCMU_BASE + 0x494)
+#define PRCM_HOLD_EVT		(_PRCMU_BASE + 0x174)
+
+#define PRCM_ITSTATUS0		(_PRCMU_BASE + 0x148)
+#define PRCM_ITSTATUS1		(_PRCMU_BASE + 0x150)
+#define PRCM_ITSTATUS2		(_PRCMU_BASE + 0x158)
+#define PRCM_ITSTATUS3		(_PRCMU_BASE + 0x160)
+#define PRCM_ITSTATUS4		(_PRCMU_BASE + 0x168)
+#define PRCM_ITSTATUS5		(_PRCMU_BASE + 0x484)
+#define PRCM_ITCLEAR5		(_PRCMU_BASE + 0x488)
+#define PRCM_ARMIT_MASKXP70_IT	(_PRCMU_BASE + 0x1018)
+
+/* System reset register */
+#define PRCM_APE_SOFTRST	(_PRCMU_BASE + 0x228)
+
+/* Level shifter and clamp control registers */
+#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
+#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
+
+/* PRCMU clock/PLL/reset registers */
+#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
+#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
+#define PRCM_PLLDSI_LOCKP          (_PRCMU_BASE + 0x508)
+#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
+#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
+#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
+#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
+#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
+#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
+#define PRCM_PLLDSI_LOCKP          (_PRCMU_BASE + 0x508)
+#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
+#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
+#define PRCM_CLKOCR		   (_PRCMU_BASE + 0x1CC)
+
+/* ePOD and memory power signal control registers */
+#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
+#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
+
+/* Debug power control unit registers */
+#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
+
+/* Miscellaneous unit registers */
+#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
+#define PRCM_GPIOCR                (_PRCMU_BASE + 0x138)
+#define PRCM_GPIOCR_DBG_STM_MOD_CMD1            0x800
+#define PRCM_GPIOCR_DBG_UARTMOD_CMD0            0x1
+
+
+#endif /* __MACH_PRCMU__REGS_H */
diff --git a/drivers/mfd/db5500-prcmu.c b/drivers/mfd/db5500-prcmu.c
new file mode 100644
index 000000000000..9dbb3cab4a6f
--- /dev/null
+++ b/drivers/mfd/db5500-prcmu.c
@@ -0,0 +1,448 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ *
+ * U5500 PRCM Unit interface driver
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <linux/irq.h>
+#include <linux/jiffies.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/db5500-prcmu.h>
+#include <mach/hardware.h>
+#include <mach/irqs.h>
+#include <mach/db5500-regs.h>
+#include "db5500-prcmu-regs.h"
+
+#define _PRCM_MB_HEADER (tcdm_base + 0xFE8)
+#define PRCM_REQ_MB0_HEADER (_PRCM_MB_HEADER + 0x0)
+#define PRCM_REQ_MB1_HEADER (_PRCM_MB_HEADER + 0x1)
+#define PRCM_REQ_MB2_HEADER (_PRCM_MB_HEADER + 0x2)
+#define PRCM_REQ_MB3_HEADER (_PRCM_MB_HEADER + 0x3)
+#define PRCM_REQ_MB4_HEADER (_PRCM_MB_HEADER + 0x4)
+#define PRCM_REQ_MB5_HEADER (_PRCM_MB_HEADER + 0x5)
+#define PRCM_REQ_MB6_HEADER (_PRCM_MB_HEADER + 0x6)
+#define PRCM_REQ_MB7_HEADER (_PRCM_MB_HEADER + 0x7)
+#define PRCM_ACK_MB0_HEADER (_PRCM_MB_HEADER + 0x8)
+#define PRCM_ACK_MB1_HEADER (_PRCM_MB_HEADER + 0x9)
+#define PRCM_ACK_MB2_HEADER (_PRCM_MB_HEADER + 0xa)
+#define PRCM_ACK_MB3_HEADER (_PRCM_MB_HEADER + 0xb)
+#define PRCM_ACK_MB4_HEADER (_PRCM_MB_HEADER + 0xc)
+#define PRCM_ACK_MB5_HEADER (_PRCM_MB_HEADER + 0xd)
+#define PRCM_ACK_MB6_HEADER (_PRCM_MB_HEADER + 0xe)
+#define PRCM_ACK_MB7_HEADER (_PRCM_MB_HEADER + 0xf)
+
+/* Req Mailboxes */
+#define PRCM_REQ_MB0 (tcdm_base + 0xFD8)
+#define PRCM_REQ_MB1 (tcdm_base + 0xFCC)
+#define PRCM_REQ_MB2 (tcdm_base + 0xFC4)
+#define PRCM_REQ_MB3 (tcdm_base + 0xFC0)
+#define PRCM_REQ_MB4 (tcdm_base + 0xF98)
+#define PRCM_REQ_MB5 (tcdm_base + 0xF90)
+#define PRCM_REQ_MB6 (tcdm_base + 0xF8C)
+#define PRCM_REQ_MB7 (tcdm_base + 0xF84)
+
+/* Ack Mailboxes */
+#define PRCM_ACK_MB0 (tcdm_base + 0xF38)
+#define PRCM_ACK_MB1 (tcdm_base + 0xF30)
+#define PRCM_ACK_MB2 (tcdm_base + 0xF24)
+#define PRCM_ACK_MB3 (tcdm_base + 0xF20)
+#define PRCM_ACK_MB4 (tcdm_base + 0xF1C)
+#define PRCM_ACK_MB5 (tcdm_base + 0xF14)
+#define PRCM_ACK_MB6 (tcdm_base + 0xF0C)
+#define PRCM_ACK_MB7 (tcdm_base + 0xF08)
+
+enum mb_return_code {
+	RC_SUCCESS,
+	RC_FAIL,
+};
+
+/* Mailbox 0 headers. */
+enum mb0_header {
+	/* request */
+	RMB0H_PWR_STATE_TRANS = 1,
+	RMB0H_WAKE_UP_CFG,
+	RMB0H_RD_WAKE_UP_ACK,
+	/* acknowledge */
+	AMB0H_WAKE_UP = 1,
+};
+
+/* Mailbox 5 headers. */
+enum mb5_header {
+	MB5H_I2C_WRITE = 1,
+	MB5H_I2C_READ,
+};
+
+/* Request mailbox 5 fields. */
+#define PRCM_REQ_MB5_I2C_SLAVE (PRCM_REQ_MB5 + 0)
+#define PRCM_REQ_MB5_I2C_REG (PRCM_REQ_MB5 + 1)
+#define PRCM_REQ_MB5_I2C_SIZE (PRCM_REQ_MB5 + 2)
+#define PRCM_REQ_MB5_I2C_DATA (PRCM_REQ_MB5 + 4)
+
+/* Acknowledge mailbox 5 fields. */
+#define PRCM_ACK_MB5_RETURN_CODE (PRCM_ACK_MB5 + 0)
+#define PRCM_ACK_MB5_I2C_DATA (PRCM_ACK_MB5 + 4)
+
+#define NUM_MB 8
+#define MBOX_BIT BIT
+#define ALL_MBOX_BITS (MBOX_BIT(NUM_MB) - 1)
+
+/*
+* Used by MCDE to setup all necessary PRCMU registers
+*/
+#define PRCMU_RESET_DSIPLL			0x00004000
+#define PRCMU_UNCLAMP_DSIPLL			0x00400800
+
+/* HDMI CLK MGT PLLSW=001 (PLLSOC0), PLLDIV=0x8, = 50 Mhz*/
+#define PRCMU_DSI_CLOCK_SETTING			0x00000128
+/* TVCLK_MGT PLLSW=001 (PLLSOC0) PLLDIV=0x13, = 19.05 MHZ */
+#define PRCMU_DSI_LP_CLOCK_SETTING		0x00000135
+#define PRCMU_PLLDSI_FREQ_SETTING		0x0004013C
+#define PRCMU_DSI_PLLOUT_SEL_SETTING		0x00000002
+#define PRCMU_ENABLE_ESCAPE_CLOCK_DIV		0x03000101
+#define PRCMU_DISABLE_ESCAPE_CLOCK_DIV		0x00000101
+
+#define PRCMU_ENABLE_PLLDSI			0x00000001
+#define PRCMU_DISABLE_PLLDSI			0x00000000
+
+#define PRCMU_DSI_RESET_SW			0x00000003
+
+#define PRCMU_PLLDSI_LOCKP_LOCKED		0x3
+
+/*
+ * mb0_transfer - state needed for mailbox 0 communication.
+ * @lock:		The transaction lock.
+ */
+static struct {
+	spinlock_t lock;
+} mb0_transfer;
+
+/*
+ * mb5_transfer - state needed for mailbox 5 communication.
+ * @lock:	The transaction lock.
+ * @work:	The transaction completion structure.
+ * @ack:	Reply ("acknowledge") data.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+	struct {
+		u8 header;
+		u8 status;
+		u8 value[4];
+	} ack;
+} mb5_transfer;
+
+/* PRCMU TCDM base IO address. */
+static __iomem void *tcdm_base;
+
+/**
+ * db5500_prcmu_abb_read() - Read register value(s) from the ABB.
+ * @slave:	The I2C slave address.
+ * @reg:	The (start) register address.
+ * @value:	The read out value(s).
+ * @size:	The number of registers to read.
+ *
+ * Reads register value(s) from the ABB.
+ * @size has to be <= 4.
+ */
+int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	int r;
+
+	if ((size < 1) || (4 < size))
+		return -EINVAL;
+
+	mutex_lock(&mb5_transfer.lock);
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+		cpu_relax();
+	writeb(slave, PRCM_REQ_MB5_I2C_SLAVE);
+	writeb(reg, PRCM_REQ_MB5_I2C_REG);
+	writeb(size, PRCM_REQ_MB5_I2C_SIZE);
+	writeb(MB5H_I2C_READ, PRCM_REQ_MB5_HEADER);
+
+	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
+	wait_for_completion(&mb5_transfer.work);
+
+	r = 0;
+	if ((mb5_transfer.ack.header == MB5H_I2C_READ) &&
+		(mb5_transfer.ack.status == RC_SUCCESS))
+		memcpy(value, mb5_transfer.ack.value, (size_t)size);
+	else
+		r = -EIO;
+
+	mutex_unlock(&mb5_transfer.lock);
+
+	return r;
+}
+
+/**
+ * db5500_prcmu_abb_write() - Write register value(s) to the ABB.
+ * @slave:	The I2C slave address.
+ * @reg:	The (start) register address.
+ * @value:	The value(s) to write.
+ * @size:	The number of registers to write.
+ *
+ * Writes register value(s) to the ABB.
+ * @size has to be <= 4.
+ */
+int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	int r;
+
+	if ((size < 1) || (4 < size))
+		return -EINVAL;
+
+	mutex_lock(&mb5_transfer.lock);
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+		cpu_relax();
+	writeb(slave, PRCM_REQ_MB5_I2C_SLAVE);
+	writeb(reg, PRCM_REQ_MB5_I2C_REG);
+	writeb(size, PRCM_REQ_MB5_I2C_SIZE);
+	memcpy_toio(PRCM_REQ_MB5_I2C_DATA, value, size);
+	writeb(MB5H_I2C_WRITE, PRCM_REQ_MB5_HEADER);
+
+	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
+	wait_for_completion(&mb5_transfer.work);
+
+	if ((mb5_transfer.ack.header == MB5H_I2C_WRITE) &&
+		(mb5_transfer.ack.status == RC_SUCCESS))
+		r = 0;
+	else
+		r = -EIO;
+
+	mutex_unlock(&mb5_transfer.lock);
+
+	return r;
+}
+
+int db5500_prcmu_enable_dsipll(void)
+{
+	int i;
+
+	/* Enable DSIPLL_RESETN resets */
+	writel(PRCMU_RESET_DSIPLL, PRCM_APE_RESETN_CLR);
+	/* Unclamp DSIPLL in/out */
+	writel(PRCMU_UNCLAMP_DSIPLL, PRCM_MMIP_LS_CLAMP_CLR);
+	/* Set DSI PLL FREQ */
+	writel(PRCMU_PLLDSI_FREQ_SETTING, PRCM_PLLDSI_FREQ);
+	writel(PRCMU_DSI_PLLOUT_SEL_SETTING,
+		PRCM_DSI_PLLOUT_SEL);
+	/* Enable Escape clocks */
+	writel(PRCMU_ENABLE_ESCAPE_CLOCK_DIV, PRCM_DSITVCLK_DIV);
+
+	/* Start DSI PLL */
+	writel(PRCMU_ENABLE_PLLDSI, PRCM_PLLDSI_ENABLE);
+	/* Reset DSI PLL */
+	writel(PRCMU_DSI_RESET_SW, PRCM_DSI_SW_RESET);
+	for (i = 0; i < 10; i++) {
+		if ((readl(PRCM_PLLDSI_LOCKP) &
+			PRCMU_PLLDSI_LOCKP_LOCKED) == PRCMU_PLLDSI_LOCKP_LOCKED)
+			break;
+		udelay(100);
+	}
+	/* Release DSIPLL_RESETN */
+	writel(PRCMU_RESET_DSIPLL, PRCM_APE_RESETN_SET);
+	return 0;
+}
+
+int db5500_prcmu_disable_dsipll(void)
+{
+	/* Disable dsi pll */
+	writel(PRCMU_DISABLE_PLLDSI, PRCM_PLLDSI_ENABLE);
+	/* Disable  escapeclock */
+	writel(PRCMU_DISABLE_ESCAPE_CLOCK_DIV, PRCM_DSITVCLK_DIV);
+	return 0;
+}
+
+int db5500_prcmu_set_display_clocks(void)
+{
+	/* HDMI and TVCLK Should be handled somewhere else */
+	/* PLLDIV=8, PLLSW=2, CLKEN=1 */
+	writel(PRCMU_DSI_CLOCK_SETTING, PRCM_HDMICLK_MGT);
+	/* PLLDIV=14, PLLSW=2, CLKEN=1 */
+	writel(PRCMU_DSI_LP_CLOCK_SETTING, PRCM_TVCLK_MGT);
+	return 0;
+}
+
+static void ack_dbb_wakeup(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+		cpu_relax();
+
+	writeb(RMB0H_RD_WAKE_UP_ACK, PRCM_REQ_MB0_HEADER);
+	writel(MBOX_BIT(0), PRCM_MBOX_CPU_SET);
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+}
+
+static inline void print_unknown_header_warning(u8 n, u8 header)
+{
+	pr_warning("prcmu: Unknown message header (%d) in mailbox %d.\n",
+		header, n);
+}
+
+static bool read_mailbox_0(void)
+{
+	bool r;
+	u8 header;
+
+	header = readb(PRCM_ACK_MB0_HEADER);
+	switch (header) {
+	case AMB0H_WAKE_UP:
+		r = true;
+		break;
+	default:
+		print_unknown_header_warning(0, header);
+		r = false;
+		break;
+	}
+	writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
+	return r;
+}
+
+static bool read_mailbox_1(void)
+{
+	writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_2(void)
+{
+	writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_3(void)
+{
+	writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_4(void)
+{
+	writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_5(void)
+{
+	u8 header;
+
+	header = readb(PRCM_ACK_MB5_HEADER);
+	switch (header) {
+	case MB5H_I2C_READ:
+		memcpy_fromio(mb5_transfer.ack.value, PRCM_ACK_MB5_I2C_DATA, 4);
+	case MB5H_I2C_WRITE:
+		mb5_transfer.ack.header = header;
+		mb5_transfer.ack.status = readb(PRCM_ACK_MB5_RETURN_CODE);
+		complete(&mb5_transfer.work);
+		break;
+	default:
+		print_unknown_header_warning(5, header);
+		break;
+	}
+	writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_6(void)
+{
+	writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_7(void)
+{
+	writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool (* const read_mailbox[NUM_MB])(void) = {
+	read_mailbox_0,
+	read_mailbox_1,
+	read_mailbox_2,
+	read_mailbox_3,
+	read_mailbox_4,
+	read_mailbox_5,
+	read_mailbox_6,
+	read_mailbox_7
+};
+
+static irqreturn_t prcmu_irq_handler(int irq, void *data)
+{
+	u32 bits;
+	u8 n;
+	irqreturn_t r;
+
+	bits = (readl(PRCM_ARM_IT1_VAL) & ALL_MBOX_BITS);
+	if (unlikely(!bits))
+		return IRQ_NONE;
+
+	r = IRQ_HANDLED;
+	for (n = 0; bits; n++) {
+		if (bits & MBOX_BIT(n)) {
+			bits -= MBOX_BIT(n);
+			if (read_mailbox[n]())
+				r = IRQ_WAKE_THREAD;
+		}
+	}
+	return r;
+}
+
+static irqreturn_t prcmu_irq_thread_fn(int irq, void *data)
+{
+	ack_dbb_wakeup();
+	return IRQ_HANDLED;
+}
+
+void __init db5500_prcmu_early_init(void)
+{
+	tcdm_base = __io_address(U5500_PRCMU_TCDM_BASE);
+	spin_lock_init(&mb0_transfer.lock);
+	mutex_init(&mb5_transfer.lock);
+	init_completion(&mb5_transfer.work);
+}
+
+/**
+ * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic
+ *
+ */
+int __init db5500_prcmu_init(void)
+{
+	int r = 0;
+
+	if (ux500_is_svp() || !cpu_is_u5500())
+		return -ENODEV;
+
+	/* Clean up the mailbox interrupts after pre-kernel code. */
+	writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLEAR);
+
+	r = request_threaded_irq(IRQ_DB5500_PRCMU1, prcmu_irq_handler,
+		prcmu_irq_thread_fn, 0, "prcmu", NULL);
+	if (r < 0) {
+		pr_err("prcmu: Failed to allocate IRQ_DB5500_PRCMU1.\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+arch_initcall(db5500_prcmu_init);
diff --git a/include/linux/mfd/db5500-prcmu.h b/include/linux/mfd/db5500-prcmu.h
new file mode 100644
index 000000000000..f0977986402c
--- /dev/null
+++ b/include/linux/mfd/db5500-prcmu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * U5500 PRCMU API.
+ */
+#ifndef __MACH_PRCMU_U5500_H
+#define __MACH_PRCMU_U5500_H
+
+#ifdef CONFIG_UX500_SOC_DB5500
+
+void db5500_prcmu_early_init(void);
+
+int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
+int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
+
+#else /* !CONFIG_UX500_SOC_DB5500 */
+
+static inline void db5500_prcmu_early_init(void)
+{
+}
+
+static inline int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	return -ENOSYS;
+}
+
+static inline int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	return -ENOSYS;
+}
+
+#endif /* CONFIG_UX500_SOC_DB5500 */
+
+static inline int db5500_prcmu_config_abb_event_readout(u32 abb_events)
+{
+#ifdef CONFIG_MACH_U5500_SIMULATOR
+	return 0;
+#else
+	return -1;
+#endif
+}
+
+#endif /* __MACH_PRCMU_U5500_H */
-- 
cgit v1.2.3


From 1032fbfd792f2b384ac16a63993b8fae5eea9083 Mon Sep 17 00:00:00 2001
From: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Date: Fri, 1 Apr 2011 14:43:33 +0200
Subject: mach-ux500: voltage domain regulators for DB8500

The DB8500 has ePOD:s (electronic power domains) which are possible
to switch on/off to deactivate silicon blocks on the DB8500 SoC
by cutting their power without retention. We model these as simple
regulators with one bit on/off settings.

Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Signed-off-by: Sundar Iyer <sundar.iyer@stericsson.com>
Signed-off-by: Jonas Aberg <jonas.aberg@stericsson.com>
Signed-off-by: Virupax Sadashivpetimath <virupax.sadashivpetimath@stericsson.com>
Signed-off-by: Martin Persson <martin.persson@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Kconfig            |   1 +
 drivers/mfd/db8500-prcmu.c             | 179 +++++++++++
 drivers/regulator/Kconfig              |   7 +
 drivers/regulator/Makefile             |   1 +
 drivers/regulator/db8500-prcmu.c       | 558 +++++++++++++++++++++++++++++++++
 include/linux/regulator/db8500-prcmu.h |  45 +++
 6 files changed, 791 insertions(+)
 create mode 100644 drivers/regulator/db8500-prcmu.c
 create mode 100644 include/linux/regulator/db8500-prcmu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 365c4c35faa7..54429d015954 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -17,6 +17,7 @@ config UX500_SOC_DB5500
 config UX500_SOC_DB8500
 	bool "DB8500"
 	select MFD_DB8500_PRCMU
+	select REGULATOR_DB8500_PRCMU
 
 endmenu
 
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index c44725bd8b9a..e63782107e2f 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -28,6 +28,8 @@
 #include <linux/uaccess.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/db8500-prcmu.h>
+#include <linux/regulator/db8500-prcmu.h>
+#include <linux/regulator/machine.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/db8500-regs.h>
@@ -1824,9 +1826,186 @@ void __init prcmu_early_init(void)
 	}
 }
 
+/*
+ * Power domain switches (ePODs) modeled as regulators for the DB8500 SoC
+ */
+static struct regulator_consumer_supply db8500_vape_consumers[] = {
+	REGULATOR_SUPPLY("v-ape", NULL),
+	REGULATOR_SUPPLY("v-i2c", "nmk-i2c.0"),
+	REGULATOR_SUPPLY("v-i2c", "nmk-i2c.1"),
+	REGULATOR_SUPPLY("v-i2c", "nmk-i2c.2"),
+	REGULATOR_SUPPLY("v-i2c", "nmk-i2c.3"),
+	/* "v-mmc" changed to "vcore" in the mainline kernel */
+	REGULATOR_SUPPLY("vcore", "sdi0"),
+	REGULATOR_SUPPLY("vcore", "sdi1"),
+	REGULATOR_SUPPLY("vcore", "sdi2"),
+	REGULATOR_SUPPLY("vcore", "sdi3"),
+	REGULATOR_SUPPLY("vcore", "sdi4"),
+	REGULATOR_SUPPLY("v-dma", "dma40.0"),
+	REGULATOR_SUPPLY("v-ape", "ab8500-usb.0"),
+	/* "v-uart" changed to "vcore" in the mainline kernel */
+	REGULATOR_SUPPLY("vcore", "uart0"),
+	REGULATOR_SUPPLY("vcore", "uart1"),
+	REGULATOR_SUPPLY("vcore", "uart2"),
+	REGULATOR_SUPPLY("v-ape", "nmk-ske-keypad.0"),
+};
+
+static struct regulator_consumer_supply db8500_vsmps2_consumers[] = {
+	/* CG2900 and CW1200 power to off-chip peripherals */
+	REGULATOR_SUPPLY("gbf_1v8", "cg2900-uart.0"),
+	REGULATOR_SUPPLY("wlan_1v8", "cw1200.0"),
+	REGULATOR_SUPPLY("musb_1v8", "ab8500-usb.0"),
+	/* AV8100 regulator */
+	REGULATOR_SUPPLY("hdmi_1v8", "0-0070"),
+};
+
+static struct regulator_consumer_supply db8500_b2r2_mcde_consumers[] = {
+	REGULATOR_SUPPLY("vsupply", "b2r2.0"),
+	REGULATOR_SUPPLY("vsupply", "mcde.0"),
+};
+
+static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
+	[DB8500_REGULATOR_VAPE] = {
+		.constraints = {
+			.name = "db8500-vape",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+		.consumer_supplies = db8500_vape_consumers,
+		.num_consumer_supplies = ARRAY_SIZE(db8500_vape_consumers),
+	},
+	[DB8500_REGULATOR_VARM] = {
+		.constraints = {
+			.name = "db8500-varm",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VMODEM] = {
+		.constraints = {
+			.name = "db8500-vmodem",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VPLL] = {
+		.constraints = {
+			.name = "db8500-vpll",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VSMPS1] = {
+		.constraints = {
+			.name = "db8500-vsmps1",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VSMPS2] = {
+		.constraints = {
+			.name = "db8500-vsmps2",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+		.consumer_supplies = db8500_vsmps2_consumers,
+		.num_consumer_supplies = ARRAY_SIZE(db8500_vsmps2_consumers),
+	},
+	[DB8500_REGULATOR_VSMPS3] = {
+		.constraints = {
+			.name = "db8500-vsmps3",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VRF1] = {
+		.constraints = {
+			.name = "db8500-vrf1",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SVAMMDSP] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sva-mmdsp",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SVAMMDSPRET] = {
+		.constraints = {
+			/* "ret" means "retention" */
+			.name = "db8500-sva-mmdsp-ret",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SVAPIPE] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sva-pipe",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SIAMMDSP] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sia-mmdsp",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SIAMMDSPRET] = {
+		.constraints = {
+			.name = "db8500-sia-mmdsp-ret",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SIAPIPE] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sia-pipe",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SGA] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sga",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_B2R2_MCDE] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-b2r2-mcde",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+		.consumer_supplies = db8500_b2r2_mcde_consumers,
+		.num_consumer_supplies = ARRAY_SIZE(db8500_b2r2_mcde_consumers),
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM12] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-esram12",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM12RET] = {
+		.constraints = {
+			.name = "db8500-esram12-ret",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM34] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-esram34",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM34RET] = {
+		.constraints = {
+			.name = "db8500-esram34-ret",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+};
+
 static struct mfd_cell db8500_prcmu_devs[] = {
 	{
 		.name = "db8500-prcmu-regulators",
+		.mfd_data = &db8500_regulators,
 	},
 	{
 		.name = "cpufreq-u8500",
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index b9f29e0d4295..f0b13a0d1851 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -274,6 +274,13 @@ config REGULATOR_AB8500
 	  This driver supports the regulators found on the ST-Ericsson mixed
 	  signal AB8500 PMIC
 
+config REGULATOR_DB8500_PRCMU
+	bool "ST-Ericsson DB8500 Voltage Domain Regulators"
+	depends on MFD_DB8500_PRCMU
+	help
+	  This driver supports the voltage domain regulators controlled by the
+	  DB8500 PRCMU
+
 config REGULATOR_TPS6586X
 	tristate "TI TPS6586X Power regulators"
 	depends on MFD_TPS6586X
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index d72a42756778..165ff5371e9e 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -41,5 +41,6 @@ obj-$(CONFIG_REGULATOR_TPS6524X) += tps6524x-regulator.o
 obj-$(CONFIG_REGULATOR_88PM8607) += 88pm8607.o
 obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o
 obj-$(CONFIG_REGULATOR_AB8500)	+= ab8500.o
+obj-$(CONFIG_REGULATOR_DB8500_PRCMU) += db8500-prcmu.o
 
 ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/db8500-prcmu.c b/drivers/regulator/db8500-prcmu.c
new file mode 100644
index 000000000000..1089a961616e
--- /dev/null
+++ b/drivers/regulator/db8500-prcmu.c
@@ -0,0 +1,558 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Authors: Sundar Iyer <sundar.iyer@stericsson.com> for ST-Ericsson
+ *          Bengt Jonsson <bengt.g.jonsson@stericsson.com> for ST-Ericsson
+ *
+ * Power domain regulators on DB8500
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/db8500-prcmu.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/db8500-prcmu.h>
+
+/*
+ * power state reference count
+ */
+static int power_state_active_cnt; /* will initialize to zero */
+static DEFINE_SPINLOCK(power_state_active_lock);
+
+static void power_state_active_enable(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&power_state_active_lock, flags);
+	power_state_active_cnt++;
+	spin_unlock_irqrestore(&power_state_active_lock, flags);
+}
+
+static int power_state_active_disable(void)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&power_state_active_lock, flags);
+	if (power_state_active_cnt <= 0) {
+		pr_err("power state: unbalanced enable/disable calls\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	power_state_active_cnt--;
+out:
+	spin_unlock_irqrestore(&power_state_active_lock, flags);
+	return ret;
+}
+
+/*
+ * Exported interface for CPUIdle only. This function is called when interrupts
+ * are turned off. Hence, no locking.
+ */
+int power_state_active_is_enabled(void)
+{
+	return (power_state_active_cnt > 0);
+}
+
+/**
+ * struct db8500_regulator_info - db8500 regulator information
+ * @dev: device pointer
+ * @desc: regulator description
+ * @rdev: regulator device pointer
+ * @is_enabled: status of the regulator
+ * @epod_id: id for EPOD (power domain)
+ * @is_ramret: RAM retention switch for EPOD (power domain)
+ * @operating_point: operating point (only for vape, to be removed)
+ *
+ */
+struct db8500_regulator_info {
+	struct device *dev;
+	struct regulator_desc desc;
+	struct regulator_dev *rdev;
+	bool is_enabled;
+	u16 epod_id;
+	bool is_ramret;
+	bool exclude_from_power_state;
+	unsigned int operating_point;
+};
+
+static int db8500_regulator_enable(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-%s-enable\n",
+		info->desc.name);
+
+	info->is_enabled = true;
+	if (!info->exclude_from_power_state)
+		power_state_active_enable();
+
+	return 0;
+}
+
+static int db8500_regulator_disable(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret = 0;
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-%s-disable\n",
+		info->desc.name);
+
+	info->is_enabled = false;
+	if (!info->exclude_from_power_state)
+		ret = power_state_active_disable();
+
+	return ret;
+}
+
+static int db8500_regulator_is_enabled(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-%s-is_enabled (is_enabled):"
+		" %i\n", info->desc.name, info->is_enabled);
+
+	return info->is_enabled;
+}
+
+/* db8500 regulator operations */
+static struct regulator_ops db8500_regulator_ops = {
+	.enable			= db8500_regulator_enable,
+	.disable		= db8500_regulator_disable,
+	.is_enabled		= db8500_regulator_is_enabled,
+};
+
+/*
+ * EPOD control
+ */
+static bool epod_on[NUM_EPOD_ID];
+static bool epod_ramret[NUM_EPOD_ID];
+
+static int enable_epod(u16 epod_id, bool ramret)
+{
+	int ret;
+
+	if (ramret) {
+		if (!epod_on[epod_id]) {
+			ret = prcmu_set_epod(epod_id, EPOD_STATE_RAMRET);
+			if (ret < 0)
+				return ret;
+		}
+		epod_ramret[epod_id] = true;
+	} else {
+		ret = prcmu_set_epod(epod_id, EPOD_STATE_ON);
+		if (ret < 0)
+			return ret;
+		epod_on[epod_id] = true;
+	}
+
+	return 0;
+}
+
+static int disable_epod(u16 epod_id, bool ramret)
+{
+	int ret;
+
+	if (ramret) {
+		if (!epod_on[epod_id]) {
+			ret = prcmu_set_epod(epod_id, EPOD_STATE_OFF);
+			if (ret < 0)
+				return ret;
+		}
+		epod_ramret[epod_id] = false;
+	} else {
+		if (epod_ramret[epod_id]) {
+			ret = prcmu_set_epod(epod_id, EPOD_STATE_RAMRET);
+			if (ret < 0)
+				return ret;
+		} else {
+			ret = prcmu_set_epod(epod_id, EPOD_STATE_OFF);
+			if (ret < 0)
+				return ret;
+		}
+		epod_on[epod_id] = false;
+	}
+
+	return 0;
+}
+
+/*
+ * Regulator switch
+ */
+static int db8500_regulator_switch_enable(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-switch-%s-enable\n",
+		info->desc.name);
+
+	ret = enable_epod(info->epod_id, info->is_ramret);
+	if (ret < 0) {
+		dev_err(rdev_get_dev(rdev),
+			"regulator-switch-%s-enable: prcmu call failed\n",
+			info->desc.name);
+		goto out;
+	}
+
+	info->is_enabled = true;
+out:
+	return ret;
+}
+
+static int db8500_regulator_switch_disable(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-switch-%s-disable\n",
+		info->desc.name);
+
+	ret = disable_epod(info->epod_id, info->is_ramret);
+	if (ret < 0) {
+		dev_err(rdev_get_dev(rdev),
+			"regulator_switch-%s-disable: prcmu call failed\n",
+			info->desc.name);
+		goto out;
+	}
+
+	info->is_enabled = 0;
+out:
+	return ret;
+}
+
+static int db8500_regulator_switch_is_enabled(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev),
+		"regulator-switch-%s-is_enabled (is_enabled): %i\n",
+		info->desc.name, info->is_enabled);
+
+	return info->is_enabled;
+}
+
+static struct regulator_ops db8500_regulator_switch_ops = {
+	.enable			= db8500_regulator_switch_enable,
+	.disable		= db8500_regulator_switch_disable,
+	.is_enabled		= db8500_regulator_switch_is_enabled,
+};
+
+/*
+ * Regulator information
+ */
+static struct db8500_regulator_info
+		db8500_regulator_info[DB8500_NUM_REGULATORS] = {
+	[DB8500_REGULATOR_VAPE] = {
+		.desc = {
+			.name	= "db8500-vape",
+			.id	= DB8500_REGULATOR_VAPE,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VARM] = {
+		.desc = {
+			.name	= "db8500-varm",
+			.id	= DB8500_REGULATOR_VARM,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VMODEM] = {
+		.desc = {
+			.name	= "db8500-vmodem",
+			.id	= DB8500_REGULATOR_VMODEM,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VPLL] = {
+		.desc = {
+			.name	= "db8500-vpll",
+			.id	= DB8500_REGULATOR_VPLL,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VSMPS1] = {
+		.desc = {
+			.name	= "db8500-vsmps1",
+			.id	= DB8500_REGULATOR_VSMPS1,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VSMPS2] = {
+		.desc = {
+			.name	= "db8500-vsmps2",
+			.id	= DB8500_REGULATOR_VSMPS2,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.exclude_from_power_state = true,
+	},
+	[DB8500_REGULATOR_VSMPS3] = {
+		.desc = {
+			.name	= "db8500-vsmps3",
+			.id	= DB8500_REGULATOR_VSMPS3,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VRF1] = {
+		.desc = {
+			.name	= "db8500-vrf1",
+			.id	= DB8500_REGULATOR_VRF1,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SVAMMDSP] = {
+		.desc = {
+			.name	= "db8500-sva-mmdsp",
+			.id	= DB8500_REGULATOR_SWITCH_SVAMMDSP,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SVAMMDSP,
+	},
+	[DB8500_REGULATOR_SWITCH_SVAMMDSPRET] = {
+		.desc = {
+			.name	= "db8500-sva-mmdsp-ret",
+			.id	= DB8500_REGULATOR_SWITCH_SVAMMDSPRET,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SVAMMDSP,
+		.is_ramret = true,
+	},
+	[DB8500_REGULATOR_SWITCH_SVAPIPE] = {
+		.desc = {
+			.name	= "db8500-sva-pipe",
+			.id	= DB8500_REGULATOR_SWITCH_SVAPIPE,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SVAPIPE,
+	},
+	[DB8500_REGULATOR_SWITCH_SIAMMDSP] = {
+		.desc = {
+			.name	= "db8500-sia-mmdsp",
+			.id	= DB8500_REGULATOR_SWITCH_SIAMMDSP,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SIAMMDSP,
+	},
+	[DB8500_REGULATOR_SWITCH_SIAMMDSPRET] = {
+		.desc = {
+			.name	= "db8500-sia-mmdsp-ret",
+			.id	= DB8500_REGULATOR_SWITCH_SIAMMDSPRET,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SIAMMDSP,
+		.is_ramret = true,
+	},
+	[DB8500_REGULATOR_SWITCH_SIAPIPE] = {
+		.desc = {
+			.name	= "db8500-sia-pipe",
+			.id	= DB8500_REGULATOR_SWITCH_SIAPIPE,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SIAPIPE,
+	},
+	[DB8500_REGULATOR_SWITCH_SGA] = {
+		.desc = {
+			.name	= "db8500-sga",
+			.id	= DB8500_REGULATOR_SWITCH_SGA,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SGA,
+	},
+	[DB8500_REGULATOR_SWITCH_B2R2_MCDE] = {
+		.desc = {
+			.name	= "db8500-b2r2-mcde",
+			.id	= DB8500_REGULATOR_SWITCH_B2R2_MCDE,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_B2R2_MCDE,
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM12] = {
+		.desc = {
+			.name	= "db8500-esram12",
+			.id	= DB8500_REGULATOR_SWITCH_ESRAM12,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id	= EPOD_ID_ESRAM12,
+		.is_enabled	= true,
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM12RET] = {
+		.desc = {
+			.name	= "db8500-esram12-ret",
+			.id	= DB8500_REGULATOR_SWITCH_ESRAM12RET,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_ESRAM12,
+		.is_ramret = true,
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM34] = {
+		.desc = {
+			.name	= "db8500-esram34",
+			.id	= DB8500_REGULATOR_SWITCH_ESRAM34,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id	= EPOD_ID_ESRAM34,
+		.is_enabled	= true,
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM34RET] = {
+		.desc = {
+			.name	= "db8500-esram34-ret",
+			.id	= DB8500_REGULATOR_SWITCH_ESRAM34RET,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_ESRAM34,
+		.is_ramret = true,
+	},
+};
+
+static int __devinit db8500_regulator_probe(struct platform_device *pdev)
+{
+	struct regulator_init_data *db8500_init_data = mfd_get_data(pdev);
+	int i, err;
+
+	/* register all regulators */
+	for (i = 0; i < ARRAY_SIZE(db8500_regulator_info); i++) {
+		struct db8500_regulator_info *info;
+		struct regulator_init_data *init_data = &db8500_init_data[i];
+
+		/* assign per-regulator data */
+		info = &db8500_regulator_info[i];
+		info->dev = &pdev->dev;
+
+		/* register with the regulator framework */
+		info->rdev = regulator_register(&info->desc, &pdev->dev,
+				init_data, info);
+		if (IS_ERR(info->rdev)) {
+			err = PTR_ERR(info->rdev);
+			dev_err(&pdev->dev, "failed to register %s: err %i\n",
+				info->desc.name, err);
+
+			/* if failing, unregister all earlier regulators */
+			i--;
+			while (i >= 0) {
+				info = &db8500_regulator_info[i];
+				regulator_unregister(info->rdev);
+				i--;
+			}
+			return err;
+		}
+
+		dev_dbg(rdev_get_dev(info->rdev),
+			"regulator-%s-probed\n", info->desc.name);
+	}
+
+	return 0;
+}
+
+static int __exit db8500_regulator_remove(struct platform_device *pdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(db8500_regulator_info); i++) {
+		struct db8500_regulator_info *info;
+		info = &db8500_regulator_info[i];
+
+		dev_vdbg(rdev_get_dev(info->rdev),
+			"regulator-%s-remove\n", info->desc.name);
+
+		regulator_unregister(info->rdev);
+	}
+
+	return 0;
+}
+
+static struct platform_driver db8500_regulator_driver = {
+	.driver = {
+		.name = "db8500-prcmu-regulators",
+		.owner = THIS_MODULE,
+	},
+	.probe = db8500_regulator_probe,
+	.remove = __exit_p(db8500_regulator_remove),
+};
+
+static int __init db8500_regulator_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&db8500_regulator_driver);
+	if (ret < 0)
+		return -ENODEV;
+
+	return 0;
+}
+
+static void __exit db8500_regulator_exit(void)
+{
+	platform_driver_unregister(&db8500_regulator_driver);
+}
+
+arch_initcall(db8500_regulator_init);
+module_exit(db8500_regulator_exit);
+
+MODULE_AUTHOR("STMicroelectronics/ST-Ericsson");
+MODULE_DESCRIPTION("DB8500 regulator driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/db8500-prcmu.h b/include/linux/regulator/db8500-prcmu.h
new file mode 100644
index 000000000000..612062313b68
--- /dev/null
+++ b/include/linux/regulator/db8500-prcmu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * Author: Bengt Jonsson <bengt.g.jonsson@stericsson.com> for ST-Ericsson
+ *
+ * Interface to power domain regulators on DB8500
+ */
+
+#ifndef __REGULATOR_H__
+#define __REGULATOR_H__
+
+/* Number of DB8500 regulators and regulator enumeration */
+enum db8500_regulator_id {
+	DB8500_REGULATOR_VAPE,
+	DB8500_REGULATOR_VARM,
+	DB8500_REGULATOR_VMODEM,
+	DB8500_REGULATOR_VPLL,
+	DB8500_REGULATOR_VSMPS1,
+	DB8500_REGULATOR_VSMPS2,
+	DB8500_REGULATOR_VSMPS3,
+	DB8500_REGULATOR_VRF1,
+	DB8500_REGULATOR_SWITCH_SVAMMDSP,
+	DB8500_REGULATOR_SWITCH_SVAMMDSPRET,
+	DB8500_REGULATOR_SWITCH_SVAPIPE,
+	DB8500_REGULATOR_SWITCH_SIAMMDSP,
+	DB8500_REGULATOR_SWITCH_SIAMMDSPRET,
+	DB8500_REGULATOR_SWITCH_SIAPIPE,
+	DB8500_REGULATOR_SWITCH_SGA,
+	DB8500_REGULATOR_SWITCH_B2R2_MCDE,
+	DB8500_REGULATOR_SWITCH_ESRAM12,
+	DB8500_REGULATOR_SWITCH_ESRAM12RET,
+	DB8500_REGULATOR_SWITCH_ESRAM34,
+	DB8500_REGULATOR_SWITCH_ESRAM34RET,
+	DB8500_NUM_REGULATORS
+};
+
+/*
+ * Exported interface for CPUIdle only. This function is called with all
+ * interrupts turned off.
+ */
+int power_state_active_is_enabled(void);
+
+#endif
-- 
cgit v1.2.3


From 618e724b8d79c6232daac49cb39b0723bf5c4b08 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 11 May 2011 14:06:58 -0700
Subject: ns: Declare sys_setns in syscalls.h

Ooops I overlooked this one, and missing it causes compile
errors of the powerpc syscall.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/syscalls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83ecc1749ef6..b8b380443d05 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -844,4 +844,5 @@ asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
 asmlinkage long sys_open_by_handle_at(int mountdirfd,
 				      struct file_handle __user *handle,
 				      int flags);
+asmlinkage long sys_setns(int fd, int nstype);
 #endif
-- 
cgit v1.2.3


From 41e2a4893566ced3c46af15df5b727326881e47d Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Sat, 19 Mar 2011 14:10:33 -0400
Subject: mmc: Ensure linux starts in eMMC user partition

uBoot sometimes leaves eMMC pointing to the private boot partition.
Ensure we always start looking at the user partition.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Bruce Clemens <bpclemens@marvell.com>
Signed-off-by: Mark F. Brown <markb@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c   | 10 ++++++++++
 include/linux/mmc/card.h |  1 +
 include/linux/mmc/mmc.h  |  1 +
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 772d0d0a541b..5c611a6e0080 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -288,6 +288,7 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 
 	if (card->ext_csd.rev >= 3) {
 		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
+		card->ext_csd.bootconfig = ext_csd[EXT_CSD_BOOT_CONFIG];
 
 		/* Sleep / awake timeout in 100ns units */
 		if (sa_shift > 0 && sa_shift <= 0x17)
@@ -567,6 +568,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		}
 	}
 
+	/*
+	 * Ensure eMMC user default partition is enabled
+	 */
+	if (card->ext_csd.bootconfig & 0x7) {
+		card->ext_csd.bootconfig &= ~0x7;
+		mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG,
+			   card->ext_csd.bootconfig);
+	}
+
 	/*
 	 * Activate high speed (if supported)
 	 */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index adb4888248be..557b73263390 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -45,6 +45,7 @@ struct mmc_ext_csd {
 	u8			rev;
 	u8			erase_group_def;
 	u8			sec_feature_support;
+	u8			bootconfig;
 	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
 	unsigned int		sectors;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 264ba5451e3b..b5ec88fd1352 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -256,6 +256,7 @@ struct _mmc_csd {
 #define EXT_CSD_PARTITION_ATTRIBUTE	156	/* R/W */
 #define EXT_CSD_PARTITION_SUPPORT	160	/* RO */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
+#define EXT_CSD_BOOT_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
 #define EXT_CSD_BUS_WIDTH		183	/* R/W */
 #define EXT_CSD_HS_TIMING		185	/* R/W */
-- 
cgit v1.2.3


From f4c5522b0a8827f39f83f928961d87e081bfe71c Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Thu, 31 Mar 2011 18:40:00 -0500
Subject: mmc: Reliable write support.

Allows reliable writes to be used for MMC writes. Reliable writes are used
to service write REQ_FUA/REQ_META requests. Handles both the legacy and
the enhanced reliable write support in MMC cards.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 81 +++++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/core/mmc.c   |  5 +++
 include/linux/mmc/card.h |  2 ++
 include/linux/mmc/mmc.h  |  4 +++
 4 files changed, 88 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 61d233a7c118..91a676773608 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -48,6 +48,10 @@ MODULE_ALIAS("mmc:block");
 #endif
 #define MODULE_PARAM_PREFIX "mmcblk."
 
+#define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) &&	\
+    (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||	\
+     ((card)->ext_csd.rel_sectors)))
+
 static DEFINE_MUTEX(block_mutex);
 
 /*
@@ -331,6 +335,57 @@ out:
 	return err ? 0 : 1;
 }
 
+static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
+{
+	struct mmc_blk_data *md = mq->data;
+
+	/*
+	 * No-op, only service this because we need REQ_FUA for reliable
+	 * writes.
+	 */
+	spin_lock_irq(&md->lock);
+	__blk_end_request_all(req, 0);
+	spin_unlock_irq(&md->lock);
+
+	return 1;
+}
+
+/*
+ * Reformat current write as a reliable write, supporting
+ * both legacy and the enhanced reliable write MMC cards.
+ * In each transfer we'll handle only as much as a single
+ * reliable write can handle, thus finish the request in
+ * partial completions.
+ */
+static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq,
+				   struct mmc_card *card,
+				   struct request *req)
+{
+	int err;
+	struct mmc_command set_count;
+
+	if (!(card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN)) {
+		/* Legacy mode imposes restrictions on transfers. */
+		if (!IS_ALIGNED(brq->cmd.arg, card->ext_csd.rel_sectors))
+			brq->data.blocks = 1;
+
+		if (brq->data.blocks > card->ext_csd.rel_sectors)
+			brq->data.blocks = card->ext_csd.rel_sectors;
+		else if (brq->data.blocks < card->ext_csd.rel_sectors)
+			brq->data.blocks = 1;
+	}
+
+	memset(&set_count, 0, sizeof(struct mmc_command));
+	set_count.opcode = MMC_SET_BLOCK_COUNT;
+	set_count.arg = brq->data.blocks | (1 << 31);
+	set_count.flags = MMC_RSP_R1 | MMC_CMD_AC;
+	err = mmc_wait_for_cmd(card->host, &set_count, 0);
+	if (err)
+		printk(KERN_ERR "%s: error %d SET_BLOCK_COUNT\n",
+		       req->rq_disk->disk_name, err);
+	return err;
+}
+
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 {
 	struct mmc_blk_data *md = mq->data;
@@ -338,6 +393,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_blk_request brq;
 	int ret = 1, disable_multi = 0;
 
+	/*
+	 * Reliable writes are used to implement Forced Unit Access and
+	 * REQ_META accesses, and are supported only on MMCs.
+	 */
+	bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
+			  (req->cmd_flags & REQ_META)) &&
+		(rq_data_dir(req) == WRITE) &&
+		REL_WRITES_SUPPORTED(card);
+
 	mmc_claim_host(card->host);
 
 	do {
@@ -374,12 +438,14 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 		if (disable_multi && brq.data.blocks > 1)
 			brq.data.blocks = 1;
 
-		if (brq.data.blocks > 1) {
+		if (brq.data.blocks > 1 || do_rel_wr) {
 			/* SPI multiblock writes terminate using a special
-			 * token, not a STOP_TRANSMISSION request.
+			 * token, not a STOP_TRANSMISSION request. Reliable
+			 * writes use SET_BLOCK_COUNT and do not use a
+			 * STOP_TRANSMISSION request either.
 			 */
-			if (!mmc_host_is_spi(card->host)
-					|| rq_data_dir(req) == READ)
+			if ((!mmc_host_is_spi(card->host) && !do_rel_wr) ||
+			    rq_data_dir(req) == READ)
 				brq.mrq.stop = &brq.stop;
 			readcmd = MMC_READ_MULTIPLE_BLOCK;
 			writecmd = MMC_WRITE_MULTIPLE_BLOCK;
@@ -396,6 +462,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			brq.data.flags |= MMC_DATA_WRITE;
 		}
 
+		if (do_rel_wr && mmc_apply_rel_rw(&brq, card, req))
+			goto cmd_err;
+
 		mmc_set_data_timeout(&brq.data, card);
 
 		brq.data.sg = mq->sg;
@@ -565,6 +634,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 			return mmc_blk_issue_secdiscard_rq(mq, req);
 		else
 			return mmc_blk_issue_discard_rq(mq, req);
+	} else if (req->cmd_flags & REQ_FLUSH) {
+		return mmc_blk_issue_flush(mq, req);
 	} else {
 		return mmc_blk_issue_rw_rq(mq, req);
 	}
@@ -622,6 +693,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 	md->disk->queue = md->queue.queue;
 	md->disk->driverfs_dev = &card->dev;
 	set_disk_ro(md->disk, md->read_only);
+	if (REL_WRITES_SUPPORTED(card))
+		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
 
 	/*
 	 * As discussed on lkml, GENHD_FL_REMOVABLE should:
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 5c611a6e0080..ae6b8fd38800 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -300,6 +300,8 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 			ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT];
 		card->ext_csd.hc_erase_size =
 			ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] << 10;
+
+		card->ext_csd.rel_sectors = ext_csd[EXT_CSD_REL_WR_SEC_C];
 	}
 
 	if (card->ext_csd.rev >= 4) {
@@ -351,6 +353,9 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 			ext_csd[EXT_CSD_TRIM_MULT];
 	}
 
+	if (card->ext_csd.rev >= 5)
+		card->ext_csd.rel_param = ext_csd[EXT_CSD_WR_REL_PARAM];
+
 	if (ext_csd[EXT_CSD_ERASED_MEM_CONT])
 		card->erased_byte = 0xFF;
 	else
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 557b73263390..c4e96fa5fb2b 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -45,6 +45,8 @@ struct mmc_ext_csd {
 	u8			rev;
 	u8			erase_group_def;
 	u8			sec_feature_support;
+	u8			rel_sectors;
+	u8			rel_param;
 	u8			bootconfig;
 	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index b5ec88fd1352..390aa6eef676 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -255,6 +255,7 @@ struct _mmc_csd {
 
 #define EXT_CSD_PARTITION_ATTRIBUTE	156	/* R/W */
 #define EXT_CSD_PARTITION_SUPPORT	160	/* RO */
+#define EXT_CSD_WR_REL_PARAM		166	/* RO */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
 #define EXT_CSD_BOOT_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
@@ -265,6 +266,7 @@ struct _mmc_csd {
 #define EXT_CSD_CARD_TYPE		196	/* RO */
 #define EXT_CSD_SEC_CNT			212	/* RO, 4 bytes */
 #define EXT_CSD_S_A_TIMEOUT		217	/* RO */
+#define EXT_CSD_REL_WR_SEC_C		222	/* RO */
 #define EXT_CSD_HC_WP_GRP_SIZE		221	/* RO */
 #define EXT_CSD_ERASE_TIMEOUT_MULT	223	/* RO */
 #define EXT_CSD_HC_ERASE_GRP_SIZE	224	/* RO */
@@ -277,6 +279,8 @@ struct _mmc_csd {
  * EXT_CSD field definitions
  */
 
+#define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
+
 #define EXT_CSD_CMD_SET_NORMAL		(1<<0)
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
-- 
cgit v1.2.3


From a5e9425d2010978c5f85986cc70a9fa0c0d5b912 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 5 Apr 2011 17:43:20 +0300
Subject: mmc: mmc_card_keep_power cleanups

mmc_card_is_powered_resumed is a mouthful; instead, simply use
mmc_card_keep_power, which also better explains the purpose of
the macro.

Employ mmc_card_keep_power() where possible.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  |  4 ++--
 drivers/mmc/core/sdio.c  | 10 +++++-----
 include/linux/mmc/host.h |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 1f453acc8682..c2350e474159 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1746,7 +1746,7 @@ int mmc_suspend_host(struct mmc_host *host)
 	}
 	mmc_bus_put(host);
 
-	if (!err && !(host->pm_flags & MMC_PM_KEEP_POWER))
+	if (!err && !mmc_card_keep_power(host))
 		mmc_power_off(host);
 
 	return err;
@@ -1764,7 +1764,7 @@ int mmc_resume_host(struct mmc_host *host)
 
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
-		if (!(host->pm_flags & MMC_PM_KEEP_POWER)) {
+		if (!mmc_card_keep_power(host)) {
 			mmc_power_up(host);
 			mmc_select_voltage(host, host->ocr);
 			/*
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index db0f0b44d684..0f7d4362d213 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -625,7 +625,7 @@ static int mmc_sdio_suspend(struct mmc_host *host)
 		}
 	}
 
-	if (!err && host->pm_flags & MMC_PM_KEEP_POWER) {
+	if (!err && mmc_card_keep_power(host)) {
 		mmc_claim_host(host);
 		sdio_disable_wide(host->card);
 		mmc_release_host(host);
@@ -645,10 +645,10 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	mmc_claim_host(host);
 
 	/* No need to reinitialize powered-resumed nonremovable cards */
-	if (mmc_card_is_removable(host) || !mmc_card_is_powered_resumed(host))
+	if (mmc_card_is_removable(host) || !mmc_card_keep_power(host))
 		err = mmc_sdio_init_card(host, host->ocr, host->card,
-				 (host->pm_flags & MMC_PM_KEEP_POWER));
-	else if (mmc_card_is_powered_resumed(host)) {
+					mmc_card_keep_power(host));
+	else if (mmc_card_keep_power(host)) {
 		/* We may have switched to 1-bit mode during suspend */
 		err = sdio_enable_4bit_bus(host->card);
 		if (err > 0) {
@@ -691,7 +691,7 @@ static int mmc_sdio_power_restore(struct mmc_host *host)
 
 	mmc_claim_host(host);
 	ret = mmc_sdio_init_card(host, host->ocr, host->card,
-			(host->pm_flags & MMC_PM_KEEP_POWER));
+				mmc_card_keep_power(host));
 	if (!ret && host->sdio_irqs)
 		mmc_signal_sdio_irq(host);
 	mmc_release_host(host);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index eb792cb6d745..8ad3a9c6f495 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -320,7 +320,7 @@ static inline int mmc_card_is_removable(struct mmc_host *host)
 	return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable;
 }
 
-static inline int mmc_card_is_powered_resumed(struct mmc_host *host)
+static inline int mmc_card_keep_power(struct mmc_host *host)
 {
 	return host->pm_flags & MMC_PM_KEEP_POWER;
 }
-- 
cgit v1.2.3


From 6b93d01fe5971951911a070f51f412d50e9536dc Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 5 Apr 2011 17:43:21 +0300
Subject: mmc: do not switch to 1-bit mode if not required

6b5eda36 followed SDIO spec part E1 section 8, which states that
in case SDIO interrupts are being used to wake up a suspended host,
then it is required to switch to 1-bit mode before stopping the clock.

Before switching to 1-bit mode (or back to 4-bit mode on resume),
make sure that SDIO interrupts are really being used to wake the host.

This is helpful for devices which have an external irq line (e.g.
wl1271), and do not use SDIO interrupts to wake up the host.

In this case, switching to 1-bit mode (and back to 4-bit mode on resume)
is not necessary.

Reported-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 4 ++--
 include/linux/mmc/host.h | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 0f7d4362d213..4221670bf82e 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -625,7 +625,7 @@ static int mmc_sdio_suspend(struct mmc_host *host)
 		}
 	}
 
-	if (!err && mmc_card_keep_power(host)) {
+	if (!err && mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		mmc_claim_host(host);
 		sdio_disable_wide(host->card);
 		mmc_release_host(host);
@@ -648,7 +648,7 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	if (mmc_card_is_removable(host) || !mmc_card_keep_power(host))
 		err = mmc_sdio_init_card(host, host->ocr, host->card,
 					mmc_card_keep_power(host));
-	else if (mmc_card_keep_power(host)) {
+	else if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		/* We may have switched to 1-bit mode during suspend */
 		err = sdio_enable_4bit_bus(host->card);
 		if (err > 0) {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8ad3a9c6f495..0fffa5cdc183 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -325,5 +325,9 @@ static inline int mmc_card_keep_power(struct mmc_host *host)
 	return host->pm_flags & MMC_PM_KEEP_POWER;
 }
 
+static inline int mmc_card_wake_sdio_irq(struct mmc_host *host)
+{
+	return host->pm_flags & MMC_PM_WAKE_SDIO_IRQ;
+}
 #endif
 
-- 
cgit v1.2.3


From eab4068795d670b065164096805cbf15a19e9690 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 5 Apr 2011 17:50:14 +0300
Subject: mmc: add MMC_QUIRK_NONSTD_FUNC_IF

Introduce MMC_QUIRK_NONSTD_FUNC_IF to ignore the "SDIO Standard Function
interface code" as indicated by the card's FBR, and instead treat all
functions as non-standard interfaces.

This is required to prevent standard drivers from facing
errors when trying to communicate with SDIO cards that erroneously
indicate standard function interface codes.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 6 ++++++
 include/linux/mmc/card.h | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 4221670bf82e..c3c2bd0874e8 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -16,6 +16,7 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_func.h>
+#include <linux/mmc/sdio_ids.h>
 
 #include "core.h"
 #include "bus.h"
@@ -31,6 +32,11 @@ static int sdio_read_fbr(struct sdio_func *func)
 	int ret;
 	unsigned char data;
 
+	if (mmc_card_nonstd_func_interface(func->card)) {
+		func->class = SDIO_CLASS_NONE;
+		return 0;
+	}
+
 	ret = mmc_io_rw_direct(func->card, 0, 0,
 		SDIO_FBR_BASE(func->num) + SDIO_FBR_STD_IF, 0, &data);
 	if (ret)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c4e96fa5fb2b..317a0029e7d7 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -128,6 +128,7 @@ struct mmc_card {
 #define MMC_QUIRK_NONSTD_SDIO	(1<<2)		/* non-standard SDIO card attached */
 						/* (missing CIA registers) */
 #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3)	/* clock gating the sdio bus will make card fail */
+#define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
@@ -183,6 +184,11 @@ static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
 }
 
+static inline int mmc_card_nonstd_func_interface(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_NONSTD_FUNC_IF;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-- 
cgit v1.2.3


From 2059a02dcb84236f9db9197fa9b00418d7b8465b Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 5 Apr 2011 18:02:25 +0300
Subject: mmc: add MMC_QUIRK_DISABLE_CD

006ebd5d introduced sdio_disable_cd(), which disconnects the pull-up
resistor on CD/DAT[3] (pin 1) of the card.

Make it possible to start using sdio_disable_cd() by introducing
MMC_QUIRK_DISABLE_CD.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 2 +-
 include/linux/mmc/card.h | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index c3c2bd0874e8..a5840c0de2e4 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -187,7 +187,7 @@ static int sdio_disable_cd(struct mmc_card *card)
 	int ret;
 	u8 ctrl;
 
-	if (!card->cccr.disable_cd)
+	if (!mmc_card_disable_cd(card))
 		return 0;
 
 	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_IF, 0, &ctrl);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 317a0029e7d7..2a7e54970c93 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -129,6 +129,7 @@ struct mmc_card {
 						/* (missing CIA registers) */
 #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3)	/* clock gating the sdio bus will make card fail */
 #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
+#define MMC_QUIRK_DISABLE_CD	(1<<5)		/* disconnect CD/DAT[3] resistor */
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
@@ -184,6 +185,11 @@ static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
 }
 
+static inline int mmc_card_disable_cd(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_DISABLE_CD;
+}
+
 static inline int mmc_card_nonstd_func_interface(const struct mmc_card *c)
 {
 	return c->quirks & MMC_QUIRK_NONSTD_FUNC_IF;
-- 
cgit v1.2.3


From 32780cd1350e651e68bdf33b7f5b009d21d5b794 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 11 Apr 2011 17:02:15 -0500
Subject: mmc: quirks: Extends card quirks with MMC/SD quirks matching the CID.

The current mechanism is SDIO-only. This allows us to create
function-specific quirks, without creating messy Kconfig dependencies,
or polluting core/ with function-specific code.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.h   |  2 -
 drivers/mmc/core/quirks.c | 93 +++++++++++++++++++++--------------------------
 drivers/mmc/core/sdio.c   |  2 +-
 include/linux/mmc/card.h  | 91 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 133 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 20b1c0831eac..ca1fdde29df6 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -61,8 +61,6 @@ int mmc_attach_mmc(struct mmc_host *host);
 int mmc_attach_sd(struct mmc_host *host);
 int mmc_attach_sdio(struct mmc_host *host);
 
-void mmc_fixup_device(struct mmc_card *card);
-
 /* Module parameters */
 extern int use_spi_crc;
 
diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c
index a4c42edc6cb3..3a596217029e 100644
--- a/drivers/mmc/core/quirks.c
+++ b/drivers/mmc/core/quirks.c
@@ -1,7 +1,8 @@
 /*
- *  This file contains work-arounds for many known sdio hardware
- *  bugs.
+ *  This file contains work-arounds for many known SD/MMC
+ *  and SDIO hardware bugs.
  *
+ *  Copyright (c) 2011 Andrei Warkentin <andreiw@motorola.com>
  *  Copyright (c) 2011 Pierre Tardy <tardyp@gmail.com>
  *  Inspired from pci fixup code:
  *  Copyright (c) 1999 Martin Mares <mj@ucw.cz>
@@ -11,34 +12,14 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mmc/card.h>
-#include <linux/mod_devicetable.h>
 
-/*
- *  The world is not perfect and supplies us with broken mmc/sdio devices.
- *  For at least a part of these bugs we need a work-around
- */
-
-struct mmc_fixup {
-	u16 vendor, device;	/* You can use SDIO_ANY_ID here of course */
-	void (*vendor_fixup)(struct mmc_card *card, int data);
-	int data;
-};
-
-/*
- * This hook just adds a quirk unconditionnally
- */
-static void __maybe_unused add_quirk(struct mmc_card *card, int data)
-{
-	card->quirks |= data;
-}
+#ifndef SDIO_VENDOR_ID_TI
+#define SDIO_VENDOR_ID_TI		0x0097
+#endif
 
-/*
- * This hook just removes a quirk unconditionnally
- */
-static void __maybe_unused remove_quirk(struct mmc_card *card, int data)
-{
-	card->quirks &= ~data;
-}
+#ifndef SDIO_DEVICE_ID_TI_WL1271
+#define SDIO_DEVICE_ID_TI_WL1271	0x4076
+#endif
 
 /*
  * This hook just adds a quirk for all sdio devices
@@ -49,37 +30,47 @@ static void add_quirk_for_sdio_devices(struct mmc_card *card, int data)
 		card->quirks |= data;
 }
 
-#ifndef SDIO_VENDOR_ID_TI
-#define SDIO_VENDOR_ID_TI		0x0097
-#endif
-
-#ifndef SDIO_DEVICE_ID_TI_WL1271
-#define SDIO_DEVICE_ID_TI_WL1271	0x4076
-#endif
-
 static const struct mmc_fixup mmc_fixup_methods[] = {
 	/* by default sdio devices are considered CLK_GATING broken */
 	/* good cards will be whitelisted as they are tested */
-	{ SDIO_ANY_ID, SDIO_ANY_ID,
-		add_quirk_for_sdio_devices, MMC_QUIRK_BROKEN_CLK_GATING },
-	{ SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
-		remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING },
-	{ SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
-		add_quirk, MMC_QUIRK_NONSTD_FUNC_IF },
-	{ SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
-		add_quirk, MMC_QUIRK_DISABLE_CD },
-	{ 0 }
+	SDIO_FIXUP(SDIO_ANY_ID, SDIO_ANY_ID,
+		   add_quirk_for_sdio_devices,
+		   MMC_QUIRK_BROKEN_CLK_GATING),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
+		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
+		   add_quirk, MMC_QUIRK_NONSTD_FUNC_IF),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
+		   add_quirk, MMC_QUIRK_DISABLE_CD),
+
+	END_FIXUP
 };
 
-void mmc_fixup_device(struct mmc_card *card)
+void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table)
 {
 	const struct mmc_fixup *f;
+	u64 rev = cid_rev_card(card);
+
+	/* Non-core specific workarounds. */
+	if (!table)
+		table = mmc_fixup_methods;
 
-	for (f = mmc_fixup_methods; f->vendor_fixup; f++) {
-		if ((f->vendor == card->cis.vendor
-		     || f->vendor == (u16) SDIO_ANY_ID) &&
-		    (f->device == card->cis.device
-		     || f->device == (u16) SDIO_ANY_ID)) {
+	for (f = table; f->vendor_fixup; f++) {
+		if ((f->manfid == CID_MANFID_ANY ||
+		     f->manfid == card->cid.manfid) &&
+		    (f->oemid == CID_OEMID_ANY ||
+		     f->oemid == card->cid.oemid) &&
+		    (f->name == CID_NAME_ANY ||
+		     !strncmp(f->name, card->cid.prod_name,
+			      sizeof(card->cid.prod_name))) &&
+		    (f->cis_vendor == card->cis.vendor ||
+		     f->cis_vendor == (u16) SDIO_ANY_ID) &&
+		    (f->cis_device == card->cis.device ||
+		     f->cis_device == (u16) SDIO_ANY_ID) &&
+		    rev >= f->rev_start && rev <= f->rev_end) {
 			dev_dbg(&card->dev, "calling %pF\n", f->vendor_fixup);
 			f->vendor_fixup(card, f->data);
 		}
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index a5840c0de2e4..1e6095961500 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -472,7 +472,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 
 		card = oldcard;
 	}
-	mmc_fixup_device(card);
+	mmc_fixup_device(card, NULL);
 
 	if (card->type == MMC_TYPE_SD_COMBO) {
 		err = mmc_sd_setup_card(host, card, oldcard != NULL);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 2a7e54970c93..c6513175f7f1 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -11,6 +11,7 @@
 #define LINUX_MMC_CARD_H
 
 #include <linux/mmc/core.h>
+#include <linux/mod_devicetable.h>
 
 struct mmc_cid {
 	unsigned int		manfid;
@@ -157,7 +158,92 @@ struct mmc_card {
 	struct dentry		*debugfs_root;
 };
 
-void mmc_fixup_device(struct mmc_card *dev);
+/*
+ *  The world is not perfect and supplies us with broken mmc/sdio devices.
+ *  For at least some of these bugs we need a work-around.
+ */
+
+struct mmc_fixup {
+	/* CID-specific fields. */
+	const char *name;
+
+	/* Valid revision range */
+	u64 rev_start, rev_end;
+
+	unsigned int manfid;
+	unsigned short oemid;
+
+	/* SDIO-specfic fields. You can use SDIO_ANY_ID here of course */
+	u16 cis_vendor, cis_device;
+
+	void (*vendor_fixup)(struct mmc_card *card, int data);
+	int data;
+};
+
+#define CID_MANFID_ANY (-1ul)
+#define CID_OEMID_ANY ((unsigned short) -1)
+#define CID_NAME_ANY (NULL)
+
+#define END_FIXUP { 0 }
+
+#define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end,	\
+		   _cis_vendor, _cis_device,				\
+		   _fixup, _data)					\
+	{						   \
+		.name = (_name),			   \
+		.manfid = (_manfid),			   \
+		.oemid = (_oemid),			   \
+		.rev_start = (_rev_start),		   \
+		.rev_end = (_rev_end),			   \
+		.cis_vendor = (_cis_vendor),		   \
+		.cis_device = (_cis_device),		   \
+		.vendor_fixup = (_fixup),		   \
+		.data = (_data),			   \
+	 }
+
+#define MMC_FIXUP_REV(_name, _manfid, _oemid, _rev_start, _rev_end,	\
+		      _fixup, _data)					\
+	_FIXUP_EXT(_name, _manfid,					\
+		   _oemid, _rev_start, _rev_end,			\
+		   SDIO_ANY_ID, SDIO_ANY_ID,				\
+		   _fixup, _data)					\
+
+#define MMC_FIXUP(_name, _manfid, _oemid, _fixup, _data) \
+	MMC_FIXUP_REV(_name, _manfid, _oemid, 0, -1ull, _fixup, _data)
+
+#define SDIO_FIXUP(_vendor, _device, _fixup, _data)			\
+	_FIXUP_EXT(CID_NAME_ANY, CID_MANFID_ANY,			\
+		    CID_OEMID_ANY, 0, -1ull,				\
+		   _vendor, _device,					\
+		   _fixup, _data)					\
+
+#define cid_rev(hwrev, fwrev, year, month)	\
+	(((u64) hwrev) << 40 |                  \
+	 ((u64) fwrev) << 32 |                  \
+	 ((u64) year) << 16 |                   \
+	 ((u64) month))
+
+#define cid_rev_card(card)		  \
+	cid_rev(card->cid.hwrev,	  \
+		    card->cid.fwrev,      \
+		    card->cid.year,	  \
+		    card->cid.month)
+
+/*
+ * This hook just adds a quirk unconditionally.
+ */
+static inline void __maybe_unused add_quirk(struct mmc_card *card, int data)
+{
+	card->quirks |= data;
+}
+
+/*
+ * This hook just removes a quirk unconditionally.
+ */
+static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
+{
+	card->quirks &= ~data;
+}
 
 #define mmc_card_mmc(c)		((c)->type == MMC_TYPE_MMC)
 #define mmc_card_sd(c)		((c)->type == MMC_TYPE_SD)
@@ -218,4 +304,7 @@ struct mmc_driver {
 extern int mmc_register_driver(struct mmc_driver *);
 extern void mmc_unregister_driver(struct mmc_driver *);
 
+extern void mmc_fixup_device(struct mmc_card *card,
+			     const struct mmc_fixup *table);
+
 #endif
-- 
cgit v1.2.3


From 853c6cac0dc0d9d330deb5b48c19eebafaed1841 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 12 Apr 2011 12:59:09 -0400
Subject: mmc: quirks: fix truncation warnings

Fix data truncation warnings: .manfid is not unsigned long:

drivers/mmc/core/quirks.c:36: warning: large integer implicitly truncated to unsigned type
drivers/mmc/core/quirks.c:40: warning: large integer implicitly truncated to unsigned type
drivers/mmc/core/quirks.c:43: warning: large integer implicitly truncated to unsigned type
drivers/mmc/core/quirks.c:46: warning: large integer implicitly truncated to unsigned type

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/card.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c6513175f7f1..937f852cf01e 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -180,7 +180,7 @@ struct mmc_fixup {
 	int data;
 };
 
-#define CID_MANFID_ANY (-1ul)
+#define CID_MANFID_ANY (-1u)
 #define CID_OEMID_ANY ((unsigned short) -1)
 #define CID_NAME_ANY (NULL)
 
-- 
cgit v1.2.3


From eaa02f751ff4f8abfc2e55a15c20a5a274244418 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 11 Apr 2011 16:13:41 -0500
Subject: mmc: core: Rename erase_timeout to cmd_timeout_ms.

Renames erase_timeout to cmd_timeout_ms inside struct mmc_command.
First step to making host honor timeouts for non-data-transfer
commands. Cleans up erase timeout code.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 39 +++++++++++++++++++++------------------
 include/linux/mmc/core.h |  2 +-
 2 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index c2350e474159..5178d5daa5f4 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1187,9 +1187,8 @@ void mmc_init_erase(struct mmc_card *card)
 	}
 }
 
-static void mmc_set_mmc_erase_timeout(struct mmc_card *card,
-				      struct mmc_command *cmd,
-				      unsigned int arg, unsigned int qty)
+static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
+				          unsigned int arg, unsigned int qty)
 {
 	unsigned int erase_timeout;
 
@@ -1246,38 +1245,42 @@ static void mmc_set_mmc_erase_timeout(struct mmc_card *card,
 	if (mmc_host_is_spi(card->host) && erase_timeout < 1000)
 		erase_timeout = 1000;
 
-	cmd->erase_timeout = erase_timeout;
+	return erase_timeout;
 }
 
-static void mmc_set_sd_erase_timeout(struct mmc_card *card,
-				     struct mmc_command *cmd, unsigned int arg,
-				     unsigned int qty)
+static unsigned int mmc_sd_erase_timeout(struct mmc_card *card,
+					 unsigned int arg,
+					 unsigned int qty)
 {
+	unsigned int erase_timeout;
+
 	if (card->ssr.erase_timeout) {
 		/* Erase timeout specified in SD Status Register (SSR) */
-		cmd->erase_timeout = card->ssr.erase_timeout * qty +
-				     card->ssr.erase_offset;
+		erase_timeout = card->ssr.erase_timeout * qty +
+				card->ssr.erase_offset;
 	} else {
 		/*
 		 * Erase timeout not specified in SD Status Register (SSR) so
 		 * use 250ms per write block.
 		 */
-		cmd->erase_timeout = 250 * qty;
+		erase_timeout = 250 * qty;
 	}
 
 	/* Must not be less than 1 second */
-	if (cmd->erase_timeout < 1000)
-		cmd->erase_timeout = 1000;
+	if (erase_timeout < 1000)
+		erase_timeout = 1000;
+
+	return erase_timeout;
 }
 
-static void mmc_set_erase_timeout(struct mmc_card *card,
-				  struct mmc_command *cmd, unsigned int arg,
-				  unsigned int qty)
+static unsigned int mmc_erase_timeout(struct mmc_card *card,
+				      unsigned int arg,
+				      unsigned int qty)
 {
 	if (mmc_card_sd(card))
-		mmc_set_sd_erase_timeout(card, cmd, arg, qty);
+		return mmc_sd_erase_timeout(card, arg, qty);
 	else
-		mmc_set_mmc_erase_timeout(card, cmd, arg, qty);
+		return mmc_mmc_erase_timeout(card, arg, qty);
 }
 
 static int mmc_do_erase(struct mmc_card *card, unsigned int from,
@@ -1351,7 +1354,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 	cmd.opcode = MMC_ERASE;
 	cmd.arg = arg;
 	cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-	mmc_set_erase_timeout(card, &cmd, arg, qty);
+	cmd.cmd_timeout_ms = mmc_erase_timeout(card, arg, qty);
 	err = mmc_wait_for_cmd(card->host, &cmd, 0);
 	if (err) {
 		printk(KERN_ERR "mmc_erase: erase error %d, status %#x\n",
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 07f27af4dba5..811e96e8d1fe 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -92,7 +92,7 @@ struct mmc_command {
  *              actively failing requests
  */
 
-	unsigned int		erase_timeout;	/* in milliseconds */
+	unsigned int		cmd_timeout_ms;	/* in milliseconds */
 
 	struct mmc_data		*data;		/* data segment associated with cmd */
 	struct mmc_request	*mrq;		/* associated request */
-- 
cgit v1.2.3


From d3a8d95dcbb726b9cf0bbc166b2473bdd236c88c Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 11 Apr 2011 16:13:43 -0500
Subject: mmc: core: Allow setting CMD timeout for CMD6 (SWITCH).

CMD6 is an R1B-type command, where DAT is used as busy. Depending
on register written using CMD6, timeout value can be different
as per spec.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c     | 14 ++++++++------
 drivers/mmc/core/mmc_ops.c | 16 +++++++++++++++-
 drivers/mmc/core/mmc_ops.h |  1 -
 include/linux/mmc/core.h   |  1 +
 4 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index ae6b8fd38800..396cb23625d2 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -548,7 +548,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	if (card->ext_csd.enhanced_area_en) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				EXT_CSD_ERASE_GROUP_DEF, 1);
+				 EXT_CSD_ERASE_GROUP_DEF, 1, 0);
 
 		if (err && err != -EBADMSG)
 			goto free_card;
@@ -579,7 +579,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	if (card->ext_csd.bootconfig & 0x7) {
 		card->ext_csd.bootconfig &= ~0x7;
 		mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG,
-			   card->ext_csd.bootconfig);
+			   card->ext_csd.bootconfig, 0);
 	}
 
 	/*
@@ -588,7 +588,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	if ((card->ext_csd.hs_max_dtr != 0) &&
 		(host->caps & MMC_CAP_MMC_HIGHSPEED)) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			EXT_CSD_HS_TIMING, 1);
+				 EXT_CSD_HS_TIMING, 1, 0);
 		if (err && err != -EBADMSG)
 			goto free_card;
 
@@ -655,7 +655,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				ddr = 0; /* no DDR for 1-bit width */
 			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 					 EXT_CSD_BUS_WIDTH,
-					 ext_csd_bits[idx][0]);
+					 ext_csd_bits[idx][0],
+					 0);
 			if (!err) {
 				mmc_set_bus_width_ddr(card->host,
 						      bus_width, MMC_SDR_MODE);
@@ -674,8 +675,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 
 		if (!err && ddr) {
 			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					EXT_CSD_BUS_WIDTH,
-					ext_csd_bits[idx][1]);
+					 EXT_CSD_BUS_WIDTH,
+					 ext_csd_bits[idx][1],
+					 0);
 		}
 		if (err) {
 			printk(KERN_WARNING "%s: switch to bus width %d ddr %d "
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index f3b22bf89cc9..a2bae625332a 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -387,7 +387,19 @@ int mmc_spi_set_crc(struct mmc_host *host, int use_crc)
 	return err;
 }
 
-int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
+/**
+ *	mmc_switch - modify EXT_CSD register
+ *	@card: the MMC card associated with the data transfer
+ *	@set: cmd set values
+ *	@index: EXT_CSD register index
+ *	@value: value to program into EXT_CSD register
+ *	@timeout_ms: timeout (ms) for operation performed by register write,
+ *                   timeout of zero implies maximum possible timeout
+ *
+ *	Modifies the EXT_CSD register for selected card.
+ */
+int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
+	       unsigned int timeout_ms)
 {
 	int err;
 	struct mmc_command cmd;
@@ -404,6 +416,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 		  (value << 8) |
 		  set;
 	cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+	cmd.cmd_timeout_ms = timeout_ms;
 
 	err = mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES);
 	if (err)
@@ -433,6 +446,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mmc_switch);
 
 int mmc_send_status(struct mmc_card *card, u32 *status)
 {
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index e6d44b8a18db..9276946fa5b7 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -20,7 +20,6 @@ int mmc_all_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_set_relative_addr(struct mmc_card *card);
 int mmc_send_csd(struct mmc_card *card, u32 *csd);
 int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd);
-int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value);
 int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 811e96e8d1fe..f8e4bcbd2846 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -135,6 +135,7 @@ extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
 extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 	struct mmc_command *, int);
+extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
 
 #define MMC_ERASE_ARG		0x00000000
 #define MMC_SECURE_ERASE_ARG	0x80000000
-- 
cgit v1.2.3


From 371a689f64b0da140c3bcd3f55305ffa1c3a58ef Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 11 Apr 2011 18:10:25 -0500
Subject: mmc: MMC boot partitions support.

Allows device MMC boot partitions to be accessed. MMC partitions are
treated effectively as separate block devices on the same MMC card.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 Documentation/mmc/00-INDEX          |   2 +
 Documentation/mmc/mmc-dev-attrs.txt |  10 ++
 Documentation/mmc/mmc-dev-parts.txt |  27 ++++
 drivers/mmc/card/block.c            | 274 +++++++++++++++++++++++++++++++-----
 drivers/mmc/core/mmc.c              |  22 ++-
 include/linux/mmc/card.h            |   4 +-
 include/linux/mmc/mmc.h             |   8 +-
 7 files changed, 305 insertions(+), 42 deletions(-)
 create mode 100644 Documentation/mmc/mmc-dev-parts.txt

(limited to 'include/linux')

diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX
index fca586f5b853..93dd7a714075 100644
--- a/Documentation/mmc/00-INDEX
+++ b/Documentation/mmc/00-INDEX
@@ -2,3 +2,5 @@
         - this file
 mmc-dev-attrs.txt
         - info on SD and MMC device attributes
+mmc-dev-parts.txt
+        - info on SD and MMC device partitions
diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
index ff2bd685bced..8898a95b41e5 100644
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ b/Documentation/mmc/mmc-dev-attrs.txt
@@ -1,3 +1,13 @@
+SD and MMC Block Device Attributes
+==================================
+
+These attributes are defined for the block devices associated with the
+SD or MMC device.
+
+The following attributes are read/write.
+
+	force_ro		Enforce read-only access even if write protect switch is off.
+
 SD and MMC Device Attributes
 ============================
 
diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt
new file mode 100644
index 000000000000..2db28b8e662f
--- /dev/null
+++ b/Documentation/mmc/mmc-dev-parts.txt
@@ -0,0 +1,27 @@
+SD and MMC Device Partitions
+============================
+
+Device partitions are additional logical block devices present on the
+SD/MMC device.
+
+As of this writing, MMC boot partitions as supported and exposed as
+/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the
+parent /dev/mmcblkX.
+
+MMC Boot Partitions
+===================
+
+Read and write access is provided to the two MMC boot partitions. Due to
+the sensitive nature of the boot partition contents, which often store
+a bootloader or bootloader configuration tables crucial to booting the
+platform, write access is disabled by default to reduce the chance of
+accidental bricking.
+
+To enable write access to /dev/mmcblkXbootY, disable the forced read-only
+access with:
+
+echo 0 > /sys/block/mmcblkXbootY/force_ro
+
+To re-enable read-only access:
+
+echo 1 > /sys/block/mmcblkXbootY/force_ro
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 3e9082b729ff..1e6bd910e79e 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -76,9 +76,19 @@ struct mmc_blk_data {
 	spinlock_t	lock;
 	struct gendisk	*disk;
 	struct mmc_queue queue;
+	struct list_head part;
 
 	unsigned int	usage;
 	unsigned int	read_only;
+	unsigned int	part_type;
+
+	/*
+	 * Only set in main mmc_blk_data associated
+	 * with mmc_card with mmc_set_drvdata, and keeps
+	 * track of the current selected device partition.
+	 */
+	unsigned int	part_curr;
+	struct device_attribute force_ro;
 };
 
 static DEFINE_MUTEX(open_lock);
@@ -101,17 +111,22 @@ static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
 	return md;
 }
 
+static inline int mmc_get_devidx(struct gendisk *disk)
+{
+	int devmaj = MAJOR(disk_devt(disk));
+	int devidx = MINOR(disk_devt(disk)) / perdev_minors;
+
+	if (!devmaj)
+		devidx = disk->first_minor / perdev_minors;
+	return devidx;
+}
+
 static void mmc_blk_put(struct mmc_blk_data *md)
 {
 	mutex_lock(&open_lock);
 	md->usage--;
 	if (md->usage == 0) {
-		int devmaj = MAJOR(disk_devt(md->disk));
-		int devidx = MINOR(disk_devt(md->disk)) / perdev_minors;
-
-		if (!devmaj)
-			devidx = md->disk->first_minor / perdev_minors;
-
+		int devidx = mmc_get_devidx(md->disk);
 		blk_cleanup_queue(md->queue.queue);
 
 		__clear_bit(devidx, dev_use);
@@ -122,6 +137,38 @@ static void mmc_blk_put(struct mmc_blk_data *md)
 	mutex_unlock(&open_lock);
 }
 
+static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	int ret;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+
+	ret = snprintf(buf, PAGE_SIZE, "%d",
+		       get_disk_ro(dev_to_disk(dev)) ^
+		       md->read_only);
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t force_ro_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	int ret;
+	char *end;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	unsigned long set = simple_strtoul(buf, &end, 0);
+	if (end == buf) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	set_disk_ro(dev_to_disk(dev), set || md->read_only);
+	ret = count;
+out:
+	mmc_blk_put(md);
+	return ret;
+}
+
 static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
 {
 	struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
@@ -176,6 +223,29 @@ struct mmc_blk_request {
 	struct mmc_data		data;
 };
 
+static inline int mmc_blk_part_switch(struct mmc_card *card,
+				      struct mmc_blk_data *md)
+{
+	int ret;
+	struct mmc_blk_data *main_md = mmc_get_drvdata(card);
+	if (main_md->part_curr == md->part_type)
+		return 0;
+
+	if (mmc_card_mmc(card)) {
+		card->ext_csd.part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK;
+		card->ext_csd.part_config |= md->part_type;
+
+		ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_PART_CONFIG, card->ext_csd.part_config,
+				 card->ext_csd.part_time);
+		if (ret)
+			return ret;
+}
+
+	main_md->part_curr = md->part_type;
+	return 0;
+}
+
 static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
 {
 	int err;
@@ -620,6 +690,11 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = md->queue.card;
 
 	mmc_claim_host(card->host);
+	ret = mmc_blk_part_switch(card, md);
+	if (ret) {
+		ret = 0;
+		goto out;
+	}
 
 	if (req->cmd_flags & REQ_DISCARD) {
 		if (req->cmd_flags & REQ_SECURE)
@@ -632,6 +707,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		ret = mmc_blk_issue_rw_rq(mq, req);
 	}
 
+out:
 	mmc_release_host(card->host);
 	return ret;
 }
@@ -642,7 +718,11 @@ static inline int mmc_blk_readonly(struct mmc_card *card)
 	       !(card->csd.cmdclass & CCC_BLOCK_WRITE);
 }
 
-static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
+static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
+					      struct device *parent,
+					      sector_t size,
+					      bool default_ro,
+					      const char *subname)
 {
 	struct mmc_blk_data *md;
 	int devidx, ret;
@@ -658,7 +738,6 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 		goto out;
 	}
 
-
 	/*
 	 * Set the read-only status based on the supported commands
 	 * and the write protect switch.
@@ -672,6 +751,7 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 	}
 
 	spin_lock_init(&md->lock);
+	INIT_LIST_HEAD(&md->part);
 	md->usage = 1;
 
 	ret = mmc_init_queue(&md->queue, card, &md->lock);
@@ -686,8 +766,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 	md->disk->fops = &mmc_bdops;
 	md->disk->private_data = md;
 	md->disk->queue = md->queue.queue;
-	md->disk->driverfs_dev = &card->dev;
-	set_disk_ro(md->disk, md->read_only);
+	md->disk->driverfs_dev = parent;
+	set_disk_ro(md->disk, md->read_only || default_ro);
 	if (REL_WRITES_SUPPORTED(card))
 		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
 
@@ -703,33 +783,97 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 	 * messages to tell when the card is present.
 	 */
 
-	snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
-		"mmcblk%d", devidx);
+	if (subname)
+		snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
+			 "mmcblk%d%s",
+			 mmc_get_devidx(dev_to_disk(parent)), subname);
+	else
+		snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
+			 "mmcblk%d", devidx);
 
 	blk_queue_logical_block_size(md->queue.queue, 512);
+	set_capacity(md->disk, size);
+	return md;
+
+ err_putdisk:
+	put_disk(md->disk);
+ err_kfree:
+	kfree(md);
+ out:
+	return ERR_PTR(ret);
+}
+
+static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
+{
+	sector_t size;
+	struct mmc_blk_data *md;
 
 	if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
 		/*
 		 * The EXT_CSD sector count is in number or 512 byte
 		 * sectors.
 		 */
-		set_capacity(md->disk, card->ext_csd.sectors);
+		size = card->ext_csd.sectors;
 	} else {
 		/*
 		 * The CSD capacity field is in units of read_blkbits.
 		 * set_capacity takes units of 512 bytes.
 		 */
-		set_capacity(md->disk,
-			card->csd.capacity << (card->csd.read_blkbits - 9));
+		size = card->csd.capacity << (card->csd.read_blkbits - 9);
 	}
+
+	md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL);
 	return md;
+}
 
- err_putdisk:
-	put_disk(md->disk);
- err_kfree:
-	kfree(md);
- out:
-	return ERR_PTR(ret);
+static int mmc_blk_alloc_part(struct mmc_card *card,
+			      struct mmc_blk_data *md,
+			      unsigned int part_type,
+			      sector_t size,
+			      bool default_ro,
+			      const char *subname)
+{
+	char cap_str[10];
+	struct mmc_blk_data *part_md;
+
+	part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro,
+				    subname);
+	if (IS_ERR(part_md))
+		return PTR_ERR(part_md);
+	part_md->part_type = part_type;
+	list_add(&part_md->part, &md->part);
+
+	string_get_size((u64)get_capacity(part_md->disk) << 9, STRING_UNITS_2,
+			cap_str, sizeof(cap_str));
+	printk(KERN_INFO "%s: %s %s partition %u %s\n",
+	       part_md->disk->disk_name, mmc_card_id(card),
+	       mmc_card_name(card), part_md->part_type, cap_str);
+	return 0;
+}
+
+static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md)
+{
+	int ret = 0;
+
+	if (!mmc_card_mmc(card))
+		return 0;
+
+	if (card->ext_csd.boot_size) {
+		ret = mmc_blk_alloc_part(card, md, EXT_CSD_PART_CONFIG_ACC_BOOT0,
+					 card->ext_csd.boot_size >> 9,
+					 true,
+					 "boot0");
+		if (ret)
+			return ret;
+		ret = mmc_blk_alloc_part(card, md, EXT_CSD_PART_CONFIG_ACC_BOOT1,
+					 card->ext_csd.boot_size >> 9,
+					 true,
+					 "boot1");
+		if (ret)
+			return ret;
+	}
+
+	return ret;
 }
 
 static int
@@ -750,9 +894,54 @@ mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card)
 	return 0;
 }
 
+static void mmc_blk_remove_req(struct mmc_blk_data *md)
+{
+	if (md) {
+		if (md->disk->flags & GENHD_FL_UP) {
+			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
+
+			/* Stop new requests from getting into the queue */
+			del_gendisk(md->disk);
+		}
+
+		/* Then flush out any already in there */
+		mmc_cleanup_queue(&md->queue);
+		mmc_blk_put(md);
+	}
+}
+
+static void mmc_blk_remove_parts(struct mmc_card *card,
+				 struct mmc_blk_data *md)
+{
+	struct list_head *pos, *q;
+	struct mmc_blk_data *part_md;
+
+	list_for_each_safe(pos, q, &md->part) {
+		part_md = list_entry(pos, struct mmc_blk_data, part);
+		list_del(pos);
+		mmc_blk_remove_req(part_md);
+	}
+}
+
+static int mmc_add_disk(struct mmc_blk_data *md)
+{
+	int ret;
+
+	add_disk(md->disk);
+	md->force_ro.show = force_ro_show;
+	md->force_ro.store = force_ro_store;
+	md->force_ro.attr.name = "force_ro";
+	md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
+	ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
+	if (ret)
+		del_gendisk(md->disk);
+
+	return ret;
+}
+
 static int mmc_blk_probe(struct mmc_card *card)
 {
-	struct mmc_blk_data *md;
+	struct mmc_blk_data *md, *part_md;
 	int err;
 	char cap_str[10];
 
@@ -776,14 +965,22 @@ static int mmc_blk_probe(struct mmc_card *card)
 		md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
 		cap_str, md->read_only ? "(ro)" : "");
 
+	if (mmc_blk_alloc_parts(card, md))
+		goto out;
+
 	mmc_set_drvdata(card, md);
-	add_disk(md->disk);
+	if (mmc_add_disk(md))
+		goto out;
+
+	list_for_each_entry(part_md, &md->part, part) {
+		if (mmc_add_disk(part_md))
+			goto out;
+	}
 	return 0;
 
  out:
-	mmc_cleanup_queue(&md->queue);
-	mmc_blk_put(md);
-
+	mmc_blk_remove_parts(card, md);
+	mmc_blk_remove_req(md);
 	return err;
 }
 
@@ -791,36 +988,43 @@ static void mmc_blk_remove(struct mmc_card *card)
 {
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
-	if (md) {
-		/* Stop new requests from getting into the queue */
-		del_gendisk(md->disk);
-
-		/* Then flush out any already in there */
-		mmc_cleanup_queue(&md->queue);
-
-		mmc_blk_put(md);
-	}
+	mmc_blk_remove_parts(card, md);
+	mmc_blk_remove_req(md);
 	mmc_set_drvdata(card, NULL);
 }
 
 #ifdef CONFIG_PM
 static int mmc_blk_suspend(struct mmc_card *card, pm_message_t state)
 {
+	struct mmc_blk_data *part_md;
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
 	if (md) {
 		mmc_queue_suspend(&md->queue);
+		list_for_each_entry(part_md, &md->part, part) {
+			mmc_queue_suspend(&part_md->queue);
+		}
 	}
 	return 0;
 }
 
 static int mmc_blk_resume(struct mmc_card *card)
 {
+	struct mmc_blk_data *part_md;
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
 	if (md) {
 		mmc_blk_set_blksize(md, card);
+
+		/*
+		 * Resume involves the card going into idle state,
+		 * so current partition is always the main one.
+		 */
+		md->part_curr = md->part_type;
 		mmc_queue_resume(&md->queue);
+		list_for_each_entry(part_md, &md->part, part) {
+			mmc_queue_resume(&part_md->queue);
+		}
 	}
 	return 0;
 }
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 396cb23625d2..a2c795e8f9dd 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -288,7 +288,10 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 
 	if (card->ext_csd.rev >= 3) {
 		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
-		card->ext_csd.bootconfig = ext_csd[EXT_CSD_BOOT_CONFIG];
+		card->ext_csd.part_config = ext_csd[EXT_CSD_PART_CONFIG];
+
+		/* EXT_CSD value is in units of 10ms, but we store in ms */
+		card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME];
 
 		/* Sleep / awake timeout in 100ns units */
 		if (sa_shift > 0 && sa_shift <= 0x17)
@@ -302,6 +305,12 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 			ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] << 10;
 
 		card->ext_csd.rel_sectors = ext_csd[EXT_CSD_REL_WR_SEC_C];
+
+		/*
+		 * There are two boot regions of equal size, defined in
+		 * multiples of 128K.
+		 */
+		card->ext_csd.boot_size = ext_csd[EXT_CSD_BOOT_MULT] << 17;
 	}
 
 	if (card->ext_csd.rev >= 4) {
@@ -576,10 +585,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	/*
 	 * Ensure eMMC user default partition is enabled
 	 */
-	if (card->ext_csd.bootconfig & 0x7) {
-		card->ext_csd.bootconfig &= ~0x7;
-		mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG,
-			   card->ext_csd.bootconfig, 0);
+	if (card->ext_csd.part_config & EXT_CSD_PART_CONFIG_ACC_MASK) {
+		card->ext_csd.part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK;
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_PART_CONFIG,
+				 card->ext_csd.part_config,
+				 card->ext_csd.part_time);
+		if (err && err != -EBADMSG)
+			goto free_card;
 	}
 
 	/*
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 937f852cf01e..0c7a58b14343 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -48,7 +48,8 @@ struct mmc_ext_csd {
 	u8			sec_feature_support;
 	u8			rel_sectors;
 	u8			rel_param;
-	u8			bootconfig;
+	u8			part_config;
+	unsigned int		part_time;		/* Units: ms */
 	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
 	unsigned int		sectors;
@@ -61,6 +62,7 @@ struct mmc_ext_csd {
 	bool			enhanced_area_en;	/* enable bit */
 	unsigned long long	enhanced_area_offset;	/* Units: Byte */
 	unsigned int		enhanced_area_size;	/* Units: KB */
+	unsigned int		boot_size;		/* in bytes */
 };
 
 struct sd_scr {
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 390aa6eef676..373b2bf5e5b5 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -257,19 +257,21 @@ struct _mmc_csd {
 #define EXT_CSD_PARTITION_SUPPORT	160	/* RO */
 #define EXT_CSD_WR_REL_PARAM		166	/* RO */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
-#define EXT_CSD_BOOT_CONFIG		179	/* R/W */
+#define EXT_CSD_PART_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
 #define EXT_CSD_BUS_WIDTH		183	/* R/W */
 #define EXT_CSD_HS_TIMING		185	/* R/W */
 #define EXT_CSD_REV			192	/* RO */
 #define EXT_CSD_STRUCTURE		194	/* RO */
 #define EXT_CSD_CARD_TYPE		196	/* RO */
+#define EXT_CSD_PART_SWITCH_TIME        199     /* RO */
 #define EXT_CSD_SEC_CNT			212	/* RO, 4 bytes */
 #define EXT_CSD_S_A_TIMEOUT		217	/* RO */
 #define EXT_CSD_REL_WR_SEC_C		222	/* RO */
 #define EXT_CSD_HC_WP_GRP_SIZE		221	/* RO */
 #define EXT_CSD_ERASE_TIMEOUT_MULT	223	/* RO */
 #define EXT_CSD_HC_ERASE_GRP_SIZE	224	/* RO */
+#define EXT_CSD_BOOT_MULT		226	/* RO */
 #define EXT_CSD_SEC_TRIM_MULT		229	/* RO */
 #define EXT_CSD_SEC_ERASE_MULT		230	/* RO */
 #define EXT_CSD_SEC_FEATURE_SUPPORT	231	/* RO */
@@ -281,6 +283,10 @@ struct _mmc_csd {
 
 #define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
 
+#define EXT_CSD_PART_CONFIG_ACC_MASK	(0x7)
+#define EXT_CSD_PART_CONFIG_ACC_BOOT0	(0x1)
+#define EXT_CSD_PART_CONFIG_ACC_BOOT1	(0x2)
+
 #define EXT_CSD_CMD_SET_NORMAL		(1<<0)
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
-- 
cgit v1.2.3


From 6a7a6b45f454686a1549729bfbae31f0b3b595d6 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Tue, 12 Apr 2011 15:06:53 -0500
Subject: mmc: quirks: Fix erase/trim for certain SanDisk cards.

CMD38 argument is passed through EXT_CSD[113].

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mmc/card.h |  1 +
 2 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 288d27394ef9..c20995323348 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -48,6 +48,13 @@ MODULE_ALIAS("mmc:block");
 #endif
 #define MODULE_PARAM_PREFIX "mmcblk."
 
+#define INAND_CMD38_ARG_EXT_CSD  113
+#define INAND_CMD38_ARG_ERASE    0x00
+#define INAND_CMD38_ARG_TRIM     0x01
+#define INAND_CMD38_ARG_SECERASE 0x80
+#define INAND_CMD38_ARG_SECTRIM1 0x81
+#define INAND_CMD38_ARG_SECTRIM2 0x88
+
 #define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) &&	\
     (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||	\
      ((card)->ext_csd.rel_sectors)))
@@ -356,6 +363,16 @@ static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 	else
 		arg = MMC_ERASE_ARG;
 
+	if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 INAND_CMD38_ARG_EXT_CSD,
+				 arg == MMC_TRIM_ARG ?
+				 INAND_CMD38_ARG_TRIM :
+				 INAND_CMD38_ARG_ERASE,
+				 0);
+		if (err)
+			goto out;
+	}
 	err = mmc_erase(card, from, nr, arg);
 out:
 	spin_lock_irq(&md->lock);
@@ -386,9 +403,28 @@ static int mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 	else
 		arg = MMC_SECURE_ERASE_ARG;
 
+	if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 INAND_CMD38_ARG_EXT_CSD,
+				 arg == MMC_SECURE_TRIM1_ARG ?
+				 INAND_CMD38_ARG_SECTRIM1 :
+				 INAND_CMD38_ARG_SECERASE,
+				 0);
+		if (err)
+			goto out;
+	}
 	err = mmc_erase(card, from, nr, arg);
-	if (!err && arg == MMC_SECURE_TRIM1_ARG)
+	if (!err && arg == MMC_SECURE_TRIM1_ARG) {
+		if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					 INAND_CMD38_ARG_EXT_CSD,
+					 INAND_CMD38_ARG_SECTRIM2,
+					 0);
+			if (err)
+				goto out;
+		}
 		err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
+	}
 out:
 	spin_lock_irq(&md->lock);
 	__blk_end_request(req, err, blk_rq_bytes(req));
@@ -941,6 +977,11 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 
 static const struct mmc_fixup blk_fixups[] =
 {
+	MMC_FIXUP("SEM02G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM04G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM08G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM16G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM32G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
 	END_FIXUP
 };
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 0c7a58b14343..72a98681ef47 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -133,6 +133,7 @@ struct mmc_card {
 #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3)	/* clock gating the sdio bus will make card fail */
 #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
 #define MMC_QUIRK_DISABLE_CD	(1<<5)		/* disconnect CD/DAT[3] resistor */
+#define MMC_QUIRK_INAND_CMD38	(1<<6)		/* iNAND devices have broken CMD38 */
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
-- 
cgit v1.2.3


From 82b0e23a295cc58d1290017ee97a40956ad68d94 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2011 20:26:38 +0200
Subject: mmc: sdhci: Fix read-only detection with JMicron 388 chip

On HP laptops with JMicron 388 chip, the write-locked SD card isn't
detected correctly as read-only in many cases.  This is because the
PRESENT_STATE register becomes unsable just after plugging, and it
returns the WRITE_PROTECT bit wrongly at the first read.

This patch fixes the read-only detection by adding a new sdhci quirk
indicating to check the register more intensively with a relatively
long delay.

The patch is tested with 2.6.39-rc4 kernel.

Cc: Aries Lee <arieslee@jmicron.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pci.c |  5 +++++
 drivers/mmc/host/sdhci.c     | 28 ++++++++++++++++++++++++----
 include/linux/mmc/sdhci.h    |  2 ++
 3 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 82c5dc412679..936bbca19c0a 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -327,6 +327,11 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
 		return ret;
 	}
 
+	/* quirk for unsable RO-detection on JM388 chips */
+	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_SD ||
+	    chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD)
+		chip->quirks |= SDHCI_QUIRK_UNSTABLE_RO_DETECT;
+
 	return 0;
 }
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a70a3d1ef35a..e5cfe70dc23b 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1256,14 +1256,11 @@ out:
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
-static int sdhci_get_ro(struct mmc_host *mmc)
+static int check_ro(struct sdhci_host *host)
 {
-	struct sdhci_host *host;
 	unsigned long flags;
 	int is_readonly;
 
-	host = mmc_priv(mmc);
-
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->flags & SDHCI_DEVICE_DEAD)
@@ -1281,6 +1278,29 @@ static int sdhci_get_ro(struct mmc_host *mmc)
 		!is_readonly : is_readonly;
 }
 
+#define SAMPLE_COUNT	5
+
+static int sdhci_get_ro(struct mmc_host *mmc)
+{
+	struct sdhci_host *host;
+	int i, ro_count;
+
+	host = mmc_priv(mmc);
+
+	if (!(host->quirks & SDHCI_QUIRK_UNSTABLE_RO_DETECT))
+		return check_ro(host);
+
+	ro_count = 0;
+	for (i = 0; i < SAMPLE_COUNT; i++) {
+		if (check_ro(host)) {
+			if (++ro_count > SAMPLE_COUNT / 2)
+				return 1;
+		}
+		msleep(30);
+	}
+	return 0;
+}
+
 static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
 	struct sdhci_host *host;
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 83bd9f76709a..92e1c9ad126c 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -85,6 +85,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK_NO_HISPD_BIT			(1<<29)
 /* Controller treats ADMA descriptors with length 0000h incorrectly */
 #define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC		(1<<30)
+/* The read-only detection via SDHCI_PRESENT_STATE register is unstable */
+#define SDHCI_QUIRK_UNSTABLE_RO_DETECT			(1<<31)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3


From cb87ea28ed9e75a41eb456bfcb547b4e6f10e750 Mon Sep 17 00:00:00 2001
From: John Calixto <john.calixto@modsystems.com>
Date: Tue, 26 Apr 2011 18:56:29 -0400
Subject: mmc: core: Add mmc CMD+ACMD passthrough ioctl

Allows appropriately-privileged applications to send CMD (normal) and ACMD
(application-specific; preceded with CMD55) commands to cards/devices on
the mmc bus.  This is primarily useful for enabling the security
functionality built in to every SD card.

It can also be used as a generic passthrough (e.g. to enable virtual
machines to control mmc bus devices directly).  However, this use case has
not been tested rigorously.  Generic passthrough testing was only conducted
for a few non-security opcodes to prove the feasibility of the passthrough.

Since any opcode can be sent using this passthrough, it is very possible to
render the card/device unusable.  Applications that use this ioctl must
have CAP_SYS_RAWIO.

Security commands tested on TI PCIxx12 (SDHCI), Sigma Designs SMP8652 SoC,
TI OMAP3621/OMAP3630 SoC, Samsung S5PC110 SoC, Qualcomm MSM7200A SoC.

Signed-off-by: John Calixto <john.calixto@modsystems.com>
Reviewed-by: Andrei Warkentin <andreiw@motorola.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 Documentation/ioctl/ioctl-number.txt |   1 +
 drivers/mmc/card/block.c             | 201 +++++++++++++++++++++++++++++++++++
 drivers/mmc/core/sd_ops.c            |   3 +-
 include/linux/Kbuild                 |   1 +
 include/linux/mmc/Kbuild             |   1 +
 include/linux/mmc/core.h             |   1 +
 include/linux/mmc/ioctl.h            |  54 ++++++++++
 7 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/mmc/Kbuild
 create mode 100644 include/linux/mmc/ioctl.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index a0a5d82b6b0b..2a34d822e6d6 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -304,6 +304,7 @@ Code  Seq#(hex)	Include File		Comments
 0xB0	all	RATIO devices		in development:
 					<mailto:vgo@ratio.de>
 0xB1	00-1F	PPPoX			<mailto:mostrows@styx.uwaterloo.ca>
+0xB3	00	linux/mmc/ioctl.h
 0xC0	00-0F	linux/usb/iowarrior.h
 0xCB	00-1F	CBM serial IEC bus	in development:
 					<mailto:michael.klein@puffin.lb.shuttle.de>
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index e7efd6faeaf9..407836d55712 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -31,7 +31,11 @@
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/string_helpers.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/compat.h>
 
+#include <linux/mmc/ioctl.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
@@ -218,11 +222,208 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+struct mmc_blk_ioc_data {
+	struct mmc_ioc_cmd ic;
+	unsigned char *buf;
+	u64 buf_bytes;
+};
+
+static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
+	struct mmc_ioc_cmd __user *user)
+{
+	struct mmc_blk_ioc_data *idata;
+	int err;
+
+	idata = kzalloc(sizeof(*idata), GFP_KERNEL);
+	if (!idata) {
+		err = -ENOMEM;
+		goto copy_err;
+	}
+
+	if (copy_from_user(&idata->ic, user, sizeof(idata->ic))) {
+		err = -EFAULT;
+		goto copy_err;
+	}
+
+	idata->buf_bytes = (u64) idata->ic.blksz * idata->ic.blocks;
+	if (idata->buf_bytes > MMC_IOC_MAX_BYTES) {
+		err = -EOVERFLOW;
+		goto copy_err;
+	}
+
+	idata->buf = kzalloc(idata->buf_bytes, GFP_KERNEL);
+	if (!idata->buf) {
+		err = -ENOMEM;
+		goto copy_err;
+	}
+
+	if (copy_from_user(idata->buf, (void __user *)(unsigned long)
+					idata->ic.data_ptr, idata->buf_bytes)) {
+		err = -EFAULT;
+		goto copy_err;
+	}
+
+	return idata;
+
+copy_err:
+	kfree(idata->buf);
+	kfree(idata);
+	return ERR_PTR(err);
+
+}
+
+static int mmc_blk_ioctl_cmd(struct block_device *bdev,
+	struct mmc_ioc_cmd __user *ic_ptr)
+{
+	struct mmc_blk_ioc_data *idata;
+	struct mmc_blk_data *md;
+	struct mmc_card *card;
+	struct mmc_command cmd = {0};
+	struct mmc_data data = {0};
+	struct mmc_request mrq = {0};
+	struct scatterlist sg;
+	int err;
+
+	/*
+	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
+	 * whole block device, not on a partition.  This prevents overspray
+	 * between sibling partitions.
+	 */
+	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
+		return -EPERM;
+
+	idata = mmc_blk_ioctl_copy_from_user(ic_ptr);
+	if (IS_ERR(idata))
+		return PTR_ERR(idata);
+
+	cmd.opcode = idata->ic.opcode;
+	cmd.arg = idata->ic.arg;
+	cmd.flags = idata->ic.flags;
+
+	data.sg = &sg;
+	data.sg_len = 1;
+	data.blksz = idata->ic.blksz;
+	data.blocks = idata->ic.blocks;
+
+	sg_init_one(data.sg, idata->buf, idata->buf_bytes);
+
+	if (idata->ic.write_flag)
+		data.flags = MMC_DATA_WRITE;
+	else
+		data.flags = MMC_DATA_READ;
+
+	mrq.cmd = &cmd;
+	mrq.data = &data;
+
+	md = mmc_blk_get(bdev->bd_disk);
+	if (!md) {
+		err = -EINVAL;
+		goto cmd_done;
+	}
+
+	card = md->queue.card;
+	if (IS_ERR(card)) {
+		err = PTR_ERR(card);
+		goto cmd_done;
+	}
+
+	mmc_claim_host(card->host);
+
+	if (idata->ic.is_acmd) {
+		err = mmc_app_cmd(card->host, card);
+		if (err)
+			goto cmd_rel_host;
+	}
+
+	/* data.flags must already be set before doing this. */
+	mmc_set_data_timeout(&data, card);
+	/* Allow overriding the timeout_ns for empirical tuning. */
+	if (idata->ic.data_timeout_ns)
+		data.timeout_ns = idata->ic.data_timeout_ns;
+
+	if ((cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
+		/*
+		 * Pretend this is a data transfer and rely on the host driver
+		 * to compute timeout.  When all host drivers support
+		 * cmd.cmd_timeout for R1B, this can be changed to:
+		 *
+		 *     mrq.data = NULL;
+		 *     cmd.cmd_timeout = idata->ic.cmd_timeout_ms;
+		 */
+		data.timeout_ns = idata->ic.cmd_timeout_ms * 1000000;
+	}
+
+	mmc_wait_for_req(card->host, &mrq);
+
+	if (cmd.error) {
+		dev_err(mmc_dev(card->host), "%s: cmd error %d\n",
+						__func__, cmd.error);
+		err = cmd.error;
+		goto cmd_rel_host;
+	}
+	if (data.error) {
+		dev_err(mmc_dev(card->host), "%s: data error %d\n",
+						__func__, data.error);
+		err = data.error;
+		goto cmd_rel_host;
+	}
+
+	/*
+	 * According to the SD specs, some commands require a delay after
+	 * issuing the command.
+	 */
+	if (idata->ic.postsleep_min_us)
+		usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us);
+
+	if (copy_to_user(&(ic_ptr->response), cmd.resp, sizeof(cmd.resp))) {
+		err = -EFAULT;
+		goto cmd_rel_host;
+	}
+
+	if (!idata->ic.write_flag) {
+		if (copy_to_user((void __user *)(unsigned long) idata->ic.data_ptr,
+						idata->buf, idata->buf_bytes)) {
+			err = -EFAULT;
+			goto cmd_rel_host;
+		}
+	}
+
+cmd_rel_host:
+	mmc_release_host(card->host);
+
+cmd_done:
+	mmc_blk_put(md);
+	kfree(idata->buf);
+	kfree(idata);
+	return err;
+}
+
+static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
+	unsigned int cmd, unsigned long arg)
+{
+	int ret = -EINVAL;
+	if (cmd == MMC_IOC_CMD)
+		ret = mmc_blk_ioctl_cmd(bdev, (struct mmc_ioc_cmd __user *)arg);
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static int mmc_blk_compat_ioctl(struct block_device *bdev, fmode_t mode,
+	unsigned int cmd, unsigned long arg)
+{
+	return mmc_blk_ioctl(bdev, mode, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif
+
 static const struct block_device_operations mmc_bdops = {
 	.open			= mmc_blk_open,
 	.release		= mmc_blk_release,
 	.getgeo			= mmc_blk_getgeo,
 	.owner			= THIS_MODULE,
+	.ioctl			= mmc_blk_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl		= mmc_blk_compat_ioctl,
+#endif
 };
 
 struct mmc_blk_request {
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index a206aea5360d..021fed153804 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -21,7 +21,7 @@
 #include "core.h"
 #include "sd_ops.h"
 
-static int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card)
+int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card)
 {
 	int err;
 	struct mmc_command cmd = {0};
@@ -49,6 +49,7 @@ static int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mmc_app_cmd);
 
 /**
  *	mmc_wait_for_app_cmd - start an application command and wait for
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 75cf611641e6..ed38527599ee 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -4,6 +4,7 @@ header-y += caif/
 header-y += dvb/
 header-y += hdlc/
 header-y += isdn/
+header-y += mmc/
 header-y += nfsd/
 header-y += raid/
 header-y += spi/
diff --git a/include/linux/mmc/Kbuild b/include/linux/mmc/Kbuild
new file mode 100644
index 000000000000..1fb26448faa9
--- /dev/null
+++ b/include/linux/mmc/Kbuild
@@ -0,0 +1 @@
+header-y += ioctl.h
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index f8e4bcbd2846..cbe8d55f64c4 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -133,6 +133,7 @@ struct mmc_card;
 
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
+extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *);
 extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 	struct mmc_command *, int);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
diff --git a/include/linux/mmc/ioctl.h b/include/linux/mmc/ioctl.h
new file mode 100644
index 000000000000..5baf2983a12f
--- /dev/null
+++ b/include/linux/mmc/ioctl.h
@@ -0,0 +1,54 @@
+#ifndef LINUX_MMC_IOCTL_H
+#define LINUX_MMC_IOCTL_H
+struct mmc_ioc_cmd {
+	/* Implies direction of data.  true = write, false = read */
+	int write_flag;
+
+	/* Application-specific command.  true = precede with CMD55 */
+	int is_acmd;
+
+	__u32 opcode;
+	__u32 arg;
+	__u32 response[4];  /* CMD response */
+	unsigned int flags;
+	unsigned int blksz;
+	unsigned int blocks;
+
+	/*
+	 * Sleep at least postsleep_min_us useconds, and at most
+	 * postsleep_max_us useconds *after* issuing command.  Needed for
+	 * some read commands for which cards have no other way of indicating
+	 * they're ready for the next command (i.e. there is no equivalent of
+	 * a "busy" indicator for read operations).
+	 */
+	unsigned int postsleep_min_us;
+	unsigned int postsleep_max_us;
+
+	/*
+	 * Override driver-computed timeouts.  Note the difference in units!
+	 */
+	unsigned int data_timeout_ns;
+	unsigned int cmd_timeout_ms;
+
+	/*
+	 * For 64-bit machines, the next member, ``__u64 data_ptr``, wants to
+	 * be 8-byte aligned.  Make sure this struct is the same size when
+	 * built for 32-bit.
+	 */
+	__u32 __pad;
+
+	/* DAT buffer */
+	__u64 data_ptr;
+};
+#define mmc_ioc_cmd_set_data(ic, ptr) ic.data_ptr = (__u64)(unsigned long) ptr
+
+#define MMC_IOC_CMD _IOWR(MMC_BLOCK_MAJOR, 0, struct mmc_ioc_cmd)
+
+/*
+ * Since this ioctl is only meant to enhance (and not replace) normal access
+ * to the mmc bus device, an upper data transfer limit of MMC_IOC_MAX_BYTES
+ * is enforced per ioctl call.  For larger data transfers, use the normal
+ * block device operations.
+ */
+#define MMC_IOC_MAX_BYTES  (512L * 256)
+#endif  /* LINUX_MMC_IOCTL_H */
-- 
cgit v1.2.3


From f2119df6b764609af4baceb68caf1e848c1c8aa7 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:18:57 +0530
Subject: mmc: sd: add support for signal voltage switch procedure

Host Controller v3.00 adds another Capabilities register. Apart
from other things, this new register indicates whether the Host
Controller supports SDR50, SDR104, and DDR50 UHS-I modes. The spec
doesn't mention about explicit support for SDR12 and SDR25 UHS-I
modes, so the Host Controller v3.00 should support them by default.
Also if the controller supports SDR104 mode, it will also support
SDR50 mode as well. So depending on the host support, we set the
corresponding MMC_CAP_* flags. One more new register. Host Control2
is added in v3.00, which is used during Signal Voltage Switch
procedure described below.

Since as per v3.00 spec, UHS-I supported hosts should set S18R
to 1, we set S18R (bit 24) of OCR before sending ACMD41. We also
need to set XPC (bit 28) of OCR in case the host can supply >150mA.
This support is indicated by the Maximum Current Capabilities
register of the Host Controller.

If the response of ACMD41 has both CCS and S18A set, we start the
signal voltage switch procedure, which if successfull, will switch
the card from 3.3V signalling to 1.8V signalling. Signal voltage
switch procedure adds support for a new command CMD11 in the
Physical Layer Spec v3.01. As part of this procedure, we need to
set 1.8V Signalling Enable (bit 3) of Host Control2 register, which
if remains set after 5ms, means the switch to 1.8V signalling is
successfull. Otherwise, we clear bit 24 of OCR and retry the
initialization sequence. When we remove the card, and insert the
same or another card, we need to make sure that we start with 3.3V
signalling voltage. So we call mmc_set_signal_voltage() with
MMC_SIGNAL_VOLTAGE_330 set so that we are back to 3.3V signalling
voltage before we actually start initializing the card.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  |  32 ++++++++
 drivers/mmc/core/core.h  |   1 +
 drivers/mmc/core/sd.c    |  36 ++++++++-
 drivers/mmc/host/sdhci.c | 192 +++++++++++++++++++++++++++++++++++++++++++----
 drivers/mmc/host/sdhci.h |  18 ++++-
 include/linux/mmc/host.h |  15 ++++
 include/linux/mmc/sd.h   |   7 ++
 7 files changed, 284 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 1dbc18576219..5005a6323165 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -942,6 +942,38 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr)
 	return ocr;
 }
 
+int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage)
+{
+	struct mmc_command cmd = {0};
+	int err = 0;
+
+	BUG_ON(!host);
+
+	/*
+	 * Send CMD11 only if the request is to switch the card to
+	 * 1.8V signalling.
+	 */
+	if (signal_voltage == MMC_SIGNAL_VOLTAGE_180) {
+		cmd.opcode = SD_SWITCH_VOLTAGE;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
+
+		err = mmc_wait_for_cmd(host, &cmd, 0);
+		if (err)
+			return err;
+
+		if (!mmc_host_is_spi(host) && (cmd.resp[0] & R1_ERROR))
+			return -EIO;
+	}
+
+	host->ios.signal_voltage = signal_voltage;
+
+	if (host->ops->start_signal_voltage_switch)
+		err = host->ops->start_signal_voltage_switch(host, &host->ios);
+
+	return err;
+}
+
 /*
  * Select timing parameters for host.
  */
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index ca1fdde29df6..7745dea430b8 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -41,6 +41,7 @@ void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
 void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
 			   unsigned int ddr);
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
+int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage);
 void mmc_set_timing(struct mmc_host *host, unsigned int timing);
 
 static inline void mmc_delay(unsigned int ms)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 6dac89fe0535..b0cd285d272a 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -403,6 +403,7 @@ struct device_type sd_type = {
 int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid)
 {
 	int err;
+	u32 rocr;
 
 	/*
 	 * Since we're changing the OCR value, we seem to
@@ -420,12 +421,38 @@ int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid)
 	 */
 	err = mmc_send_if_cond(host, ocr);
 	if (!err)
-		ocr |= 1 << 30;
+		ocr |= SD_OCR_CCS;
 
-	err = mmc_send_app_op_cond(host, ocr, NULL);
+	/*
+	 * If the host supports one of UHS-I modes, request the card
+	 * to switch to 1.8V signaling level.
+	 */
+	if (host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50))
+		ocr |= SD_OCR_S18R;
+
+	/* If the host can supply more than 150mA, XPC should be set to 1. */
+	if (host->caps & (MMC_CAP_SET_XPC_330 | MMC_CAP_SET_XPC_300 |
+	    MMC_CAP_SET_XPC_180))
+		ocr |= SD_OCR_XPC;
+
+try_again:
+	err = mmc_send_app_op_cond(host, ocr, &rocr);
 	if (err)
 		return err;
 
+	/*
+	 * In case CCS and S18A in the response is set, start Signal Voltage
+	 * Switch procedure. SPI mode doesn't support CMD11.
+	 */
+	if (!mmc_host_is_spi(host) && ((rocr & 0x41000000) == 0x41000000)) {
+		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
+		if (err) {
+			ocr &= ~SD_OCR_S18R;
+			goto try_again;
+		}
+	}
+
 	if (mmc_host_is_spi(host))
 		err = mmc_send_cid(host, cid);
 	else
@@ -773,6 +800,11 @@ int mmc_attach_sd(struct mmc_host *host)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
+	/* Make sure we are at 3.3V signalling voltage */
+	err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330);
+	if (err)
+		return err;
+
 	err = mmc_send_app_op_cond(host, 0, &ocr);
 	if (err)
 		return err;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e5cfe70dc23b..4e2031449a23 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -83,6 +83,8 @@ static void sdhci_dumpregs(struct sdhci_host *host)
 	printk(KERN_DEBUG DRIVER_NAME ": Cmd:      0x%08x | Max curr: 0x%08x\n",
 		sdhci_readw(host, SDHCI_COMMAND),
 		sdhci_readl(host, SDHCI_MAX_CURRENT));
+	printk(KERN_DEBUG DRIVER_NAME ": Host ctl2: 0x%08x\n",
+		sdhci_readw(host, SDHCI_HOST_CONTROL2));
 
 	if (host->flags & SDHCI_USE_ADMA)
 		printk(KERN_DEBUG DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x\n",
@@ -1323,11 +1325,114 @@ out:
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
+	struct mmc_ios *ios)
+{
+	struct sdhci_host *host;
+	u8 pwr;
+	u16 clk, ctrl;
+	u32 present_state;
+
+	host = mmc_priv(mmc);
+
+	/*
+	 * Signal Voltage Switching is only applicable for Host Controllers
+	 * v3.00 and above.
+	 */
+	if (host->version < SDHCI_SPEC_300)
+		return 0;
+
+	/*
+	 * We first check whether the request is to set signalling voltage
+	 * to 3.3V. If so, we change the voltage to 3.3V and return quickly.
+	 */
+	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+	if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
+		/* Set 1.8V Signal Enable in the Host Control2 register to 0 */
+		ctrl &= ~SDHCI_CTRL_VDD_180;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+
+		/* Wait for 5ms */
+		usleep_range(5000, 5500);
+
+		/* 3.3V regulator output should be stable within 5 ms */
+		ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+		if (!(ctrl & SDHCI_CTRL_VDD_180))
+			return 0;
+		else {
+			printk(KERN_INFO DRIVER_NAME ": Switching to 3.3V "
+				"signalling voltage failed\n");
+			return -EIO;
+		}
+	} else if (!(ctrl & SDHCI_CTRL_VDD_180) &&
+		  (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180)) {
+		/* Stop SDCLK */
+		clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+		clk &= ~SDHCI_CLOCK_CARD_EN;
+		sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+		/* Check whether DAT[3:0] is 0000 */
+		present_state = sdhci_readl(host, SDHCI_PRESENT_STATE);
+		if (!((present_state & SDHCI_DATA_LVL_MASK) >>
+		       SDHCI_DATA_LVL_SHIFT)) {
+			/*
+			 * Enable 1.8V Signal Enable in the Host Control2
+			 * register
+			 */
+			ctrl |= SDHCI_CTRL_VDD_180;
+			sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+
+			/* Wait for 5ms */
+			usleep_range(5000, 5500);
+
+			ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+			if (ctrl & SDHCI_CTRL_VDD_180) {
+				/* Provide SDCLK again and wait for 1ms*/
+				clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+				clk |= SDHCI_CLOCK_CARD_EN;
+				sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+				usleep_range(1000, 1500);
+
+				/*
+				 * If DAT[3:0] level is 1111b, then the card
+				 * was successfully switched to 1.8V signaling.
+				 */
+				present_state = sdhci_readl(host,
+							SDHCI_PRESENT_STATE);
+				if ((present_state & SDHCI_DATA_LVL_MASK) ==
+				     SDHCI_DATA_LVL_MASK)
+					return 0;
+			}
+		}
+
+		/*
+		 * If we are here, that means the switch to 1.8V signaling
+		 * failed. We power cycle the card, and retry initialization
+		 * sequence by setting S18R to 0.
+		 */
+		pwr = sdhci_readb(host, SDHCI_POWER_CONTROL);
+		pwr &= ~SDHCI_POWER_ON;
+		sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
+
+		/* Wait for 1ms as per the spec */
+		usleep_range(1000, 1500);
+		pwr |= SDHCI_POWER_ON;
+		sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
+
+		printk(KERN_INFO DRIVER_NAME ": Switching to 1.8V signalling "
+			"voltage failed, retrying with S18R set to 0\n");
+		return -EAGAIN;
+	} else
+		/* No signal voltage switch required */
+		return 0;
+}
+
 static const struct mmc_host_ops sdhci_ops = {
 	.request	= sdhci_request,
 	.set_ios	= sdhci_set_ios,
 	.get_ro		= sdhci_get_ro,
 	.enable_sdio_irq = sdhci_enable_sdio_irq,
+	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
 };
 
 /*****************************************************************************\
@@ -1808,7 +1913,9 @@ EXPORT_SYMBOL_GPL(sdhci_alloc_host);
 int sdhci_add_host(struct sdhci_host *host)
 {
 	struct mmc_host *mmc;
-	unsigned int caps, ocr_avail;
+	u32 caps[2];
+	u32 max_current_caps;
+	unsigned int ocr_avail;
 	int ret;
 
 	WARN_ON(host == NULL);
@@ -1831,12 +1938,15 @@ int sdhci_add_host(struct sdhci_host *host)
 			host->version);
 	}
 
-	caps = (host->quirks & SDHCI_QUIRK_MISSING_CAPS) ? host->caps :
+	caps[0] = (host->quirks & SDHCI_QUIRK_MISSING_CAPS) ? host->caps :
 		sdhci_readl(host, SDHCI_CAPABILITIES);
 
+	caps[1] = (host->version >= SDHCI_SPEC_300) ?
+		sdhci_readl(host, SDHCI_CAPABILITIES_1) : 0;
+
 	if (host->quirks & SDHCI_QUIRK_FORCE_DMA)
 		host->flags |= SDHCI_USE_SDMA;
-	else if (!(caps & SDHCI_CAN_DO_SDMA))
+	else if (!(caps[0] & SDHCI_CAN_DO_SDMA))
 		DBG("Controller doesn't have SDMA capability\n");
 	else
 		host->flags |= SDHCI_USE_SDMA;
@@ -1847,7 +1957,8 @@ int sdhci_add_host(struct sdhci_host *host)
 		host->flags &= ~SDHCI_USE_SDMA;
 	}
 
-	if ((host->version >= SDHCI_SPEC_200) && (caps & SDHCI_CAN_DO_ADMA2))
+	if ((host->version >= SDHCI_SPEC_200) &&
+		(caps[0] & SDHCI_CAN_DO_ADMA2))
 		host->flags |= SDHCI_USE_ADMA;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_ADMA) &&
@@ -1897,10 +2008,10 @@ int sdhci_add_host(struct sdhci_host *host)
 	}
 
 	if (host->version >= SDHCI_SPEC_300)
-		host->max_clk = (caps & SDHCI_CLOCK_V3_BASE_MASK)
+		host->max_clk = (caps[0] & SDHCI_CLOCK_V3_BASE_MASK)
 			>> SDHCI_CLOCK_BASE_SHIFT;
 	else
-		host->max_clk = (caps & SDHCI_CLOCK_BASE_MASK)
+		host->max_clk = (caps[0] & SDHCI_CLOCK_BASE_MASK)
 			>> SDHCI_CLOCK_BASE_SHIFT;
 
 	host->max_clk *= 1000000;
@@ -1916,7 +2027,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	}
 
 	host->timeout_clk =
-		(caps & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT;
+		(caps[0] & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT;
 	if (host->timeout_clk == 0) {
 		if (host->ops->get_timeout_clock) {
 			host->timeout_clk = host->ops->get_timeout_clock(host);
@@ -1928,7 +2039,7 @@ int sdhci_add_host(struct sdhci_host *host)
 			return -ENODEV;
 		}
 	}
-	if (caps & SDHCI_TIMEOUT_CLK_UNIT)
+	if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT)
 		host->timeout_clk *= 1000;
 
 	/*
@@ -1955,21 +2066,76 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA))
 		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
-	if (caps & SDHCI_CAN_DO_HISPD)
+	if (caps[0] & SDHCI_CAN_DO_HISPD)
 		mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) &&
 	    mmc_card_is_removable(mmc))
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
+	/* UHS-I mode(s) supported by the host controller. */
+	if (host->version >= SDHCI_SPEC_300)
+		mmc->caps |= MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25;
+
+	/* SDR104 supports also implies SDR50 support */
+	if (caps[1] & SDHCI_SUPPORT_SDR104)
+		mmc->caps |= MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_SDR50;
+	else if (caps[1] & SDHCI_SUPPORT_SDR50)
+		mmc->caps |= MMC_CAP_UHS_SDR50;
+
+	if (caps[1] & SDHCI_SUPPORT_DDR50)
+		mmc->caps |= MMC_CAP_UHS_DDR50;
+
 	ocr_avail = 0;
-	if (caps & SDHCI_CAN_VDD_330)
+	/*
+	 * According to SD Host Controller spec v3.00, if the Host System
+	 * can afford more than 150mA, Host Driver should set XPC to 1. Also
+	 * the value is meaningful only if Voltage Support in the Capabilities
+	 * register is set. The actual current value is 4 times the register
+	 * value.
+	 */
+	max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
+
+	if (caps[0] & SDHCI_CAN_VDD_330) {
+		int max_current_330;
+
 		ocr_avail |= MMC_VDD_32_33 | MMC_VDD_33_34;
-	if (caps & SDHCI_CAN_VDD_300)
+
+		max_current_330 = ((max_current_caps &
+				   SDHCI_MAX_CURRENT_330_MASK) >>
+				   SDHCI_MAX_CURRENT_330_SHIFT) *
+				   SDHCI_MAX_CURRENT_MULTIPLIER;
+
+		if (max_current_330 > 150)
+			mmc->caps |= MMC_CAP_SET_XPC_330;
+	}
+	if (caps[0] & SDHCI_CAN_VDD_300) {
+		int max_current_300;
+
 		ocr_avail |= MMC_VDD_29_30 | MMC_VDD_30_31;
-	if (caps & SDHCI_CAN_VDD_180)
+
+		max_current_300 = ((max_current_caps &
+				   SDHCI_MAX_CURRENT_300_MASK) >>
+				   SDHCI_MAX_CURRENT_300_SHIFT) *
+				   SDHCI_MAX_CURRENT_MULTIPLIER;
+
+		if (max_current_300 > 150)
+			mmc->caps |= MMC_CAP_SET_XPC_300;
+	}
+	if (caps[0] & SDHCI_CAN_VDD_180) {
+		int max_current_180;
+
 		ocr_avail |= MMC_VDD_165_195;
 
+		max_current_180 = ((max_current_caps &
+				   SDHCI_MAX_CURRENT_180_MASK) >>
+				   SDHCI_MAX_CURRENT_180_SHIFT) *
+				   SDHCI_MAX_CURRENT_MULTIPLIER;
+
+		if (max_current_180 > 150)
+			mmc->caps |= MMC_CAP_SET_XPC_180;
+	}
+
 	mmc->ocr_avail = ocr_avail;
 	mmc->ocr_avail_sdio = ocr_avail;
 	if (host->ocr_avail_sdio)
@@ -2029,7 +2195,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (host->quirks & SDHCI_QUIRK_FORCE_BLK_SZ_2048) {
 		mmc->max_blk_size = 2;
 	} else {
-		mmc->max_blk_size = (caps & SDHCI_MAX_BLOCK_MASK) >>
+		mmc->max_blk_size = (caps[0] & SDHCI_MAX_BLOCK_MASK) >>
 				SDHCI_MAX_BLOCK_SHIFT;
 		if (mmc->max_blk_size >= 3) {
 			printk(KERN_WARNING "%s: Invalid maximum block size, "
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index c6e25a76d269..5cba2fea46e0 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -68,6 +68,8 @@
 #define  SDHCI_DATA_AVAILABLE	0x00000800
 #define  SDHCI_CARD_PRESENT	0x00010000
 #define  SDHCI_WRITE_PROTECT	0x00080000
+#define  SDHCI_DATA_LVL_MASK	0x00F00000
+#define   SDHCI_DATA_LVL_SHIFT	20
 
 #define SDHCI_HOST_CONTROL 	0x28
 #define  SDHCI_CTRL_LED		0x01
@@ -146,7 +148,8 @@
 
 #define SDHCI_ACMD12_ERR	0x3C
 
-/* 3E-3F reserved */
+#define SDHCI_HOST_CONTROL2		0x3E
+#define  SDHCI_CTRL_VDD_180		0x0008
 
 #define SDHCI_CAPABILITIES	0x40
 #define  SDHCI_TIMEOUT_CLK_MASK	0x0000003F
@@ -167,9 +170,20 @@
 #define  SDHCI_CAN_VDD_180	0x04000000
 #define  SDHCI_CAN_64BIT	0x10000000
 
+#define  SDHCI_SUPPORT_SDR50	0x00000001
+#define  SDHCI_SUPPORT_SDR104	0x00000002
+#define  SDHCI_SUPPORT_DDR50	0x00000004
+
 #define SDHCI_CAPABILITIES_1	0x44
 
-#define SDHCI_MAX_CURRENT	0x48
+#define SDHCI_MAX_CURRENT		0x48
+#define  SDHCI_MAX_CURRENT_330_MASK	0x0000FF
+#define  SDHCI_MAX_CURRENT_330_SHIFT	0
+#define  SDHCI_MAX_CURRENT_300_MASK	0x00FF00
+#define  SDHCI_MAX_CURRENT_300_SHIFT	8
+#define  SDHCI_MAX_CURRENT_180_MASK	0xFF0000
+#define  SDHCI_MAX_CURRENT_180_SHIFT	16
+#define   SDHCI_MAX_CURRENT_MULTIPLIER	4
 
 /* 4C-4F reserved for more max current */
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0fffa5cdc183..bde5a0b1c47e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -56,6 +56,11 @@ struct mmc_ios {
 #define MMC_SDR_MODE		0
 #define MMC_1_2V_DDR_MODE	1
 #define MMC_1_8V_DDR_MODE	2
+
+	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
+
+#define MMC_SIGNAL_VOLTAGE_330	0
+#define MMC_SIGNAL_VOLTAGE_180	1
 };
 
 struct mmc_host_ops {
@@ -117,6 +122,8 @@ struct mmc_host_ops {
 
 	/* optional callback for HC quirks */
 	void	(*init_card)(struct mmc_host *host, struct mmc_card *card);
+
+	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
 };
 
 struct mmc_card;
@@ -173,6 +180,14 @@ struct mmc_host {
 						/* DDR mode at 1.2V */
 #define MMC_CAP_POWER_OFF_CARD	(1 << 13)	/* Can power off after boot */
 #define MMC_CAP_BUS_WIDTH_TEST	(1 << 14)	/* CMD14/CMD19 bus width ok */
+#define MMC_CAP_UHS_SDR12	(1 << 15)	/* Host supports UHS SDR12 mode */
+#define MMC_CAP_UHS_SDR25	(1 << 16)	/* Host supports UHS SDR25 mode */
+#define MMC_CAP_UHS_SDR50	(1 << 17)	/* Host supports UHS SDR50 mode */
+#define MMC_CAP_UHS_SDR104	(1 << 18)	/* Host supports UHS SDR104 mode */
+#define MMC_CAP_UHS_DDR50	(1 << 19)	/* Host supports UHS DDR50 mode */
+#define MMC_CAP_SET_XPC_330	(1 << 20)	/* Host supports >150mA current at 3.3V */
+#define MMC_CAP_SET_XPC_300	(1 << 21)	/* Host supports >150mA current at 3.0V */
+#define MMC_CAP_SET_XPC_180	(1 << 22)	/* Host supports >150mA current at 1.8V */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 3fd85e088cc3..c648878f6734 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -17,6 +17,7 @@
 /* This is basically the same command as for MMC with some quirks. */
 #define SD_SEND_RELATIVE_ADDR     3   /* bcr                     R6  */
 #define SD_SEND_IF_COND           8   /* bcr  [11:0] See below   R7  */
+#define SD_SWITCH_VOLTAGE         11  /* ac                      R1  */
 
   /* class 10 */
 #define SD_SWITCH                 6   /* adtc [31:0] See below   R1  */
@@ -32,6 +33,12 @@
 #define SD_APP_OP_COND           41   /* bcr  [31:0] OCR         R3  */
 #define SD_APP_SEND_SCR          51   /* adtc                    R1  */
 
+/* OCR bit definitions */
+#define SD_OCR_S18R		(1 << 24)    /* 1.8V switching request */
+#define SD_ROCR_S18A		SD_OCR_S18R  /* 1.8V switching accepted by card */
+#define SD_OCR_XPC		(1 << 28)    /* SDXC power control */
+#define SD_OCR_CCS		(1 << 30)    /* Card Capacity Status */
+
 /*
  * SD_SWITCH argument format:
  *
-- 
cgit v1.2.3


From 013909c4ffd16ded4895528b856fd8782df04dc6 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:18:58 +0530
Subject: mmc: sd: query function modes for uhs cards

SD cards which conform to Physical Layer Spec v3.01 can support
additional Bus Speed Modes, Driver Strength, and Current Limit
other than the default values. We use CMD6 mode 0 to read these
additional card functions. The values read here will be used
during UHS-I initialization steps.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 68 +++++++++++++++++++++++++++++++++++++++++-------
 include/linux/mmc/card.h |  4 +++
 2 files changed, 62 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index b0cd285d272a..8285842f19e9 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -189,6 +189,9 @@ static int mmc_decode_scr(struct mmc_card *card)
 
 	scr->sda_vsn = UNSTUFF_BITS(resp, 56, 4);
 	scr->bus_widths = UNSTUFF_BITS(resp, 48, 4);
+	if (scr->sda_vsn == SCR_SPEC_VER_2)
+		/* Check if Physical Layer Spec v3.0 is supported */
+		scr->sda_spec3 = UNSTUFF_BITS(resp, 47, 1);
 
 	if (UNSTUFF_BITS(resp, 55, 1))
 		card->erased_byte = 0xFF;
@@ -274,29 +277,74 @@ static int mmc_read_switch(struct mmc_card *card)
 	status = kmalloc(64, GFP_KERNEL);
 	if (!status) {
 		printk(KERN_ERR "%s: could not allocate a buffer for "
-			"switch capabilities.\n", mmc_hostname(card->host));
+			"switch capabilities.\n",
+			mmc_hostname(card->host));
 		return -ENOMEM;
 	}
 
+	/* Find out the supported Bus Speed Modes. */
 	err = mmc_sd_switch(card, 0, 0, 1, status);
 	if (err) {
-		/* If the host or the card can't do the switch,
-		 * fail more gracefully. */
-		if ((err != -EINVAL)
-		 && (err != -ENOSYS)
-		 && (err != -EFAULT))
+		/*
+		 * If the host or the card can't do the switch,
+		 * fail more gracefully.
+		 */
+		if (err != -EINVAL && err != -ENOSYS && err != -EFAULT)
 			goto out;
 
-		printk(KERN_WARNING "%s: problem reading switch "
-			"capabilities, performance might suffer.\n",
+		printk(KERN_WARNING "%s: problem reading Bus Speed modes.\n",
 			mmc_hostname(card->host));
 		err = 0;
 
 		goto out;
 	}
 
-	if (status[13] & 0x02)
-		card->sw_caps.hs_max_dtr = 50000000;
+	if (card->scr.sda_spec3) {
+		card->sw_caps.sd3_bus_mode = status[13];
+
+		/* Find out Driver Strengths supported by the card */
+		err = mmc_sd_switch(card, 0, 2, 1, status);
+		if (err) {
+			/*
+			 * If the host or the card can't do the switch,
+			 * fail more gracefully.
+			 */
+			if (err != -EINVAL && err != -ENOSYS && err != -EFAULT)
+				goto out;
+
+			printk(KERN_WARNING "%s: problem reading "
+				"Driver Strength.\n",
+				mmc_hostname(card->host));
+			err = 0;
+
+			goto out;
+		}
+
+		card->sw_caps.sd3_drv_type = status[9];
+
+		/* Find out Current Limits supported by the card */
+		err = mmc_sd_switch(card, 0, 3, 1, status);
+		if (err) {
+			/*
+			 * If the host or the card can't do the switch,
+			 * fail more gracefully.
+			 */
+			if (err != -EINVAL && err != -ENOSYS && err != -EFAULT)
+				goto out;
+
+			printk(KERN_WARNING "%s: problem reading "
+				"Current Limit.\n",
+				mmc_hostname(card->host));
+			err = 0;
+
+			goto out;
+		}
+
+		card->sw_caps.sd3_curr_limit = status[7];
+	} else {
+		if (status[13] & 0x02)
+			card->sw_caps.hs_max_dtr = 50000000;
+	}
 
 out:
 	kfree(status);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 72a98681ef47..56f4d9234a66 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -67,6 +67,7 @@ struct mmc_ext_csd {
 
 struct sd_scr {
 	unsigned char		sda_vsn;
+	unsigned char		sda_spec3;
 	unsigned char		bus_widths;
 #define SD_SCR_BUS_WIDTH_1	(1<<0)
 #define SD_SCR_BUS_WIDTH_4	(1<<2)
@@ -80,6 +81,9 @@ struct sd_ssr {
 
 struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
+	unsigned int		sd3_bus_mode;
+	unsigned int		sd3_drv_type;
+	unsigned int		sd3_curr_limit;
 };
 
 struct sdio_cccr {
-- 
cgit v1.2.3


From d6d50a15a2897d4133d536dd4343b5cf21163db3 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:18:59 +0530
Subject: mmc: sd: add support for driver type selection

This patch adds support for setting driver strength during UHS-I
initialization procedure. Since UHS-I cards set S18A (bit 24) in
response to ACMD41, we use this as a base for UHS-I initialization.
We modify the parameter list of mmc_sd_get_cid() so that we can
save the ROCR from ACMD41 to check whether bit 24 is set.

We decide whether the Host Controller supports A, C, or D driver
type depending on the Capabilities register. Driver type B is
suported by default. We then set the appropriate driver type for
the card using CMD6 mode 1. As per Host Controller spec v3.00, we
set driver type for the host only if Preset Value Enable in the
Host Control2 register is not set. SDHCI_HOST_CONTROL has been
renamed to SDHCI_HOST_CONTROL1 to conform to the spec.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  |   9 +++
 drivers/mmc/core/core.h  |   1 +
 drivers/mmc/core/sd.c    | 152 +++++++++++++++++++++++++++++++++++++++--------
 drivers/mmc/core/sd.h    |   2 +-
 drivers/mmc/core/sdio.c  |   4 +-
 drivers/mmc/host/sdhci.c |  27 +++++++++
 drivers/mmc/host/sdhci.h |  11 +++-
 include/linux/mmc/card.h |   4 ++
 include/linux/mmc/host.h |  10 ++++
 9 files changed, 190 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5005a6323165..61c6c0b8f0e0 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -983,6 +983,15 @@ void mmc_set_timing(struct mmc_host *host, unsigned int timing)
 	mmc_set_ios(host);
 }
 
+/*
+ * Select appropriate driver type for host.
+ */
+void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type)
+{
+	host->ios.drv_type = drv_type;
+	mmc_set_ios(host);
+}
+
 /*
  * Apply power to the MMC stack.  This is a two-stage process.
  * First, we enable power to the card without the clock running.
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 7745dea430b8..93f33973de39 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -43,6 +43,7 @@ void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
 int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage);
 void mmc_set_timing(struct mmc_host *host, unsigned int timing);
+void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type);
 
 static inline void mmc_delay(unsigned int ms)
 {
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 8285842f19e9..5b7c99855635 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -400,6 +400,98 @@ out:
 	return err;
 }
 
+static int sd_select_driver_type(struct mmc_card *card, u8 *status)
+{
+	int host_drv_type = 0, card_drv_type = 0;
+	int err;
+
+	/*
+	 * If the host doesn't support any of the Driver Types A,C or D,
+	 * default Driver Type B is used.
+	 */
+	if (!(card->host->caps & (MMC_CAP_DRIVER_TYPE_A | MMC_CAP_DRIVER_TYPE_C
+	    | MMC_CAP_DRIVER_TYPE_D)))
+		return 0;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) {
+		host_drv_type = MMC_SET_DRIVER_TYPE_A;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
+			card_drv_type = MMC_SET_DRIVER_TYPE_A;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
+			card_drv_type = MMC_SET_DRIVER_TYPE_B;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	} else if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) {
+		host_drv_type = MMC_SET_DRIVER_TYPE_C;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	} else if (!(card->host->caps & MMC_CAP_DRIVER_TYPE_D)) {
+		/*
+		 * If we are here, that means only the default driver type
+		 * B is supported by the host.
+		 */
+		host_drv_type = MMC_SET_DRIVER_TYPE_B;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
+			card_drv_type = MMC_SET_DRIVER_TYPE_B;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	}
+
+	err = mmc_sd_switch(card, 1, 2, card_drv_type, status);
+	if (err)
+		return err;
+
+	if ((status[15] & 0xF) != card_drv_type) {
+		printk(KERN_WARNING "%s: Problem setting driver strength!\n",
+			mmc_hostname(card->host));
+		return 0;
+	}
+
+	mmc_set_driver_type(card->host, host_drv_type);
+
+	return 0;
+}
+
+/*
+ * UHS-I specific initialization procedure
+ */
+static int mmc_sd_init_uhs_card(struct mmc_card *card)
+{
+	int err;
+	u8 *status;
+
+	if (!card->scr.sda_spec3)
+		return 0;
+
+	if (!(card->csd.cmdclass & CCC_SWITCH))
+		return 0;
+
+	status = kmalloc(64, GFP_KERNEL);
+	if (!status) {
+		printk(KERN_ERR "%s: could not allocate a buffer for "
+			"switch capabilities.\n", mmc_hostname(card->host));
+		return -ENOMEM;
+	}
+
+	/* Set 4-bit bus width */
+	if ((card->host->caps & MMC_CAP_4_BIT_DATA) &&
+	    (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
+		err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4);
+		if (err)
+			goto out;
+
+		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+	}
+
+	/* Set the driver strength for the card */
+	err = sd_select_driver_type(card, status);
+
+out:
+	kfree(status);
+
+	return err;
+}
+
 MMC_DEV_ATTR(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1],
 	card->raw_cid[2], card->raw_cid[3]);
 MMC_DEV_ATTR(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
@@ -448,10 +540,9 @@ struct device_type sd_type = {
 /*
  * Fetch CID from card.
  */
-int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid)
+int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr)
 {
 	int err;
-	u32 rocr;
 
 	/*
 	 * Since we're changing the OCR value, we seem to
@@ -485,7 +576,7 @@ int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid)
 		ocr |= SD_OCR_XPC;
 
 try_again:
-	err = mmc_send_app_op_cond(host, ocr, &rocr);
+	err = mmc_send_app_op_cond(host, ocr, rocr);
 	if (err)
 		return err;
 
@@ -493,7 +584,8 @@ try_again:
 	 * In case CCS and S18A in the response is set, start Signal Voltage
 	 * Switch procedure. SPI mode doesn't support CMD11.
 	 */
-	if (!mmc_host_is_spi(host) && ((rocr & 0x41000000) == 0x41000000)) {
+	if (!mmc_host_is_spi(host) && rocr &&
+	   ((*rocr & 0x41000000) == 0x41000000)) {
 		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
 		if (err) {
 			ocr &= ~SD_OCR_S18R;
@@ -628,11 +720,12 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 	struct mmc_card *card;
 	int err;
 	u32 cid[4];
+	u32 rocr = 0;
 
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	err = mmc_sd_get_cid(host, ocr, cid);
+	err = mmc_sd_get_cid(host, ocr, cid, &rocr);
 	if (err)
 		return err;
 
@@ -685,30 +778,37 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 	if (err)
 		goto free_card;
 
-	/*
-	 * Attempt to change to high-speed (if supported)
-	 */
-	err = mmc_sd_switch_hs(card);
-	if (err > 0)
-		mmc_sd_go_highspeed(card);
-	else if (err)
-		goto free_card;
-
-	/*
-	 * Set bus speed.
-	 */
-	mmc_set_clock(host, mmc_sd_get_max_clock(card));
-
-	/*
-	 * Switch to wider bus (if supported).
-	 */
-	if ((host->caps & MMC_CAP_4_BIT_DATA) &&
-		(card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
-		err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4);
+	/* Initialization sequence for UHS-I cards */
+	if (rocr & SD_ROCR_S18A) {
+		err = mmc_sd_init_uhs_card(card);
 		if (err)
 			goto free_card;
+	} else {
+		/*
+		 * Attempt to change to high-speed (if supported)
+		 */
+		err = mmc_sd_switch_hs(card);
+		if (err > 0)
+			mmc_sd_go_highspeed(card);
+		else if (err)
+			goto free_card;
+
+		/*
+		 * Set bus speed.
+		 */
+		mmc_set_clock(host, mmc_sd_get_max_clock(card));
 
-		mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
+		/*
+		 * Switch to wider bus (if supported).
+		 */
+		if ((host->caps & MMC_CAP_4_BIT_DATA) &&
+			(card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
+			err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4);
+			if (err)
+				goto free_card;
+
+			mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
+		}
 	}
 
 	host->card = card;
diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h
index 3d8800fa7600..4b34b24f3f76 100644
--- a/drivers/mmc/core/sd.h
+++ b/drivers/mmc/core/sd.h
@@ -5,7 +5,7 @@
 
 extern struct device_type sd_type;
 
-int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid);
+int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr);
 int mmc_sd_get_csd(struct mmc_host *host, struct mmc_card *card);
 void mmc_decode_cid(struct mmc_card *card);
 int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 1e6095961500..4d0c15bfa514 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -369,8 +369,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 		goto err;
 	}
 
-	if (ocr & R4_MEMORY_PRESENT
-	    && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) {
+	if ((ocr & R4_MEMORY_PRESENT) &&
+	    mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid, NULL) == 0) {
 		card->type = MMC_TYPE_SD_COMBO;
 
 		if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO ||
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 4e2031449a23..8072e16d6d46 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1245,6 +1245,25 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
+	if (host->version >= SDHCI_SPEC_300) {
+		u16 ctrl_2;
+
+		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+		if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+			/*
+			 * We only need to set Driver Strength if the
+			 * preset value enable is not set.
+			 */
+			ctrl_2 &= ~SDHCI_CTRL_DRV_TYPE_MASK;
+			if (ios->drv_type == MMC_SET_DRIVER_TYPE_A)
+				ctrl_2 |= SDHCI_CTRL_DRV_TYPE_A;
+			else if (ios->drv_type == MMC_SET_DRIVER_TYPE_C)
+				ctrl_2 |= SDHCI_CTRL_DRV_TYPE_C;
+
+			sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+		}
+	}
+
 	/*
 	 * Some (ENE) controllers go apeshit on some ios operation,
 	 * signalling timeout and CRC errors even on CMD0. Resetting
@@ -2086,6 +2105,14 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[1] & SDHCI_SUPPORT_DDR50)
 		mmc->caps |= MMC_CAP_UHS_DDR50;
 
+	/* Driver Type(s) (A, C, D) supported by the host */
+	if (caps[1] & SDHCI_DRIVER_TYPE_A)
+		mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
+	if (caps[1] & SDHCI_DRIVER_TYPE_C)
+		mmc->caps |= MMC_CAP_DRIVER_TYPE_C;
+	if (caps[1] & SDHCI_DRIVER_TYPE_D)
+		mmc->caps |= MMC_CAP_DRIVER_TYPE_D;
+
 	ocr_avail = 0;
 	/*
 	 * According to SD Host Controller spec v3.00, if the Host System
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 5cba2fea46e0..667bf8874be7 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -71,7 +71,7 @@
 #define  SDHCI_DATA_LVL_MASK	0x00F00000
 #define   SDHCI_DATA_LVL_SHIFT	20
 
-#define SDHCI_HOST_CONTROL 	0x28
+#define SDHCI_HOST_CONTROL	0x28
 #define  SDHCI_CTRL_LED		0x01
 #define  SDHCI_CTRL_4BITBUS	0x02
 #define  SDHCI_CTRL_HISPD	0x04
@@ -150,6 +150,12 @@
 
 #define SDHCI_HOST_CONTROL2		0x3E
 #define  SDHCI_CTRL_VDD_180		0x0008
+#define  SDHCI_CTRL_DRV_TYPE_MASK	0x0030
+#define   SDHCI_CTRL_DRV_TYPE_B		0x0000
+#define   SDHCI_CTRL_DRV_TYPE_A		0x0010
+#define   SDHCI_CTRL_DRV_TYPE_C		0x0020
+#define   SDHCI_CTRL_DRV_TYPE_D		0x0030
+#define  SDHCI_CTRL_PRESET_VAL_ENABLE	0x8000
 
 #define SDHCI_CAPABILITIES	0x40
 #define  SDHCI_TIMEOUT_CLK_MASK	0x0000003F
@@ -173,6 +179,9 @@
 #define  SDHCI_SUPPORT_SDR50	0x00000001
 #define  SDHCI_SUPPORT_SDR104	0x00000002
 #define  SDHCI_SUPPORT_DDR50	0x00000004
+#define  SDHCI_DRIVER_TYPE_A	0x00000010
+#define  SDHCI_DRIVER_TYPE_C	0x00000020
+#define  SDHCI_DRIVER_TYPE_D	0x00000040
 
 #define SDHCI_CAPABILITIES_1	0x44
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 56f4d9234a66..539327260dc1 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -83,6 +83,10 @@ struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
 	unsigned int		sd3_bus_mode;
 	unsigned int		sd3_drv_type;
+#define SD_DRIVER_TYPE_B	0x01
+#define SD_DRIVER_TYPE_A	0x02
+#define SD_DRIVER_TYPE_C	0x04
+#define SD_DRIVER_TYPE_D	0x08
 	unsigned int		sd3_curr_limit;
 };
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bde5a0b1c47e..949e4d525989 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -61,6 +61,13 @@ struct mmc_ios {
 
 #define MMC_SIGNAL_VOLTAGE_330	0
 #define MMC_SIGNAL_VOLTAGE_180	1
+
+	unsigned char	drv_type;		/* driver type (A, B, C, D) */
+
+#define MMC_SET_DRIVER_TYPE_B	0
+#define MMC_SET_DRIVER_TYPE_A	1
+#define MMC_SET_DRIVER_TYPE_C	2
+#define MMC_SET_DRIVER_TYPE_D	3
 };
 
 struct mmc_host_ops {
@@ -188,6 +195,9 @@ struct mmc_host {
 #define MMC_CAP_SET_XPC_330	(1 << 20)	/* Host supports >150mA current at 3.3V */
 #define MMC_CAP_SET_XPC_300	(1 << 21)	/* Host supports >150mA current at 3.0V */
 #define MMC_CAP_SET_XPC_180	(1 << 22)	/* Host supports >150mA current at 1.8V */
+#define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
+#define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
+#define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
-- 
cgit v1.2.3


From 49c468fcf878d2c86e31920cf54aa90c88418a66 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:01 +0530
Subject: mmc: sd: add support for uhs bus speed mode selection

This patch adds support for setting UHS-I bus speed mode during UHS-I
initialization procedure. Since both the host and card can support
more than one bus speed, we select the highest speed based on both of
their capabilities. First we set the bus speed mode for the card using
CMD6 mode 1, and then we program the host controller to support the
required speed mode. We also set High Speed Enable in case one of the
UHS-I modes is selected. We take care to reset SD clock before setting
UHS mode in the Host Control2 register, and then re-enable it as per
the Host Controller spec v3.00. We then set the clock frequency for
the UHS-I mode selected.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/sdhci.c | 40 ++++++++++++++++++++++++++---
 drivers/mmc/host/sdhci.h |  6 +++++
 include/linux/mmc/card.h | 19 ++++++++++++++
 include/linux/mmc/host.h |  5 ++++
 5 files changed, 132 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 5b7c99855635..6970b82171f7 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -452,6 +452,66 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 	return 0;
 }
 
+static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status)
+{
+	unsigned int bus_speed = 0, timing = 0;
+	int err;
+
+	/*
+	 * If the host doesn't support any of the UHS-I modes, fallback on
+	 * default speed.
+	 */
+	if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)))
+		return 0;
+
+	if ((card->host->caps & MMC_CAP_UHS_SDR104) &&
+	    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) {
+			bus_speed = UHS_SDR104_BUS_SPEED;
+			timing = MMC_TIMING_UHS_SDR104;
+			card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+	} else if ((card->host->caps & MMC_CAP_UHS_DDR50) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) {
+			bus_speed = UHS_DDR50_BUS_SPEED;
+			timing = MMC_TIMING_UHS_DDR50;
+			card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR50)) {
+			bus_speed = UHS_SDR50_BUS_SPEED;
+			timing = MMC_TIMING_UHS_SDR50;
+			card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) {
+			bus_speed = UHS_SDR25_BUS_SPEED;
+			timing = MMC_TIMING_UHS_SDR25;
+			card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 |
+		    MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR12)) {
+			bus_speed = UHS_SDR12_BUS_SPEED;
+			timing = MMC_TIMING_UHS_SDR12;
+			card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+	}
+
+	card->sd_bus_speed = bus_speed;
+	err = mmc_sd_switch(card, 1, 0, bus_speed, status);
+	if (err)
+		return err;
+
+	if ((status[16] & 0xF) != bus_speed)
+		printk(KERN_WARNING "%s: Problem setting bus speed mode!\n",
+			mmc_hostname(card->host));
+	else {
+		mmc_set_timing(card->host, timing);
+		mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr);
+	}
+
+	return 0;
+}
+
 /*
  * UHS-I specific initialization procedure
  */
@@ -485,6 +545,11 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 
 	/* Set the driver strength for the card */
 	err = sd_select_driver_type(card, status);
+	if (err)
+		goto out;
+
+	/* Set bus speed mode of the card */
+	err = sd_set_bus_speed_mode(card, status);
 
 out:
 	kfree(status);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 409cde5970ae..8994493dd940 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1244,7 +1244,16 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		ctrl &= ~SDHCI_CTRL_HISPD;
 
 	if (host->version >= SDHCI_SPEC_300) {
-		u16 ctrl_2;
+		u16 clk, ctrl_2;
+		unsigned int clock;
+
+		/* In case of UHS-I modes, set High Speed Enable */
+		if ((ios->timing == MMC_TIMING_UHS_SDR50) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR104) ||
+		    (ios->timing == MMC_TIMING_UHS_DDR50) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR25) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR12))
+			ctrl |= SDHCI_CTRL_HISPD;
 
 		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 		if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
@@ -1267,8 +1276,6 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			 * need to reset SD Clock Enable before changing High
 			 * Speed Enable to avoid generating clock gliches.
 			 */
-			u16 clk;
-			unsigned int clock;
 
 			/* Reset SD Clock Enable */
 			clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
@@ -1282,6 +1289,33 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			host->clock = 0;
 			sdhci_set_clock(host, clock);
 		}
+
+		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+		/* Select Bus Speed Mode for host */
+		ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+		if (ios->timing == MMC_TIMING_UHS_SDR12)
+			ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
+		else if (ios->timing == MMC_TIMING_UHS_SDR25)
+			ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+		else if (ios->timing == MMC_TIMING_UHS_SDR50)
+			ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
+		else if (ios->timing == MMC_TIMING_UHS_SDR104)
+			ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
+		else if (ios->timing == MMC_TIMING_UHS_DDR50)
+			ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
+
+		/* Reset SD Clock Enable */
+		clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+		clk &= ~SDHCI_CLOCK_CARD_EN;
+		sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+		sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+
+		/* Re-enable SD Clock */
+		clock = host->clock;
+		host->clock = 0;
+		sdhci_set_clock(host, clock);
 	} else
 		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 667bf8874be7..d96f6afcca1f 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -149,6 +149,12 @@
 #define SDHCI_ACMD12_ERR	0x3C
 
 #define SDHCI_HOST_CONTROL2		0x3E
+#define  SDHCI_CTRL_UHS_MASK		0x0007
+#define   SDHCI_CTRL_UHS_SDR12		0x0000
+#define   SDHCI_CTRL_UHS_SDR25		0x0001
+#define   SDHCI_CTRL_UHS_SDR50		0x0002
+#define   SDHCI_CTRL_UHS_SDR104		0x0003
+#define   SDHCI_CTRL_UHS_DDR50		0x0004
 #define  SDHCI_CTRL_VDD_180		0x0008
 #define  SDHCI_CTRL_DRV_TYPE_MASK	0x0030
 #define   SDHCI_CTRL_DRV_TYPE_B		0x0000
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 539327260dc1..4ef6ded6347d 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -81,7 +81,24 @@ struct sd_ssr {
 
 struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
+	unsigned int		uhs_max_dtr;
+#define UHS_SDR104_MAX_DTR	208000000
+#define UHS_SDR50_MAX_DTR	100000000
+#define UHS_DDR50_MAX_DTR	50000000
+#define UHS_SDR25_MAX_DTR	UHS_DDR50_MAX_DTR
+#define UHS_SDR12_MAX_DTR	25000000
 	unsigned int		sd3_bus_mode;
+#define UHS_SDR12_BUS_SPEED	0
+#define UHS_SDR25_BUS_SPEED	1
+#define UHS_SDR50_BUS_SPEED	2
+#define UHS_SDR104_BUS_SPEED	3
+#define UHS_DDR50_BUS_SPEED	4
+
+#define SD_MODE_UHS_SDR12	(1 << UHS_SDR12_BUS_SPEED)
+#define SD_MODE_UHS_SDR25	(1 << UHS_SDR25_BUS_SPEED)
+#define SD_MODE_UHS_SDR50	(1 << UHS_SDR50_BUS_SPEED)
+#define SD_MODE_UHS_SDR104	(1 << UHS_SDR104_BUS_SPEED)
+#define SD_MODE_UHS_DDR50	(1 << UHS_DDR50_BUS_SPEED)
 	unsigned int		sd3_drv_type;
 #define SD_DRIVER_TYPE_B	0x01
 #define SD_DRIVER_TYPE_A	0x02
@@ -166,6 +183,8 @@ struct mmc_card {
 	const char		**info;		/* info strings */
 	struct sdio_func_tuple	*tuples;	/* unknown common tuples */
 
+	unsigned int		sd_bus_speed;	/* Bus Speed Mode set for the card */
+
 	struct dentry		*debugfs_root;
 };
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 949e4d525989..62375992bdd6 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -50,6 +50,11 @@ struct mmc_ios {
 #define MMC_TIMING_LEGACY	0
 #define MMC_TIMING_MMC_HS	1
 #define MMC_TIMING_SD_HS	2
+#define MMC_TIMING_UHS_SDR12	MMC_TIMING_LEGACY
+#define MMC_TIMING_UHS_SDR25	MMC_TIMING_SD_HS
+#define MMC_TIMING_UHS_SDR50	3
+#define MMC_TIMING_UHS_SDR104	4
+#define MMC_TIMING_UHS_DDR50	5
 
 	unsigned char	ddr;			/* dual data rate used */
 
-- 
cgit v1.2.3


From 5371c927bcd06a5c9dd6785bab2d452b87d9abc6 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:02 +0530
Subject: mmc: sd: set current limit for uhs cards

We decide on the current limit to be set for the card based on the
Capability of Host Controller to provide current at 1.8V signalling,
and the maximum current limit of the card as indicated by CMD6
mode 0. We then set the current limit for the card using CMD6 mode 1.
As per the Physical Layer Spec v3.01, the current limit switch is
only applicable for SDR50, SDR104, and DDR50 bus speed modes. For
other UHS-I modes, we set the default current limit of 200mA.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/sdhci.c | 10 ++++++++
 include/linux/mmc/card.h |  9 +++++++
 include/linux/mmc/host.h |  4 +++
 4 files changed, 86 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 6970b82171f7..8e2d8012e4cb 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -512,6 +512,64 @@ static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status)
 	return 0;
 }
 
+static int sd_set_current_limit(struct mmc_card *card, u8 *status)
+{
+	int current_limit = 0;
+	int err;
+
+	/*
+	 * Current limit switch is only defined for SDR50, SDR104, and DDR50
+	 * bus speed modes. For other bus speed modes, we set the default
+	 * current limit of 200mA.
+	 */
+	if ((card->sd_bus_speed == UHS_SDR50_BUS_SPEED) ||
+	    (card->sd_bus_speed == UHS_SDR104_BUS_SPEED) ||
+	    (card->sd_bus_speed == UHS_DDR50_BUS_SPEED)) {
+		if (card->host->caps & MMC_CAP_MAX_CURRENT_800) {
+			if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_800)
+				current_limit = SD_SET_CURRENT_LIMIT_800;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_600)
+				current_limit = SD_SET_CURRENT_LIMIT_600;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_400)
+				current_limit = SD_SET_CURRENT_LIMIT_400;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_200)
+				current_limit = SD_SET_CURRENT_LIMIT_200;
+		} else if (card->host->caps & MMC_CAP_MAX_CURRENT_600) {
+			if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_600)
+				current_limit = SD_SET_CURRENT_LIMIT_600;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_400)
+				current_limit = SD_SET_CURRENT_LIMIT_400;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_200)
+				current_limit = SD_SET_CURRENT_LIMIT_200;
+		} else if (card->host->caps & MMC_CAP_MAX_CURRENT_400) {
+			if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_400)
+				current_limit = SD_SET_CURRENT_LIMIT_400;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_200)
+				current_limit = SD_SET_CURRENT_LIMIT_200;
+		} else if (card->host->caps & MMC_CAP_MAX_CURRENT_200) {
+			if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_200)
+				current_limit = SD_SET_CURRENT_LIMIT_200;
+		}
+	} else
+		current_limit = SD_SET_CURRENT_LIMIT_200;
+
+	err = mmc_sd_switch(card, 1, 3, current_limit, status);
+	if (err)
+		return err;
+
+	if (((status[15] >> 4) & 0x0F) != current_limit)
+		printk(KERN_WARNING "%s: Problem setting current limit!\n",
+			mmc_hostname(card->host));
+
+	return 0;
+}
+
 /*
  * UHS-I specific initialization procedure
  */
@@ -550,6 +608,11 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 
 	/* Set bus speed mode of the card */
 	err = sd_set_bus_speed_mode(card, status);
+	if (err)
+		goto out;
+
+	/* Set current limit for the card */
+	err = sd_set_current_limit(card, status);
 
 out:
 	kfree(status);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 8994493dd940..2a15aad2eba5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2216,6 +2216,16 @@ int sdhci_add_host(struct sdhci_host *host)
 
 		if (max_current_180 > 150)
 			mmc->caps |= MMC_CAP_SET_XPC_180;
+
+		/* Maximum current capabilities of the host at 1.8V */
+		if (max_current_180 >= 800)
+			mmc->caps |= MMC_CAP_MAX_CURRENT_800;
+		else if (max_current_180 >= 600)
+			mmc->caps |= MMC_CAP_MAX_CURRENT_600;
+		else if (max_current_180 >= 400)
+			mmc->caps |= MMC_CAP_MAX_CURRENT_400;
+		else
+			mmc->caps |= MMC_CAP_MAX_CURRENT_200;
 	}
 
 	mmc->ocr_avail = ocr_avail;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 4ef6ded6347d..47b5ad3960b7 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -105,6 +105,15 @@ struct sd_switch_caps {
 #define SD_DRIVER_TYPE_C	0x04
 #define SD_DRIVER_TYPE_D	0x08
 	unsigned int		sd3_curr_limit;
+#define SD_SET_CURRENT_LIMIT_200	0
+#define SD_SET_CURRENT_LIMIT_400	1
+#define SD_SET_CURRENT_LIMIT_600	2
+#define SD_SET_CURRENT_LIMIT_800	3
+
+#define SD_MAX_CURRENT_200	(1 << SD_SET_CURRENT_LIMIT_200)
+#define SD_MAX_CURRENT_400	(1 << SD_SET_CURRENT_LIMIT_400)
+#define SD_MAX_CURRENT_600	(1 << SD_SET_CURRENT_LIMIT_600)
+#define SD_MAX_CURRENT_800	(1 << SD_SET_CURRENT_LIMIT_800)
 };
 
 struct sdio_cccr {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 62375992bdd6..52b5dc914a8c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -203,6 +203,10 @@ struct mmc_host {
 #define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
 #define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
 #define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
+#define MMC_CAP_MAX_CURRENT_200	(1 << 26)	/* Host max current limit is 200mA */
+#define MMC_CAP_MAX_CURRENT_400	(1 << 27)	/* Host max current limit is 400mA */
+#define MMC_CAP_MAX_CURRENT_600	(1 << 28)	/* Host max current limit is 600mA */
+#define MMC_CAP_MAX_CURRENT_800	(1 << 29)	/* Host max current limit is 800mA */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
-- 
cgit v1.2.3


From 3a3035114307cd55e024662bb295a87b849f0bd4 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:03 +0530
Subject: mmc: sd: report correct speed and capacity of uhs cards

Since only UHS-I cards respond with S18A set in response to ACMD41,
we set the card as ultra-high-speed after successfull initialization.
We need to decide whether a card is SDXC based on the C_SIZE field
of CSDv2.0 register. According to Physical Layer spec v3.01, the
minimum value of C_SIZE for SDXC card is 00FFFFh.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/bus.c   | 11 ++++++++---
 drivers/mmc/core/sd.c    | 10 +++++++++-
 include/linux/mmc/card.h |  7 +++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index d6d62fd07ee9..393d817ed040 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -274,8 +274,12 @@ int mmc_add_card(struct mmc_card *card)
 		break;
 	case MMC_TYPE_SD:
 		type = "SD";
-		if (mmc_card_blockaddr(card))
-			type = "SDHC";
+		if (mmc_card_blockaddr(card)) {
+			if (mmc_card_ext_capacity(card))
+				type = "SDXC";
+			else
+				type = "SDHC";
+		}
 		break;
 	case MMC_TYPE_SDIO:
 		type = "SDIO";
@@ -299,7 +303,8 @@ int mmc_add_card(struct mmc_card *card)
 	} else {
 		printk(KERN_INFO "%s: new %s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
-			mmc_card_highspeed(card) ? "high speed " : "",
+			mmc_sd_card_uhs(card) ? "ultra high speed " :
+			(mmc_card_highspeed(card) ? "high speed " : ""),
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type, card->rca);
 	}
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 8e2d8012e4cb..732c3171ceca 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -130,7 +130,7 @@ static int mmc_decode_csd(struct mmc_card *card)
 		break;
 	case 1:
 		/*
-		 * This is a block-addressed SDHC card. Most
+		 * This is a block-addressed SDHC or SDXC card. Most
 		 * interesting fields are unused and have fixed
 		 * values. To avoid getting tripped by buggy cards,
 		 * we assume those fixed values ourselves.
@@ -144,6 +144,11 @@ static int mmc_decode_csd(struct mmc_card *card)
 		e = UNSTUFF_BITS(resp, 96, 3);
 		csd->max_dtr	  = tran_exp[e] * tran_mant[m];
 		csd->cmdclass	  = UNSTUFF_BITS(resp, 84, 12);
+		csd->c_size	  = UNSTUFF_BITS(resp, 48, 22);
+
+		/* SDXC cards have a minimum C_SIZE of 0x00FFFF */
+		if (csd->c_size >= 0xFFFF)
+			mmc_card_set_ext_capacity(card);
 
 		m = UNSTUFF_BITS(resp, 48, 22);
 		csd->capacity     = (1 + m) << 10;
@@ -911,6 +916,9 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_sd_init_uhs_card(card);
 		if (err)
 			goto free_card;
+
+		/* Card is an ultra-high-speed card */
+		mmc_sd_card_set_uhs(card);
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 47b5ad3960b7..d8dffc992ce2 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -30,6 +30,7 @@ struct mmc_csd {
 	unsigned short		cmdclass;
 	unsigned short		tacc_clks;
 	unsigned int		tacc_ns;
+	unsigned int		c_size;
 	unsigned int		r2w_factor;
 	unsigned int		max_dtr;
 	unsigned int		erase_size;		/* In sectors */
@@ -158,6 +159,8 @@ struct mmc_card {
 #define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
 #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
 #define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
+#define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
+#define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -293,12 +296,16 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
 #define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
+#define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
+#define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 
 static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
 {
-- 
cgit v1.2.3


From b513ea250eb7c36a8afb3df938d632ca6b4df7cd Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:04 +0530
Subject: mmc: sd: add support for tuning during uhs initialization

Host Controller needs tuning during initialization to operate SDR50
and SDR104 UHS-I cards. Whether SDR50 mode actually needs tuning is
indicated by bit 45 of the Host Controller Capabilities register.
A new command CMD19 has been defined in the Physical Layer spec
v3.01 to request the card to send tuning pattern.

We enable Buffer Read Ready interrupt at the very begining of tuning
procedure, because that is the only interrupt generated by the Host
Controller during tuning. We program the block size to 64 in the
Block Size register. We make sure that DMA Enable and Multi Block
Select in the Transfer Mode register are set to 0 before actually
sending CMD19. The tuning block is sent by the card to the Host
Controller using DAT lines, so we set Data Present Select (bit 5) in
the Command register. The Host Controller is responsible for doing
the verfication of tuning block sent by the card at the hardware
level. After sending CMD19, we wait for Buffer Read Ready interrupt.
In case we don't receive an interrupt after the specified timeout
value, we fall back on fixed sampling clock by setting Execute
Tuning (bit 6) and Sampling Clock Select (bit 7) of Host Control2
register to 0. Before exiting the tuning procedure, we disable Buffer
Read Ready interrupt and re-enable other interrupts.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c     |   6 ++
 drivers/mmc/host/sdhci.c  | 168 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/mmc/host/sdhci.h  |   3 +
 include/linux/mmc/host.h  |   1 +
 include/linux/mmc/mmc.h   |   1 +
 include/linux/mmc/sdhci.h |   4 ++
 6 files changed, 182 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 732c3171ceca..fc65475a26ee 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -618,6 +618,12 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 
 	/* Set current limit for the card */
 	err = sd_set_current_limit(card, status);
+	if (err)
+		goto out;
+
+	/* SPI mode doesn't define CMD19 */
+	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
+		err = card->host->ops->execute_tuning(card->host);
 
 out:
 	kfree(status);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 2a15aad2eba5..8a56eacea34d 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -38,6 +38,8 @@
 #define SDHCI_USE_LEDS_CLASS
 #endif
 
+#define MAX_TUNING_LOOP 40
+
 static unsigned int debug_quirks = 0;
 
 static void sdhci_finish_data(struct sdhci_host *);
@@ -968,7 +970,9 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 		flags |= SDHCI_CMD_CRC;
 	if (cmd->flags & MMC_RSP_OPCODE)
 		flags |= SDHCI_CMD_INDEX;
-	if (cmd->data)
+
+	/* CMD19 is special in that the Data Present Select should be set */
+	if (cmd->data || (cmd->opcode == MMC_SEND_TUNING_BLOCK))
 		flags |= SDHCI_CMD_DATA;
 
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
@@ -1501,12 +1505,157 @@ static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
 		return 0;
 }
 
+static int sdhci_execute_tuning(struct mmc_host *mmc)
+{
+	struct sdhci_host *host;
+	u16 ctrl;
+	u32 ier;
+	int tuning_loop_counter = MAX_TUNING_LOOP;
+	unsigned long timeout;
+	int err = 0;
+
+	host = mmc_priv(mmc);
+
+	disable_irq(host->irq);
+	spin_lock(&host->lock);
+
+	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+	/*
+	 * Host Controller needs tuning only in case of SDR104 mode
+	 * and for SDR50 mode when Use Tuning for SDR50 is set in
+	 * Capabilities register.
+	 */
+	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
+	    (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
+	    (host->flags & SDHCI_SDR50_NEEDS_TUNING)))
+		ctrl |= SDHCI_CTRL_EXEC_TUNING;
+	else {
+		spin_unlock(&host->lock);
+		enable_irq(host->irq);
+		return 0;
+	}
+
+	sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+
+	/*
+	 * As per the Host Controller spec v3.00, tuning command
+	 * generates Buffer Read Ready interrupt, so enable that.
+	 *
+	 * Note: The spec clearly says that when tuning sequence
+	 * is being performed, the controller does not generate
+	 * interrupts other than Buffer Read Ready interrupt. But
+	 * to make sure we don't hit a controller bug, we _only_
+	 * enable Buffer Read Ready interrupt here.
+	 */
+	ier = sdhci_readl(host, SDHCI_INT_ENABLE);
+	sdhci_clear_set_irqs(host, ier, SDHCI_INT_DATA_AVAIL);
+
+	/*
+	 * Issue CMD19 repeatedly till Execute Tuning is set to 0 or the number
+	 * of loops reaches 40 times or a timeout of 150ms occurs.
+	 */
+	timeout = 150;
+	do {
+		struct mmc_command cmd = {0};
+		struct mmc_request mrq = {0};
+
+		if (!tuning_loop_counter && !timeout)
+			break;
+
+		cmd.opcode = MMC_SEND_TUNING_BLOCK;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
+		cmd.retries = 0;
+		cmd.data = NULL;
+		cmd.error = 0;
+
+		mrq.cmd = &cmd;
+		host->mrq = &mrq;
+
+		/*
+		 * In response to CMD19, the card sends 64 bytes of tuning
+		 * block to the Host Controller. So we set the block size
+		 * to 64 here.
+		 */
+		sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), SDHCI_BLOCK_SIZE);
+
+		/*
+		 * The tuning block is sent by the card to the host controller.
+		 * So we set the TRNS_READ bit in the Transfer Mode register.
+		 * This also takes care of setting DMA Enable and Multi Block
+		 * Select in the same register to 0.
+		 */
+		sdhci_writew(host, SDHCI_TRNS_READ, SDHCI_TRANSFER_MODE);
+
+		sdhci_send_command(host, &cmd);
+
+		host->cmd = NULL;
+		host->mrq = NULL;
+
+		spin_unlock(&host->lock);
+		enable_irq(host->irq);
+
+		/* Wait for Buffer Read Ready interrupt */
+		wait_event_interruptible_timeout(host->buf_ready_int,
+					(host->tuning_done == 1),
+					msecs_to_jiffies(50));
+		disable_irq(host->irq);
+		spin_lock(&host->lock);
+
+		if (!host->tuning_done) {
+			printk(KERN_INFO DRIVER_NAME ": Timeout waiting for "
+				"Buffer Read Ready interrupt during tuning "
+				"procedure, falling back to fixed sampling "
+				"clock\n");
+			ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+			ctrl &= ~SDHCI_CTRL_TUNED_CLK;
+			ctrl &= ~SDHCI_CTRL_EXEC_TUNING;
+			sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+
+			err = -EIO;
+			goto out;
+		}
+
+		host->tuning_done = 0;
+
+		ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+		tuning_loop_counter--;
+		timeout--;
+		mdelay(1);
+	} while (ctrl & SDHCI_CTRL_EXEC_TUNING);
+
+	/*
+	 * The Host Driver has exhausted the maximum number of loops allowed,
+	 * so use fixed sampling frequency.
+	 */
+	if (!tuning_loop_counter || !timeout) {
+		ctrl &= ~SDHCI_CTRL_TUNED_CLK;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+	} else {
+		if (!(ctrl & SDHCI_CTRL_TUNED_CLK)) {
+			printk(KERN_INFO DRIVER_NAME ": Tuning procedure"
+				" failed, falling back to fixed sampling"
+				" clock\n");
+			err = -EIO;
+		}
+	}
+
+out:
+	sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier);
+	spin_unlock(&host->lock);
+	enable_irq(host->irq);
+
+	return err;
+}
+
 static const struct mmc_host_ops sdhci_ops = {
 	.request	= sdhci_request,
 	.set_ios	= sdhci_set_ios,
 	.get_ro		= sdhci_get_ro,
 	.enable_sdio_irq = sdhci_enable_sdio_irq,
 	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
+	.execute_tuning			= sdhci_execute_tuning,
 };
 
 /*****************************************************************************\
@@ -1724,6 +1873,16 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 {
 	BUG_ON(intmask == 0);
 
+	/* CMD19 generates _only_ Buffer Read Ready interrupt */
+	if (intmask & SDHCI_INT_DATA_AVAIL) {
+		if (SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)) ==
+		    MMC_SEND_TUNING_BLOCK) {
+			host->tuning_done = 1;
+			wake_up(&host->buf_ready_int);
+			return;
+		}
+	}
+
 	if (!host->data) {
 		/*
 		 * The "data complete" interrupt is also used to
@@ -2160,6 +2319,10 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[1] & SDHCI_SUPPORT_DDR50)
 		mmc->caps |= MMC_CAP_UHS_DDR50;
 
+	/* Does the host needs tuning for SDR50? */
+	if (caps[1] & SDHCI_USE_SDR50_TUNING)
+		host->flags |= SDHCI_SDR50_NEEDS_TUNING;
+
 	/* Driver Type(s) (A, C, D) supported by the host */
 	if (caps[1] & SDHCI_DRIVER_TYPE_A)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
@@ -2313,6 +2476,9 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	setup_timer(&host->timer, sdhci_timeout_timer, (unsigned long)host);
 
+	if (host->version >= SDHCI_SPEC_300)
+		init_waitqueue_head(&host->buf_ready_int);
+
 	ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
 		mmc_hostname(mmc), host);
 	if (ret)
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index d96f6afcca1f..e62367491eee 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -161,6 +161,8 @@
 #define   SDHCI_CTRL_DRV_TYPE_A		0x0010
 #define   SDHCI_CTRL_DRV_TYPE_C		0x0020
 #define   SDHCI_CTRL_DRV_TYPE_D		0x0030
+#define  SDHCI_CTRL_EXEC_TUNING		0x0040
+#define  SDHCI_CTRL_TUNED_CLK		0x0080
 #define  SDHCI_CTRL_PRESET_VAL_ENABLE	0x8000
 
 #define SDHCI_CAPABILITIES	0x40
@@ -188,6 +190,7 @@
 #define  SDHCI_DRIVER_TYPE_A	0x00000010
 #define  SDHCI_DRIVER_TYPE_C	0x00000020
 #define  SDHCI_DRIVER_TYPE_D	0x00000040
+#define  SDHCI_USE_SDR50_TUNING	0x00002000
 
 #define SDHCI_CAPABILITIES_1	0x44
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 52b5dc914a8c..ca7007fdb399 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -136,6 +136,7 @@ struct mmc_host_ops {
 	void	(*init_card)(struct mmc_host *host, struct mmc_card *card);
 
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
+	int	(*execute_tuning)(struct mmc_host *host);
 };
 
 struct mmc_card;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 373b2bf5e5b5..9fa5a73f393d 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -50,6 +50,7 @@
 #define MMC_SET_BLOCKLEN         16   /* ac   [31:0] block len   R1  */
 #define MMC_READ_SINGLE_BLOCK    17   /* adtc [31:0] data addr   R1  */
 #define MMC_READ_MULTIPLE_BLOCK  18   /* adtc [31:0] data addr   R1  */
+#define MMC_SEND_TUNING_BLOCK    19   /* adtc                    R1  */
 
   /* class 3 */
 #define MMC_WRITE_DAT_UNTIL_STOP 20   /* adtc [31:0] data addr   R1  */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 92e1c9ad126c..b74c8530e959 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -111,6 +111,7 @@ struct sdhci_host {
 #define SDHCI_USE_ADMA		(1<<1)	/* Host is ADMA capable */
 #define SDHCI_REQ_USE_DMA	(1<<2)	/* Use DMA for this req. */
 #define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
+#define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
 
 	unsigned int version;	/* SDHCI spec. version */
 
@@ -147,6 +148,9 @@ struct sdhci_host {
 	unsigned int            ocr_avail_sd;
 	unsigned int            ocr_avail_mmc;
 
+	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
+	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
+
 	unsigned long private[0] ____cacheline_aligned;
 };
 #endif /* __SDHCI_H */
-- 
cgit v1.2.3


From 4d55c5a13a189a80d40383f02c8026f9a87d7c87 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:05 +0530
Subject: mmc: sdhci: enable preset value after uhs initialization

According to the Host Controller spec v3.00, setting Preset Value Enable
in the Host Control2 register lets SDCLK Frequency Select, Clock Generator
Select and Driver Strength Select to be set automatically by the Host
Controller based on the UHS-I mode set. This patch enables this feature.
Since Preset Value Enable makes sense only for UHS-I cards, we enable this
feature after successfull UHS-I initialization. We also reset Preset Value
Enable next time before initialization.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 11 +++++++++++
 drivers/mmc/host/sdhci.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/mmc/host.h |  1 +
 3 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index fc65475a26ee..b461b290ce25 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -925,6 +925,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 
 		/* Card is an ultra-high-speed card */
 		mmc_sd_card_set_uhs(card);
+
+		/*
+		 * Since initialization is now complete, enable preset
+		 * value registers for UHS-I cards.
+		 */
+		if (host->ops->enable_preset_value)
+			host->ops->enable_preset_value(host, true);
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
@@ -1095,6 +1102,10 @@ int mmc_attach_sd(struct mmc_host *host)
 	if (err)
 		return err;
 
+	/* Disable preset value enable if already set since last time */
+	if (host->ops->enable_preset_value)
+		host->ops->enable_preset_value(host, false);
+
 	err = mmc_send_app_op_cond(host, 0, &ocr);
 	if (err)
 		return err;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 8a56eacea34d..a1ab22415883 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1649,6 +1649,37 @@ out:
 	return err;
 }
 
+static void sdhci_enable_preset_value(struct mmc_host *mmc, bool enable)
+{
+	struct sdhci_host *host;
+	u16 ctrl;
+	unsigned long flags;
+
+	host = mmc_priv(mmc);
+
+	/* Host Controller v3.00 defines preset value registers */
+	if (host->version < SDHCI_SPEC_300)
+		return;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+	/*
+	 * We only enable or disable Preset Value if they are not already
+	 * enabled or disabled respectively. Otherwise, we bail out.
+	 */
+	if (enable && !(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+		ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+	} else if (!enable && (ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+		ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+	}
+
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
 static const struct mmc_host_ops sdhci_ops = {
 	.request	= sdhci_request,
 	.set_ios	= sdhci_set_ios,
@@ -1656,6 +1687,7 @@ static const struct mmc_host_ops sdhci_ops = {
 	.enable_sdio_irq = sdhci_enable_sdio_irq,
 	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
 	.execute_tuning			= sdhci_execute_tuning,
+	.enable_preset_value		= sdhci_enable_preset_value,
 };
 
 /*****************************************************************************\
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index ca7007fdb399..6716bd12eccd 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -137,6 +137,7 @@ struct mmc_host_ops {
 
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
 	int	(*execute_tuning)(struct mmc_host *host);
+	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
 };
 
 struct mmc_card;
-- 
cgit v1.2.3


From c3ed3877625f10d600b0eca2ca48a68c46aed660 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:06 +0530
Subject: mmc: sdhci: add support for programmable clock mode

Host Controller v3.00 supports programmable clock mode as an optional
feature. The support for this mode is indicated by non-zero value in
bits 48-55 of the Capabilities register. If supported, the actual
value of Clock Multiplier is one more than the value provided in the
bit fields. We only set Clock Generator Select (bit 5) and SDCLK
Frequency Select (bits 8-15) of the Clock Control register in case
Preset Value Enable is not set, otherwise these fields are automatically
set by the Host Controller based on the UHS mode selected. Also, since
the maximum and minimum clock frequency in this mode can be
(Base Clock * Clock Mul) and (Base Clock * Clock Mul)/1024 respectively,
f_max and f_min have been recalculated to reflect this change.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 81 ++++++++++++++++++++++++++++++++++++++---------
 drivers/mmc/host/sdhci.h  |  3 ++
 include/linux/mmc/sdhci.h |  1 +
 3 files changed, 70 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a1ab22415883..07cca557c4b5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1013,8 +1013,8 @@ static void sdhci_finish_command(struct sdhci_host *host)
 
 static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
-	int div;
-	u16 clk;
+	int div = 0; /* Initialized for compiler warning */
+	u16 clk = 0;
 	unsigned long timeout;
 
 	if (clock == host->clock)
@@ -1032,14 +1032,45 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 		goto out;
 
 	if (host->version >= SDHCI_SPEC_300) {
-		/* Version 3.00 divisors must be a multiple of 2. */
-		if (host->max_clk <= clock)
-			div = 1;
-		else {
-			for (div = 2; div < SDHCI_MAX_DIV_SPEC_300; div += 2) {
-				if ((host->max_clk / div) <= clock)
-					break;
+		/*
+		 * Check if the Host Controller supports Programmable Clock
+		 * Mode.
+		 */
+		if (host->clk_mul) {
+			u16 ctrl;
+
+			/*
+			 * We need to figure out whether the Host Driver needs
+			 * to select Programmable Clock Mode, or the value can
+			 * be set automatically by the Host Controller based on
+			 * the Preset Value registers.
+			 */
+			ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+			if (!(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+				for (div = 1; div <= 1024; div++) {
+					if (((host->max_clk * host->clk_mul) /
+					      div) <= clock)
+						break;
+				}
+				/*
+				 * Set Programmable Clock Mode in the Clock
+				 * Control register.
+				 */
+				clk = SDHCI_PROG_CLOCK_MODE;
+				div--;
+			}
+		} else {
+			/* Version 3.00 divisors must be a multiple of 2. */
+			if (host->max_clk <= clock)
+				div = 1;
+			else {
+				for (div = 2; div < SDHCI_MAX_DIV_SPEC_300;
+				     div += 2) {
+					if ((host->max_clk / div) <= clock)
+						break;
+				}
 			}
+			div >>= 1;
 		}
 	} else {
 		/* Version 2.00 divisors must be a power of 2. */
@@ -1047,10 +1078,10 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 			if ((host->max_clk / div) <= clock)
 				break;
 		}
+		div >>= 1;
 	}
-	div >>= 1;
 
-	clk = (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
+	clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
 	clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN)
 		<< SDHCI_DIVIDER_HI_SHIFT;
 	clk |= SDHCI_CLOCK_INT_EN;
@@ -2307,18 +2338,38 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT)
 		host->timeout_clk *= 1000;
 
+	/*
+	 * In case of Host Controller v3.00, find out whether clock
+	 * multiplier is supported.
+	 */
+	host->clk_mul = (caps[1] & SDHCI_CLOCK_MUL_MASK) >>
+			SDHCI_CLOCK_MUL_SHIFT;
+
+	/*
+	 * In case the value in Clock Multiplier is 0, then programmable
+	 * clock mode is not supported, otherwise the actual clock
+	 * multiplier is one more than the value of Clock Multiplier
+	 * in the Capabilities Register.
+	 */
+	if (host->clk_mul)
+		host->clk_mul += 1;
+
 	/*
 	 * Set host parameters.
 	 */
 	mmc->ops = &sdhci_ops;
+	mmc->f_max = host->max_clk;
 	if (host->ops->get_min_clock)
 		mmc->f_min = host->ops->get_min_clock(host);
-	else if (host->version >= SDHCI_SPEC_300)
-		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300;
-	else
+	else if (host->version >= SDHCI_SPEC_300) {
+		if (host->clk_mul) {
+			mmc->f_min = (host->max_clk * host->clk_mul) / 1024;
+			mmc->f_max = host->max_clk * host->clk_mul;
+		} else
+			mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300;
+	} else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
 
-	mmc->f_max = host->max_clk;
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE;
 
 	/*
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index e62367491eee..6b0a0ee9ac6e 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -101,6 +101,7 @@
 #define  SDHCI_DIV_MASK	0xFF
 #define  SDHCI_DIV_MASK_LEN	8
 #define  SDHCI_DIV_HI_MASK	0x300
+#define  SDHCI_PROG_CLOCK_MODE	0x0020
 #define  SDHCI_CLOCK_CARD_EN	0x0004
 #define  SDHCI_CLOCK_INT_STABLE	0x0002
 #define  SDHCI_CLOCK_INT_EN	0x0001
@@ -191,6 +192,8 @@
 #define  SDHCI_DRIVER_TYPE_C	0x00000020
 #define  SDHCI_DRIVER_TYPE_D	0x00000040
 #define  SDHCI_USE_SDR50_TUNING	0x00002000
+#define  SDHCI_CLOCK_MUL_MASK	0x00FF0000
+#define  SDHCI_CLOCK_MUL_SHIFT	16
 
 #define SDHCI_CAPABILITIES_1	0x44
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index b74c8530e959..a88a799ed7c3 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -117,6 +117,7 @@ struct sdhci_host {
 
 	unsigned int max_clk;	/* Max possible freq (MHz) */
 	unsigned int timeout_clk;	/* Timeout freq (KHz) */
+	unsigned int clk_mul;	/* Clock Muliplier value */
 
 	unsigned int clock;	/* Current clock (MHz) */
 	u8 pwr;			/* Current voltage */
-- 
cgit v1.2.3


From cf2b5eea1ea0ff9b3184bc6771bcb93a9fdcd1d9 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:07 +0530
Subject: mmc: sdhci: add support for retuning mode 1

Host Controller v3.00 can support retuning modes 1,2 or 3 depending on
the bits 46-47 of the Capabilities register. Also, the timer count for
retuning is indicated by bits 40-43 of the same register. We initialize
timer_list for retuning the first time we execute tuning procedure. This
condition is indicated by SDHCI_NEEDS_RETUNING not being set. Since
retuning mode 1 sets a limit of 4MB on the maximum data length, we set
max_blk_count appropriately. Once the tuning timer expires, we set
SDHCI_NEEDS_RETUNING flag, and if the flag is set, we execute tuning
procedure before sending the next command. We need to restore mmc_request
structure after executing retuning procedure since host->mrq is used
inside the procedure to send CMD19. We also disable and re-enable this
flag during suspend and resume respectively, as per the spec v3.00.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 109 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/mmc/host/sdhci.h  |   6 ++-
 include/linux/mmc/sdhci.h |   6 +++
 3 files changed, 118 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 07cca557c4b5..fa3301644b15 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -46,6 +46,8 @@ static void sdhci_finish_data(struct sdhci_host *);
 
 static void sdhci_send_command(struct sdhci_host *, struct mmc_command *);
 static void sdhci_finish_command(struct sdhci_host *);
+static int sdhci_execute_tuning(struct mmc_host *mmc);
+static void sdhci_tuning_timer(unsigned long data);
 
 static void sdhci_dumpregs(struct sdhci_host *host)
 {
@@ -1206,8 +1208,27 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	if (!present || host->flags & SDHCI_DEVICE_DEAD) {
 		host->mrq->cmd->error = -ENOMEDIUM;
 		tasklet_schedule(&host->finish_tasklet);
-	} else
+	} else {
+		u32 present_state;
+
+		present_state = sdhci_readl(host, SDHCI_PRESENT_STATE);
+		/*
+		 * Check if the re-tuning timer has already expired and there
+		 * is no on-going data transfer. If so, we need to execute
+		 * tuning procedure before sending command.
+		 */
+		if ((host->flags & SDHCI_NEEDS_RETUNING) &&
+		    !(present_state & (SDHCI_DOING_WRITE | SDHCI_DOING_READ))) {
+			spin_unlock_irqrestore(&host->lock, flags);
+			sdhci_execute_tuning(mmc);
+			spin_lock_irqsave(&host->lock, flags);
+
+			/* Restore original mmc_request structure */
+			host->mrq = mrq;
+		}
+
 		sdhci_send_command(host, mrq->cmd);
+	}
 
 	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
@@ -1673,6 +1694,37 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 	}
 
 out:
+	/*
+	 * If this is the very first time we are here, we start the retuning
+	 * timer. Since only during the first time, SDHCI_NEEDS_RETUNING
+	 * flag won't be set, we check this condition before actually starting
+	 * the timer.
+	 */
+	if (!(host->flags & SDHCI_NEEDS_RETUNING) && host->tuning_count &&
+	    (host->tuning_mode == SDHCI_TUNING_MODE_1)) {
+		mod_timer(&host->tuning_timer, jiffies +
+			host->tuning_count * HZ);
+		/* Tuning mode 1 limits the maximum data length to 4MB */
+		mmc->max_blk_count = (4 * 1024 * 1024) / mmc->max_blk_size;
+	} else {
+		host->flags &= ~SDHCI_NEEDS_RETUNING;
+		/* Reload the new initial value for timer */
+		if (host->tuning_mode == SDHCI_TUNING_MODE_1)
+			mod_timer(&host->tuning_timer, jiffies +
+				host->tuning_count * HZ);
+	}
+
+	/*
+	 * In case tuning fails, host controllers which support re-tuning can
+	 * try tuning again at a later time, when the re-tuning timer expires.
+	 * So for these controllers, we return 0. Since there might be other
+	 * controllers who do not have this capability, we return error for
+	 * them.
+	 */
+	if (err && host->tuning_count &&
+	    host->tuning_mode == SDHCI_TUNING_MODE_1)
+		err = 0;
+
 	sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier);
 	spin_unlock(&host->lock);
 	enable_irq(host->irq);
@@ -1775,6 +1827,9 @@ static void sdhci_tasklet_finish(unsigned long param)
 
 	del_timer(&host->timer);
 
+	if (host->version >= SDHCI_SPEC_300)
+		del_timer(&host->tuning_timer);
+
 	mrq = host->mrq;
 
 	/*
@@ -1848,6 +1903,20 @@ static void sdhci_timeout_timer(unsigned long data)
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+static void sdhci_tuning_timer(unsigned long data)
+{
+	struct sdhci_host *host;
+	unsigned long flags;
+
+	host = (struct sdhci_host *)data;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	host->flags |= SDHCI_NEEDS_RETUNING;
+
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
 /*****************************************************************************\
  *                                                                           *
  * Interrupt handling                                                        *
@@ -2122,6 +2191,14 @@ int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state)
 
 	sdhci_disable_card_detection(host);
 
+	/* Disable tuning since we are suspending */
+	if (host->version >= SDHCI_SPEC_300 && host->tuning_count &&
+	    host->tuning_mode == SDHCI_TUNING_MODE_1) {
+		host->flags &= ~SDHCI_NEEDS_RETUNING;
+		mod_timer(&host->tuning_timer, jiffies +
+			host->tuning_count * HZ);
+	}
+
 	ret = mmc_suspend_host(host->mmc);
 	if (ret)
 		return ret;
@@ -2163,6 +2240,11 @@ int sdhci_resume_host(struct sdhci_host *host)
 	ret = mmc_resume_host(host->mmc);
 	sdhci_enable_card_detection(host);
 
+	/* Set the re-tuning expiration flag */
+	if ((host->version >= SDHCI_SPEC_300) && host->tuning_count &&
+	    (host->tuning_mode == SDHCI_TUNING_MODE_1))
+		host->flags |= SDHCI_NEEDS_RETUNING;
+
 	return ret;
 }
 
@@ -2414,6 +2496,21 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[1] & SDHCI_DRIVER_TYPE_D)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_D;
 
+	/* Initial value for re-tuning timer count */
+	host->tuning_count = (caps[1] & SDHCI_RETUNING_TIMER_COUNT_MASK) >>
+			      SDHCI_RETUNING_TIMER_COUNT_SHIFT;
+
+	/*
+	 * In case Re-tuning Timer is not disabled, the actual value of
+	 * re-tuning timer will be 2 ^ (n - 1).
+	 */
+	if (host->tuning_count)
+		host->tuning_count = 1 << (host->tuning_count - 1);
+
+	/* Re-tuning mode supported by the Host Controller */
+	host->tuning_mode = (caps[1] & SDHCI_RETUNING_MODE_MASK) >>
+			     SDHCI_RETUNING_MODE_SHIFT;
+
 	ocr_avail = 0;
 	/*
 	 * According to SD Host Controller spec v3.00, if the Host System
@@ -2559,9 +2656,15 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	setup_timer(&host->timer, sdhci_timeout_timer, (unsigned long)host);
 
-	if (host->version >= SDHCI_SPEC_300)
+	if (host->version >= SDHCI_SPEC_300) {
 		init_waitqueue_head(&host->buf_ready_int);
 
+		/* Initialize re-tuning timer */
+		init_timer(&host->tuning_timer);
+		host->tuning_timer.data = (unsigned long)host;
+		host->tuning_timer.function = sdhci_tuning_timer;
+	}
+
 	ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
 		mmc_hostname(mmc), host);
 	if (ret)
@@ -2655,6 +2758,8 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 	free_irq(host->irq, host);
 
 	del_timer_sync(&host->timer);
+	if (host->version >= SDHCI_SPEC_300)
+		del_timer_sync(&host->tuning_timer);
 
 	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 6b0a0ee9ac6e..8ea11b7cf89a 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -191,7 +191,11 @@
 #define  SDHCI_DRIVER_TYPE_A	0x00000010
 #define  SDHCI_DRIVER_TYPE_C	0x00000020
 #define  SDHCI_DRIVER_TYPE_D	0x00000040
-#define  SDHCI_USE_SDR50_TUNING	0x00002000
+#define  SDHCI_RETUNING_TIMER_COUNT_MASK	0x00000F00
+#define  SDHCI_RETUNING_TIMER_COUNT_SHIFT	8
+#define  SDHCI_USE_SDR50_TUNING			0x00002000
+#define  SDHCI_RETUNING_MODE_MASK		0x0000C000
+#define  SDHCI_RETUNING_MODE_SHIFT		14
 #define  SDHCI_CLOCK_MUL_MASK	0x00FF0000
 #define  SDHCI_CLOCK_MUL_SHIFT	16
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index a88a799ed7c3..e902618d68f4 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -112,6 +112,7 @@ struct sdhci_host {
 #define SDHCI_REQ_USE_DMA	(1<<2)	/* Use DMA for this req. */
 #define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
 #define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
+#define SDHCI_NEEDS_RETUNING	(1<<5)	/* Host needs retuning */
 
 	unsigned int version;	/* SDHCI spec. version */
 
@@ -152,6 +153,11 @@ struct sdhci_host {
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
 
+	unsigned int		tuning_count;	/* Timer count for re-tuning */
+	unsigned int		tuning_mode;	/* Re-tuning mode supported by host */
+#define SDHCI_TUNING_MODE_1	0
+	struct timer_list	tuning_timer;	/* Timer for tuning */
+
 	unsigned long private[0] ____cacheline_aligned;
 };
 #endif /* __SDHCI_H */
-- 
cgit v1.2.3


From 06e8935febe687e2a561707d4c7ca4245d261dbe Mon Sep 17 00:00:00 2001
From: Stefan Nilsson XK <stefan.xk.nilsson@stericsson.com>
Date: Wed, 11 May 2011 17:48:05 +0200
Subject: mmc: sdio: optimized SDIO IRQ handling for single irq

If there is only 1 function interrupt registered it is possible to
improve performance by directly calling the irq handler and avoiding
the overhead of reading the CCCR registers.

Signed-off-by: Per Forlin <per.forlin@linaro.org>
Acked-by: Ulf Hansson <ulf.hansson@stericsson.com>
Reviewed-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio_irq.c | 33 ++++++++++++++++++++++++++++++++-
 include/linux/mmc/card.h    |  1 +
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index b3001617e67d..03ead028d2ce 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -31,6 +31,17 @@ static int process_sdio_pending_irqs(struct mmc_card *card)
 {
 	int i, ret, count;
 	unsigned char pending;
+	struct sdio_func *func;
+
+	/*
+	 * Optimization, if there is only 1 function interrupt registered
+	 * call irq handler directly
+	 */
+	func = card->sdio_single_irq;
+	if (func) {
+		func->irq_handler(func);
+		return 1;
+	}
 
 	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_INTx, 0, &pending);
 	if (ret) {
@@ -42,7 +53,7 @@ static int process_sdio_pending_irqs(struct mmc_card *card)
 	count = 0;
 	for (i = 1; i <= 7; i++) {
 		if (pending & (1 << i)) {
-			struct sdio_func *func = card->sdio_func[i - 1];
+			func = card->sdio_func[i - 1];
 			if (!func) {
 				printk(KERN_WARNING "%s: pending IRQ for "
 					"non-existent function\n",
@@ -186,6 +197,24 @@ static int sdio_card_irq_put(struct mmc_card *card)
 	return 0;
 }
 
+/* If there is only 1 function registered set sdio_single_irq */
+static void sdio_single_irq_set(struct mmc_card *card)
+{
+	struct sdio_func *func;
+	int i;
+
+	card->sdio_single_irq = NULL;
+	if ((card->host->caps & MMC_CAP_SDIO_IRQ) &&
+	    card->host->sdio_irqs == 1)
+		for (i = 0; i < card->sdio_funcs; i++) {
+		       func = card->sdio_func[i];
+		       if (func && func->irq_handler) {
+			       card->sdio_single_irq = func;
+			       break;
+		       }
+	       }
+}
+
 /**
  *	sdio_claim_irq - claim the IRQ for a SDIO function
  *	@func: SDIO function
@@ -227,6 +256,7 @@ int sdio_claim_irq(struct sdio_func *func, sdio_irq_handler_t *handler)
 	ret = sdio_card_irq_get(func->card);
 	if (ret)
 		func->irq_handler = NULL;
+	sdio_single_irq_set(func->card);
 
 	return ret;
 }
@@ -251,6 +281,7 @@ int sdio_release_irq(struct sdio_func *func)
 	if (func->irq_handler) {
 		func->irq_handler = NULL;
 		sdio_card_irq_put(func->card);
+		sdio_single_irq_set(func->card);
 	}
 
 	ret = mmc_io_rw_direct(func->card, 0, 0, SDIO_CCCR_IENx, 0, &reg);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index d8dffc992ce2..4910dec47bb7 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -191,6 +191,7 @@ struct mmc_card {
 	struct sdio_cccr	cccr;		/* common card info */
 	struct sdio_cis		cis;		/* common tuple info */
 	struct sdio_func	*sdio_func[SDIO_MAX_FUNCS]; /* SDIO functions (devices) */
+	struct sdio_func	*sdio_single_irq; /* SDIO function when only one IRQ active */
 	unsigned		num_info;	/* number of info strings */
 	const char		**info;		/* info strings */
 	struct sdio_func_tuple	*tuples;	/* unknown common tuples */
-- 
cgit v1.2.3


From 7311bef0697bcfbbcb898c3c22e61e23f203ae9d Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 11 May 2011 16:51:11 +0000
Subject: mmc: tmio: runtime suspend the controller, where possible

The TMIO MMC controller cannot be powered off to save power, when no
card is plugged in, because then it will not be able to detect a new
card-insertion event. On some implementations, however, it is
possible to switch to using another source to detect card insertion.
This patch adds support for such implementations.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/tmio_mmc.h     |  3 ++
 drivers/mmc/host/tmio_mmc_pio.c | 64 +++++++++++++++++++++++++++++++++++++----
 include/linux/mfd/tmio.h        | 17 +++++++++++
 3 files changed, 78 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index c6bf726c8f44..8260bc2c34e3 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -131,4 +131,7 @@ int tmio_mmc_host_resume(struct device *dev);
 #define tmio_mmc_host_resume NULL
 #endif
 
+int tmio_mmc_host_runtime_suspend(struct device *dev);
+int tmio_mmc_host_runtime_resume(struct device *dev);
+
 #endif
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index af5d4f6d2233..ad6347bb02dd 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -746,6 +746,7 @@ fail:
 static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct tmio_mmc_host *host = mmc_priv(mmc);
+	struct tmio_mmc_data *pdata = host->pdata;
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
@@ -775,13 +776,24 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	/* Power sequence - OFF -> UP -> ON */
 	if (ios->power_mode == MMC_POWER_UP) {
+		if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && !pdata->power) {
+			pm_runtime_get_sync(&host->pdev->dev);
+			pdata->power = true;
+		}
 		/* power up SD bus */
 		if (host->set_pwr)
 			host->set_pwr(host->pdev, 1);
 	} else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
 		/* power down SD bus */
-		if (ios->power_mode == MMC_POWER_OFF && host->set_pwr)
-			host->set_pwr(host->pdev, 0);
+		if (ios->power_mode == MMC_POWER_OFF) {
+			if (host->set_pwr)
+				host->set_pwr(host->pdev, 0);
+			if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
+			    pdata->power) {
+				pdata->power = false;
+				pm_runtime_put(&host->pdev->dev);
+			}
+		}
 		tmio_mmc_clk_stop(host);
 	} else {
 		/* start bus clock */
@@ -853,6 +865,7 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host,
 	if (!mmc)
 		return -ENOMEM;
 
+	pdata->dev = &pdev->dev;
 	_host = mmc_priv(mmc);
 	_host->pdata = pdata;
 	_host->mmc = mmc;
@@ -886,6 +899,7 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host,
 	else
 		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
+	pdata->power = false;
 	pm_runtime_enable(&pdev->dev);
 	ret = pm_runtime_resume(&pdev->dev);
 	if (ret < 0)
@@ -907,7 +921,8 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host,
 	tmio_mmc_request_dma(_host, pdata);
 
 	/* We have to keep the device powered for its card detection to work */
-	pm_runtime_get_noresume(&pdev->dev);
+	if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD))
+		pm_runtime_get_noresume(&pdev->dev);
 
 	mmc_add_host(mmc);
 
@@ -937,15 +952,25 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host)
 {
 	struct platform_device *pdev = host->pdev;
 
+	/*
+	 * We don't have to manipulate pdata->power here: if there is a card in
+	 * the slot, the runtime PM is active and our .runtime_resume() will not
+	 * be run. If there is no card in the slot and the platform can suspend
+	 * the controller, the runtime PM is suspended and pdata->power == false,
+	 * so, our .runtime_resume() will not try to detect a card in the slot.
+	 */
+	if (host->pdata->flags & TMIO_MMC_HAS_COLD_CD)
+		pm_runtime_get_sync(&pdev->dev);
+
 	mmc_remove_host(host->mmc);
 	cancel_delayed_work_sync(&host->delayed_reset_work);
 	tmio_mmc_release_dma(host);
-	iounmap(host->ctl);
-	mmc_free_host(host->mmc);
 
-	/* Compensate for pm_runtime_get_sync() in probe() above */
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
+
+	iounmap(host->ctl);
+	mmc_free_host(host->mmc);
 }
 EXPORT_SYMBOL(tmio_mmc_host_remove);
 
@@ -970,6 +995,9 @@ int tmio_mmc_host_resume(struct device *dev)
 	struct mmc_host *mmc = dev_get_drvdata(dev);
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 
+	/* The MMC core will perform the complete set up */
+	host->pdata->power = false;
+
 	if (!host->pm_error)
 		pm_runtime_get_sync(dev);
 
@@ -982,4 +1010,28 @@ EXPORT_SYMBOL(tmio_mmc_host_resume);
 
 #endif	/* CONFIG_PM */
 
+int tmio_mmc_host_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+EXPORT_SYMBOL(tmio_mmc_host_runtime_suspend);
+
+int tmio_mmc_host_runtime_resume(struct device *dev)
+{
+	struct mmc_host *mmc = dev_get_drvdata(dev);
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+	struct tmio_mmc_data *pdata = host->pdata;
+
+	tmio_mmc_reset(host);
+
+	if (pdata->power) {
+		/* Only entered after a card-insert interrupt */
+		tmio_mmc_set_ios(mmc, &mmc->ios);
+		mmc_detect_change(mmc, msecs_to_jiffies(100));
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_mmc_host_runtime_resume);
+
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 8e70310ee945..5a90266c3a5a 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -4,6 +4,7 @@
 #include <linux/fb.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
 #define tmio_ioread8(addr) readb(addr)
 #define tmio_ioread16(addr) readw(addr)
@@ -61,6 +62,12 @@
  * Some controllers can support SDIO IRQ signalling.
  */
 #define TMIO_MMC_SDIO_IRQ		(1 << 2)
+/*
+ * Some platforms can detect card insertion events with controller powered
+ * down, in which case they have to call tmio_mmc_cd_wakeup() to power up the
+ * controller and report the event to the driver.
+ */
+#define TMIO_MMC_HAS_COLD_CD		(1 << 3)
 
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
@@ -82,11 +89,21 @@ struct tmio_mmc_data {
 	unsigned long			flags;
 	u32				ocr_mask;	/* available voltages */
 	struct tmio_mmc_dma		*dma;
+	struct device			*dev;
+	bool				power;
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
 	int (*get_cd)(struct platform_device *host);
 };
 
+static inline void tmio_mmc_cd_wakeup(struct tmio_mmc_data *pdata)
+{
+	if (pdata && !pdata->power) {
+		pdata->power = true;
+		pm_runtime_get(pdata->dev);
+	}
+}
+
 /*
  * data for the NAND controller
  */
-- 
cgit v1.2.3


From 2595880481ac894d390365092de9aaf92b44e147 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 11 May 2011 16:51:15 +0000
Subject: mmc: sdhi: allow powering down controller with no card inserted

Supply a link to TMIO private data for platforms to implement their
own card detection.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c  | 6 ++++++
 include/linux/mmc/sh_mobile_sdhi.h | 4 ++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index d264bbeb529b..b3654293017b 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -71,6 +71,7 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 	}
 
 	mmc_data = &priv->mmc_data;
+	p->pdata = mmc_data;
 
 	snprintf(clk_name, sizeof(clk_name), "sdhi%d", pdev->id);
 	priv->clk = clk_get(&pdev->dev, clk_name);
@@ -159,8 +160,11 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev)
 	struct mmc_host *mmc = platform_get_drvdata(pdev);
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 	struct sh_mobile_sdhi *priv = container_of(host->pdata, struct sh_mobile_sdhi, mmc_data);
+	struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
 	int i, irq;
 
+	p->pdata = NULL;
+
 	for (i = 0; i < 3; i++) {
 		irq = platform_get_irq(pdev, i);
 		if (irq >= 0)
@@ -178,6 +182,8 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev)
 static const struct dev_pm_ops tmio_mmc_dev_pm_ops = {
 	.suspend = tmio_mmc_host_suspend,
 	.resume = tmio_mmc_host_resume,
+	.runtime_suspend = tmio_mmc_host_runtime_suspend,
+	.runtime_resume = tmio_mmc_host_runtime_resume,
 };
 
 static struct platform_driver sh_mobile_sdhi_driver = {
diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h
index c981b959760f..faf32b6ec185 100644
--- a/include/linux/mmc/sh_mobile_sdhi.h
+++ b/include/linux/mmc/sh_mobile_sdhi.h
@@ -3,12 +3,16 @@
 
 #include <linux/types.h>
 
+struct platform_device;
+struct tmio_mmc_data;
+
 struct sh_mobile_sdhi_info {
 	int dma_slave_tx;
 	int dma_slave_rx;
 	unsigned long tmio_flags;
 	unsigned long tmio_caps;
 	u32 tmio_ocr_mask;	/* available MMC voltages */
+	struct tmio_mmc_data *pdata;
 	void (*set_pwr)(struct platform_device *pdev, int state);
 	int (*get_cd)(struct platform_device *pdev);
 };
-- 
cgit v1.2.3


From 4c4cb171054230c2e58ed6574d7faa1871c75bbe Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 13 May 2011 11:17:18 +0530
Subject: mmc: core: add support for eMMC Dual Data Rate

eMMC voltage change not required for 1.8V.  3.3V and 1.8V vcc
are capable of doing DDR. vccq of 1.8v is not required.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Reviewed-by: Arindam Nath <arindam.nath@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 14 ++------------
 drivers/mmc/core/core.h  |  2 --
 drivers/mmc/core/mmc.c   | 35 ++++++++++++++++++++++++++++++-----
 include/linux/mmc/host.h |  1 +
 4 files changed, 33 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 94c8d5a9ecae..7863eedf3d9a 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -717,23 +717,13 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
 	mmc_set_ios(host);
 }
 
-/*
- * Change data bus width and DDR mode of a host.
- */
-void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
-			   unsigned int ddr)
-{
-	host->ios.bus_width = width;
-	host->ios.ddr = ddr;
-	mmc_set_ios(host);
-}
-
 /*
  * Change data bus width of a host.
  */
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
 {
-	mmc_set_bus_width_ddr(host, width, MMC_SDR_MODE);
+	host->ios.bus_width = width;
+	mmc_set_ios(host);
 }
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 53d23c240324..d9411ed2a39b 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -38,8 +38,6 @@ void mmc_ungate_clock(struct mmc_host *host);
 void mmc_set_ungated(struct mmc_host *host);
 void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode);
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
-void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
-			   unsigned int ddr);
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
 int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage,
 			   bool cmd11);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index a2c795e8f9dd..0433fe66cbad 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -20,6 +20,7 @@
 #include "core.h"
 #include "bus.h"
 #include "mmc_ops.h"
+#include "sd_ops.h"
 
 static const unsigned int tran_exp[] = {
 	10000,		100000,		1000000,	10000000,
@@ -633,10 +634,14 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	if (mmc_card_highspeed(card)) {
 		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
-			&& (host->caps & (MMC_CAP_1_8V_DDR)))
+			&& ((host->caps & (MMC_CAP_1_8V_DDR |
+			     MMC_CAP_UHS_DDR50))
+				== (MMC_CAP_1_8V_DDR | MMC_CAP_UHS_DDR50)))
 				ddr = MMC_1_8V_DDR_MODE;
 		else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
-			&& (host->caps & (MMC_CAP_1_2V_DDR)))
+			&& ((host->caps & (MMC_CAP_1_2V_DDR |
+			     MMC_CAP_UHS_DDR50))
+				== (MMC_CAP_1_2V_DDR | MMC_CAP_UHS_DDR50)))
 				ddr = MMC_1_2V_DDR_MODE;
 	}
 
@@ -670,8 +675,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 					 ext_csd_bits[idx][0],
 					 0);
 			if (!err) {
-				mmc_set_bus_width_ddr(card->host,
-						      bus_width, MMC_SDR_MODE);
+				mmc_set_bus_width(card->host, bus_width);
 				/*
 				 * If controller can't handle bus width test,
 				 * use the highest bus width to maintain
@@ -697,8 +701,29 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				1 << bus_width, ddr);
 			goto free_card;
 		} else if (ddr) {
+			/*
+			 * eMMC cards can support 3.3V to 1.2V i/o (vccq)
+			 * signaling.
+			 *
+			 * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
+			 *
+			 * 1.8V vccq at 3.3V core voltage (vcc) is not required
+			 * in the JEDEC spec for DDR.
+			 *
+			 * Do not force change in vccq since we are obviously
+			 * working and no change to vccq is needed.
+			 *
+			 * WARNING: eMMC rules are NOT the same as SD DDR
+			 */
+			if (ddr == EXT_CSD_CARD_TYPE_DDR_1_2V) {
+				err = mmc_set_signal_voltage(host,
+					MMC_SIGNAL_VOLTAGE_120, 0);
+				if (err)
+					goto err;
+			}
 			mmc_card_set_ddr_mode(card);
-			mmc_set_bus_width_ddr(card->host, bus_width, ddr);
+			mmc_set_timing(card->host, MMC_TIMING_UHS_DDR50);
+			mmc_set_bus_width(card->host, bus_width);
 		}
 	}
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 6716bd12eccd..de32e6aa018a 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -66,6 +66,7 @@ struct mmc_ios {
 
 #define MMC_SIGNAL_VOLTAGE_330	0
 #define MMC_SIGNAL_VOLTAGE_180	1
+#define MMC_SIGNAL_VOLTAGE_120	2
 
 	unsigned char	drv_type;		/* driver type (A, B, C, D) */
 
-- 
cgit v1.2.3


From c59de9287993b5c36f9005f745a3ce0b1008131d Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:35 -0500
Subject: mmc: quirks: Add/remove quirks conditional support.

Conditional add/remove quirks for MMC and SD.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/card.h | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 4910dec47bb7..7190aa2096f7 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -273,16 +273,14 @@ struct mmc_fixup {
 		    card->cid.month)
 
 /*
- * This hook just adds a quirk unconditionally.
+ * Unconditionally quirk add/remove.
  */
+
 static inline void __maybe_unused add_quirk(struct mmc_card *card, int data)
 {
 	card->quirks |= data;
 }
 
-/*
- * This hook just removes a quirk unconditionally.
- */
 static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 {
 	card->quirks &= ~data;
@@ -308,6 +306,40 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 
+/*
+ * Quirk add/remove for MMC products.
+ */
+
+static inline void __maybe_unused add_quirk_mmc(struct mmc_card *card, int data)
+{
+	if (mmc_card_mmc(card))
+		card->quirks |= data;
+}
+
+static inline void __maybe_unused remove_quirk_mmc(struct mmc_card *card,
+						   int data)
+{
+	if (mmc_card_mmc(card))
+		card->quirks &= ~data;
+}
+
+/*
+ * Quirk add/remove for SD products.
+ */
+
+static inline void __maybe_unused add_quirk_sd(struct mmc_card *card, int data)
+{
+	if (mmc_card_sd(card))
+		card->quirks |= data;
+}
+
+static inline void __maybe_unused remove_quirk_sd(struct mmc_card *card,
+						   int data)
+{
+	if (mmc_card_sd(card))
+		card->quirks &= ~data;
+}
+
 static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
 {
 	return c->quirks & MMC_QUIRK_LENIENT_FN0;
-- 
cgit v1.2.3


From 7bf02ea22c6cdd09e2d3f1d3c3fe366b834ae9af Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 24 May 2011 17:11:16 -0700
Subject: arch, mm: filter disallowed nodes from arch specific show_mem
 functions

Architectures that implement their own show_mem() function did not pass
the filter argument to show_free_areas() to appropriately avoid emitting
the state of nodes that are disallowed in the current context.  This patch
now passes the filter argument to show_free_areas() so those nodes are now
avoided.

This patch also removes the show_free_areas() wrapper around
__show_free_areas() and converts existing callers to pass an empty filter.

ia64 emits additional information for each node, so skip_free_areas_zone()
must be made global to filter disallowed nodes and it is converted to use
a nid argument rather than a zone for this use case.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <jejb@parisc-linux.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/init.c               |  2 +-
 arch/ia64/mm/contig.c            | 10 ++++++----
 arch/ia64/mm/discontig.c         | 10 ++++++----
 arch/parisc/mm/init.c            |  2 +-
 arch/sparc/kernel/setup_32.c     |  2 +-
 arch/sparc/mm/init_32.c          |  2 +-
 arch/unicore32/mm/init.c         |  2 +-
 drivers/net/ioc3-eth.c           |  2 +-
 drivers/tty/serial/68328serial.c |  2 +-
 include/linux/mm.h               |  6 +++---
 lib/show_mem.c                   |  2 +-
 mm/nommu.c                       |  6 +++---
 mm/page_alloc.c                  | 22 ++++++++--------------
 13 files changed, 34 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 76f82ae44efb..3f17ea146f0e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -85,7 +85,7 @@ void show_mem(unsigned int filter)
 	struct meminfo * mi = &meminfo;
 
 	printk("Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 
 	for_each_bank (i, mi) {
 		struct membank *bank = &mi->bank[i];
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 9a018cde5d84..f114a3b14c6a 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -44,13 +44,16 @@ void show_mem(unsigned int filter)
 	pg_data_t *pgdat;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
+		int nid = pgdat->node_id;
 
+		if (skip_free_areas_node(filter, nid))
+			continue;
 		pgdat_resize_lock(pgdat, &flags);
 		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -64,8 +67,7 @@ void show_mem(unsigned int filter)
 				if (max_gap < LARGE_GAP)
 					continue;
 #endif
-				i = vmemmap_find_next_valid_pfn(pgdat->node_id,
-					 i) - 1;
+				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
 				continue;
 			}
 			if (PageReserved(page))
@@ -81,7 +83,7 @@ void show_mem(unsigned int filter)
 		total_cached += cached;
 		total_shared += shared;
 		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
-		       "shrd: %10d, swpd: %10d\n", pgdat->node_id,
+		       "shrd: %10d, swpd: %10d\n", nid,
 		       present, reserved, shared, cached);
 	}
 	printk(KERN_INFO "%ld pages of RAM\n", total_present);
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 82ab1bc6afb1..c641333cd997 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -622,13 +622,16 @@ void show_mem(unsigned int filter)
 	pg_data_t *pgdat;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
+		int nid = pgdat->node_id;
 
+		if (skip_free_areas_node(filter, nid))
+			continue;
 		pgdat_resize_lock(pgdat, &flags);
 		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -638,8 +641,7 @@ void show_mem(unsigned int filter)
 			if (pfn_valid(pgdat->node_start_pfn + i))
 				page = pfn_to_page(pgdat->node_start_pfn + i);
 			else {
-				i = vmemmap_find_next_valid_pfn(pgdat->node_id,
-					 i) - 1;
+				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
 				continue;
 			}
 			if (PageReserved(page))
@@ -655,7 +657,7 @@ void show_mem(unsigned int filter)
 		total_cached += cached;
 		total_shared += shared;
 		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
-		       "shrd: %10d, swpd: %10d\n", pgdat->node_id,
+		       "shrd: %10d, swpd: %10d\n", nid,
 		       present, reserved, shared, cached);
 	}
 	printk(KERN_INFO "%ld pages of RAM\n", total_present);
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 5fa1e273006e..c5c9c65e502d 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -686,7 +686,7 @@ void show_mem(unsigned int filter)
 	int shared = 0, cached = 0;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 #ifndef CONFIG_DISCONTIGMEM
 	i = max_mapnr;
 	while (i-- > 0) {
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 3609bdee9ed2..3249d3f3234d 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -82,7 +82,7 @@ static void prom_sync_me(void)
 			     "nop\n\t" : : "r" (&trapbase));
 
 	prom_printf("PROM SYNC COMMAND...\n");
-	show_free_areas();
+	show_free_areas(0);
 	if(current->pid != 0) {
 		local_irq_enable();
 		sys_sync();
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 4c31e2b6e71b..28c2cc81c9a9 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -78,7 +78,7 @@ void __init kmap_init(void)
 void show_mem(unsigned int filter)
 {
 	printk("Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk("Free swap:       %6ldkB\n",
 	       nr_swap_pages << (PAGE_SHIFT-10));
 	printk("%ld pages of RAM\n", totalram_pages);
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 1fc02633f700..2d3e7112d2a3 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -62,7 +62,7 @@ void show_mem(unsigned int filter)
 	struct meminfo *mi = &meminfo;
 
 	printk(KERN_DEFAULT "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 
 	for_each_bank(i, mi) {
 		struct membank *bank = &mi->bank[i];
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 96c95617195f..32f07f868d89 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -915,7 +915,7 @@ static void ioc3_alloc_rings(struct net_device *dev)
 
 			skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
 			if (!skb) {
-				show_free_areas();
+				show_free_areas(0);
 				continue;
 			}
 
diff --git a/drivers/tty/serial/68328serial.c b/drivers/tty/serial/68328serial.c
index d5bfd41707e7..e0a77540b8ca 100644
--- a/drivers/tty/serial/68328serial.c
+++ b/drivers/tty/serial/68328serial.c
@@ -281,7 +281,7 @@ static void receive_chars(struct m68k_serial *info, unsigned short rx)
 #ifdef CONFIG_MAGIC_SYSRQ
 			} else if (ch == 0x10) { /* ^P */
 				show_state();
-				show_free_areas();
+				show_free_areas(0);
 				show_buffers();
 /*				show_net_buffers(); */
 				return;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6507dde38b16..1746f67c33de 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -862,13 +862,13 @@ extern void pagefault_out_of_memory(void);
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
 /*
- * Flags passed to show_mem() and __show_free_areas() to suppress output in
+ * Flags passed to show_mem() and show_free_areas() to suppress output in
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES	(0x0001u)	/* filter disallowed nodes */
 
-extern void show_free_areas(void);
-extern void __show_free_areas(unsigned int flags);
+extern void show_free_areas(unsigned int flags);
+extern bool skip_free_areas_node(unsigned int flags, int nid);
 
 int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 90cbe4bb5960..4407f8c9b1f7 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -16,7 +16,7 @@ void show_mem(unsigned int filter)
 		nonshared = 0, highmem = 0;
 
 	printk("Mem-Info:\n");
-	__show_free_areas(filter);
+	show_free_areas(filter);
 
 	for_each_online_pgdat(pgdat) {
 		unsigned long i, flags;
diff --git a/mm/nommu.c b/mm/nommu.c
index c4c542c736a9..5afce48ce339 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1235,7 +1235,7 @@ error_free:
 enomem:
 	printk("Allocation of length %lu from process %d (%s) failed\n",
 	       len, current->pid, current->comm);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 }
 
@@ -1468,14 +1468,14 @@ error_getting_vma:
 	printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
 	       " from process %d failed\n",
 	       len, current->pid);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 
 error_getting_region:
 	printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
 	       " from process %d failed\n",
 	       len, current->pid);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(do_mmap_pgoff);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9d5498e2d0f5..b1447522d346 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2473,10 +2473,10 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 #endif
 
 /*
- * Determine whether the zone's node should be displayed or not, depending on
- * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
+ * Determine whether the node should be displayed or not, depending on whether
+ * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
  */
-static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
+bool skip_free_areas_node(unsigned int flags, int nid)
 {
 	bool ret = false;
 
@@ -2484,8 +2484,7 @@ static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
 		goto out;
 
 	get_mems_allowed();
-	ret = !node_isset(zone->zone_pgdat->node_id,
-				cpuset_current_mems_allowed);
+	ret = !node_isset(nid, cpuset_current_mems_allowed);
 	put_mems_allowed();
 out:
 	return ret;
@@ -2500,13 +2499,13 @@ out:
  * Suppresses nodes that are not allowed by current's cpuset if
  * SHOW_MEM_FILTER_NODES is passed.
  */
-void __show_free_areas(unsigned int filter)
+void show_free_areas(unsigned int filter)
 {
 	int cpu;
 	struct zone *zone;
 
 	for_each_populated_zone(zone) {
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s per-cpu:\n", zone->name);
@@ -2549,7 +2548,7 @@ void __show_free_areas(unsigned int filter)
 	for_each_populated_zone(zone) {
 		int i;
 
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s"
@@ -2618,7 +2617,7 @@ void __show_free_areas(unsigned int filter)
 	for_each_populated_zone(zone) {
  		unsigned long nr[MAX_ORDER], flags, order, total = 0;
 
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s: ", zone->name);
@@ -2639,11 +2638,6 @@ void __show_free_areas(unsigned int filter)
 	show_swap_cache_info();
 }
 
-void show_free_areas(void)
-{
-	__show_free_areas(0);
-}
-
 static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
 {
 	zoneref->zone = zone;
-- 
cgit v1.2.3


From fa25c503dfa203b921199ea42c0046c89f2ed49f Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:28 -0700
Subject: mm: per-node vmstat: show proper vmstats

commit 2ac390370a ("writeback: add
/sys/devices/system/node/<node>/vmstat") added vmstat entry.  But
strangely it only show nr_written and nr_dirtied.

        # cat /sys/devices/system/node/node20/vmstat
        nr_written 0
        nr_dirtied 0

Of course, It's not adequate.  With this patch, the vmstat show all vm
stastics as /proc/vmstat.

        # cat /sys/devices/system/node/node0/vmstat
	nr_free_pages 899224
	nr_inactive_anon 201
	nr_active_anon 17380
	nr_inactive_file 31572
	nr_active_file 28277
	nr_unevictable 0
	nr_mlock 0
	nr_anon_pages 17321
	nr_mapped 8640
	nr_file_pages 60107
	nr_dirty 33
	nr_writeback 0
	nr_slab_reclaimable 6850
	nr_slab_unreclaimable 7604
	nr_page_table_pages 3105
	nr_kernel_stack 175
	nr_unstable 0
	nr_bounce 0
	nr_vmscan_write 0
	nr_writeback_temp 0
	nr_isolated_anon 0
	nr_isolated_file 0
	nr_shmem 260
	nr_dirtied 1050
	nr_written 938
	numa_hit 962872
	numa_miss 0
	numa_foreign 0
	numa_interleave 8617
	numa_local 962872
	numa_other 0
	nr_anon_transparent_hugepages 0

[akpm@linux-foundation.org: no externs in .c files]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Rubin <mrubin@google.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    |  14 ++-
 include/linux/vmstat.h |   4 +-
 mm/vmstat.c            | 261 +++++++++++++++++++++++++------------------------
 3 files changed, 144 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index b3b72d64e805..793f796c4da3 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/memory.h>
+#include <linux/vmstat.h>
 #include <linux/node.h>
 #include <linux/hugetlb.h>
 #include <linux/compaction.h>
@@ -179,11 +180,14 @@ static ssize_t node_read_vmstat(struct sys_device *dev,
 				struct sysdev_attribute *attr, char *buf)
 {
 	int nid = dev->id;
-	return sprintf(buf,
-		"nr_written %lu\n"
-		"nr_dirtied %lu\n",
-		node_page_state(nid, NR_WRITTEN),
-		node_page_state(nid, NR_DIRTIED));
+	int i;
+	int n = 0;
+
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
+			     node_page_state(nid, i));
+
+	return n;
 }
 static SYSDEV_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2b3831b58aa4..e73d1030f2f7 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -313,6 +313,8 @@ static inline void __dec_zone_page_state(struct page *page,
 #define set_pgdat_percpu_threshold(pgdat, callback) { }
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
-#endif
+#endif		/* CONFIG_SMP */
+
+extern const char * const vmstat_text[];
 
 #endif /* _LINUX_VMSTAT_H */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 897ea9e88238..209546a8bdd5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -659,6 +659,138 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 }
 #endif
 
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS)
+#ifdef CONFIG_ZONE_DMA
+#define TEXT_FOR_DMA(xx) xx "_dma",
+#else
+#define TEXT_FOR_DMA(xx)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define TEXT_FOR_DMA32(xx) xx "_dma32",
+#else
+#define TEXT_FOR_DMA32(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define TEXT_FOR_HIGHMEM(xx) xx "_high",
+#else
+#define TEXT_FOR_HIGHMEM(xx)
+#endif
+
+#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
+					TEXT_FOR_HIGHMEM(xx) xx "_movable",
+
+const char * const vmstat_text[] = {
+	/* Zoned VM counters */
+	"nr_free_pages",
+	"nr_inactive_anon",
+	"nr_active_anon",
+	"nr_inactive_file",
+	"nr_active_file",
+	"nr_unevictable",
+	"nr_mlock",
+	"nr_anon_pages",
+	"nr_mapped",
+	"nr_file_pages",
+	"nr_dirty",
+	"nr_writeback",
+	"nr_slab_reclaimable",
+	"nr_slab_unreclaimable",
+	"nr_page_table_pages",
+	"nr_kernel_stack",
+	"nr_unstable",
+	"nr_bounce",
+	"nr_vmscan_write",
+	"nr_writeback_temp",
+	"nr_isolated_anon",
+	"nr_isolated_file",
+	"nr_shmem",
+	"nr_dirtied",
+	"nr_written",
+
+#ifdef CONFIG_NUMA
+	"numa_hit",
+	"numa_miss",
+	"numa_foreign",
+	"numa_interleave",
+	"numa_local",
+	"numa_other",
+#endif
+	"nr_anon_transparent_hugepages",
+	"nr_dirty_threshold",
+	"nr_dirty_background_threshold",
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+	"pgpgin",
+	"pgpgout",
+	"pswpin",
+	"pswpout",
+
+	TEXTS_FOR_ZONES("pgalloc")
+
+	"pgfree",
+	"pgactivate",
+	"pgdeactivate",
+
+	"pgfault",
+	"pgmajfault",
+
+	TEXTS_FOR_ZONES("pgrefill")
+	TEXTS_FOR_ZONES("pgsteal")
+	TEXTS_FOR_ZONES("pgscan_kswapd")
+	TEXTS_FOR_ZONES("pgscan_direct")
+
+#ifdef CONFIG_NUMA
+	"zone_reclaim_failed",
+#endif
+	"pginodesteal",
+	"slabs_scanned",
+	"kswapd_steal",
+	"kswapd_inodesteal",
+	"kswapd_low_wmark_hit_quickly",
+	"kswapd_high_wmark_hit_quickly",
+	"kswapd_skip_congestion_wait",
+	"pageoutrun",
+	"allocstall",
+
+	"pgrotated",
+
+#ifdef CONFIG_COMPACTION
+	"compact_blocks_moved",
+	"compact_pages_moved",
+	"compact_pagemigrate_failed",
+	"compact_stall",
+	"compact_fail",
+	"compact_success",
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+	"htlb_buddy_alloc_success",
+	"htlb_buddy_alloc_fail",
+#endif
+	"unevictable_pgs_culled",
+	"unevictable_pgs_scanned",
+	"unevictable_pgs_rescued",
+	"unevictable_pgs_mlocked",
+	"unevictable_pgs_munlocked",
+	"unevictable_pgs_cleared",
+	"unevictable_pgs_stranded",
+	"unevictable_pgs_mlockfreed",
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	"thp_fault_alloc",
+	"thp_fault_fallback",
+	"thp_collapse_alloc",
+	"thp_collapse_alloc_failed",
+	"thp_split",
+#endif
+
+#endif /* CONFIG_VM_EVENTS_COUNTERS */
+};
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS */
+
+
 #ifdef CONFIG_PROC_FS
 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
 						struct zone *zone)
@@ -831,135 +963,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
 	.release	= seq_release,
 };
 
-#ifdef CONFIG_ZONE_DMA
-#define TEXT_FOR_DMA(xx) xx "_dma",
-#else
-#define TEXT_FOR_DMA(xx)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define TEXT_FOR_DMA32(xx) xx "_dma32",
-#else
-#define TEXT_FOR_DMA32(xx)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define TEXT_FOR_HIGHMEM(xx) xx "_high",
-#else
-#define TEXT_FOR_HIGHMEM(xx)
-#endif
-
-#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
-					TEXT_FOR_HIGHMEM(xx) xx "_movable",
-
-static const char * const vmstat_text[] = {
-	/* Zoned VM counters */
-	"nr_free_pages",
-	"nr_inactive_anon",
-	"nr_active_anon",
-	"nr_inactive_file",
-	"nr_active_file",
-	"nr_unevictable",
-	"nr_mlock",
-	"nr_anon_pages",
-	"nr_mapped",
-	"nr_file_pages",
-	"nr_dirty",
-	"nr_writeback",
-	"nr_slab_reclaimable",
-	"nr_slab_unreclaimable",
-	"nr_page_table_pages",
-	"nr_kernel_stack",
-	"nr_unstable",
-	"nr_bounce",
-	"nr_vmscan_write",
-	"nr_writeback_temp",
-	"nr_isolated_anon",
-	"nr_isolated_file",
-	"nr_shmem",
-	"nr_dirtied",
-	"nr_written",
-
-#ifdef CONFIG_NUMA
-	"numa_hit",
-	"numa_miss",
-	"numa_foreign",
-	"numa_interleave",
-	"numa_local",
-	"numa_other",
-#endif
-	"nr_anon_transparent_hugepages",
-	"nr_dirty_threshold",
-	"nr_dirty_background_threshold",
-
-#ifdef CONFIG_VM_EVENT_COUNTERS
-	"pgpgin",
-	"pgpgout",
-	"pswpin",
-	"pswpout",
-
-	TEXTS_FOR_ZONES("pgalloc")
-
-	"pgfree",
-	"pgactivate",
-	"pgdeactivate",
-
-	"pgfault",
-	"pgmajfault",
-
-	TEXTS_FOR_ZONES("pgrefill")
-	TEXTS_FOR_ZONES("pgsteal")
-	TEXTS_FOR_ZONES("pgscan_kswapd")
-	TEXTS_FOR_ZONES("pgscan_direct")
-
-#ifdef CONFIG_NUMA
-	"zone_reclaim_failed",
-#endif
-	"pginodesteal",
-	"slabs_scanned",
-	"kswapd_steal",
-	"kswapd_inodesteal",
-	"kswapd_low_wmark_hit_quickly",
-	"kswapd_high_wmark_hit_quickly",
-	"kswapd_skip_congestion_wait",
-	"pageoutrun",
-	"allocstall",
-
-	"pgrotated",
-
-#ifdef CONFIG_COMPACTION
-	"compact_blocks_moved",
-	"compact_pages_moved",
-	"compact_pagemigrate_failed",
-	"compact_stall",
-	"compact_fail",
-	"compact_success",
-#endif
-
-#ifdef CONFIG_HUGETLB_PAGE
-	"htlb_buddy_alloc_success",
-	"htlb_buddy_alloc_fail",
-#endif
-	"unevictable_pgs_culled",
-	"unevictable_pgs_scanned",
-	"unevictable_pgs_rescued",
-	"unevictable_pgs_mlocked",
-	"unevictable_pgs_munlocked",
-	"unevictable_pgs_cleared",
-	"unevictable_pgs_stranded",
-	"unevictable_pgs_mlockfreed",
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	"thp_fault_alloc",
-	"thp_fault_fallback",
-	"thp_collapse_alloc",
-	"thp_collapse_alloc_failed",
-	"thp_split",
-#endif
-
-#endif /* CONFIG_VM_EVENTS_COUNTERS */
-};
-
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 							struct zone *zone)
 {
-- 
cgit v1.2.3


From f62e00cc3a00bfbd394a79fc22b334c31f91bd5f Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:29 -0700
Subject: mm: introduce wait_on_page_locked_killable()

commit 2687a356 ("Add lock_page_killable") introduced killable
lock_page().  Similarly this patch introdues killable
wait_on_page_locked().

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |  9 +++++++++
 mm/filemap.c            | 11 +++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c11950652646..ea268080380d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -357,6 +357,15 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
  */
 extern void wait_on_page_bit(struct page *page, int bit_nr);
 
+extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+
+static inline int wait_on_page_locked_killable(struct page *page)
+{
+	if (PageLocked(page))
+		return wait_on_page_bit_killable(page, PG_locked);
+	return 0;
+}
+
 /* 
  * Wait for a page to be unlocked.
  *
diff --git a/mm/filemap.c b/mm/filemap.c
index c641edf553a9..dea8a38bb2bb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -562,6 +562,17 @@ void wait_on_page_bit(struct page *page, int bit_nr)
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
+int wait_on_page_bit_killable(struct page *page, int bit_nr)
+{
+	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+
+	if (!test_bit(bit_nr, &page->flags))
+		return 0;
+
+	return __wait_on_bit(page_waitqueue(page), &wait,
+			     sleep_on_page_killable, TASK_KILLABLE);
+}
+
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
  * @page: Page defining the wait queue of interest
-- 
cgit v1.2.3


From 37b23e0525d393d48a7d59f870b3bc061a30ccdb Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:30 -0700
Subject: x86,mm: make pagefault killable

When an oom killing occurs, almost all processes are getting stuck at the
following two points.

	1) __alloc_pages_nodemask
	2) __lock_page_or_retry

1) is not very problematic because TIF_MEMDIE leads to an allocation
failure and getting out from page allocator.

2) is more problematic.  In an OOM situation, zones typically don't have
page cache at all and memory starvation might lead to greatly reduced IO
performance.  When a fork bomb occurs, TIF_MEMDIE tasks don't die quickly,
meaning that a fork bomb may create new process quickly rather than the
oom-killer killing it.  Then, the system may become livelocked.

This patch makes the pagefault interruptible by SIGKILL.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/fault.c | 12 +++++++++++-
 include/linux/mm.h  |  1 +
 mm/filemap.c        | 31 ++++++++++++++++++++++++-------
 3 files changed, 36 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index bcb394dfbb35..f7a2a054a3c0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -965,7 +965,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	struct mm_struct *mm;
 	int fault;
 	int write = error_code & PF_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY |
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
 					(write ? FAULT_FLAG_WRITE : 0);
 
 	tsk = current;
@@ -1138,6 +1138,16 @@ good_area:
 		return;
 	}
 
+	/*
+	 * Pagefault was interrupted by SIGKILL. We have no reason to
+	 * continue pagefault.
+	 */
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+		if (!(error_code & PF_USER))
+			no_context(regs, error_code, address);
+		return;
+	}
+
 	/*
 	 * Major/minor page fault accounting is only done on the
 	 * initial attempt. If we go through a retry, it is extremely
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1746f67c33de..57d3d5fade16 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -153,6 +153,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_MKWRITE	0x04	/* Fault was mkwrite of existing pte */
 #define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
+#define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 
 /*
  * This interface is used by x86 PAT code to identify a pfn mapping that is
diff --git a/mm/filemap.c b/mm/filemap.c
index dea8a38bb2bb..8144f87dcbb4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -654,15 +654,32 @@ EXPORT_SYMBOL_GPL(__lock_page_killable);
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 			 unsigned int flags)
 {
-	if (!(flags & FAULT_FLAG_ALLOW_RETRY)) {
-		__lock_page(page);
-		return 1;
-	} else {
-		if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
-			up_read(&mm->mmap_sem);
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		/*
+		 * CAUTION! In this case, mmap_sem is not released
+		 * even though return 0.
+		 */
+		if (flags & FAULT_FLAG_RETRY_NOWAIT)
+			return 0;
+
+		up_read(&mm->mmap_sem);
+		if (flags & FAULT_FLAG_KILLABLE)
+			wait_on_page_locked_killable(page);
+		else
 			wait_on_page_locked(page);
-		}
 		return 0;
+	} else {
+		if (flags & FAULT_FLAG_KILLABLE) {
+			int ret;
+
+			ret = __lock_page_killable(page);
+			if (ret) {
+				up_read(&mm->mmap_sem);
+				return 0;
+			}
+		} else
+			__lock_page(page);
+		return 1;
 	}
 }
 
-- 
cgit v1.2.3


From 1b79acc91115ba47e744b70bb166b77bd94f5855 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:32 -0700
Subject: mm, mem-hotplug: recalculate lowmem_reserve when memory hotplug
 occurs

Currently, memory hotplug calls setup_per_zone_wmarks() and
calculate_zone_inactive_ratio(), but doesn't call
setup_per_zone_lowmem_reserve().

It means the number of reserved pages aren't updated even if memory hot
plug occur.  This patch fixes it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  | 2 +-
 mm/memory_hotplug.c | 9 +++++----
 mm/page_alloc.c     | 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 57d3d5fade16..e173cd297d88 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1381,7 +1381,7 @@ extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long,
 				unsigned long, enum memmap_context);
 extern void setup_per_zone_wmarks(void);
-extern void calculate_zone_inactive_ratio(struct zone *zone);
+extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(unsigned int flags);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2c4edc459fb0..59ac18fefd65 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -459,8 +459,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 		zone_pcp_update(zone);
 
 	mutex_unlock(&zonelists_mutex);
-	setup_per_zone_wmarks();
-	calculate_zone_inactive_ratio(zone);
+
+	init_per_zone_wmark_min();
+
 	if (onlined_pages) {
 		kswapd_run(zone_to_nid(zone));
 		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
@@ -893,8 +894,8 @@ repeat:
 	zone->zone_pgdat->node_present_pages -= offlined_pages;
 	totalram_pages -= offlined_pages;
 
-	setup_per_zone_wmarks();
-	calculate_zone_inactive_ratio(zone);
+	init_per_zone_wmark_min();
+
 	if (!node_present_pages(node)) {
 		node_clear_state(node, N_HIGH_MEMORY);
 		kswapd_stop(node);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 56d0be36be9d..e133cea36932 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5094,7 +5094,7 @@ void setup_per_zone_wmarks(void)
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-void __meminit calculate_zone_inactive_ratio(struct zone *zone)
+static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
 {
 	unsigned int gb, ratio;
 
@@ -5140,7 +5140,7 @@ static void __meminit setup_per_zone_inactive_ratio(void)
  * 8192MB:	11584k
  * 16384MB:	16384k
  */
-static int __init init_per_zone_wmark_min(void)
+int __meminit init_per_zone_wmark_min(void)
 {
 	unsigned long lowmem_kbytes;
 
-- 
cgit v1.2.3


From a6cccdc36c966e51fd969560d870cfd37afbfa9c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:33 -0700
Subject: mm, mem-hotplug: update pcp->stat_threshold when memory hotplug occur

Currently, cpu hotplug updates pcp->stat_threshold, but memory hotplug
doesn't.  There is no reason for this.

[akpm@linux-foundation.org: fix CONFIG_SMP=n build]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 3 +++
 mm/page_alloc.c        | 2 ++
 mm/vmstat.c            | 3 +--
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e73d1030f2f7..51359837511a 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -261,6 +261,7 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
 void refresh_cpu_vm_stats(int);
+void refresh_zone_stat_thresholds(void);
 
 int calculate_pressure_threshold(struct zone *zone);
 int calculate_normal_threshold(struct zone *zone);
@@ -313,6 +314,8 @@ static inline void __dec_zone_page_state(struct page *page,
 #define set_pgdat_percpu_threshold(pgdat, callback) { }
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
+static inline void refresh_zone_stat_thresholds(void) { }
+
 #endif		/* CONFIG_SMP */
 
 extern const char * const vmstat_text[];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e133cea36932..77773329aa72 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -39,6 +39,7 @@
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
+#include <linux/vmstat.h>
 #include <linux/mempolicy.h>
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
@@ -5152,6 +5153,7 @@ int __meminit init_per_zone_wmark_min(void)
 	if (min_free_kbytes > 65536)
 		min_free_kbytes = 65536;
 	setup_per_zone_wmarks();
+	refresh_zone_stat_thresholds();
 	setup_per_zone_lowmem_reserve();
 	setup_per_zone_inactive_ratio();
 	return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 209546a8bdd5..20c18b7694b2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -157,7 +157,7 @@ int calculate_normal_threshold(struct zone *zone)
 /*
  * Refresh the thresholds for each zone.
  */
-static void refresh_zone_stat_thresholds(void)
+void refresh_zone_stat_thresholds(void)
 {
 	struct zone *zone;
 	int cpu;
@@ -1201,7 +1201,6 @@ static int __init setup_vmstat(void)
 #ifdef CONFIG_SMP
 	int cpu;
 
-	refresh_zone_stat_thresholds();
 	register_cpu_notifier(&vmstat_notifier);
 
 	for_each_online_cpu(cpu)
-- 
cgit v1.2.3


From 72788c385604523422592249c19cba0187021e9b Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 24 May 2011 17:11:40 -0700
Subject: oom: replace PF_OOM_ORIGIN with toggling oom_score_adj

There's a kernel-wide shortage of per-process flags, so it's always
helpful to trim one when possible without incurring a significant penalty.
 It's even more important when you're planning on adding a per- process
flag yourself, which I plan to do shortly for transparent hugepages.

PF_OOM_ORIGIN is used by ksm and swapoff to prefer current since it has a
tendency to allocate large amounts of memory and should be preferred for
killing over other tasks.  We'd rather immediately kill the task making
the errant syscall rather than penalizing an innocent task.

This patch removes PF_OOM_ORIGIN since its behavior is equivalent to
setting the process's oom_score_adj to OOM_SCORE_ADJ_MAX.

The process's old oom_score_adj is stored and then set to
OOM_SCORE_ADJ_MAX during the time it used to have PF_OOM_ORIGIN.  The old
value is then reinstated when the process should no longer be considered a
high priority for oom killing.

Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h   |  2 ++
 include/linux/sched.h |  1 -
 mm/ksm.c              |  7 +++++--
 mm/oom_kill.c         | 36 +++++++++++++++++++++++++++---------
 mm/swapfile.c         |  6 ++++--
 5 files changed, 38 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5e3aa8311c5e..4952fb874ad3 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -40,6 +40,8 @@ enum oom_constraint {
 	CONSTRAINT_MEMCG,
 };
 
+extern int test_set_oom_score_adj(int new_val);
+
 extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 			const nodemask_t *nodemask, unsigned long totalpages);
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aaf71e08222c..44b8faaac7c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1753,7 +1753,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_OOM_ORIGIN	0x00080000	/* Allocating much memory to others */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
diff --git a/mm/ksm.c b/mm/ksm.c
index 942dfc73a2ff..d708b3ef2260 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -35,6 +35,7 @@
 #include <linux/ksm.h>
 #include <linux/hash.h>
 #include <linux/freezer.h>
+#include <linux/oom.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -1894,9 +1895,11 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
-			current->flags |= PF_OOM_ORIGIN;
+			int oom_score_adj;
+
+			oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
 			err = unmerge_and_remove_all_rmap_items();
-			current->flags &= ~PF_OOM_ORIGIN;
+			test_set_oom_score_adj(oom_score_adj);
 			if (err) {
 				ksm_run = KSM_RUN_STOP;
 				count = err;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f52e85c80e8d..e4b0991ca351 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -38,6 +38,33 @@ int sysctl_oom_kill_allocating_task;
 int sysctl_oom_dump_tasks = 1;
 static DEFINE_SPINLOCK(zone_scan_lock);
 
+/**
+ * test_set_oom_score_adj() - set current's oom_score_adj and return old value
+ * @new_val: new oom_score_adj value
+ *
+ * Sets the oom_score_adj value for current to @new_val with proper
+ * synchronization and returns the old value.  Usually used to temporarily
+ * set a value, save the old value in the caller, and then reinstate it later.
+ */
+int test_set_oom_score_adj(int new_val)
+{
+	struct sighand_struct *sighand = current->sighand;
+	int old_val;
+
+	spin_lock_irq(&sighand->siglock);
+	old_val = current->signal->oom_score_adj;
+	if (new_val != old_val) {
+		if (new_val == OOM_SCORE_ADJ_MIN)
+			atomic_inc(&current->mm->oom_disable_count);
+		else if (old_val == OOM_SCORE_ADJ_MIN)
+			atomic_dec(&current->mm->oom_disable_count);
+		current->signal->oom_score_adj = new_val;
+	}
+	spin_unlock_irq(&sighand->siglock);
+
+	return old_val;
+}
+
 #ifdef CONFIG_NUMA
 /**
  * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -154,15 +181,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 		return 0;
 	}
 
-	/*
-	 * When the PF_OOM_ORIGIN bit is set, it indicates the task should have
-	 * priority for oom killing.
-	 */
-	if (p->flags & PF_OOM_ORIGIN) {
-		task_unlock(p);
-		return 1000;
-	}
-
 	/*
 	 * The memory controller may have a limit of 0 bytes, so avoid a divide
 	 * by zero, if necessary.
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8c6b3ce38f09..d537d29e9b7b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,7 @@
 #include <linux/syscalls.h>
 #include <linux/memcontrol.h>
 #include <linux/poll.h>
+#include <linux/oom.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -1555,6 +1556,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	struct address_space *mapping;
 	struct inode *inode;
 	char *pathname;
+	int oom_score_adj;
 	int i, type, prev;
 	int err;
 
@@ -1613,9 +1615,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	p->flags &= ~SWP_WRITEOK;
 	spin_unlock(&swap_lock);
 
-	current->flags |= PF_OOM_ORIGIN;
+	oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
 	err = try_to_unuse(type);
-	current->flags &= ~PF_OOM_ORIGIN;
+	test_set_oom_score_adj(oom_score_adj);
 
 	if (err) {
 		/*
-- 
cgit v1.2.3


From 15fa8f425557a0d698f933627771f520ef4ae34b Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 24 May 2011 17:11:41 -0700
Subject: include/linux/gfp.h: work around apparent sparse confusion

Running sparse on page_alloc.c today, it errors out:
        include/linux/gfp.h:254:17: error: bad constant expression
        include/linux/gfp.h:254:17: error: cannot size expression

which is a line in gfp_zone():

        BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);

That's really unfortunate, because it ends up hiding all of the other
legitimate sparse messages like this:
        mm/page_alloc.c:5315:59: warning: incorrect type in argument 1 (different base types)
        mm/page_alloc.c:5315:59:    expected unsigned long [unsigned] [usertype] size
        mm/page_alloc.c:5315:59:    got restricted gfp_t [usertype] <noident>
...

Having sparse be able to catch these very oopsable bugs is a lot more
important than keeping a BUILD_BUG_ON().  Kill the BUILD_BUG_ON().

Compiles on x86_64 with and without CONFIG_DEBUG_VM=y.  defconfig boots
fine for me.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 56d8fc87fbbc..7e8f5d863c76 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -249,14 +249,9 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 
 	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
 					 ((1 << ZONES_SHIFT) - 1);
-
-	if (__builtin_constant_p(bit))
-		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
-	else {
 #ifdef CONFIG_DEBUG_VM
-		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+	BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 #endif
-	}
 	return z;
 }
 
-- 
cgit v1.2.3


From 82d4b5779a75887750748609f3415f01c1bb9f81 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 24 May 2011 17:11:42 -0700
Subject: include/linux/gfp.h: convert BUG_ON() into VM_BUG_ON()

VM_BUG_ON() if effectively a BUG_ON() undef #ifdef CONFIG_DEBUG_VM.  That
is exactly what we have here now, and two different folks have suggested
doing it this way.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7e8f5d863c76..cb4089254f01 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -249,9 +249,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 
 	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
 					 ((1 << ZONES_SHIFT) - 1);
-#ifdef CONFIG_DEBUG_VM
-	BUG_ON((GFP_ZONE_BAD >> bit) & 1);
-#endif
+	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 	return z;
 }
 
-- 
cgit v1.2.3


From d05f3169c0fbca16132ec7c2be71685c6de638b5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Tue, 24 May 2011 17:11:44 -0700
Subject: mm: make expand_downwards() symmetrical with expand_upwards()

Currently we have expand_upwards exported while expand_downwards is
accessible only via expand_stack or expand_stack_downwards.

check_stack_guard_page is a nice example of the asymmetry.  It uses
expand_stack for VM_GROWSDOWN while expand_upwards is called for
VM_GROWSUP case.

Let's clean this up by exporting both functions and make those names
consistent.  Let's use expand_{upwards,downwards} because expanding
doesn't always involve stack manipulation (an example is
ia64_do_page_fault which uses expand_upwards for registers backing store
expansion).  expand_downwards has to be defined for both
CONFIG_STACK_GROWS{UP,DOWN} because get_arg_page calls the downwards
version in the early process initialization phase for growsup
configuration.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c          | 2 +-
 include/linux/mm.h | 8 +++++---
 mm/memory.c        | 2 +-
 mm/mmap.c          | 7 +------
 4 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index c1cf372f17a7..e276d5e0abb9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -200,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
 #ifdef CONFIG_STACK_GROWSUP
 	if (write) {
-		ret = expand_stack_downwards(bprm->vma, pos);
+		ret = expand_downwards(bprm->vma, pos);
 		if (ret < 0)
 			return NULL;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e173cd297d88..d2948af126ca 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1518,15 +1518,17 @@ unsigned long ra_submit(struct file_ra_state *ra,
 			struct address_space *mapping,
 			struct file *filp);
 
-/* Do stack extension */
+/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+
+/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */
+extern int expand_downwards(struct vm_area_struct *vma,
+		unsigned long address);
 #if VM_GROWSUP
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 #else
   #define expand_upwards(vma, address) do { } while (0)
 #endif
-extern int expand_stack_downwards(struct vm_area_struct *vma,
-				  unsigned long address);
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
diff --git a/mm/memory.c b/mm/memory.c
index 61e66f026563..4c6ea10f3d18 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2966,7 +2966,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
 		if (prev && prev->vm_end == address)
 			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
 
-		expand_stack(vma, address - PAGE_SIZE);
+		expand_downwards(vma, address - PAGE_SIZE);
 	}
 	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
 		struct vm_area_struct *next = vma->vm_next;
diff --git a/mm/mmap.c b/mm/mmap.c
index e76f8d752884..adb12527fd0e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1774,7 +1774,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-static int expand_downwards(struct vm_area_struct *vma,
+int expand_downwards(struct vm_area_struct *vma,
 				   unsigned long address)
 {
 	int error;
@@ -1821,11 +1821,6 @@ static int expand_downwards(struct vm_area_struct *vma,
 	return error;
 }
 
-int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
-{
-	return expand_downwards(vma, address);
-}
-
 #ifdef CONFIG_STACK_GROWSUP
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-- 
cgit v1.2.3


From d16dfc550f5326a4000f3322582a7c05dec91d7a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:11:45 -0700
Subject: mm: mmu_gather rework

Rework the existing mmu_gather infrastructure.

The direct purpose of these patches was to allow preemptible mmu_gather,
but even without that I think these patches provide an improvement to the
status quo.

The first 9 patches rework the mmu_gather infrastructure.  For review
purpose I've split them into generic and per-arch patches with the last of
those a generic cleanup.

The next patch provides generic RCU page-table freeing, and the followup
is a patch converting s390 to use this.  I've also got 4 patches from
DaveM lined up (not included in this series) that uses this to implement
gup_fast() for sparc64.

Then there is one patch that extends the generic mmu_gather batching.

After that follow the mm preemptibility patches, these make part of the mm
a lot more preemptible.  It converts i_mmap_lock and anon_vma->lock to
mutexes which together with the mmu_gather rework makes mmu_gather
preemptible as well.

Making i_mmap_lock a mutex also enables a clean-up of the truncate code.

This also allows for preemptible mmu_notifiers, something that XPMEM I
think wants.

Furthermore, it removes the new and universially detested unmap_mutex.

This patch:

Remove the first obstacle towards a fully preemptible mmu_gather.

The current scheme assumes mmu_gather is always done with preemption
disabled and uses per-cpu storage for the page batches.  Change this to
try and allocate a page for batching and in case of failure, use a small
on-stack array to make some progress.

Preemptible mmu_gather is desired in general and usable once i_mmap_lock
becomes a mutex.  Doing it before the mutex conversion saves us from
having to rework the code by moving the mmu_gather bits inside the
pte_lock.

Also avoid flushing the tlb batches from under the pte lock, this is
useful even without the i_mmap_lock conversion as it significantly reduces
pte lock hold times.

[akpm@linux-foundation.org: fix comment tpyo]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                 | 10 ++---
 include/asm-generic/tlb.h | 96 ++++++++++++++++++++++++++++++++++-------------
 include/linux/mm.h        |  2 +-
 mm/memory.c               | 46 +++++++++++------------
 mm/mmap.c                 | 18 ++++-----
 5 files changed, 107 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index e276d5e0abb9..936f5776655c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -600,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 	unsigned long length = old_end - old_start;
 	unsigned long new_start = old_start - shift;
 	unsigned long new_end = old_end - shift;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 
 	BUG_ON(new_start > new_end);
 
@@ -626,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		return -ENOMEM;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	if (new_end > old_start) {
 		/*
 		 * when the old and new regions overlap clear from new_end.
 		 */
-		free_pgd_range(tlb, new_end, old_end, new_end,
+		free_pgd_range(&tlb, new_end, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	} else {
 		/*
@@ -640,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		 * have constraints on va-space that make this illegal (IA64) -
 		 * for the others its just a little faster.
 		 */
-		free_pgd_range(tlb, old_start, old_end, new_end,
+		free_pgd_range(&tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	}
-	tlb_finish_mmu(tlb, new_end, old_end);
+	tlb_finish_mmu(&tlb, new_end, old_end);
 
 	/*
 	 * Shrink the vma to just the new range.  Always succeeds.
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e43f9766259f..2d3547c84235 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -5,6 +5,8 @@
  * Copyright 2001 Red Hat, Inc.
  * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  *
+ * Copyright 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
@@ -22,51 +24,71 @@
  * and page free order so much..
  */
 #ifdef CONFIG_SMP
-  #ifdef ARCH_FREE_PTR_NR
-    #define FREE_PTR_NR   ARCH_FREE_PTR_NR
-  #else
-    #define FREE_PTE_NR	506
-  #endif
   #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
 #else
-  #define FREE_PTE_NR	1
   #define tlb_fast_mode(tlb) 1
 #endif
 
+/*
+ * If we can't allocate a page to make a big batch of page pointers
+ * to work on, then just handle a few from the on-stack structure.
+ */
+#define MMU_GATHER_BUNDLE	8
+
 /* struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
  */
 struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		nr;	/* set to ~0U means fast mode */
+	unsigned int		max;	/* nr < max */
 	unsigned int		need_flush;/* Really unmapped some ptes? */
 	unsigned int		fullmm; /* non-zero means full mm flush */
-	struct page *		pages[FREE_PTE_NR];
+#ifdef HAVE_ARCH_MMU_GATHER
+	struct arch_mmu_gather	arch;
+#endif
+	struct page		**pages;
+	struct page		*local[MMU_GATHER_BUNDLE];
 };
 
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+{
+	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+
+	if (addr) {
+		tlb->pages = (void *)addr;
+		tlb->max = PAGE_SIZE / sizeof(struct page *);
+	}
+}
 
 /* tlb_gather_mmu
- *	Return a pointer to an initialized struct mmu_gather.
+ *	Called to initialize an (on-stack) mmu_gather structure for page-table
+ *	tear-down from @mm. The @fullmm argument is used when @mm is without
+ *	users and we're going to destroy the full address space (exit/execve).
  */
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
 {
-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
 	tlb->mm = mm;
 
-	/* Use fast mode if only one CPU is online */
-	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
+	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->pages = tlb->local;
+
+	if (num_online_cpus() > 1) {
+		tlb->nr = 0;
+		__tlb_alloc_page(tlb);
+	} else /* Use fast mode if only one CPU is online */
+		tlb->nr = ~0U;
 
-	tlb->fullmm = full_mm_flush;
+	tlb->fullmm = fullmm;
 
-	return tlb;
+#ifdef HAVE_ARCH_MMU_GATHER
+	tlb->arch = ARCH_MMU_GATHER_INIT;
+#endif
 }
 
 static inline void
-tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+tlb_flush_mmu(struct mmu_gather *tlb)
 {
 	if (!tlb->need_flush)
 		return;
@@ -75,6 +97,13 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 	if (!tlb_fast_mode(tlb)) {
 		free_pages_and_swap_cache(tlb->pages, tlb->nr);
 		tlb->nr = 0;
+		/*
+		 * If we are using the local on-stack array of pages for MMU
+		 * gather, try allocating an off-stack array again as we have
+		 * recently freed pages.
+		 */
+		if (tlb->pages == tlb->local)
+			__tlb_alloc_page(tlb);
 	}
 }
 
@@ -85,29 +114,42 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	tlb_flush_mmu(tlb, start, end);
+	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 
-	put_cpu_var(mmu_gathers);
+	if (tlb->pages != tlb->local)
+		free_pages((unsigned long)tlb->pages, 0);
 }
 
-/* tlb_remove_page
+/* __tlb_remove_page
  *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
  *	handling the additional races in SMP caused by other CPUs caching valid
- *	mappings in their TLBs.
+ *	mappings in their TLBs. Returns the number of free page slots left.
+ *	When out of page slots we must call tlb_flush_mmu().
  */
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
-		return;
+		return 1; /* avoid calling tlb_flush_mmu() */
 	}
 	tlb->pages[tlb->nr++] = page;
-	if (tlb->nr >= FREE_PTE_NR)
-		tlb_flush_mmu(tlb, 0, 0);
+	VM_BUG_ON(tlb->nr > tlb->max);
+
+	return tlb->max - tlb->nr;
+}
+
+/* tlb_remove_page
+ *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
+ *	required.
+ */
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	if (!__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
 }
 
 /**
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d2948af126ca..ffcce9bf2b54 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -906,7 +906,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
 		struct vm_area_struct *start_vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *);
diff --git a/mm/memory.c b/mm/memory.c
index 4c6ea10f3d18..19b2d44de9f0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -912,12 +912,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				long *zap_work, struct zap_details *details)
 {
 	struct mm_struct *mm = tlb->mm;
+	int force_flush = 0;
 	pte_t *pte;
 	spinlock_t *ptl;
 	int rss[NR_MM_COUNTERS];
 
 	init_rss_vec(rss);
-
+again:
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
 	do {
@@ -974,7 +975,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			tlb_remove_page(tlb, page);
+			force_flush = !__tlb_remove_page(tlb, page);
+			if (force_flush)
+				break;
 			continue;
 		}
 		/*
@@ -1001,6 +1004,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
+	/*
+	 * mmu_gather ran out of room to batch pages, we break out of
+	 * the PTE lock to avoid doing the potential expensive TLB invalidate
+	 * and page-free while holding it.
+	 */
+	if (force_flush) {
+		force_flush = 0;
+		tlb_flush_mmu(tlb);
+		if (addr != end)
+			goto again;
+	}
+
 	return addr;
 }
 
@@ -1121,17 +1136,14 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-unsigned long unmap_vmas(struct mmu_gather **tlbp,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
 		struct vm_area_struct *vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *details)
 {
 	long zap_work = ZAP_BLOCK_SIZE;
-	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
-	int tlb_start_valid = 0;
 	unsigned long start = start_addr;
 	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
-	int fullmm = (*tlbp)->fullmm;
 	struct mm_struct *mm = vma->vm_mm;
 
 	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1152,11 +1164,6 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			untrack_pfn_vma(vma, 0, 0);
 
 		while (start != end) {
-			if (!tlb_start_valid) {
-				tlb_start = start;
-				tlb_start_valid = 1;
-			}
-
 			if (unlikely(is_vm_hugetlb_page(vma))) {
 				/*
 				 * It is undesirable to test vma->vm_file as it
@@ -1177,7 +1184,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 
 				start = end;
 			} else
-				start = unmap_page_range(*tlbp, vma,
+				start = unmap_page_range(tlb, vma,
 						start, end, &zap_work, details);
 
 			if (zap_work > 0) {
@@ -1185,19 +1192,13 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 				break;
 			}
 
-			tlb_finish_mmu(*tlbp, tlb_start, start);
-
 			if (need_resched() ||
 				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
-				if (i_mmap_lock) {
-					*tlbp = NULL;
+				if (i_mmap_lock)
 					goto out;
-				}
 				cond_resched();
 			}
 
-			*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
-			tlb_start_valid = 0;
 			zap_work = ZAP_BLOCK_SIZE;
 		}
 	}
@@ -1217,16 +1218,15 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size, struct zap_details *details)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	unsigned long end = address + size;
 	unsigned long nr_accounted = 0;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	update_hiwater_rss(mm);
 	end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
-	if (tlb)
-		tlb_finish_mmu(tlb, address, end);
+	tlb_finish_mmu(&tlb, address, end);
 	return end;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index adb12527fd0e..40d49986e714 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1903,17 +1903,17 @@ static void unmap_region(struct mm_struct *mm,
 		unsigned long start, unsigned long end)
 {
 	struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	unsigned long nr_accounted = 0;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
-	free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
-				 next? next->vm_start: 0);
-	tlb_finish_mmu(tlb, start, end);
+	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+				 next ? next->vm_start : 0);
+	tlb_finish_mmu(&tlb, start, end);
 }
 
 /*
@@ -2255,7 +2255,7 @@ EXPORT_SYMBOL(do_brk);
 /* Release all mmaps. */
 void exit_mmap(struct mm_struct *mm)
 {
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
 	unsigned long nr_accounted = 0;
 	unsigned long end;
@@ -2280,14 +2280,14 @@ void exit_mmap(struct mm_struct *mm)
 
 	lru_add_drain();
 	flush_cache_mm(mm);
-	tlb = tlb_gather_mmu(mm, 1);
+	tlb_gather_mmu(&tlb, mm, 1);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
 
-	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
-	tlb_finish_mmu(tlb, 0, end);
+	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+	tlb_finish_mmu(&tlb, 0, end);
 
 	/*
 	 * Walk the list again, actually closing and freeing it,
-- 
cgit v1.2.3


From e4c70a6629f9c74c4b0de258a3951890e9047c82 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:03 -0700
Subject: lockdep, mutex: provide mutex_lock_nest_lock

In order to convert i_mmap_lock to a mutex we need a mutex equivalent to
spin_lock_nest_lock(), thus provide the mutex_lock_nest_lock() annotation.

As with spin_lock_nest_lock(), mutex_lock_nest_lock() allows annotation of
the locking pattern where an outer lock serializes the acquisition order
of nested locks.  That is, if every time you lock multiple locks A, say A1
and A2 you first acquire N, the order of acquiring A1 and A2 is
irrelevant.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h |  3 +++
 include/linux/mutex.h   |  9 +++++++++
 kernel/mutex.c          | 25 +++++++++++++++++--------
 3 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 4aef1dda6406..ef820a3c378b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -487,12 +487,15 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # ifdef CONFIG_PROVE_LOCKING
 #  define mutex_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, NULL, i)
+#  define mutex_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 2, n, i)
 # else
 #  define mutex_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, NULL, i)
+#  define mutex_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 1, n, i)
 # endif
 # define mutex_release(l, n, i)			lock_release(l, n, i)
 #else
 # define mutex_acquire(l, s, t, i)		do { } while (0)
+# define mutex_acquire_nest(l, s, t, n, i)	do { } while (0)
 # define mutex_release(l, n, i)			do { } while (0)
 #endif
 
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index c75471db576e..a940fe435aca 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -132,6 +132,7 @@ static inline int mutex_is_locked(struct mutex *lock)
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
 extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
 					unsigned int subclass);
 extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
@@ -140,6 +141,13 @@ extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
 #define mutex_lock(lock) mutex_lock_nested(lock, 0)
 #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
 #define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
+
+#define mutex_lock_nest_lock(lock, nest_lock)				\
+do {									\
+	typecheck(struct lockdep_map *, &(nest_lock)->dep_map);		\
+	_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);		\
+} while (0)
+
 #else
 extern void mutex_lock(struct mutex *lock);
 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
@@ -148,6 +156,7 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
 #endif
 
 /*
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 2c938e2337cd..d607ed5dd441 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -131,14 +131,14 @@ EXPORT_SYMBOL(mutex_unlock);
  */
 static inline int __sched
 __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
-	       	unsigned long ip)
+		    struct lockdep_map *nest_lock, unsigned long ip)
 {
 	struct task_struct *task = current;
 	struct mutex_waiter waiter;
 	unsigned long flags;
 
 	preempt_disable();
-	mutex_acquire(&lock->dep_map, subclass, 0, ip);
+	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	/*
@@ -269,16 +269,25 @@ void __sched
 mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_nested);
 
+void __sched
+_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
+{
+	might_sleep();
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
+}
+
+EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
+
 int __sched
 mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
 }
 EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
 
@@ -287,7 +296,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
 	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
-				   subclass, _RET_IP_);
+				   subclass, NULL, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -393,7 +402,7 @@ __mutex_lock_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
 }
 
 static noinline int __sched
@@ -401,7 +410,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
 }
 
 static noinline int __sched
@@ -409,7 +418,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
 }
 #endif
 
-- 
cgit v1.2.3


From 97a894136f29802da19a15541de3c019e1ca147e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:04 -0700
Subject: mm: Remove i_mmap_lock lockbreak

Hugh says:
 "The only significant loser, I think, would be page reclaim (when
  concurrent with truncation): could spin for a long time waiting for
  the i_mmap_mutex it expects would soon be dropped? "

Counter points:
 - cpu contention makes the spin stop (need_resched())
 - zap pages should be freeing pages at a higher rate than reclaim
   ever can

I think the simplification of the truncate code is definitely worth it.

Effectively reverts: 2aa15890f3c ("mm: prevent concurrent
unmap_mapping_range() on the same inode") and takes out the code that
caused its problem.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c               |   1 -
 include/linux/fs.h       |   2 -
 include/linux/mm.h       |   2 -
 include/linux/mm_types.h |   1 -
 kernel/fork.c            |   1 -
 mm/memory.c              | 195 +++++++----------------------------------------
 mm/mmap.c                |  13 +---
 mm/mremap.c              |   1 -
 8 files changed, 28 insertions(+), 188 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 05f4fa521325..7a7284c71abd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -331,7 +331,6 @@ void address_space_init_once(struct address_space *mapping)
 	spin_lock_init(&mapping->private_lock);
 	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
 	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-	mutex_init(&mapping->unmap_mutex);
 }
 EXPORT_SYMBOL(address_space_init_once);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdf9495df204..5d2c86bdf5ba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -635,7 +635,6 @@ struct address_space {
 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 	spinlock_t		i_mmap_lock;	/* protect tree, count, list */
-	unsigned int		truncate_count;	/* Cover race condition with truncate */
 	unsigned long		nrpages;	/* number of total pages */
 	pgoff_t			writeback_index;/* writeback starts here */
 	const struct address_space_operations *a_ops;	/* methods */
@@ -644,7 +643,6 @@ struct address_space {
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
 	struct address_space	*assoc_mapping;	/* ditto */
-	struct mutex		unmap_mutex;    /* to protect unmapping */
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffcce9bf2b54..2ad0ac8c3f32 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -895,8 +895,6 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
-	spinlock_t *i_mmap_lock;		/* For unmap_mapping_range: */
-	unsigned long truncate_count;		/* Compare vm_truncate_count */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 02aa5619709b..201998e5b530 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -175,7 +175,6 @@ struct vm_area_struct {
 					   units, *not* PAGE_CACHE_SIZE */
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
-	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
 
 #ifndef CONFIG_MMU
 	struct vm_region *vm_region;	/* NOMMU mapping region */
diff --git a/kernel/fork.c b/kernel/fork.c
index 2b44d82b8237..4eef925477fc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -386,7 +386,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			spin_lock(&mapping->i_mmap_lock);
 			if (tmp->vm_flags & VM_SHARED)
 				mapping->i_mmap_writable++;
-			tmp->vm_truncate_count = mpnt->vm_truncate_count;
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			vma_prio_tree_add(tmp, mpnt);
diff --git a/mm/memory.c b/mm/memory.c
index 17193d74f302..18655878b9f8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -986,13 +986,13 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	struct mm_struct *mm = tlb->mm;
 	int force_flush = 0;
-	pte_t *pte;
-	spinlock_t *ptl;
 	int rss[NR_MM_COUNTERS];
+	spinlock_t *ptl;
+	pte_t *pte;
 
 again:
 	init_rss_vec(rss);
@@ -1001,12 +1001,9 @@ again:
 	do {
 		pte_t ptent = *pte;
 		if (pte_none(ptent)) {
-			(*zap_work)--;
 			continue;
 		}
 
-		(*zap_work) -= PAGE_SIZE;
-
 		if (pte_present(ptent)) {
 			struct page *page;
 
@@ -1075,7 +1072,7 @@ again:
 				print_bad_pte(vma, addr, ptent, NULL);
 		}
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
-	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+	} while (pte++, addr += PAGE_SIZE, addr != end);
 
 	add_mm_rss_vec(mm, rss);
 	arch_leave_lazy_mmu_mode();
@@ -1099,7 +1096,7 @@ again:
 static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -1111,19 +1108,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 			if (next-addr != HPAGE_PMD_SIZE) {
 				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			} else if (zap_huge_pmd(tlb, vma, pmd)) {
-				(*zap_work)--;
+			} else if (zap_huge_pmd(tlb, vma, pmd))
 				continue;
-			}
 			/* fall through */
 		}
-		if (pmd_none_or_clear_bad(pmd)) {
-			(*zap_work)--;
+		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		}
-		next = zap_pte_range(tlb, vma, pmd, addr, next,
-						zap_work, details);
-	} while (pmd++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pte_range(tlb, vma, pmd, addr, next, details);
+		cond_resched();
+	} while (pmd++, addr = next, addr != end);
 
 	return addr;
 }
@@ -1131,7 +1124,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -1139,13 +1132,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud)) {
-			(*zap_work)--;
+		if (pud_none_or_clear_bad(pud))
 			continue;
-		}
-		next = zap_pmd_range(tlb, vma, pud, addr, next,
-						zap_work, details);
-	} while (pud++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pmd_range(tlb, vma, pud, addr, next, details);
+	} while (pud++, addr = next, addr != end);
 
 	return addr;
 }
@@ -1153,7 +1143,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 static unsigned long unmap_page_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -1167,13 +1157,10 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd)) {
-			(*zap_work)--;
+		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		}
-		next = zap_pud_range(tlb, vma, pgd, addr, next,
-						zap_work, details);
-	} while (pgd++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pud_range(tlb, vma, pgd, addr, next, details);
+	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
 	mem_cgroup_uncharge_end();
 
@@ -1218,9 +1205,7 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *details)
 {
-	long zap_work = ZAP_BLOCK_SIZE;
 	unsigned long start = start_addr;
-	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
 	struct mm_struct *mm = vma->vm_mm;
 
 	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1253,33 +1238,15 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
 				 * Since no pte has actually been setup, it is
 				 * safe to do nothing in this case.
 				 */
-				if (vma->vm_file) {
+				if (vma->vm_file)
 					unmap_hugepage_range(vma, start, end, NULL);
-					zap_work -= (end - start) /
-					pages_per_huge_page(hstate_vma(vma));
-				}
 
 				start = end;
 			} else
-				start = unmap_page_range(tlb, vma,
-						start, end, &zap_work, details);
-
-			if (zap_work > 0) {
-				BUG_ON(start != end);
-				break;
-			}
-
-			if (need_resched() ||
-				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
-				if (i_mmap_lock)
-					goto out;
-				cond_resched();
-			}
-
-			zap_work = ZAP_BLOCK_SIZE;
+				start = unmap_page_range(tlb, vma, start, end, details);
 		}
 	}
-out:
+
 	mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
 	return start;	/* which is now the end (or restart) address */
 }
@@ -2612,96 +2579,11 @@ unwritable_page:
 	return ret;
 }
 
-/*
- * Helper functions for unmap_mapping_range().
- *
- * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __
- *
- * We have to restart searching the prio_tree whenever we drop the lock,
- * since the iterator is only valid while the lock is held, and anyway
- * a later vma might be split and reinserted earlier while lock dropped.
- *
- * The list of nonlinear vmas could be handled more efficiently, using
- * a placeholder, but handle it in the same way until a need is shown.
- * It is important to search the prio_tree before nonlinear list: a vma
- * may become nonlinear and be shifted from prio_tree to nonlinear list
- * while the lock is dropped; but never shifted from list to prio_tree.
- *
- * In order to make forward progress despite restarting the search,
- * vm_truncate_count is used to mark a vma as now dealt with, so we can
- * quickly skip it next time around.  Since the prio_tree search only
- * shows us those vmas affected by unmapping the range in question, we
- * can't efficiently keep all vmas in step with mapping->truncate_count:
- * so instead reset them all whenever it wraps back to 0 (then go to 1).
- * mapping->truncate_count and vma->vm_truncate_count are protected by
- * i_mmap_lock.
- *
- * In order to make forward progress despite repeatedly restarting some
- * large vma, note the restart_addr from unmap_vmas when it breaks out:
- * and restart from that address when we reach that vma again.  It might
- * have been split or merged, shrunk or extended, but never shifted: so
- * restart_addr remains valid so long as it remains in the vma's range.
- * unmap_mapping_range forces truncate_count to leap over page-aligned
- * values so we can save vma's restart_addr in its truncate_count field.
- */
-#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
-
-static void reset_vma_truncate_counts(struct address_space *mapping)
-{
-	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
-
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
-		vma->vm_truncate_count = 0;
-	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
-		vma->vm_truncate_count = 0;
-}
-
-static int unmap_mapping_range_vma(struct vm_area_struct *vma,
+static void unmap_mapping_range_vma(struct vm_area_struct *vma,
 		unsigned long start_addr, unsigned long end_addr,
 		struct zap_details *details)
 {
-	unsigned long restart_addr;
-	int need_break;
-
-	/*
-	 * files that support invalidating or truncating portions of the
-	 * file from under mmaped areas must have their ->fault function
-	 * return a locked page (and set VM_FAULT_LOCKED in the return).
-	 * This provides synchronisation against concurrent unmapping here.
-	 */
-
-again:
-	restart_addr = vma->vm_truncate_count;
-	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
-		start_addr = restart_addr;
-		if (start_addr >= end_addr) {
-			/* Top of vma has been split off since last time */
-			vma->vm_truncate_count = details->truncate_count;
-			return 0;
-		}
-	}
-
-	restart_addr = zap_page_range(vma, start_addr,
-					end_addr - start_addr, details);
-	need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
-
-	if (restart_addr >= end_addr) {
-		/* We have now completed this vma: mark it so */
-		vma->vm_truncate_count = details->truncate_count;
-		if (!need_break)
-			return 0;
-	} else {
-		/* Note restart_addr in vma's truncate_count field */
-		vma->vm_truncate_count = restart_addr;
-		if (!need_break)
-			goto again;
-	}
-
-	spin_unlock(details->i_mmap_lock);
-	cond_resched();
-	spin_lock(details->i_mmap_lock);
-	return -EINTR;
+	zap_page_range(vma, start_addr, end_addr - start_addr, details);
 }
 
 static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
@@ -2711,12 +2593,8 @@ static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
 	struct prio_tree_iter iter;
 	pgoff_t vba, vea, zba, zea;
 
-restart:
 	vma_prio_tree_foreach(vma, &iter, root,
 			details->first_index, details->last_index) {
-		/* Skip quickly over those we have already dealt with */
-		if (vma->vm_truncate_count == details->truncate_count)
-			continue;
 
 		vba = vma->vm_pgoff;
 		vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
@@ -2728,11 +2606,10 @@ restart:
 		if (zea > vea)
 			zea = vea;
 
-		if (unmap_mapping_range_vma(vma,
+		unmap_mapping_range_vma(vma,
 			((zba - vba) << PAGE_SHIFT) + vma->vm_start,
 			((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
-				details) < 0)
-			goto restart;
+				details);
 	}
 }
 
@@ -2747,15 +2624,9 @@ static inline void unmap_mapping_range_list(struct list_head *head,
 	 * across *all* the pages in each nonlinear VMA, not just the pages
 	 * whose virtual address lies outside the file truncation point.
 	 */
-restart:
 	list_for_each_entry(vma, head, shared.vm_set.list) {
-		/* Skip quickly over those we have already dealt with */
-		if (vma->vm_truncate_count == details->truncate_count)
-			continue;
 		details->nonlinear_vma = vma;
-		if (unmap_mapping_range_vma(vma, vma->vm_start,
-					vma->vm_end, details) < 0)
-			goto restart;
+		unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
 	}
 }
 
@@ -2794,26 +2665,14 @@ void unmap_mapping_range(struct address_space *mapping,
 	details.last_index = hba + hlen - 1;
 	if (details.last_index < details.first_index)
 		details.last_index = ULONG_MAX;
-	details.i_mmap_lock = &mapping->i_mmap_lock;
 
-	mutex_lock(&mapping->unmap_mutex);
-	spin_lock(&mapping->i_mmap_lock);
-
-	/* Protect against endless unmapping loops */
-	mapping->truncate_count++;
-	if (unlikely(is_restart_addr(mapping->truncate_count))) {
-		if (mapping->truncate_count == 0)
-			reset_vma_truncate_counts(mapping);
-		mapping->truncate_count++;
-	}
-	details.truncate_count = mapping->truncate_count;
 
+	spin_lock(&mapping->i_mmap_lock);
 	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
 	spin_unlock(&mapping->i_mmap_lock);
-	mutex_unlock(&mapping->unmap_mutex);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 40d49986e714..50cb04bb56bf 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -445,10 +445,8 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vma->vm_file)
 		mapping = vma->vm_file->f_mapping;
 
-	if (mapping) {
+	if (mapping)
 		spin_lock(&mapping->i_mmap_lock);
-		vma->vm_truncate_count = mapping->truncate_count;
-	}
 
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
 	__vma_link_file(vma);
@@ -558,16 +556,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		if (!(vma->vm_flags & VM_NONLINEAR))
 			root = &mapping->i_mmap;
 		spin_lock(&mapping->i_mmap_lock);
-		if (importer &&
-		    vma->vm_truncate_count != next->vm_truncate_count) {
-			/*
-			 * unmap_mapping_range might be in progress:
-			 * ensure that the expanding vma is rescanned.
-			 */
-			importer->vm_truncate_count = 0;
-		}
 		if (insert) {
-			insert->vm_truncate_count = vma->vm_truncate_count;
 			/*
 			 * Put into prio_tree now, so instantiated pages
 			 * are visible to arm/parisc __flush_dcache_page
diff --git a/mm/mremap.c b/mm/mremap.c
index a7c1f9f9b941..909e1e1e99b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -94,7 +94,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		 */
 		mapping = vma->vm_file->f_mapping;
 		spin_lock(&mapping->i_mmap_lock);
-		new_vma->vm_truncate_count = 0;
 	}
 
 	/*
-- 
cgit v1.2.3


From 3d48ae45e72390ddf8cc5256ac32ed6f7a19cbea Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:06 -0700
Subject: mm: Convert i_mmap_lock to a mutex

Straightforward conversion of i_mmap_lock to a mutex.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/lockstat.txt   |  2 +-
 Documentation/vm/locking     |  2 +-
 arch/x86/mm/hugetlbpage.c    |  4 ++--
 fs/hugetlbfs/inode.c         |  4 ++--
 fs/inode.c                   |  2 +-
 include/linux/fs.h           |  2 +-
 include/linux/mmu_notifier.h |  2 +-
 kernel/fork.c                |  4 ++--
 mm/filemap.c                 | 10 +++++-----
 mm/filemap_xip.c             |  4 ++--
 mm/fremap.c                  |  4 ++--
 mm/hugetlb.c                 | 14 +++++++-------
 mm/memory-failure.c          |  4 ++--
 mm/memory.c                  |  4 ++--
 mm/mmap.c                    | 22 +++++++++++-----------
 mm/mremap.c                  |  4 ++--
 mm/rmap.c                    | 28 ++++++++++++++--------------
 17 files changed, 58 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 65f4c795015d..9c0a80d17a23 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -136,7 +136,7 @@ View the top contending locks:
                              dcache_lock:          1037           1161           0.38          45.32         774.51           6611         243371           0.15         306.48       77387.24
                          &inode->i_mutex:           161            286 18446744073709       62882.54     1244614.55           3653          20598 18446744073709       62318.60     1693822.74
                          &zone->lru_lock:            94             94           0.53           7.33          92.10           4366          32690           0.29          59.81       16350.06
-              &inode->i_data.i_mmap_lock:            79             79           0.40           3.77          53.03          11779          87755           0.28         116.93       29898.44
+              &inode->i_data.i_mmap_mutex:            79             79           0.40           3.77          53.03          11779          87755           0.28         116.93       29898.44
                         &q->__queue_lock:            48             50           0.52          31.62          86.31            774          13131           0.17         113.08       12277.52
                         &rq->rq_lock_key:            43             47           0.74          68.50         170.63           3706          33929           0.22         107.99       17460.62
                       &rq->rq_lock_key#2:            39             46           0.75           6.68          49.03           2979          32292           0.17         125.17       17137.63
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index 25fadb448760..f61228bd6395 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -66,7 +66,7 @@ in some cases it is not really needed. Eg, vm_start is modified by
 expand_stack(), it is hard to come up with a destructive scenario without 
 having the vmlist protection in this case.
 
-The page_table_lock nests with the inode i_mmap_lock and the kmem cache
+The page_table_lock nests with the inode i_mmap_mutex and the kmem cache
 c_spinlock spinlocks.  This is okay, since the kmem code asks for pages after
 dropping c_spinlock.  The page_table_lock also nests with pagecache_lock and
 pagemap_lru_lock spinlocks, and no code asks for memory with these locks
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index d4203988504a..f581a18c0d4d 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -72,7 +72,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	if (!vma_shareable(vma, addr))
 		return;
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
 		if (svma == vma)
 			continue;
@@ -97,7 +97,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 		put_page(virt_to_page(spte));
 	spin_unlock(&mm->page_table_lock);
 out:
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 }
 
 /*
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9eeb1cd03ff..e7a035781b7d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	pgoff = offset >> PAGE_SHIFT;
 
 	i_size_write(inode, offset);
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	if (!prio_tree_empty(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	truncate_hugepages(inode, offset);
 	return 0;
 }
diff --git a/fs/inode.c b/fs/inode.c
index 7a7284c71abd..88aa3bcc7681 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -326,7 +326,7 @@ void address_space_init_once(struct address_space *mapping)
 	memset(mapping, 0, sizeof(*mapping));
 	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
 	spin_lock_init(&mapping->tree_lock);
-	spin_lock_init(&mapping->i_mmap_lock);
+	mutex_init(&mapping->i_mmap_mutex);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
 	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5d2c86bdf5ba..5bb9e826019b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -634,7 +634,7 @@ struct address_space {
 	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
-	spinlock_t		i_mmap_lock;	/* protect tree, count, list */
+	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
 	unsigned long		nrpages;	/* number of total pages */
 	pgoff_t			writeback_index;/* writeback starts here */
 	const struct address_space_operations *a_ops;	/* methods */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index cc2e7dfea9d7..a877dfc243eb 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -150,7 +150,7 @@ struct mmu_notifier_ops {
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
+ * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->lock).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/kernel/fork.c b/kernel/fork.c
index 4eef925477fc..927692734bcf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -383,14 +383,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			get_file(file);
 			if (tmp->vm_flags & VM_DENYWRITE)
 				atomic_dec(&inode->i_writecount);
-			spin_lock(&mapping->i_mmap_lock);
+			mutex_lock(&mapping->i_mmap_mutex);
 			if (tmp->vm_flags & VM_SHARED)
 				mapping->i_mmap_writable++;
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			vma_prio_tree_add(tmp, mpnt);
 			flush_dcache_mmap_unlock(mapping);
-			spin_unlock(&mapping->i_mmap_lock);
+			mutex_unlock(&mapping->i_mmap_mutex);
 		}
 
 		/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 8144f87dcbb4..88354ae0b1fd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,16 +58,16 @@
 /*
  * Lock ordering:
  *
- *  ->i_mmap_lock		(truncate_pagecache)
+ *  ->i_mmap_mutex		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
  *
  *  ->i_mutex
- *    ->i_mmap_lock		(truncate->unmap_mapping_range)
+ *    ->i_mmap_mutex		(truncate->unmap_mapping_range)
  *
  *  ->mmap_sem
- *    ->i_mmap_lock
+ *    ->i_mmap_mutex
  *      ->page_table_lock or pte_lock	(various, mainly in memory.c)
  *        ->mapping->tree_lock	(arch-dependent flush_dcache_mmap_lock)
  *
@@ -84,7 +84,7 @@
  *    sb_lock			(fs/fs-writeback.c)
  *    ->mapping->tree_lock	(__sync_single_inode)
  *
- *  ->i_mmap_lock
+ *  ->i_mmap_mutex
  *    ->anon_vma.lock		(vma_adjust)
  *
  *  ->anon_vma.lock
@@ -106,7 +106,7 @@
  *
  *  (code doesn't rely on that order, so you could switch it around)
  *  ->tasklist_lock             (memory_failure, collect_procs_ao)
- *    ->i_mmap_lock
+ *    ->i_mmap_mutex
  */
 
 /*
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 83364df74a33..93356cd12828 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -183,7 +183,7 @@ __xip_unmap (struct address_space * mapping,
 		return;
 
 retry:
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		mm = vma->vm_mm;
 		address = vma->vm_start +
@@ -201,7 +201,7 @@ retry:
 			page_cache_release(page);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 
 	if (locked) {
 		mutex_unlock(&xip_sparse_mutex);
diff --git a/mm/fremap.c b/mm/fremap.c
index ec520c7b28df..7f4123056e06 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -211,13 +211,13 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 			}
 			goto out;
 		}
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
 		vma->vm_flags |= VM_NONLINEAR;
 		vma_prio_tree_remove(vma, &mapping->i_mmap);
 		vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 		flush_dcache_mmap_unlock(mapping);
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	}
 
 	if (vma->vm_flags & VM_LOCKED) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bbb4a5bbb958..5fd68b95c671 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2205,7 +2205,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long sz = huge_page_size(h);
 
 	/*
-	 * A page gathering list, protected by per file i_mmap_lock. The
+	 * A page gathering list, protected by per file i_mmap_mutex. The
 	 * lock is used to avoid list corruption from multiple unmapping
 	 * of the same page since we are using page->lru.
 	 */
@@ -2274,9 +2274,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 			  unsigned long end, struct page *ref_page)
 {
-	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
 	__unmap_hugepage_range(vma, start, end, ref_page);
-	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 }
 
 /*
@@ -2308,7 +2308,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * this mapping should be shared between all the VMAs,
 	 * __unmap_hugepage_range() is called as the lock is already held
 	 */
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		/* Do not unmap the current VMA */
 		if (iter_vma == vma)
@@ -2326,7 +2326,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 				address, address + huge_page_size(h),
 				page);
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 
 	return 1;
 }
@@ -2810,7 +2810,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	BUG_ON(address >= end);
 	flush_cache_range(vma, address, end);
 
-	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
 	spin_lock(&mm->page_table_lock);
 	for (; address < end; address += huge_page_size(h)) {
 		ptep = huge_pte_offset(mm, address);
@@ -2825,7 +2825,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
-	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 
 	flush_tlb_range(vma, start, end);
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2b9a5eef39e0..12178ec32ab5 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -429,7 +429,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 	 */
 
 	read_lock(&tasklist_lock);
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	for_each_process(tsk) {
 		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 
@@ -449,7 +449,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 				add_to_kill(tsk, page, vma, to_kill, tkc);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	read_unlock(&tasklist_lock);
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 18655878b9f8..7bbe4d3df756 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2667,12 +2667,12 @@ void unmap_mapping_range(struct address_space *mapping,
 		details.last_index = ULONG_MAX;
 
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 50cb04bb56bf..26efbfca0b20 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -194,7 +194,7 @@ error:
 }
 
 /*
- * Requires inode->i_mapping->i_mmap_lock
+ * Requires inode->i_mapping->i_mmap_mutex
  */
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 		struct file *file, struct address_space *mapping)
@@ -222,9 +222,9 @@ void unlink_file_vma(struct vm_area_struct *vma)
 
 	if (file) {
 		struct address_space *mapping = file->f_mapping;
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 		__remove_shared_vm_struct(vma, file, mapping);
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	}
 }
 
@@ -446,13 +446,13 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 		mapping = vma->vm_file->f_mapping;
 
 	if (mapping)
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
 	__vma_link_file(vma);
 
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 
 	mm->map_count++;
 	validate_mm(mm);
@@ -555,7 +555,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		mapping = file->f_mapping;
 		if (!(vma->vm_flags & VM_NONLINEAR))
 			root = &mapping->i_mmap;
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 		if (insert) {
 			/*
 			 * Put into prio_tree now, so instantiated pages
@@ -622,7 +622,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (anon_vma)
 		anon_vma_unlock(anon_vma);
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 
 	if (remove_next) {
 		if (file) {
@@ -2290,7 +2290,7 @@ void exit_mmap(struct mm_struct *mm)
 
 /* Insert vm structure into process list sorted by address
  * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_lock is taken here.
+ * then i_mmap_mutex is taken here.
  */
 int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
 {
@@ -2532,7 +2532,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
 		 */
 		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
 			BUG();
-		spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+		mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
 	}
 }
 
@@ -2559,7 +2559,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->lock outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
@@ -2631,7 +2631,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
 		 * AS_MM_ALL_LOCKS can't change to 0 from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 		if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
 					&mapping->flags))
 			BUG();
diff --git a/mm/mremap.c b/mm/mremap.c
index 909e1e1e99b1..506fa44403df 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -93,7 +93,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		 * and we propagate stale pages into the dst afterward.
 		 */
 		mapping = vma->vm_file->f_mapping;
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 	}
 
 	/*
@@ -122,7 +122,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	pte_unmap(new_pte - 1);
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 522e4a93cadd..f0ef7ea5423a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -24,7 +24,7 @@
  *   inode->i_alloc_sem (vmtruncate_range)
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
- *       mapping->i_mmap_lock
+ *       mapping->i_mmap_mutex
  *         anon_vma->lock
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
@@ -646,14 +646,14 @@ static int page_referenced_file(struct page *page,
 	 * The page lock not only makes sure that page->mapping cannot
 	 * suddenly be NULLified by truncation, it makes sure that the
 	 * structure at mapping cannot be freed and reused yet,
-	 * so we can safely take mapping->i_mmap_lock.
+	 * so we can safely take mapping->i_mmap_mutex.
 	 */
 	BUG_ON(!PageLocked(page));
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 
 	/*
-	 * i_mmap_lock does not stabilize mapcount at all, but mapcount
+	 * i_mmap_mutex does not stabilize mapcount at all, but mapcount
 	 * is more likely to be accurate if we note it after spinning.
 	 */
 	mapcount = page_mapcount(page);
@@ -675,7 +675,7 @@ static int page_referenced_file(struct page *page,
 			break;
 	}
 
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return referenced;
 }
 
@@ -762,7 +762,7 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
 
 	BUG_ON(PageAnon(page));
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		if (vma->vm_flags & VM_SHARED) {
 			unsigned long address = vma_address(page, vma);
@@ -771,7 +771,7 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
 			ret += page_mkclean_one(page, vma, address);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
@@ -1119,7 +1119,7 @@ out_mlock:
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->lock or mapping->i_mmap_lock.
+	 * we now hold anon_vma->lock or mapping->i_mmap_mutex.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
@@ -1345,7 +1345,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	unsigned long max_nl_size = 0;
 	unsigned int mapcount;
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
@@ -1391,7 +1391,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	mapcount = page_mapcount(page);
 	if (!mapcount)
 		goto out;
-	cond_resched_lock(&mapping->i_mmap_lock);
+	cond_resched();
 
 	max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
 	if (max_nl_cursor == 0)
@@ -1413,7 +1413,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 			}
 			vma->vm_private_data = (void *) max_nl_cursor;
 		}
-		cond_resched_lock(&mapping->i_mmap_lock);
+		cond_resched();
 		max_nl_cursor += CLUSTER_SIZE;
 	} while (max_nl_cursor <= max_nl_size);
 
@@ -1425,7 +1425,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
 		vma->vm_private_data = NULL;
 out:
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
@@ -1544,7 +1544,7 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
 
 	if (!mapping)
 		return ret;
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
@@ -1558,7 +1558,7 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
 	 * never contain migration ptes.  Decide what to do about this
 	 * limitation to linear when we need rmap_walk() on nonlinear.
 	 */
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 25aeeb046e695c3093a86aa9386128ffb3b1bc32 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:07 -0700
Subject: mm: revert page_lock_anon_vma() lock annotation

Its beyond ugly and gets in the way.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 15 +--------------
 mm/rmap.c            |  4 +---
 2 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 830e65dc01ee..590c291a8cd9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -218,20 +218,7 @@ int try_to_munlock(struct page *);
 /*
  * Called by memory-failure.c to kill processes.
  */
-struct anon_vma *__page_lock_anon_vma(struct page *page);
-
-static inline struct anon_vma *page_lock_anon_vma(struct page *page)
-{
-	struct anon_vma *anon_vma;
-
-	__cond_lock(RCU, anon_vma = __page_lock_anon_vma(page));
-
-	/* (void) is needed to make gcc happy */
-	(void) __cond_lock(&anon_vma->root->lock, anon_vma);
-
-	return anon_vma;
-}
-
+struct anon_vma *page_lock_anon_vma(struct page *page);
 void page_unlock_anon_vma(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index f0ef7ea5423a..c6044761617e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -323,7 +323,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-struct anon_vma *__page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma, *root_anon_vma;
 	unsigned long anon_mapping;
@@ -357,8 +357,6 @@ out:
 }
 
 void page_unlock_anon_vma(struct anon_vma *anon_vma)
-	__releases(&anon_vma->root->lock)
-	__releases(RCU)
 {
 	anon_vma_unlock(anon_vma);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 2b575eb64f7a9c701fb4bfdb12388ac547f6c2b6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:11 -0700
Subject: mm: convert anon_vma->lock to a mutex

Straightforward conversion of anon_vma->lock to a mutex.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h      |  8 ++------
 include/linux/mmu_notifier.h |  2 +-
 include/linux/rmap.h         | 14 +++++++-------
 mm/huge_memory.c             |  4 ++--
 mm/mmap.c                    | 10 +++++-----
 mm/rmap.c                    |  8 ++++----
 6 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 8847c8c29791..48c32ebf65a7 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -92,12 +92,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 #define wait_split_huge_page(__anon_vma, __pmd)				\
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
-		spin_unlock_wait(&(__anon_vma)->root->lock);		\
-		/*							\
-		 * spin_unlock_wait() is just a loop in C and so the	\
-		 * CPU can reorder anything around it.			\
-		 */							\
-		smp_mb();						\
+		anon_vma_lock(__anon_vma);				\
+		anon_vma_unlock(__anon_vma);				\
 		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
 		       pmd_trans_huge(*____pmd));			\
 	} while (0)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index a877dfc243eb..1d1b1e13f79f 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -150,7 +150,7 @@ struct mmu_notifier_ops {
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->lock).
+ * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->mutex).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 590c291a8cd9..2148b122779b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -7,7 +7,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/memcontrol.h>
 
 /*
@@ -26,7 +26,7 @@
  */
 struct anon_vma {
 	struct anon_vma *root;	/* Root of this anon_vma tree */
-	spinlock_t lock;	/* Serialize access to vma list */
+	struct mutex mutex;	/* Serialize access to vma list */
 	/*
 	 * The refcount is taken on an anon_vma when there is no
 	 * guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@ struct anon_vma_chain {
 	struct vm_area_struct *vma;
 	struct anon_vma *anon_vma;
 	struct list_head same_vma;   /* locked by mmap_sem & page_table_lock */
-	struct list_head same_anon_vma;	/* locked by anon_vma->lock */
+	struct list_head same_anon_vma;	/* locked by anon_vma->mutex */
 };
 
 #ifdef CONFIG_MMU
@@ -93,24 +93,24 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		spin_lock(&anon_vma->root->lock);
+		mutex_lock(&anon_vma->root->mutex);
 }
 
 static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		spin_unlock(&anon_vma->root->lock);
+		mutex_unlock(&anon_vma->root->mutex);
 }
 
 static inline void anon_vma_lock(struct anon_vma *anon_vma)
 {
-	spin_lock(&anon_vma->root->lock);
+	mutex_lock(&anon_vma->root->mutex);
 }
 
 static inline void anon_vma_unlock(struct anon_vma *anon_vma)
 {
-	spin_unlock(&anon_vma->root->lock);
+	mutex_unlock(&anon_vma->root->mutex);
 }
 
 /*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 83326ad66d9b..90eef404ec2e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1139,7 +1139,7 @@ static int __split_huge_page_splitting(struct page *page,
 		 * We can't temporarily set the pmd to null in order
 		 * to split it, the pmd must remain marked huge at all
 		 * times or the VM won't take the pmd_trans_huge paths
-		 * and it won't wait on the anon_vma->root->lock to
+		 * and it won't wait on the anon_vma->root->mutex to
 		 * serialize against split_huge_page*.
 		 */
 		pmdp_splitting_flush_notify(vma, address, pmd);
@@ -1333,7 +1333,7 @@ static int __split_huge_page_map(struct page *page,
 	return ret;
 }
 
-/* must be called with anon_vma->root->lock hold */
+/* must be called with anon_vma->root->mutex hold */
 static void __split_huge_page(struct page *page,
 			      struct anon_vma *anon_vma)
 {
diff --git a/mm/mmap.c b/mm/mmap.c
index 26efbfca0b20..ac2631b7477f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2502,15 +2502,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
 		 * The LSB of head.next can't change from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
+		mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
 		/*
 		 * We can safely modify head.next after taking the
-		 * anon_vma->root->lock. If some other vma in this mm shares
+		 * anon_vma->root->mutex. If some other vma in this mm shares
 		 * the same anon_vma we won't take it again.
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us thanks to the
-		 * anon_vma->root->lock.
+		 * anon_vma->root->mutex.
 		 */
 		if (__test_and_set_bit(0, (unsigned long *)
 				       &anon_vma->root->head.next))
@@ -2559,7 +2559,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->lock outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
@@ -2615,7 +2615,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us until we release the
-		 * anon_vma->root->lock.
+		 * anon_vma->root->mutex.
 		 */
 		if (!__test_and_clear_bit(0, (unsigned long *)
 					  &anon_vma->root->head.next))
diff --git a/mm/rmap.c b/mm/rmap.c
index d271845d7d15..ce29d405d093 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -25,7 +25,7 @@
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
  *       mapping->i_mmap_mutex
- *         anon_vma->lock
+ *         anon_vma->mutex
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
  *             swap_lock (in swap_duplicate, swap_info_get)
@@ -40,7 +40,7 @@
  *
  * (code doesn't rely on that order so it could be switched around)
  * ->tasklist_lock
- *   anon_vma->lock      (memory_failure, collect_procs_anon)
+ *   anon_vma->mutex      (memory_failure, collect_procs_anon)
  *     pte map lock
  */
 
@@ -307,7 +307,7 @@ static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
 
-	spin_lock_init(&anon_vma->lock);
+	mutex_init(&anon_vma->mutex);
 	atomic_set(&anon_vma->refcount, 0);
 	INIT_LIST_HEAD(&anon_vma->head);
 }
@@ -1143,7 +1143,7 @@ out_mlock:
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->lock or mapping->i_mmap_mutex.
+	 * we now hold anon_vma->mutex or mapping->i_mmap_mutex.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
-- 
cgit v1.2.3


From de03c72cfce5b263a674d04348b58475ec50163c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:12:15 -0700
Subject: mm: convert mm->cpu_vm_cpumask into cpumask_var_t

cpumask_t is very big struct and cpu_vm_mask is placed wrong position.
It might lead to reduce cache hit ratio.

This patch has two change.
1) Move the place of cpumask into last of mm_struct. Because usually cpumask
   is accessed only front bits when the system has cpu-hotplug capability
2) Convert cpu_vm_mask into cpumask_var_t. It may help to reduce memory
   footprint if cpumask_size() will use nr_cpumask_bits properly in future.

In addition, this patch change the name of cpu_vm_mask with cpu_vm_mask_var.
It may help to detect out of tree cpu_vm_mask users.

This patch has no functional change.

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cachetlb.txt |  2 +-
 arch/x86/kernel/tboot.c    |  1 -
 include/linux/mm_types.h   |  9 ++++++---
 include/linux/sched.h      |  1 +
 init/main.c                |  2 ++
 kernel/fork.c              | 37 ++++++++++++++++++++++++++++++++++---
 mm/init-mm.c               |  1 -
 7 files changed, 44 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 9164ae3b83bc..9b728dc17535 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -16,7 +16,7 @@ on all processors in the system.  Don't let this scare you into
 thinking SMP cache/tlb flushing must be so inefficient, this is in
 fact an area where many optimizations are possible.  For example,
 if it can be proven that a user address space has never executed
-on a cpu (see vma->cpu_vm_mask), one need not perform a flush
+on a cpu (see mm_cpumask()), one need not perform a flush
 for this address space on that cpu.
 
 First, the TLB flushing interfaces, since they are the simplest.  The
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 998e972f3b1a..30ac65df7d4e 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -110,7 +110,6 @@ static struct mm_struct tboot_mm = {
 	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask    = CPU_MASK_ALL,
 };
 
 static inline void switch_to_tboot_pt(void)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 201998e5b530..c2f9ea7922f4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -265,8 +265,6 @@ struct mm_struct {
 
 	struct linux_binfmt *binfmt;
 
-	cpumask_t cpu_vm_mask;
-
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
@@ -316,9 +314,14 @@ struct mm_struct {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
+
+	cpumask_var_t cpu_vm_mask_var;
 };
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
-#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
+static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
+{
+	return mm->cpu_vm_mask_var;
+}
 
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 44b8faaac7c0..f18300eddfcb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2176,6 +2176,7 @@ static inline void mmdrop(struct mm_struct * mm)
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
 		__mmdrop(mm);
 }
+extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm);
 
 /* mmput gets rid of the mappings and all user-space */
 extern void mmput(struct mm_struct *);
diff --git a/init/main.c b/init/main.c
index 48df882d51d2..22da33918aef 100644
--- a/init/main.c
+++ b/init/main.c
@@ -509,6 +509,8 @@ asmlinkage void __init start_kernel(void)
 	sort_main_extable();
 	trap_init();
 	mm_init();
+	BUG_ON(mm_init_cpumask(&init_mm, 0));
+
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
diff --git a/kernel/fork.c b/kernel/fork.c
index 927692734bcf..8e7e135d0817 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -485,6 +485,20 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 }
 
+int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
+		return -ENOMEM;
+
+	if (oldmm)
+		cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
+	else
+		memset(mm_cpumask(mm), 0, cpumask_size());
+#endif
+	return 0;
+}
+
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
@@ -521,10 +535,20 @@ struct mm_struct * mm_alloc(void)
 	struct mm_struct * mm;
 
 	mm = allocate_mm();
-	if (mm) {
-		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	memset(mm, 0, sizeof(*mm));
+	mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	if (mm_init_cpumask(mm, NULL)) {
+		mm_free_pgd(mm);
+		free_mm(mm);
+		return NULL;
 	}
+
 	return mm;
 }
 
@@ -536,6 +560,7 @@ struct mm_struct * mm_alloc(void)
 void __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
+	free_cpumask_var(mm->cpu_vm_mask_var);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
@@ -690,6 +715,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
+	if (mm_init_cpumask(mm, oldmm))
+		goto fail_nocpumask;
+
 	if (init_new_context(tsk, mm))
 		goto fail_nocontext;
 
@@ -716,6 +744,9 @@ fail_nomem:
 	return NULL;
 
 fail_nocontext:
+	free_cpumask_var(mm->cpu_vm_mask_var);
+
+fail_nocpumask:
 	/*
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 1d29cdfe8ebb..4019979b2637 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -21,6 +21,5 @@ struct mm_struct init_mm = {
 	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask	= CPU_MASK_ALL,
 	INIT_MM_CONTEXT(init_mm)
 };
-- 
cgit v1.2.3


From a238ab5b0239575c179f4976064192c3f7409dad Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 24 May 2011 17:12:16 -0700
Subject: mm: break out page allocation warning code

This originally started as a simple patch to give vmalloc() some more
verbose output on failure on top of the plain page allocator messages.
Johannes suggested that it might be nicer to lead with the vmalloc() info
_before_ the page allocator messages.

But, I do think there's a lot of value in what __alloc_pages_slowpath()
does with its filtering and so forth.

This patch creates a new function which other allocators can call instead
of relying on the internal page allocator warnings.  It also gives this
function private rate-limiting which separates it from other
printk_ratelimit() users.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/page_alloc.c    | 62 ++++++++++++++++++++++++++++++++++++------------------
 2 files changed, 43 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2ad0ac8c3f32..8e2f2cd821f7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1387,6 +1387,8 @@ extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
 
+extern void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
+
 extern void setup_per_cpu_pageset(void);
 
 extern void zone_pcp_update(struct zone *zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 83eaa2eb72f8..44019da9632e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -30,6 +30,7 @@
 #include <linux/pagevec.h>
 #include <linux/blkdev.h>
 #include <linux/slab.h>
+#include <linux/ratelimit.h>
 #include <linux/oom.h>
 #include <linux/notifier.h>
 #include <linux/topology.h>
@@ -1736,6 +1737,45 @@ static inline bool should_suppress_show_mem(void)
 	return ret;
 }
 
+static DEFINE_RATELIMIT_STATE(nopage_rs,
+		DEFAULT_RATELIMIT_INTERVAL,
+		DEFAULT_RATELIMIT_BURST);
+
+void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+{
+	va_list args;
+	unsigned int filter = SHOW_MEM_FILTER_NODES;
+
+	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+		return;
+
+	/*
+	 * This documents exceptions given to allocations in certain
+	 * contexts that are allowed to allocate outside current's set
+	 * of allowed nodes.
+	 */
+	if (!(gfp_mask & __GFP_NOMEMALLOC))
+		if (test_thread_flag(TIF_MEMDIE) ||
+		    (current->flags & (PF_MEMALLOC | PF_EXITING)))
+			filter &= ~SHOW_MEM_FILTER_NODES;
+	if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
+		filter &= ~SHOW_MEM_FILTER_NODES;
+
+	if (fmt) {
+		printk(KERN_WARNING);
+		va_start(args, fmt);
+		vprintk(fmt, args);
+		va_end(args);
+	}
+
+	pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n",
+		   current->comm, order, gfp_mask);
+
+	dump_stack();
+	if (!should_suppress_show_mem())
+		show_mem(filter);
+}
+
 static inline int
 should_alloc_retry(gfp_t gfp_mask, unsigned int order,
 				unsigned long pages_reclaimed)
@@ -2178,27 +2218,7 @@ rebalance:
 	}
 
 nopage:
-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
-		unsigned int filter = SHOW_MEM_FILTER_NODES;
-
-		/*
-		 * This documents exceptions given to allocations in certain
-		 * contexts that are allowed to allocate outside current's set
-		 * of allowed nodes.
-		 */
-		if (!(gfp_mask & __GFP_NOMEMALLOC))
-			if (test_thread_flag(TIF_MEMDIE) ||
-			    (current->flags & (PF_MEMALLOC | PF_EXITING)))
-				filter &= ~SHOW_MEM_FILTER_NODES;
-		if (in_interrupt() || !wait)
-			filter &= ~SHOW_MEM_FILTER_NODES;
-
-		pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
-			current->comm, order, gfp_mask);
-		dump_stack();
-		if (!should_suppress_show_mem())
-			show_mem(filter);
-	}
+	warn_alloc_failed(gfp_mask, order, NULL);
 	return page;
 got_pg:
 	if (kmemcheck_enabled)
-- 
cgit v1.2.3


From 7b1de5868b124d8f399d8791ed30a9b679d64d4d Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 24 May 2011 17:12:25 -0700
Subject: readahead: readahead page allocations are OK to fail

Pass __GFP_NORETRY|__GFP_NOWARN for readahead page allocations.

readahead page allocations are completely optional.  They are OK to fail
and in particular shall not trigger OOM on themselves.

Reported-by: Dave Young <hidave.darkstar@gmail.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 6 ++++++
 mm/readahead.c          | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ea268080380d..716875e53520 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -219,6 +219,12 @@ static inline struct page *page_cache_alloc_cold(struct address_space *x)
 	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
 
+static inline struct page *page_cache_alloc_readahead(struct address_space *x)
+{
+	return __page_cache_alloc(mapping_gfp_mask(x) |
+				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
+}
+
 typedef int filler_t(void *, struct page *);
 
 extern struct page * find_get_page(struct address_space *mapping,
diff --git a/mm/readahead.c b/mm/readahead.c
index 2c0cc489e288..867f9dd82dcd 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -180,7 +180,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 		if (page)
 			continue;
 
-		page = page_cache_alloc_cold(mapping);
+		page = page_cache_alloc_readahead(mapping);
 		if (!page)
 			break;
 		page->index = page_offset;
-- 
cgit v1.2.3


From a09ed5e00084448453c8bada4dcd31e5fbfc2f21 Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Tue, 24 May 2011 17:12:26 -0700
Subject: vmscan: change shrink_slab() interfaces by passing shrink_control

Consolidate the existing parameters to shrink_slab() into a new
shrink_control struct.  This is needed later to pass the same struct to
shrinkers.

Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/drop_caches.c    |  6 +++++-
 include/linux/mm.h  | 13 +++++++++++--
 mm/memory-failure.c |  7 ++++++-
 mm/vmscan.c         | 46 +++++++++++++++++++++++++++++++++-------------
 4 files changed, 55 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 98b77c89494c..440999c24353 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,9 +40,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 static void drop_slab(void)
 {
 	int nr_objects;
+	struct shrink_control shrink = {
+		.gfp_mask = GFP_KERNEL,
+		.nr_scanned = 1000,
+	};
 
 	do {
-		nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+		nr_objects = shrink_slab(&shrink, 1000);
 	} while (nr_objects > 10);
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e2f2cd821f7..32cfa9602d00 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1161,6 +1161,15 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
 }
 #endif
 
+/*
+ * This struct is used to pass information from page reclaim to the shrinkers.
+ * We consolidate the values for easier extention later.
+ */
+struct shrink_control {
+	unsigned long nr_scanned;
+	gfp_t gfp_mask;
+};
+
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
@@ -1630,8 +1639,8 @@ int in_gate_area_no_mm(unsigned long addr);
 
 int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-			unsigned long lru_pages);
+unsigned long shrink_slab(struct shrink_control *shrink,
+				unsigned long lru_pages);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 369b80e81416..341341b2b47b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -239,7 +239,12 @@ void shake_page(struct page *p, int access)
 	if (access) {
 		int nr;
 		do {
-			nr = shrink_slab(1000, GFP_KERNEL, 1000);
+			struct shrink_control shrink = {
+				.gfp_mask = GFP_KERNEL,
+				.nr_scanned = 1000,
+			};
+
+			nr = shrink_slab(&shrink, 1000);
 			if (page_count(p) == 1)
 				break;
 		} while (nr > 10);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 890f54184d9a..e4e245ed1a5b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -222,11 +222,13 @@ EXPORT_SYMBOL(unregister_shrinker);
  *
  * Returns the number of slab objects which we shrunk.
  */
-unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-			unsigned long lru_pages)
+unsigned long shrink_slab(struct shrink_control *shrink,
+			  unsigned long lru_pages)
 {
 	struct shrinker *shrinker;
 	unsigned long ret = 0;
+	unsigned long scanned = shrink->nr_scanned;
+	gfp_t gfp_mask = shrink->gfp_mask;
 
 	if (scanned == 0)
 		scanned = SWAP_CLUSTER_MAX;
@@ -2035,7 +2037,8 @@ static bool all_unreclaimable(struct zonelist *zonelist,
  * 		else, the number of pages reclaimed
  */
 static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
-					struct scan_control *sc)
+					struct scan_control *sc,
+					struct shrink_control *shrink)
 {
 	int priority;
 	unsigned long total_scanned = 0;
@@ -2069,7 +2072,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 				lru_pages += zone_reclaimable_pages(zone);
 			}
 
-			shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
+			shrink->nr_scanned = sc->nr_scanned;
+			shrink_slab(shrink, lru_pages);
 			if (reclaim_state) {
 				sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 				reclaim_state->reclaimed_slab = 0;
@@ -2141,12 +2145,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 		.mem_cgroup = NULL,
 		.nodemask = nodemask,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 
 	trace_mm_vmscan_direct_reclaim_begin(order,
 				sc.may_writepage,
 				gfp_mask);
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
 
@@ -2206,17 +2213,20 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 		.order = 0,
 		.mem_cgroup = mem_cont,
 		.nodemask = NULL, /* we don't care the placement */
+		.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+				(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
 	};
 
-	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
-			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
 	zonelist = NODE_DATA(numa_node_id())->node_zonelists;
 
 	trace_mm_vmscan_memcg_reclaim_begin(0,
 					    sc.may_writepage,
 					    sc.gfp_mask);
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -2344,6 +2354,9 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 		.order = order,
 		.mem_cgroup = NULL,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 loop_again:
 	total_scanned = 0;
 	sc.nr_reclaimed = 0;
@@ -2443,8 +2456,8 @@ loop_again:
 					end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
-			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
-						lru_pages);
+			shrink.nr_scanned = sc.nr_scanned;
+			nr_slab = shrink_slab(&shrink, lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
@@ -2796,7 +2809,10 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 		.swappiness = vm_swappiness,
 		.order = 0,
 	};
-	struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
+	struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
 	struct task_struct *p = current;
 	unsigned long nr_reclaimed;
 
@@ -2805,7 +2821,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
@@ -2980,6 +2996,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		.swappiness = vm_swappiness,
 		.order = order,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 	unsigned long nr_slab_pages0, nr_slab_pages1;
 
 	cond_resched();
@@ -3006,6 +3025,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	}
 
 	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
+	shrink.nr_scanned = sc.nr_scanned;
 	if (nr_slab_pages0 > zone->min_slab_pages) {
 		/*
 		 * shrink_slab() does not currently allow us to determine how
@@ -3021,7 +3041,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 			unsigned long lru_pages = zone_reclaimable_pages(zone);
 
 			/* No reclaimable slab or very low memory pressure */
-			if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
+			if (!shrink_slab(&shrink, lru_pages))
 				break;
 
 			/* Freed enough memory */
-- 
cgit v1.2.3


From 1495f230fa7750479c79e3656286b9183d662077 Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Tue, 24 May 2011 17:12:27 -0700
Subject: vmscan: change shrinker API by passing shrink_control struct

Change each shrinker's API by consolidating the existing parameters into
shrink_control struct.  This will simplify any further features added w/o
touching each file of shrinker.

[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: fix warning]
[kosaki.motohiro@jp.fujitsu.com: fix up new shrinker API]
[akpm@linux-foundation.org: fix xfs warning]
[akpm@linux-foundation.org: update gfs2]
Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kvm/mmu.c                   |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c      |  9 +++------
 drivers/gpu/drm/ttm/ttm_page_alloc.c |  4 +++-
 drivers/staging/zcache/zcache.c      |  5 ++++-
 fs/dcache.c                          |  8 ++++++--
 fs/drop_caches.c                     |  3 +--
 fs/gfs2/glock.c                      |  5 ++++-
 fs/gfs2/quota.c                      | 12 +++++++-----
 fs/gfs2/quota.h                      |  4 +++-
 fs/inode.c                           |  6 +++++-
 fs/mbcache.c                         | 10 ++++++----
 fs/nfs/dir.c                         |  5 ++++-
 fs/nfs/internal.h                    |  2 +-
 fs/quota/dquot.c                     |  5 ++++-
 fs/xfs/linux-2.6/xfs_buf.c           |  4 ++--
 fs/xfs/linux-2.6/xfs_sync.c          |  5 +++--
 fs/xfs/quota/xfs_qm.c                |  6 +++---
 include/linux/mm.h                   | 19 +++++++++++--------
 mm/memory-failure.c                  |  3 +--
 mm/vmscan.c                          | 34 +++++++++++++++++++---------------
 net/sunrpc/auth.c                    |  4 +++-
 21 files changed, 95 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 28418054b880..bd14bb4c8594 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3545,10 +3545,11 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
 	return kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
 }
 
-static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct kvm *kvm;
 	struct kvm *kvm_freed = NULL;
+	int nr_to_scan = sc->nr_to_scan;
 
 	if (nr_to_scan == 0)
 		goto out;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c6289034e29a..0b2e167d2bce 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -56,9 +56,7 @@ static int i915_gem_phys_pwrite(struct drm_device *dev,
 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
 
 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
-				    int nr_to_scan,
-				    gfp_t gfp_mask);
-
+				    struct shrink_control *sc);
 
 /* some bookkeeping */
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
@@ -4092,9 +4090,7 @@ i915_gpu_is_active(struct drm_device *dev)
 }
 
 static int
-i915_gem_inactive_shrink(struct shrinker *shrinker,
-			 int nr_to_scan,
-			 gfp_t gfp_mask)
+i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker,
@@ -4102,6 +4098,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker,
 			     mm.inactive_shrinker);
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_i915_gem_object *obj, *next;
+	int nr_to_scan = sc->nr_to_scan;
 	int cnt;
 
 	if (!mutex_trylock(&dev->struct_mutex))
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 9d9d92945f8c..d948575717bf 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -395,12 +395,14 @@ static int ttm_pool_get_num_unused_pages(void)
 /**
  * Callback for mm to request pool to reduce number of page held.
  */
-static int ttm_pool_mm_shrink(struct shrinker *shrink, int shrink_pages, gfp_t gfp_mask)
+static int ttm_pool_mm_shrink(struct shrinker *shrink,
+			      struct shrink_control *sc)
 {
 	static atomic_t start_pool = ATOMIC_INIT(0);
 	unsigned i;
 	unsigned pool_offset = atomic_add_return(1, &start_pool);
 	struct ttm_page_pool *pool;
+	int shrink_pages = sc->nr_to_scan;
 
 	pool_offset = pool_offset % NUM_POOLS;
 	/* select start pool in round robin fashion */
diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c
index b8a2b30a1572..77ac2d4d3ef1 100644
--- a/drivers/staging/zcache/zcache.c
+++ b/drivers/staging/zcache/zcache.c
@@ -1181,9 +1181,12 @@ static bool zcache_freeze;
 /*
  * zcache shrinker interface (only useful for ephemeral pages, so zbud only)
  */
-static int shrink_zcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_zcache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
 	int ret = -1;
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if (nr >= 0) {
 		if (!(gfp_mask & __GFP_FS))
diff --git a/fs/dcache.c b/fs/dcache.c
index 18b2a1f10ed8..37f72ee5bf7c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent)
 EXPORT_SYMBOL(shrink_dcache_parent);
 
 /*
- * Scan `nr' dentries and return the number which remain.
+ * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
  *
  * We need to avoid reentering the filesystem if the caller is performing a
  * GFP_NOFS allocation attempt.  One example deadlock is:
@@ -1231,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent);
  *
  * In this case we return -1 to tell the caller that we baled.
  */
-static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
+
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 440999c24353..c00e055b6282 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -42,11 +42,10 @@ static void drop_slab(void)
 	int nr_objects;
 	struct shrink_control shrink = {
 		.gfp_mask = GFP_KERNEL,
-		.nr_scanned = 1000,
 	};
 
 	do {
-		nr_objects = shrink_slab(&shrink, 1000);
+		nr_objects = shrink_slab(&shrink, 1000, 1000);
 	} while (nr_objects > 10);
 }
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a2a6abbccc07..2792a790e50b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1346,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 }
 
 
-static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+				    struct shrink_control *sc)
 {
 	struct gfs2_glock *gl;
 	int may_demote;
 	int nr_skipped = 0;
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 	LIST_HEAD(skipped);
 
 	if (nr == 0)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e23d9864c418..42e8d23bc047 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -38,6 +38,7 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
@@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
 
-int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct gfs2_quota_data *qd;
 	struct gfs2_sbd *sdp;
+	int nr_to_scan = sc->nr_to_scan;
 
-	if (nr == 0)
+	if (nr_to_scan == 0)
 		goto out;
 
-	if (!(gfp_mask & __GFP_FS))
+	if (!(sc->gfp_mask & __GFP_FS))
 		return -1;
 
 	spin_lock(&qd_lru_lock);
-	while (nr && !list_empty(&qd_lru_list)) {
+	while (nr_to_scan && !list_empty(&qd_lru_list)) {
 		qd = list_entry(qd_lru_list.next,
 				struct gfs2_quota_data, qd_reclaim);
 		sdp = qd->qd_gl->gl_sbd;
@@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 		spin_unlock(&qd_lru_lock);
 		kmem_cache_free(gfs2_quotad_cachep, qd);
 		spin_lock(&qd_lru_lock);
-		nr--;
+		nr_to_scan--;
 	}
 	spin_unlock(&qd_lru_lock);
 
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e7d236ca48bd..90bf1c302a98 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -12,6 +12,7 @@
 
 struct gfs2_inode;
 struct gfs2_sbd;
+struct shrink_control;
 
 #define NO_QUOTA_CHANGE ((u32)-1)
 
@@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
 	return ret;
 }
 
-extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
+				 struct shrink_control *sc);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 88aa3bcc7681..990d284877a1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan)
  * This function is passed the number of inodes to scan, and it returns the
  * total number of remaining possibly-reclaimable inodes.
  */
-static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
+
 	if (nr) {
 		/*
 		 * Nasty deadlock avoidance.  We may hold various FS locks,
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2f174be06555..8c32ef3ba88e 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock);
  * What the mbcache registers as to get shrunk dynamically.
  */
 
-static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink,
+			      struct shrink_control *sc);
 
 static struct shrinker mb_cache_shrinker = {
 	.shrink = mb_cache_shrink_fn,
@@ -156,18 +157,19 @@ forget:
  * gets low.
  *
  * @shrink: (ignored)
- * @nr_to_scan: Number of objects to scan
- * @gfp_mask: (ignored)
+ * @sc: shrink_control passed from reclaim
  *
  * Returns the number of objects which are present in the cache.
  */
 static int
-mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
 {
 	LIST_HEAD(free_list);
 	struct mb_cache *cache;
 	struct mb_cache_entry *entry, *tmp;
 	int count = 0;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	mb_debug("trying to free %d entries", nr_to_scan);
 	spin_lock(&mb_cache_spinlock);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672216c8..424e47773a84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head)
 	}
 }
 
-int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink,
+			      struct shrink_control *sc)
 {
 	LIST_HEAD(head);
 	struct nfs_inode *nfsi, *next;
 	struct nfs_access_entry *cache;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
 		return (nr_to_scan == 0) ? 0 : -1;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce885dd..2df6ca7b5898 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp,
 
 /* dir.c */
 extern int nfs_access_cache_shrinker(struct shrinker *shrink,
-					int nr_to_scan, gfp_t gfp_mask);
+					struct shrink_control *sc);
 
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d3c032f5fa0a..5b572c89e6c4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -691,8 +691,11 @@ static void prune_dqcache(int count)
  * This is called from kswapd when we think we need some
  * more memory
  */
-static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink,
+				 struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+
 	if (nr) {
 		spin_lock(&dq_list_lock);
 		prune_dqcache(nr);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52b2b5da566e..5e68099db2a5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1422,12 +1422,12 @@ restart:
 int
 xfs_buftarg_shrink(
 	struct shrinker		*shrink,
-	int			nr_to_scan,
-	gfp_t			mask)
+	struct shrink_control	*sc)
 {
 	struct xfs_buftarg	*btp = container_of(shrink,
 					struct xfs_buftarg, bt_shrinker);
 	struct xfs_buf		*bp;
+	int nr_to_scan = sc->nr_to_scan;
 	LIST_HEAD(dispose);
 
 	if (!nr_to_scan)
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index cb1bb2080e44..8ecad5ff9f9b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -1032,13 +1032,14 @@ xfs_reclaim_inodes(
 static int
 xfs_reclaim_inode_shrink(
 	struct shrinker	*shrink,
-	int		nr_to_scan,
-	gfp_t		gfp_mask)
+	struct shrink_control *sc)
 {
 	struct xfs_mount *mp;
 	struct xfs_perag *pag;
 	xfs_agnumber_t	ag;
 	int		reclaimable;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
 	if (nr_to_scan) {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 69228aa8605a..b94dace4e785 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -60,7 +60,7 @@ STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int	xfs_qm_shake(struct shrinker *, int, gfp_t);
+STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
 
 static struct shrinker xfs_qm_shaker = {
 	.shrink = xfs_qm_shake,
@@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist(
 STATIC int
 xfs_qm_shake(
 	struct shrinker	*shrink,
-	int		nr_to_scan,
-	gfp_t		gfp_mask)
+	struct shrink_control *sc)
 {
 	int	ndqused, nfree, n;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if (!kmem_shake_allow(gfp_mask))
 		return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 32cfa9602d00..5cbbf78eaac7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1166,18 +1166,20 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
  * We consolidate the values for easier extention later.
  */
 struct shrink_control {
-	unsigned long nr_scanned;
 	gfp_t gfp_mask;
+
+	/* How many slab objects shrinker() should scan and try to reclaim */
+	unsigned long nr_to_scan;
 };
 
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
- * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'.  It should
- * look through the least-recently-used 'nr_to_scan' entries and
- * attempt to free them up.  It should return the number of objects
- * which remain in the cache.  If it returns -1, it means it cannot do
- * any scanning at this time (eg. there is a risk of deadlock).
+ * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
+ * and a 'gfpmask'.  It should look through the least-recently-used
+ * 'nr_to_scan' entries and attempt to free them up.  It should return
+ * the number of objects which remain in the cache.  If it returns -1, it means
+ * it cannot do any scanning at this time (eg. there is a risk of deadlock).
  *
  * The 'gfpmask' refers to the allocation we are currently trying to
  * fulfil.
@@ -1186,7 +1188,7 @@ struct shrink_control {
  * querying the cache size, so a fastpath for that case is appropriate.
  */
 struct shrinker {
-	int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
+	int (*shrink)(struct shrinker *, struct shrink_control *sc);
 	int seeks;	/* seeks to recreate an obj */
 
 	/* These are for internal use */
@@ -1640,7 +1642,8 @@ int in_gate_area_no_mm(unsigned long addr);
 int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 unsigned long shrink_slab(struct shrink_control *shrink,
-				unsigned long lru_pages);
+			  unsigned long nr_pages_scanned,
+			  unsigned long lru_pages);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 341341b2b47b..5c8f7e08928d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -241,10 +241,9 @@ void shake_page(struct page *p, int access)
 		do {
 			struct shrink_control shrink = {
 				.gfp_mask = GFP_KERNEL,
-				.nr_scanned = 1000,
 			};
 
-			nr = shrink_slab(&shrink, 1000);
+			nr = shrink_slab(&shrink, 1000, 1000);
 			if (page_count(p) == 1)
 				break;
 		} while (nr > 10);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e4e245ed1a5b..7e0116150dc7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -202,6 +202,14 @@ void unregister_shrinker(struct shrinker *shrinker)
 }
 EXPORT_SYMBOL(unregister_shrinker);
 
+static inline int do_shrinker_shrink(struct shrinker *shrinker,
+				     struct shrink_control *sc,
+				     unsigned long nr_to_scan)
+{
+	sc->nr_to_scan = nr_to_scan;
+	return (*shrinker->shrink)(shrinker, sc);
+}
+
 #define SHRINK_BATCH 128
 /*
  * Call the shrink functions to age shrinkable caches
@@ -223,15 +231,14 @@ EXPORT_SYMBOL(unregister_shrinker);
  * Returns the number of slab objects which we shrunk.
  */
 unsigned long shrink_slab(struct shrink_control *shrink,
+			  unsigned long nr_pages_scanned,
 			  unsigned long lru_pages)
 {
 	struct shrinker *shrinker;
 	unsigned long ret = 0;
-	unsigned long scanned = shrink->nr_scanned;
-	gfp_t gfp_mask = shrink->gfp_mask;
 
-	if (scanned == 0)
-		scanned = SWAP_CLUSTER_MAX;
+	if (nr_pages_scanned == 0)
+		nr_pages_scanned = SWAP_CLUSTER_MAX;
 
 	if (!down_read_trylock(&shrinker_rwsem)) {
 		/* Assume we'll be able to shrink next time */
@@ -244,8 +251,8 @@ unsigned long shrink_slab(struct shrink_control *shrink,
 		unsigned long total_scan;
 		unsigned long max_pass;
 
-		max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
-		delta = (4 * scanned) / shrinker->seeks;
+		max_pass = do_shrinker_shrink(shrinker, shrink, 0);
+		delta = (4 * nr_pages_scanned) / shrinker->seeks;
 		delta *= max_pass;
 		do_div(delta, lru_pages + 1);
 		shrinker->nr += delta;
@@ -272,9 +279,9 @@ unsigned long shrink_slab(struct shrink_control *shrink,
 			int shrink_ret;
 			int nr_before;
 
-			nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
-			shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
-								gfp_mask);
+			nr_before = do_shrinker_shrink(shrinker, shrink, 0);
+			shrink_ret = do_shrinker_shrink(shrinker, shrink,
+							this_scan);
 			if (shrink_ret == -1)
 				break;
 			if (shrink_ret < nr_before)
@@ -2072,8 +2079,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 				lru_pages += zone_reclaimable_pages(zone);
 			}
 
-			shrink->nr_scanned = sc->nr_scanned;
-			shrink_slab(shrink, lru_pages);
+			shrink_slab(shrink, sc->nr_scanned, lru_pages);
 			if (reclaim_state) {
 				sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 				reclaim_state->reclaimed_slab = 0;
@@ -2456,8 +2462,7 @@ loop_again:
 					end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
-			shrink.nr_scanned = sc.nr_scanned;
-			nr_slab = shrink_slab(&shrink, lru_pages);
+			nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
@@ -3025,7 +3030,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	}
 
 	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-	shrink.nr_scanned = sc.nr_scanned;
 	if (nr_slab_pages0 > zone->min_slab_pages) {
 		/*
 		 * shrink_slab() does not currently allow us to determine how
@@ -3041,7 +3045,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 			unsigned long lru_pages = zone_reclaimable_pages(zone);
 
 			/* No reclaimable slab or very low memory pressure */
-			if (!shrink_slab(&shrink, lru_pages))
+			if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages))
 				break;
 
 			/* Freed enough memory */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 67e31276682a..cd6e4aa19dbf 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
  * Run memory cache shrinker.
  */
 static int
-rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
 {
 	LIST_HEAD(free);
 	int res;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
 		return (nr_to_scan == 0) ? 0 : -1;
-- 
cgit v1.2.3


From bf4e8902ee5080f5d2c810b639e7e778c8082b52 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Tue, 24 May 2011 17:12:32 -0700
Subject: mm: enable set_page_section() only if CONFIG_SPARSEMEM and
 !CONFIG_SPARSEMEM_VMEMMAP

set_page_section() is meaningful only in CONFIG_SPARSEMEM and
!CONFIG_SPARSEMEM_VMEMMAP context.  Move it to proper place and amend
accordingly functions which are using it.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5cbbf78eaac7..6702171f8d0a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -682,6 +682,12 @@ static inline struct zone *page_zone(struct page *page)
 }
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+static inline void set_page_section(struct page *page, unsigned long section)
+{
+	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
+	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
+}
+
 static inline unsigned long page_to_section(struct page *page)
 {
 	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
@@ -700,18 +706,14 @@ static inline void set_page_node(struct page *page, unsigned long node)
 	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
 }
 
-static inline void set_page_section(struct page *page, unsigned long section)
-{
-	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
-	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
-}
-
 static inline void set_page_links(struct page *page, enum zone_type zone,
 	unsigned long node, unsigned long pfn)
 {
 	set_page_zone(page, zone);
 	set_page_node(page, node);
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 	set_page_section(page, pfn_to_section_nr(pfn));
+#endif
 }
 
 /*
-- 
cgit v1.2.3


From e3c40f379a144f35e53864a2cd970e238071afd7 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Tue, 24 May 2011 17:12:33 -0700
Subject: mm: pfn_to_section_nr()/section_nr_to_pfn() is valid only in
 CONFIG_SPARSEMEM context

pfn_to_section_nr()/section_nr_to_pfn() is valid only in CONFIG_SPARSEMEM
context.  Move it to proper place.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e56f835274c9..d7152002e18d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -928,9 +928,6 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define pfn_to_nid(pfn)		(0)
 #endif
 
-#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
-#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
-
 #ifdef CONFIG_SPARSEMEM
 
 /*
@@ -956,6 +953,9 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #error Allocator MAX_ORDER exceeds SECTION_SIZE
 #endif
 
+#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
+#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+
 struct page;
 struct page_cgroup;
 struct mem_section {
-- 
cgit v1.2.3


From ba93fa81b5f2bf0076407a3a777fff122ce16220 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Tue, 24 May 2011 17:12:34 -0700
Subject: mm: do not define PFN_SECTION_SHIFT if !CONFIG_SPARSEMEM

Do not define PFN_SECTION_SHIFT if !CONFIG_SPARSEMEM.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6702171f8d0a..32309f6542e8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -605,10 +605,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define NODE_NOT_IN_PAGE_FLAGS
 #endif
 
-#ifndef PFN_SECTION_SHIFT
-#define PFN_SECTION_SHIFT 0
-#endif
-
 /*
  * Define the bit shifts to access each section.  For non-existent
  * sections we define the shift as 0; that plus a 0 mask ensures
-- 
cgit v1.2.3


From 172703b08cd05e2d5196ac13e94cc186f629d58b Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@linux.intel.com>
Date: Tue, 24 May 2011 17:12:36 -0700
Subject: mm: delete non-atomic mm counter implementation

The problem with having two different types of counters is that developers
adding new code need to keep in mind whether it's safe to use both the
atomic and non-atomic implementations.  For example, when adding new
callers of the *_mm_counter() functions a developer needs to ensure that
those paths are always executed with page_table_lock held, in case we're
using the non-atomic implementation of mm counters.

Hugh Dickins introduced the atomic mm counters in commit f412ac08c986
("[PATCH] mm: fix rss and mmlist locking").  When asked why he left the
non-atomic counters around he said,

  | The only reason was to avoid adding costly atomic operations into a
  | configuration that had no need for them there: the page_table_lock
  | sufficed.
  |
  | Certainly it would be simpler just to delete the non-atomic variant.
  |
  | And I think it's fair to say that any configuration on which we're
  | measuring performance to that degree (rather than "does it boot fast?"
  | type measurements), would already be going the split ptlocks route.

Removing the non-atomic counters eases the maintenance burden because
developers no longer have to mindful of the two implementations when using
*_mm_counter().

Note that all architectures provide a means of atomically updating
atomic_long_t variables, even if they have to revert to the generic
spinlock implementation because they don't support 64-bit atomic
instructions (see lib/atomic64.c).

Signed-off-by: Matt Fleming <matt.fleming@linux.intel.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 44 +++++++-------------------------------------
 include/linux/mm_types.h |  9 +++------
 2 files changed, 10 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 32309f6542e8..48e458190d88 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1053,65 +1053,35 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 /*
  * per-process(per-mm_struct) statistics.
  */
-#if defined(SPLIT_RSS_COUNTING)
-/*
- * The mm counters are not protected by its page_table_lock,
- * so must be incremented atomically.
- */
 static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
 {
 	atomic_long_set(&mm->rss_stat.count[member], value);
 }
 
+#if defined(SPLIT_RSS_COUNTING)
 unsigned long get_mm_counter(struct mm_struct *mm, int member);
-
-static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
-{
-	atomic_long_add(value, &mm->rss_stat.count[member]);
-}
-
-static inline void inc_mm_counter(struct mm_struct *mm, int member)
-{
-	atomic_long_inc(&mm->rss_stat.count[member]);
-}
-
-static inline void dec_mm_counter(struct mm_struct *mm, int member)
-{
-	atomic_long_dec(&mm->rss_stat.count[member]);
-}
-
-#else  /* !USE_SPLIT_PTLOCKS */
-/*
- * The mm counters are protected by its page_table_lock,
- * so can be incremented directly.
- */
-static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
-{
-	mm->rss_stat.count[member] = value;
-}
-
+#else
 static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
 {
-	return mm->rss_stat.count[member];
+	return atomic_long_read(&mm->rss_stat.count[member]);
 }
+#endif
 
 static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
 {
-	mm->rss_stat.count[member] += value;
+	atomic_long_add(value, &mm->rss_stat.count[member]);
 }
 
 static inline void inc_mm_counter(struct mm_struct *mm, int member)
 {
-	mm->rss_stat.count[member]++;
+	atomic_long_inc(&mm->rss_stat.count[member]);
 }
 
 static inline void dec_mm_counter(struct mm_struct *mm, int member)
 {
-	mm->rss_stat.count[member]--;
+	atomic_long_dec(&mm->rss_stat.count[member]);
 }
 
-#endif /* !USE_SPLIT_PTLOCKS */
-
 static inline unsigned long get_mm_rss(struct mm_struct *mm)
 {
 	return get_mm_counter(mm, MM_FILEPAGES) +
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2f9ea7922f4..071d459e866b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -204,19 +204,16 @@ enum {
 
 #if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU)
 #define SPLIT_RSS_COUNTING
-struct mm_rss_stat {
-	atomic_long_t count[NR_MM_COUNTERS];
-};
 /* per-thread cached information, */
 struct task_rss_stat {
 	int events;	/* for synchronization threshold */
 	int count[NR_MM_COUNTERS];
 };
-#else  /* !USE_SPLIT_PTLOCKS */
+#endif /* USE_SPLIT_PTLOCKS */
+
 struct mm_rss_stat {
-	unsigned long count[NR_MM_COUNTERS];
+	atomic_long_t count[NR_MM_COUNTERS];
 };
-#endif /* !USE_SPLIT_PTLOCKS */
 
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
-- 
cgit v1.2.3


From 8bba154ef29e16331e578029e9050c74b87b7ff9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 24 May 2011 17:12:37 -0700
Subject: memblock/nobootmem: allow alloc_bootmem() to take 0 as low limit

The bootmem wrapper with memblock supports top-down now, so we do not need
to set the low limit to __pa(MAX_DMA_ADDRESS).

The logic should be: good to allocate above __pa(MAX_DMA_ADDRESS), but it
is ok if we can not find memory above 16M on system that has a small
amount of RAM.

Signed-off-by: Yinghai LU <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Olaf Hering <olaf@aepfle.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 01eca1794e14..ab344a521105 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -99,24 +99,31 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long align,
 				      unsigned long goal);
 
+#ifdef CONFIG_NO_BOOTMEM
+/* We are using top down, so it is safe to use 0 here */
+#define BOOTMEM_LOW_LIMIT 0
+#else
+#define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS)
+#endif
+
 #define alloc_bootmem(x) \
-	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_align(x, align) \
-	__alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_nopanic(x) \
-	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages(x) \
-	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_nopanic(x) \
-	__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 
 #define alloc_bootmem_low(x) \
 	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-- 
cgit v1.2.3


From b09e0fa4b4ea66266058eead43350bd7d55fec67 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 24 May 2011 17:12:39 -0700
Subject: tmpfs: implement generic xattr support

Implement generic xattrs for tmpfs filesystems.  The Feodra project, while
trying to replace suid apps with file capabilities, realized that tmpfs,
which is used on the build systems, does not support file capabilities and
thus cannot be used to build packages which use file capabilities.  Xattrs
are also needed for overlayfs.

The xattr interface is a bit odd.  If a filesystem does not implement any
{get,set,list}xattr functions the VFS will call into some random LSM hooks
and the running LSM can then implement some method for handling xattrs.
SELinux for example provides a method to support security.selinux but no
other security.* xattrs.

As it stands today when one enables CONFIG_TMPFS_POSIX_ACL tmpfs will have
xattr handler routines specifically to handle acls.  Because of this tmpfs
would loose the VFS/LSM helpers to support the running LSM.  To make up
for that tmpfs had stub functions that did nothing but call into the LSM
hooks which implement the helpers.

This new patch does not use the LSM fallback functions and instead just
implements a native get/set/list xattr feature for the full security.* and
trusted.* namespace like a normal filesystem.  This means that tmpfs can
now support both security.selinux and security.capability, which was not
previously possible.

The basic implementation is that I attach a:

struct shmem_xattr {
	struct list_head list; /* anchored by shmem_inode_info->xattr_list */
	char *name;
	size_t size;
	char value[0];
};

Into the struct shmem_inode_info for each xattr that is set.  This
implementation could easily support the user.* namespace as well, except
some care needs to be taken to prevent large amounts of unswappable memory
being allocated for unprivileged users.

[mszeredi@suse.cz: new config option, suport trusted.*, support symlinks]
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Tested-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Hugh Dickins <hughd@google.com>
Tested-by: Jordi Pujol <jordipujolp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig               |  18 ++-
 include/linux/shmem_fs.h |   8 +-
 mm/shmem.c               | 320 +++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 290 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index f3aa9b08b228..979992dcb386 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -121,9 +121,25 @@ config TMPFS
 
 	  See <file:Documentation/filesystems/tmpfs.txt> for details.
 
+config TMPFS_XATTR
+	bool "Tmpfs extended attributes"
+	depends on TMPFS
+	default n
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  Currently this enables support for the trusted.* and
+	  security.* namespaces.
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on tmpfs.
+
 config TMPFS_POSIX_ACL
 	bool "Tmpfs POSIX Access Control Lists"
-	depends on TMPFS
+	depends on TMPFS_XATTR
 	select GENERIC_ACL
 	help
 	  POSIX Access Control Lists (ACLs) support permissions for users and
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 399be5ad2f99..2b7fec840517 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -9,6 +9,8 @@
 
 #define SHMEM_NR_DIRECT 16
 
+#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
+
 struct shmem_inode_info {
 	spinlock_t		lock;
 	unsigned long		flags;
@@ -17,8 +19,12 @@ struct shmem_inode_info {
 	unsigned long		next_index;	/* highest alloced index + 1 */
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct page		*i_indirect;	/* top indirect blocks page */
-	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+	union {
+		swp_entry_t	i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+		char		inline_symlink[SHMEM_SYMLINK_INLINE_LEN];
+	};
 	struct list_head	swaplist;	/* chain of maybes on swap */
+	struct list_head	xattr_list;	/* list of shmem_xattr */
 	struct inode		vfs_inode;
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index ba4ad28b7db6..69edb45a9f28 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -99,6 +99,13 @@ static struct vfsmount *shm_mnt;
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
+struct shmem_xattr {
+	struct list_head list;	/* anchored by shmem_inode_info->xattr_list */
+	char *name;		/* xattr name */
+	size_t size;
+	char value[0];
+};
+
 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
 enum sgp_type {
 	SGP_READ,	/* don't exceed i_size, don't allocate page */
@@ -822,6 +829,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 static void shmem_evict_inode(struct inode *inode)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_xattr *xattr, *nxattr;
 
 	if (inode->i_mapping->a_ops == &shmem_aops) {
 		truncate_inode_pages(inode->i_mapping, 0);
@@ -834,6 +842,11 @@ static void shmem_evict_inode(struct inode *inode)
 			mutex_unlock(&shmem_swaplist_mutex);
 		}
 	}
+
+	list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
+		kfree(xattr->name);
+		kfree(xattr);
+	}
 	BUG_ON(inode->i_blocks);
 	shmem_free_inode(inode->i_sb);
 	end_writeback(inode);
@@ -1615,6 +1628,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		spin_lock_init(&info->lock);
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->swaplist);
+		INIT_LIST_HEAD(&info->xattr_list);
 		cache_no_acl(inode);
 
 		switch (mode & S_IFMT) {
@@ -2014,9 +2028,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
 	info = SHMEM_I(inode);
 	inode->i_size = len-1;
-	if (len <= (char *)inode - (char *)info) {
+	if (len <= SHMEM_SYMLINK_INLINE_LEN) {
 		/* do it inline */
-		memcpy(info, symname, len);
+		memcpy(info->inline_symlink, symname, len);
 		inode->i_op = &shmem_symlink_inline_operations;
 	} else {
 		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -2042,7 +2056,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
 static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
 {
-	nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
+	nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
 	return NULL;
 }
 
@@ -2066,63 +2080,253 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co
 	}
 }
 
-static const struct inode_operations shmem_symlink_inline_operations = {
-	.readlink	= generic_readlink,
-	.follow_link	= shmem_follow_link_inline,
-};
-
-static const struct inode_operations shmem_symlink_inode_operations = {
-	.readlink	= generic_readlink,
-	.follow_link	= shmem_follow_link,
-	.put_link	= shmem_put_link,
-};
-
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
 /*
- * Superblocks without xattr inode operations will get security.* xattr
- * support from the VFS "for free". As soon as we have any other xattrs
+ * Superblocks without xattr inode operations may get some security.* xattr
+ * support from the LSM "for free". As soon as we have any other xattrs
  * like ACLs, we also need to implement the security.* handlers at
  * filesystem level, though.
  */
 
-static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
-					size_t list_len, const char *name,
-					size_t name_len, int handler_flags)
+static int shmem_xattr_get(struct dentry *dentry, const char *name,
+			   void *buffer, size_t size)
 {
-	return security_inode_listsecurity(dentry->d_inode, list, list_len);
-}
+	struct shmem_inode_info *info;
+	struct shmem_xattr *xattr;
+	int ret = -ENODATA;
 
-static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
-		void *buffer, size_t size, int handler_flags)
-{
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-	return xattr_getsecurity(dentry->d_inode, name, buffer, size);
+	info = SHMEM_I(dentry->d_inode);
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		if (strcmp(name, xattr->name))
+			continue;
+
+		ret = xattr->size;
+		if (buffer) {
+			if (size < xattr->size)
+				ret = -ERANGE;
+			else
+				memcpy(buffer, xattr->value, xattr->size);
+		}
+		break;
+	}
+	spin_unlock(&info->lock);
+	return ret;
 }
 
-static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
-		const void *value, size_t size, int flags, int handler_flags)
+static int shmem_xattr_set(struct dentry *dentry, const char *name,
+			   const void *value, size_t size, int flags)
 {
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-	return security_inode_setsecurity(dentry->d_inode, name, value,
-					  size, flags);
+	struct inode *inode = dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_xattr *xattr;
+	struct shmem_xattr *new_xattr = NULL;
+	size_t len;
+	int err = 0;
+
+	/* value == NULL means remove */
+	if (value) {
+		/* wrap around? */
+		len = sizeof(*new_xattr) + size;
+		if (len <= sizeof(*new_xattr))
+			return -ENOMEM;
+
+		new_xattr = kmalloc(len, GFP_KERNEL);
+		if (!new_xattr)
+			return -ENOMEM;
+
+		new_xattr->name = kstrdup(name, GFP_KERNEL);
+		if (!new_xattr->name) {
+			kfree(new_xattr);
+			return -ENOMEM;
+		}
+
+		new_xattr->size = size;
+		memcpy(new_xattr->value, value, size);
+	}
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		if (!strcmp(name, xattr->name)) {
+			if (flags & XATTR_CREATE) {
+				xattr = new_xattr;
+				err = -EEXIST;
+			} else if (new_xattr) {
+				list_replace(&xattr->list, &new_xattr->list);
+			} else {
+				list_del(&xattr->list);
+			}
+			goto out;
+		}
+	}
+	if (flags & XATTR_REPLACE) {
+		xattr = new_xattr;
+		err = -ENODATA;
+	} else {
+		list_add(&new_xattr->list, &info->xattr_list);
+		xattr = NULL;
+	}
+out:
+	spin_unlock(&info->lock);
+	if (xattr)
+		kfree(xattr->name);
+	kfree(xattr);
+	return err;
 }
 
-static const struct xattr_handler shmem_xattr_security_handler = {
-	.prefix = XATTR_SECURITY_PREFIX,
-	.list   = shmem_xattr_security_list,
-	.get    = shmem_xattr_security_get,
-	.set    = shmem_xattr_security_set,
-};
 
 static const struct xattr_handler *shmem_xattr_handlers[] = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
 	&generic_acl_access_handler,
 	&generic_acl_default_handler,
-	&shmem_xattr_security_handler,
+#endif
 	NULL
 };
+
+static int shmem_xattr_validate(const char *name)
+{
+	struct { const char *prefix; size_t len; } arr[] = {
+		{ XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
+		{ XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arr); i++) {
+		size_t preflen = arr[i].len;
+		if (strncmp(name, arr[i].prefix, preflen) == 0) {
+			if (!name[preflen])
+				return -EINVAL;
+			return 0;
+		}
+	}
+	return -EOPNOTSUPP;
+}
+
+static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
+			      void *buffer, size_t size)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_getxattr(dentry, name, buffer, size);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	return shmem_xattr_get(dentry, name, buffer, size);
+}
+
+static int shmem_setxattr(struct dentry *dentry, const char *name,
+			  const void *value, size_t size, int flags)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_setxattr(dentry, name, value, size, flags);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	if (size == 0)
+		value = "";  /* empty EA, do not remove */
+
+	return shmem_xattr_set(dentry, name, value, size, flags);
+
+}
+
+static int shmem_removexattr(struct dentry *dentry, const char *name)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_removexattr(dentry, name);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	return shmem_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE);
+}
+
+static bool xattr_is_trusted(const char *name)
+{
+	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+}
+
+static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	bool trusted = capable(CAP_SYS_ADMIN);
+	struct shmem_xattr *xattr;
+	struct shmem_inode_info *info;
+	size_t used = 0;
+
+	info = SHMEM_I(dentry->d_inode);
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		size_t len;
+
+		/* skip "trusted." attributes for unprivileged callers */
+		if (!trusted && xattr_is_trusted(xattr->name))
+			continue;
+
+		len = strlen(xattr->name) + 1;
+		used += len;
+		if (buffer) {
+			if (size < used) {
+				used = -ERANGE;
+				break;
+			}
+			memcpy(buffer, xattr->name, len);
+			buffer += len;
+		}
+	}
+	spin_unlock(&info->lock);
+
+	return used;
+}
+#endif /* CONFIG_TMPFS_XATTR */
+
+static const struct inode_operations shmem_symlink_inline_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= shmem_follow_link_inline,
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
+};
+
+static const struct inode_operations shmem_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= shmem_follow_link,
+	.put_link	= shmem_put_link,
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
 #endif
+};
 
 static struct dentry *shmem_get_parent(struct dentry *child)
 {
@@ -2402,8 +2606,10 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_magic = TMPFS_MAGIC;
 	sb->s_op = &shmem_ops;
 	sb->s_time_gran = 1;
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
 	sb->s_xattr = shmem_xattr_handlers;
+#endif
+#ifdef CONFIG_TMPFS_POSIX_ACL
 	sb->s_flags |= MS_POSIXACL;
 #endif
 
@@ -2501,11 +2707,13 @@ static const struct file_operations shmem_file_operations = {
 static const struct inode_operations shmem_inode_operations = {
 	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 
@@ -2523,23 +2731,27 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.mknod		= shmem_mknod,
 	.rename		= shmem_rename,
 #endif
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	.setattr	= shmem_notify_change,
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 };
 
 static const struct inode_operations shmem_special_inode_operations = {
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	.setattr	= shmem_notify_change,
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 };
-- 
cgit v1.2.3


From c856507f2b2b47a49d8587afb58930b463f6bff4 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 24 May 2011 17:12:40 -0700
Subject: mm: remove last trace of shmem_get_unmapped_area

Remove noMMU declaration of shmem_get_unmapped_area() from mm.h: it fell
out of use in 2.6.21 and ceased to exist in 2.6.29.

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 48e458190d88..8eb969ebf904 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -873,14 +873,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
 int shmem_zero_setup(struct vm_area_struct *);
 
-#ifndef CONFIG_MMU
-extern unsigned long shmem_get_unmapped_area(struct file *file,
-					     unsigned long addr,
-					     unsigned long len,
-					     unsigned long pgoff,
-					     unsigned long flags);
-#endif
-
 extern int can_do_mlock(void);
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
-- 
cgit v1.2.3


From d98f6cb67fb5b9376d4957d7ba9f32eac35c2e08 Mon Sep 17 00:00:00 2001
From: Stephen Wilson <wilsons@start.ca>
Date: Tue, 24 May 2011 17:12:41 -0700
Subject: mm: export get_vma_policy()

In commit 48fce3429d ("mempolicies: unexport get_vma_policy()")
get_vma_policy() was marked static as all clients were local to
mempolicy.c.

However, the decision to generate /proc/pid/numa_maps in the numa memory
policy code and outside the procfs subsystem introduces an artificial
interdependency between the two systems.  Exporting get_vma_policy() once
again is the first step to clean up this interdependency.

Signed-off-by: Stephen Wilson <wilsons@start.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 3 +++
 mm/mempolicy.c            | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 31ac26ca4acf..c2f603218fbc 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -199,6 +199,9 @@ void mpol_free_shared_policy(struct shared_policy *p);
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 					    unsigned long idx);
 
+struct mempolicy *get_vma_policy(struct task_struct *tsk,
+		struct vm_area_struct *vma, unsigned long addr);
+
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
 extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 959a8b8c7350..5bfb03ef3cb0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1489,7 +1489,7 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-static struct mempolicy *get_vma_policy(struct task_struct *task,
+struct mempolicy *get_vma_policy(struct task_struct *task,
 		struct vm_area_struct *vma, unsigned long addr)
 {
 	struct mempolicy *pol = task->mempolicy;
-- 
cgit v1.2.3


From 13057efb0a0063eb8042d99093ec88a52c4f1593 Mon Sep 17 00:00:00 2001
From: Stephen Wilson <wilsons@start.ca>
Date: Tue, 24 May 2011 17:12:46 -0700
Subject: mm: declare mpol_to_str() when CONFIG_TMPFS=n

When CONFIG_TMPFS=n mpol_to_str() is not declared in mempolicy.h.
However, in the NUMA case, the definition is always compiled.

Since it is not strictly true that tmpfs is the only client, and since the
symbol was always lurking around anyways, export mpol_to_str()
unconditionally.  Furthermore, this will allow us to move show_numa_map()
out of mempolicy.c and into the procfs subsystem.

Signed-off-by: Stephen Wilson <wilsons@start.ca>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index c2f603218fbc..7978eec1b7d9 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -231,10 +231,10 @@ int do_migrate_pages(struct mm_struct *mm,
 
 #ifdef CONFIG_TMPFS
 extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
+#endif
 
 extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 			int no_context);
-#endif
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -371,13 +371,13 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
 {
 	return 1;	/* error */
 }
+#endif
 
 static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 				int no_context)
 {
 	return 0;
 }
-#endif
 
 #endif /* CONFIG_NUMA */
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From f2beb7983613ecca20a61604f01ab50cc7a797e6 Mon Sep 17 00:00:00 2001
From: Stephen Wilson <wilsons@start.ca>
Date: Tue, 24 May 2011 17:12:48 -0700
Subject: proc: make struct proc_maps_private truly private

Now that mm/mempolicy.c is no longer implementing /proc/pid/numa_maps
there is no need to export struct proc_maps_private to the world.  Move it
to fs/proc/internal.h instead.

Signed-off-by: Stephen Wilson <wilsons@start.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h      | 8 ++++++++
 include/linux/proc_fs.h | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..3763b436e69d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
 extern const struct inode_operations proc_net_inode_operations;
 
+struct proc_maps_private {
+	struct pid *pid;
+	struct task_struct *task;
+#ifdef CONFIG_MMU
+	struct vm_area_struct *tail_vma;
+#endif
+};
+
 void proc_init_inodecache(void);
 
 static inline struct pid *proc_pid(struct inode *inode)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index eaf4350c0f90..3686cd6c9aca 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -288,12 +288,4 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde)
 	return pde->parent->data;
 }
 
-struct proc_maps_private {
-	struct pid *pid;
-	struct task_struct *task;
-#ifdef CONFIG_MMU
-	struct vm_area_struct *tail_vma;
-#endif
-};
-
 #endif /* _LINUX_PROC_FS_H */
-- 
cgit v1.2.3


From a539f3533b78e39a22723d6d3e1e11b6c14454d9 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Tue, 24 May 2011 17:12:51 -0700
Subject: mm: add SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macro

Add SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macro which aligns given
pfn to upper section and lower section boundary accordingly.

Required for the latest memory hotplug support for the Xen balloon driver.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d7152002e18d..217bcf6bca77 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -956,6 +956,9 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
 #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
 
+#define SECTION_ALIGN_UP(pfn)	(((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
+#define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
+
 struct page;
 struct page_cgroup;
 struct mem_section {
-- 
cgit v1.2.3


From 0ac1ee0bfec2a4ad118f907ce586d0dfd8db7641 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 24 May 2011 17:13:05 -0700
Subject: ulimit: raise default hard ulimit on number of files to 4096

Apps are increasingly using more than 1024 file descriptors.  See
discussion in several distro bug trackers, e.g.  BugLink:
http://bugs.launchpad.net/bugs/663090
https://issues.rpath.com/browse/RPL-2054

You don't want to raise the default soft limit, since that might break
apps that use select(), but it's safe to raise the default hard limit;
that way, apps that know they need lots of file descriptors can raise
their soft limit without needing root, and without user intervention.

Ubuntu is doing this with a kernel change because they have a policy of
not changing kernel defaults in userland.

While 4096 might not be enough for *all* apps, it seems to be plenty for
the apps I've seen lately that are unhappy with 1024.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Cc: Dan Kegel <dank@kegel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/resource.h | 2 +-
 include/linux/fs.h             | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
index 587566f95f6c..61fa862fe08d 100644
--- a/include/asm-generic/resource.h
+++ b/include/asm-generic/resource.h
@@ -78,7 +78,7 @@
 	[RLIMIT_CORE]		= {              0,  RLIM_INFINITY },	\
 	[RLIMIT_RSS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 	[RLIMIT_NPROC]		= {              0,              0 },	\
-	[RLIMIT_NOFILE]		= {       INR_OPEN,       INR_OPEN },	\
+	[RLIMIT_NOFILE]		= {   INR_OPEN_CUR,   INR_OPEN_MAX },	\
 	[RLIMIT_MEMLOCK]	= {    MLOCK_LIMIT,    MLOCK_LIMIT },	\
 	[RLIMIT_AS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5bb9e826019b..3f9d3251790d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -23,7 +23,8 @@
 
 /* Fixed constants first: */
 #undef NR_OPEN
-#define INR_OPEN 1024		/* Initial setting for nfile rlimits */
+#define INR_OPEN_CUR 1024	/* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096	/* Hard limit for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
-- 
cgit v1.2.3


From 21b7d815b2cc380929181e1a4a7129e8b569e574 Mon Sep 17 00:00:00 2001
From: Wanlong Gao <wanlong.gao@gmail.com>
Date: Tue, 24 May 2011 17:13:06 -0700
Subject: include/linux/c2port.h: remove wrong and never used macros

The macro to_class_dev() uses the deprecated structure class_device, and
the c2port_device has no member named class in the definition of the macro
to_c2port_device.

Signed-off-by: Wanlong Gao <wanlong.gao@gmail.com>
Cc: Rodolfo Giometti <giometti@linux.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/c2port.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/c2port.h b/include/linux/c2port.h
index 2a5cd867c365..a2f7d7413f30 100644
--- a/include/linux/c2port.h
+++ b/include/linux/c2port.h
@@ -60,9 +60,6 @@ struct c2port_ops {
  * Exported functions
  */
 
-#define to_class_dev(obj) container_of((obj), struct class_device, kobj)
-#define to_c2port_device(obj) container_of((obj), struct c2port_device, class)
-
 extern struct c2port_device *c2port_device_register(char *name,
 					struct c2port_ops *ops, void *devdata);
 extern void c2port_device_unregister(struct c2port_device *dev);
-- 
cgit v1.2.3


From e50c1f609c63223adaa38f5a79b18759a00adf72 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 24 May 2011 17:13:11 -0700
Subject: fscache: remove dead code under CONFIG_WORKQUEUE_DEBUGFS

There is no CONFIG_WORKQUEUE_DEBUGFS any more, so this code is dead.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fscache/operation.c        | 10 ----------
 fs/fscache/page.c             | 13 -------------
 include/linux/fscache-cache.h | 12 ------------
 3 files changed, 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 48a18f184d50..30afdfa7aec7 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op)
 	_enter("{OBJ%x OP%x,%u}",
 	       op->object->debug_id, op->debug_id, atomic_read(&op->usage));
 
-	fscache_set_op_state(op, "EnQ");
-
 	ASSERT(list_empty(&op->pend_link));
 	ASSERT(op->processor != NULL);
 	ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
@@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
 static void fscache_run_op(struct fscache_object *object,
 			   struct fscache_operation *op)
 {
-	fscache_set_op_state(op, "Run");
-
 	object->n_in_progress++;
 	if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
 		wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
 
 	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
 
-	fscache_set_op_state(op, "SubmitX");
-
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object,
 
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
-	fscache_set_op_state(op, "Submit");
-
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op)
 	if (!atomic_dec_and_test(&op->usage))
 		return;
 
-	fscache_set_op_state(op, "Put");
-
 	_debug("PUT OP");
 	if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
 		BUG();
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 41c441c2058d..a2a5d19ece6a 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
 	fscache_stat(&fscache_n_attr_changed_calls);
 
 	if (fscache_object_is_active(object)) {
-		fscache_set_op_state(op, "CallFS");
 		fscache_stat(&fscache_n_cop_attr_changed);
 		ret = object->cache->ops->attr_changed(object);
 		fscache_stat_d(&fscache_n_cop_attr_changed);
-		fscache_set_op_state(op, "Done");
 		if (ret < 0)
 			fscache_abort_object(object);
 	}
@@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 
 	fscache_operation_init(op, fscache_attr_changed_op, NULL);
 	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
-	fscache_set_op_name(op, "Attr");
 
 	spin_lock(&cookie->lock);
 
@@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
 	op->context	= context;
 	op->start_time	= jiffies;
 	INIT_LIST_HEAD(&op->to_do);
-	fscache_set_op_name(&op->op, "Retr");
 	return op;
 }
 
@@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
-	fscache_set_op_name(&op->op, "RetrRA1");
 
 	spin_lock(&cookie->lock);
 
@@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(mapping, end_io_func, context);
 	if (!op)
 		return -ENOMEM;
-	fscache_set_op_name(&op->op, "RetrRAN");
 
 	spin_lock(&cookie->lock);
 
@@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
 	if (!op)
 		return -ENOMEM;
-	fscache_set_op_name(&op->op, "RetrAL1");
 
 	spin_lock(&cookie->lock);
 
@@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op)
 
 	_enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
 
-	fscache_set_op_state(&op->op, "GetPage");
-
 	spin_lock(&object->lock);
 	cookie = object->cookie;
 
@@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op)
 	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
 
-	fscache_set_op_state(&op->op, "Store");
 	fscache_stat(&fscache_n_store_pages);
 	fscache_stat(&fscache_n_cop_write_page);
 	ret = object->cache->ops->write_page(op, page);
 	fscache_stat_d(&fscache_n_cop_write_page);
-	fscache_set_op_state(&op->op, "EndWrite");
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
-		fscache_set_op_state(&op->op, "Abort");
 		fscache_abort_object(object);
 	} else {
 		fscache_enqueue_operation(&op->op);
@@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	fscache_operation_init(&op->op, fscache_write_op,
 			       fscache_release_write_op);
 	op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
-	fscache_set_op_name(&op->op, "Write1");
 
 	ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
 	if (ret < 0)
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 76427e688d15..af095b54502e 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -100,17 +100,6 @@ struct fscache_operation {
 
 	/* operation releaser */
 	fscache_operation_release_t release;
-
-#ifdef CONFIG_WORKQUEUE_DEBUGFS
-	struct work_struct put_work;	/* work to delay operation put */
-	const char *name;		/* operation name */
-	const char *state;		/* operation state */
-#define fscache_set_op_name(OP, N)	do { (OP)->name  = (N); } while(0)
-#define fscache_set_op_state(OP, S)	do { (OP)->state = (S); } while(0)
-#else
-#define fscache_set_op_name(OP, N)	do { } while(0)
-#define fscache_set_op_state(OP, S)	do { } while(0)
-#endif
 };
 
 extern atomic_t fscache_op_debug_id;
@@ -137,7 +126,6 @@ static inline void fscache_operation_init(struct fscache_operation *op,
 	op->processor = processor;
 	op->release = release;
 	INIT_LIST_HEAD(&op->pend_link);
-	fscache_set_op_state(op, "Init");
 }
 
 /*
-- 
cgit v1.2.3


From 4b060420a596095869a6d7849caa798d23839cd1 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 24 May 2011 17:13:12 -0700
Subject: bitmap, irq: add smp_affinity_list interface to /proc/irq

Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the
cpu count is large.

Setting smp affinity to cpus 256 to 263 would be:

	echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity

instead of:

	echo 256-263 > smp_affinity_list

Think about what it looks like for cpus around say, 4088 to 4095.

We already have many alternate "list" interfaces:

/sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
/sys/devices/system/node/nodeX/cpulist
/sys/devices/pci***/***/local_cpulist

Add a companion interface, smp_affinity_list to use cpu lists instead of
cpu maps.  This conforms to other companion interfaces where both a map
and a list interface exists.

This required adding a bitmap_parselist_user() function in a manner
similar to the bitmap_parse_user() function.

[akpm@linux-foundation.org: make __bitmap_parselist() static]
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/IRQ-affinity.txt     |  17 ++++--
 Documentation/filesystems/proc.txt |  11 +++-
 include/linux/bitmap.h             |   5 +-
 include/linux/cpumask.h            |  15 +++++
 kernel/irq/proc.c                  |  54 ++++++++++++++++--
 lib/bitmap.c                       | 109 +++++++++++++++++++++++++++++++++----
 6 files changed, 188 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
index b4a615b78403..7890fae18529 100644
--- a/Documentation/IRQ-affinity.txt
+++ b/Documentation/IRQ-affinity.txt
@@ -4,10 +4,11 @@ ChangeLog:
 
 SMP IRQ affinity
 
-/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted
-for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed
-to turn off all CPUs, and if an IRQ controller does not support IRQ
-affinity then the value will not change from the default 0xffffffff.
+/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
+which target CPUs are permitted for a given IRQ source.  It's a bitmask
+(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs.  It's not
+allowed to turn off all CPUs, and if an IRQ controller does not support
+IRQ affinity then the value will not change from the default of all cpus.
 
 /proc/irq/default_smp_affinity specifies default affinity mask that applies
 to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
@@ -54,3 +55,11 @@ round-trip min/avg/max = 0.1/0.5/585.4 ms
 This time around IRQ44 was delivered only to the last four processors.
 i.e counters for the CPU0-3 did not change.
 
+Here is an example of limiting that same irq (44) to cpus 1024 to 1031:
+
+[root@moon 44]# echo 1024-1031 > smp_affinity
+[root@moon 44]# cat smp_affinity
+1024-1031
+
+Note that to do this with a bitmask would require 32 bitmasks of zero
+to follow the pertinent one.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 60740e8ecb37..f48178024067 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -574,6 +574,12 @@ The contents of each smp_affinity file is the same by default:
   > cat /proc/irq/0/smp_affinity
   ffffffff
 
+There is an alternate interface, smp_affinity_list which allows specifying
+a cpu range instead of a bitmask:
+
+  > cat /proc/irq/0/smp_affinity_list
+  1024-1031
+
 The default_smp_affinity mask applies to all non-active IRQs, which are the
 IRQs which have not yet been allocated/activated, and hence which lack a
 /proc/irq/[0-9]* directory.
@@ -583,12 +589,13 @@ reports itself as being attached. This hardware locality information does not
 include information about any possible driver locality preference.
 
 prof_cpu_mask specifies which CPUs are to be profiled by the system wide
-profiler. Default value is ffffffff (all cpus).
+profiler. Default value is ffffffff (all cpus if there are only 32 of them).
 
 The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
 between all the CPUs which are allowed to handle it. As usual the kernel has
 more info than you and does a better job than you, so the defaults are the
-best choice for almost everyone.
+best choice for almost everyone.  [Note this applies only to those IO-APIC's
+that support "Round Robin" interrupt distribution.]
 
 There are  three  more  important subdirectories in /proc: net, scsi, and sys.
 The general  rule  is  that  the  contents,  or  even  the  existence of these
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index daf8c480c786..dcafe0bf0005 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -55,7 +55,8 @@
  * bitmap_parse(buf, buflen, dst, nbits)	Parse bitmap dst from kernel buf
  * bitmap_parse_user(ubuf, ulen, dst, nbits)	Parse bitmap dst from user buf
  * bitmap_scnlistprintf(buf, len, src, nbits)	Print bitmap src as list to buf
- * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from list
+ * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from kernel buf
+ * bitmap_parselist_user(buf, dst, nbits)	Parse bitmap dst from user buf
  * bitmap_find_free_region(bitmap, bits, order)	Find and allocate bit region
  * bitmap_release_region(bitmap, pos, order)	Free specified bit region
  * bitmap_allocate_region(bitmap, pos, order)	Allocate specified bit region
@@ -129,6 +130,8 @@ extern int bitmap_scnlistprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
 extern int bitmap_parselist(const char *buf, unsigned long *maskp,
 			int nmaskbits);
+extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
+			unsigned long *dst, int nbits);
 extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
 		const unsigned long *old, const unsigned long *new, int bits);
 extern int bitmap_bitremap(int oldbit,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index bae6fe24d1f9..b24ac56477b4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -546,6 +546,21 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
 	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
+/**
+ * cpumask_parselist_user - extract a cpumask from a user string
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parselist_user(const char __user *buf, int len,
+				     struct cpumask *dstp)
+{
+	return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
+							nr_cpumask_bits);
+}
+
 /**
  * cpulist_scnprintf - print a cpumask into a string as comma-separated list
  * @buf: the buffer to sprintf into
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 834899f2500f..64e3df6ab1ef 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
 
 #ifdef CONFIG_SMP
 
-static int irq_affinity_proc_show(struct seq_file *m, void *v)
+static int show_irq_affinity(int type, struct seq_file *m, void *v)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
 	const struct cpumask *mask = desc->irq_data.affinity;
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
 	if (irqd_is_setaffinity_pending(&desc->irq_data))
 		mask = desc->pending_mask;
 #endif
-	seq_cpumask(m, mask);
+	if (type)
+		seq_cpumask_list(m, mask);
+	else
+		seq_cpumask(m, mask);
 	seq_putc(m, '\n');
 	return 0;
 }
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
 #endif
 
 int no_irq_affinity;
-static ssize_t irq_affinity_proc_write(struct file *file,
+static int irq_affinity_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(0, m, v);
+}
+
+static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(1, m, v);
+}
+
+
+static ssize_t write_irq_affinity(int type, struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
 	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 		return -ENOMEM;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	if (type)
+		err = cpumask_parselist_user(buffer, count, new_value);
+	else
+		err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
 		goto free_cpumask;
 
@@ -100,11 +117,28 @@ free_cpumask:
 	return err;
 }
 
+static ssize_t irq_affinity_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	return write_irq_affinity(0, file, buffer, count, pos);
+}
+
+static ssize_t irq_affinity_list_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	return write_irq_affinity(1, file, buffer, count, pos);
+}
+
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
 
+static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
+}
+
 static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = {
 	.release	= single_release,
 };
 
+static const struct file_operations irq_affinity_list_proc_fops = {
+	.open		= irq_affinity_list_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= irq_affinity_list_proc_write,
+};
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_cpumask(m, irq_default_affinity);
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 	proc_create_data("affinity_hint", 0400, desc->dir,
 			 &irq_affinity_hint_proc_fops, (void *)(long)irq);
 
+	/* create /proc/irq/<irq>/smp_affinity_list */
+	proc_create_data("smp_affinity_list", 0600, desc->dir,
+			 &irq_affinity_list_proc_fops, (void *)(long)irq);
+
 	proc_create_data("node", 0444, desc->dir,
 			 &irq_node_proc_fops, (void *)(long)irq);
 #endif
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 91e0ccfdb424..41baf02924e6 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -571,8 +571,11 @@ int bitmap_scnlistprintf(char *buf, unsigned int buflen,
 EXPORT_SYMBOL(bitmap_scnlistprintf);
 
 /**
- * bitmap_parselist - convert list format ASCII string to bitmap
+ * __bitmap_parselist - convert list format ASCII string to bitmap
  * @bp: read nul-terminated user string from this buffer
+ * @buflen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
  * @maskp: write resulting mask here
  * @nmaskbits: number of bits in mask to be written
  *
@@ -587,20 +590,63 @@ EXPORT_SYMBOL(bitmap_scnlistprintf);
  *    %-EINVAL: invalid character in string
  *    %-ERANGE: bit number specified too large for mask
  */
-int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+static int __bitmap_parselist(const char *buf, unsigned int buflen,
+		int is_user, unsigned long *maskp,
+		int nmaskbits)
 {
 	unsigned a, b;
+	int c, old_c, totaldigits;
+	const char __user *ubuf = buf;
+	int exp_digit, in_range;
 
+	totaldigits = c = 0;
 	bitmap_zero(maskp, nmaskbits);
 	do {
-		if (!isdigit(*bp))
-			return -EINVAL;
-		b = a = simple_strtoul(bp, (char **)&bp, BASEDEC);
-		if (*bp == '-') {
-			bp++;
-			if (!isdigit(*bp))
+		exp_digit = 1;
+		in_range = 0;
+		a = b = 0;
+
+		/* Get the next cpu# or a range of cpu#'s */
+		while (buflen) {
+			old_c = c;
+			if (is_user) {
+				if (__get_user(c, ubuf++))
+					return -EFAULT;
+			} else
+				c = *buf++;
+			buflen--;
+			if (isspace(c))
+				continue;
+
+			/*
+			 * If the last character was a space and the current
+			 * character isn't '\0', we've got embedded whitespace.
+			 * This is a no-no, so throw an error.
+			 */
+			if (totaldigits && c && isspace(old_c))
+				return -EINVAL;
+
+			/* A '\0' or a ',' signal the end of a cpu# or range */
+			if (c == '\0' || c == ',')
+				break;
+
+			if (c == '-') {
+				if (exp_digit || in_range)
+					return -EINVAL;
+				b = 0;
+				in_range = 1;
+				exp_digit = 1;
+				continue;
+			}
+
+			if (!isdigit(c))
 				return -EINVAL;
-			b = simple_strtoul(bp, (char **)&bp, BASEDEC);
+
+			b = b * 10 + (c - '0');
+			if (!in_range)
+				a = b;
+			exp_digit = 0;
+			totaldigits++;
 		}
 		if (!(a <= b))
 			return -EINVAL;
@@ -610,13 +656,52 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 			set_bit(a, maskp);
 			a++;
 		}
-		if (*bp == ',')
-			bp++;
-	} while (*bp != '\0' && *bp != '\n');
+	} while (buflen && c == ',');
 	return 0;
 }
+
+int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+	char *nl  = strchr(bp, '\n');
+	int len;
+
+	if (nl)
+		len = nl - bp;
+	else
+		len = strlen(bp);
+
+	return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
+}
 EXPORT_SYMBOL(bitmap_parselist);
 
+
+/**
+ * bitmap_parselist_user()
+ *
+ * @ubuf: pointer to user buffer containing string.
+ * @ulen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Wrapper for bitmap_parselist(), providing it with user buffer.
+ *
+ * We cannot have this as an inline function in bitmap.h because it needs
+ * linux/uaccess.h to get the access_ok() declaration and this causes
+ * cyclic dependencies.
+ */
+int bitmap_parselist_user(const char __user *ubuf,
+			unsigned int ulen, unsigned long *maskp,
+			int nmaskbits)
+{
+	if (!access_ok(VERIFY_READ, ubuf, ulen))
+		return -EFAULT;
+	return __bitmap_parselist((const char *)ubuf,
+					ulen, 1, maskp, nmaskbits);
+}
+EXPORT_SYMBOL(bitmap_parselist_user);
+
+
 /**
  * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
-- 
cgit v1.2.3


From 1dbe39424a43e56a6c9aed12661192af51dcdb9f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 24 May 2011 17:13:13 -0700
Subject: xattr.h: expose string defines to userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

af4f136056c9 ("security: move LSM xattrnames to xattr.h") moved the
XATTR_CAPS_SUFFIX define from capability.h to xattr.h.  This makes sense
except it was previously exports to userspace but xattr.h does not export
it to userspace.  This patch exports these headers to userspace to fix the
ABI regression.

There is some slight possibility that this will cause problems in other
applications which used these #defines differently (wrongly) and I could
JUST export the capabilities xattr name that we broke.  Does anyonehave an
idea how exposing these headers could cause a problem?

Below is what is being exposed to userspace, included here since it isn't
clear exactly what is going to be made available from the patch.

/* Namespaces */
#define XATTR_OS2_PREFIX "os2."
#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)

#define XATTR_SECURITY_PREFIX   "security."
#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1)

#define XATTR_SYSTEM_PREFIX "system."
#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1)

#define XATTR_TRUSTED_PREFIX "trusted."
#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1)

#define XATTR_USER_PREFIX "user."
#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)

/* Security namespace */
#define XATTR_SELINUX_SUFFIX "selinux"
#define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX

#define XATTR_SMACK_SUFFIX "SMACK64"
#define XATTR_SMACK_IPIN "SMACK64IPIN"
#define XATTR_SMACK_IPOUT "SMACK64IPOUT"
#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
#define XATTR_NAME_SMACKIPIN    XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
#define XATTR_NAME_SMACKIPOUT   XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT

#define XATTR_CAPS_SUFFIX "capability"
#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX

Reported-by: Ozan Çaglayan <ozan@pardus.org.tr>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: Mimi Zohar <zohar@us.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/xattr.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 6050783005bd..aed54c50aa66 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -13,10 +13,6 @@
 #define XATTR_CREATE	0x1	/* set value, fail if attr already exists */
 #define XATTR_REPLACE	0x2	/* set value, fail if attr does not exist */
 
-#ifdef  __KERNEL__
-
-#include <linux/types.h>
-
 /* Namespaces */
 #define XATTR_OS2_PREFIX "os2."
 #define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)
@@ -53,6 +49,10 @@
 #define XATTR_CAPS_SUFFIX "capability"
 #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
 
+#ifdef  __KERNEL__
+
+#include <linux/types.h>
+
 struct inode;
 struct dentry;
 
-- 
cgit v1.2.3


From 903c0c7cdc21f2ccb7562a7bbc70289c0c2b16ad Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:13:16 -0700
Subject: sparse: define dummy BUILD_BUG_ON definition for sparse

BUILD_BUG_ON() causes a syntax error to detect coding errors.  So it
causes sparse to detect an error too.  This reduces sparse's usefulness.

This patch makes a dummy BUILD_BUG_ON() definition for sparse.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f37ba716ef8b..22295244de18 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -638,6 +638,13 @@ struct sysinfo {
 	char _f[20-2*sizeof(long)-sizeof(int)];	/* Padding: libc5 uses this.. */
 };
 
+#ifdef __CHECKER__
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)
+#define BUILD_BUG_ON_ZERO(e)
+#define BUILD_BUG_ON_NULL(e)
+#define BUILD_BUG_ON(condition)
+#else /* __CHECKER__ */
+
 /* Force a compilation error if a constant expression is not a power of 2 */
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
 	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
@@ -674,6 +681,7 @@ extern int __build_bug_on_failed;
 		if (condition) __build_bug_on_failed = 1;	\
 	} while(0)
 #endif
+#endif	/* __CHECKER__ */
 
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
-- 
cgit v1.2.3


From 5bd7e6a30e1eb38f58f0046c0cd42dfc38e90d64 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:13:17 -0700
Subject: sparse: define __must_be_array() for __CHECKER__

commit c5e631cf65f ("ARRAY_SIZE: check for type") added __must_be_array().
But sparse can't parse this gcc extention.

Now make C=2 makes following sparse errors a lot.

  kernel/futex.c:2699:25: error: No right hand side of '+'-expression

Because __must_be_array() is used for ARRAY_SIZE() macro and it is
used very widely.

This patch fixes it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cb4c1eb7778e..59e4028e833d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -34,8 +34,12 @@
     __asm__ ("" : "=r"(__ptr) : "0"(ptr));		\
     (typeof(ptr)) (__ptr + (off)); })
 
+#ifdef __CHECKER__
+#define __must_be_array(arr) 0
+#else
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#endif
 
 /*
  * Force always-inline if the user requests it so via the .config,
-- 
cgit v1.2.3


From 746a2a838deec3ef86ef6b7c3edd4207b9a351aa Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:13:17 -0700
Subject: sparse: Undef __compiletime_{warning,error} if __CHECKER__ is defined

sparse can't parse warning and error attribute.  then they should be
hidden from sparse.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 64b7c003fd7a..dfadc96e9d63 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -51,7 +51,7 @@
 #if __GNUC_MINOR__ > 0
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #endif
-#if __GNUC_MINOR__ >= 4
+#if __GNUC_MINOR__ >= 4 && !defined(__CHECKER__)
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
 #endif
-- 
cgit v1.2.3


From 95dde501907b06e7203c74f8435acfdab9eb2659 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 24 May 2011 17:13:19 -0700
Subject: memblock: add error return when CONFIG_HAVE_MEMBLOCK is not set

On larger systems, information in the kernel log is lost because there is
so much early text printed, that it overflows the static log buffer before
the log_buf_len kernel parameter can be processed, and a bigger log buffer
allocated.

Distros are relunctant to increase memory usage by increasing the size of
the static log buffer, so minimize the problem by allocating the new log
buffer as early as possible.

This patch:

Add an error return if CONFIG_HAVE_MEMBLOCK is not set instead of having
to add #ifdef CONFIG_HAVE_MEMBLOCK around blocks of code calling that
function.

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 62a10c2a11f2..7525e38c434d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,6 +2,8 @@
 #define _LINUX_MEMBLOCK_H
 #ifdef __KERNEL__
 
+#define MEMBLOCK_ERROR	0
+
 #ifdef CONFIG_HAVE_MEMBLOCK
 /*
  * Logical memory blocks.
@@ -20,7 +22,6 @@
 #include <asm/memblock.h>
 
 #define INIT_MEMBLOCK_REGIONS	128
-#define MEMBLOCK_ERROR		0
 
 struct memblock_region {
 	phys_addr_t base;
@@ -160,6 +161,12 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 #define __initdata_memblock
 #endif
 
+#else
+static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
+{
+	return MEMBLOCK_ERROR;
+}
+
 #endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 162a7e7500f9664636e649ba59defe541b7c2c60 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 24 May 2011 17:13:20 -0700
Subject: printk: allocate kernel log buffer earlier

On larger systems, because of the numerous ACPI, Bootmem and EFI messages,
the static log buffer overflows before the larger one specified by the
log_buf_len param is allocated.  Minimize the overflow by allocating the
new log buffer as soon as possible.

On kernels without memblock, a later call to setup_log_buf from
kernel/init.c is the fallback.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix CONFIG_PRINTK=n build]
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/setup.c |  2 ++
 include/linux/printk.h  |  7 ++++
 init/main.c             |  1 +
 kernel/printk.c         | 87 ++++++++++++++++++++++++++++++++-----------------
 4 files changed, 68 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 605e5ae19c7f..a3e5948670c2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -946,6 +946,8 @@ void __init setup_arch(char **cmdline_p)
 	if (init_ohci1394_dma_early)
 		init_ohci1394_dma_on_all_controllers();
 #endif
+	/* Allocate bigger log buffer */
+	setup_log_buf(1);
 
 	reserve_initrd();
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index ee048e77e1ae..0101d55d9651 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -1,6 +1,8 @@
 #ifndef __KERNEL_PRINTK__
 #define __KERNEL_PRINTK__
 
+#include <linux/init.h>
+
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
@@ -113,6 +115,7 @@ extern int dmesg_restrict;
 extern int kptr_restrict;
 
 void log_buf_kexec_setup(void);
+void __init setup_log_buf(int early);
 #else
 static inline __attribute__ ((format (printf, 1, 0)))
 int vprintk(const char *s, va_list args)
@@ -137,6 +140,10 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 static inline void log_buf_kexec_setup(void)
 {
 }
+
+static inline void setup_log_buf(int early)
+{
+}
 #endif
 
 extern void dump_stack(void) __cold;
diff --git a/init/main.c b/init/main.c
index 22da33918aef..d2f1e086bf33 100644
--- a/init/main.c
+++ b/init/main.c
@@ -504,6 +504,7 @@ asmlinkage void __init start_kernel(void)
 	 * These use large bootmem allocations and must precede
 	 * kmem_cache_init()
 	 */
+	setup_log_buf(0);
 	pidhash_init();
 	vfs_caches_init_early();
 	sort_main_extable();
diff --git a/kernel/printk.c b/kernel/printk.c
index da8ca817eae3..35185392173f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
 #include <linux/smp.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
@@ -167,46 +168,74 @@ void log_buf_kexec_setup(void)
 }
 #endif
 
+/* requested log_buf_len from kernel cmdline */
+static unsigned long __initdata new_log_buf_len;
+
+/* save requested log_buf_len since it's too early to process it */
 static int __init log_buf_len_setup(char *str)
 {
 	unsigned size = memparse(str, &str);
-	unsigned long flags;
 
 	if (size)
 		size = roundup_pow_of_two(size);
-	if (size > log_buf_len) {
-		unsigned start, dest_idx, offset;
-		char *new_log_buf;
+	if (size > log_buf_len)
+		new_log_buf_len = size;
 
-		new_log_buf = alloc_bootmem(size);
-		if (!new_log_buf) {
-			printk(KERN_WARNING "log_buf_len: allocation failed\n");
-			goto out;
-		}
+	return 0;
+}
+early_param("log_buf_len", log_buf_len_setup);
 
-		spin_lock_irqsave(&logbuf_lock, flags);
-		log_buf_len = size;
-		log_buf = new_log_buf;
-
-		offset = start = min(con_start, log_start);
-		dest_idx = 0;
-		while (start != log_end) {
-			log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
-			start++;
-			dest_idx++;
-		}
-		log_start -= offset;
-		con_start -= offset;
-		log_end -= offset;
-		spin_unlock_irqrestore(&logbuf_lock, flags);
+void __init setup_log_buf(int early)
+{
+	unsigned long flags;
+	unsigned start, dest_idx, offset;
+	char *new_log_buf;
+	int free;
+
+	if (!new_log_buf_len)
+		return;
+
+	if (early) {
+		unsigned long mem;
 
-		printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
+		mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
+		if (mem == MEMBLOCK_ERROR)
+			return;
+		new_log_buf = __va(mem);
+	} else {
+		new_log_buf = alloc_bootmem_nopanic(new_log_buf_len);
 	}
-out:
-	return 1;
-}
 
-__setup("log_buf_len=", log_buf_len_setup);
+	if (unlikely(!new_log_buf)) {
+		pr_err("log_buf_len: %ld bytes not available\n",
+			new_log_buf_len);
+		return;
+	}
+
+	spin_lock_irqsave(&logbuf_lock, flags);
+	log_buf_len = new_log_buf_len;
+	log_buf = new_log_buf;
+	new_log_buf_len = 0;
+	free = __LOG_BUF_LEN - log_end;
+
+	offset = start = min(con_start, log_start);
+	dest_idx = 0;
+	while (start != log_end) {
+		unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1);
+
+		log_buf[dest_idx] = __log_buf[log_idx_mask];
+		start++;
+		dest_idx++;
+	}
+	log_start -= offset;
+	con_start -= offset;
+	log_end -= offset;
+	spin_unlock_irqrestore(&logbuf_lock, flags);
+
+	pr_info("log_buf_len: %d\n", log_buf_len);
+	pr_info("early log buf free: %d(%d%%)\n",
+		free, (free * 100) / __LOG_BUF_LEN);
+}
 
 #ifdef CONFIG_BOOT_PRINTK_DELAY
 
-- 
cgit v1.2.3


From 3c1ab50d0a31b27bb4e55168f4901dd91e6e5ea4 Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Tue, 24 May 2011 17:13:23 -0700
Subject: drivers/leds/leds-pca9532.c: add gpio capability

Allow unused leds on pca9532 to be used as gpio.  The board I am working
on now has no less than 6 pca9532 chips.  One chips is used for only leds,
one has 14 leds and 2 gpio and the rest of the chips are gpio only.

There is also one board in mainline which could use this capabilty;
arch/arm/mach-iop32x/n2100.c
 232         {       .type = PCA9532_TYPE_NONE }, /* power OFF gpio */
 233         {       .type = PCA9532_TYPE_NONE }, /* reset gpio */

This patch defines a new pin type, PCA9532_TYPE_GPIO, and registers a
gpiochip if any pin has this type set.  The gpio will registers all chip
pins but will filter on gpio_request.

[randy.dunlap@oracle.com: fix build when GPIOLIB is not enabled]
Signed-off-by: Joachim Eastwood <joachim.eastwood@jotron.com>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Reviewed-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Jan Weitzel <j.weitzel@phytec.de>
Cc: Juergen Kilb <j.kilb@phytec.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/leds/Kconfig         |  10 ++++
 drivers/leds/leds-pca9532.c  | 113 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/leds-pca9532.h |   3 +-
 3 files changed, 122 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 9bec8699b8a3..09de8dac8a73 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -162,6 +162,16 @@ config LEDS_PCA9532
 	  LED controller. It is generally only useful
 	  as a platform driver
 
+config LEDS_PCA9532_GPIO
+	bool "Enable GPIO support for PCA9532"
+	depends on LEDS_PCA9532
+	depends on GPIOLIB
+	help
+	  Allow unused pins on PCA9532 to be used as gpio.
+
+	  To use a pin as gpio pca9532_type in pca9532_platform data needs to
+	  set to PCA9532_TYPE_GPIO.
+
 config LEDS_GPIO
 	tristate "LED Support for GPIO connected LEDs"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c
index 5bf63af09ddf..ebea85603f43 100644
--- a/drivers/leds/leds-pca9532.c
+++ b/drivers/leds/leds-pca9532.c
@@ -19,7 +19,9 @@
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 #include <linux/leds-pca9532.h>
+#include <linux/gpio.h>
 
+#define PCA9532_REG_INPUT(i) ((i)/8)
 #define PCA9532_REG_PSC(i) (0x2+(i)*2)
 #define PCA9532_REG_PWM(i) (0x3+(i)*2)
 #define PCA9532_REG_LS0  0x6
@@ -34,6 +36,9 @@ struct pca9532_data {
 	struct mutex update_lock;
 	struct input_dev *idev;
 	struct work_struct work;
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	struct gpio_chip gpio;
+#endif
 	u8 pwm[2];
 	u8 psc[2];
 };
@@ -200,16 +205,68 @@ static void pca9532_led_work(struct work_struct *work)
 	pca9532_setled(led);
 }
 
-static void pca9532_destroy_devices(struct pca9532_data *data, int n_devs)
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+static int pca9532_gpio_request_pin(struct gpio_chip *gc, unsigned offset)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	struct pca9532_led *led = &data->leds[offset];
+
+	if (led->type == PCA9532_TYPE_GPIO)
+		return 0;
+
+	return -EBUSY;
+}
+
+static void pca9532_gpio_set_value(struct gpio_chip *gc, unsigned offset, int val)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	struct pca9532_led *led = &data->leds[offset];
+
+	if (val)
+		led->state = PCA9532_ON;
+	else
+		led->state = PCA9532_OFF;
+
+	pca9532_setled(led);
+}
+
+static int pca9532_gpio_get_value(struct gpio_chip *gc, unsigned offset)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	unsigned char reg;
+
+	reg = i2c_smbus_read_byte_data(data->client, PCA9532_REG_INPUT(offset));
+
+	return !!(reg & (1 << (offset % 8)));
+}
+
+static int pca9532_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
+{
+	/* To use as input ensure pin is not driven */
+	pca9532_gpio_set_value(gc, offset, 0);
+
+	return 0;
+}
+
+static int pca9532_gpio_direction_output(struct gpio_chip *gc, unsigned offset, int val)
+{
+	pca9532_gpio_set_value(gc, offset, val);
+
+	return 0;
+}
+#endif /* CONFIG_LEDS_PCA9532_GPIO */
+
+static int pca9532_destroy_devices(struct pca9532_data *data, int n_devs)
 {
 	int i = n_devs;
 
 	if (!data)
-		return;
+		return -EINVAL;
 
 	while (--i >= 0) {
 		switch (data->leds[i].type) {
 		case PCA9532_TYPE_NONE:
+		case PCA9532_TYPE_GPIO:
 			break;
 		case PCA9532_TYPE_LED:
 			led_classdev_unregister(&data->leds[i].ldev);
@@ -224,12 +281,26 @@ static void pca9532_destroy_devices(struct pca9532_data *data, int n_devs)
 			break;
 		}
 	}
+
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	if (data->gpio.dev) {
+		int err = gpiochip_remove(&data->gpio);
+		if (err) {
+			dev_err(&data->client->dev, "%s failed, %d\n",
+						"gpiochip_remove()", err);
+			return err;
+		}
+	}
+#endif
+
+	return 0;
 }
 
 static int pca9532_configure(struct i2c_client *client,
 	struct pca9532_data *data, struct pca9532_platform_data *pdata)
 {
 	int i, err = 0;
+	int gpios = 0;
 
 	for (i = 0; i < 2; i++)	{
 		data->pwm[i] = pdata->pwm[i];
@@ -249,6 +320,9 @@ static int pca9532_configure(struct i2c_client *client,
 		switch (led->type) {
 		case PCA9532_TYPE_NONE:
 			break;
+		case PCA9532_TYPE_GPIO:
+			gpios++;
+			break;
 		case PCA9532_TYPE_LED:
 			led->state = pled->state;
 			led->name = pled->name;
@@ -297,6 +371,34 @@ static int pca9532_configure(struct i2c_client *client,
 			break;
 		}
 	}
+
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	if (gpios) {
+		data->gpio.label = "gpio-pca9532";
+		data->gpio.direction_input = pca9532_gpio_direction_input;
+		data->gpio.direction_output = pca9532_gpio_direction_output;
+		data->gpio.set = pca9532_gpio_set_value;
+		data->gpio.get = pca9532_gpio_get_value;
+		data->gpio.request = pca9532_gpio_request_pin;
+		data->gpio.can_sleep = 1;
+		data->gpio.base = pdata->gpio_base;
+		data->gpio.ngpio = 16;
+		data->gpio.dev = &client->dev;
+		data->gpio.owner = THIS_MODULE;
+
+		err = gpiochip_add(&data->gpio);
+		if (err) {
+			/* Use data->gpio.dev as a flag for freeing gpiochip */
+			data->gpio.dev = NULL;
+			dev_warn(&client->dev, "could not add gpiochip\n");
+		} else {
+			dev_info(&client->dev, "gpios %i...%i\n",
+				data->gpio.base, data->gpio.base +
+				data->gpio.ngpio - 1);
+		}
+	}
+#endif
+
 	return 0;
 
 exit:
@@ -337,7 +439,12 @@ static int pca9532_probe(struct i2c_client *client,
 static int pca9532_remove(struct i2c_client *client)
 {
 	struct pca9532_data *data = i2c_get_clientdata(client);
-	pca9532_destroy_devices(data, 16);
+	int err;
+
+	err = pca9532_destroy_devices(data, 16);
+	if (err)
+		return err;
+
 	kfree(data);
 	return 0;
 }
diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
index f158eb1149aa..b8d6fffed4d8 100644
--- a/include/linux/leds-pca9532.h
+++ b/include/linux/leds-pca9532.h
@@ -25,7 +25,7 @@ enum pca9532_state {
 };
 
 enum pca9532_type { PCA9532_TYPE_NONE, PCA9532_TYPE_LED,
-	PCA9532_TYPE_N2100_BEEP };
+	PCA9532_TYPE_N2100_BEEP, PCA9532_TYPE_GPIO };
 
 struct pca9532_led {
 	u8 id;
@@ -41,6 +41,7 @@ struct pca9532_platform_data {
 	struct pca9532_led leds[16];
 	u8 pwm[2];
 	u8 psc[2];
+	int gpio_base;
 };
 
 #endif /* __LINUX_PCA9532_H */
-- 
cgit v1.2.3


From 4440673a95e63ad888a41db596edaa0c55d3a332 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 24 May 2011 17:13:29 -0700
Subject: leds: provide helper to register "leds-gpio" devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function makes a deep copy of the platform data to allow it to live
in init memory.  For a kernel that supports several machines and so
includes the definition for several leds-gpio devices this saves quite
some memory because all but one definition can be free'd after boot.

As the function is used by arch code it must be builtin and so cannot go
into leds-gpio.c.

[akpm@linux-foundation.org: s/CONFIG_LED_REGISTER_GPIO/CONFIG_LEDS_REGISTER_GPIO/]
Signed-off-by: Uwe Kleine-König  <u.kleine-koenig@pengutronix.de>
Cc: Russell King <rmk@arm.linux.org.uk>
Acked-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Cc: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Tested-by: H Hartley Sweeten <hartleys@visionengravers.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/Makefile                  |  2 +-
 drivers/leds/Kconfig              |  7 +++++++
 drivers/leds/Makefile             |  1 +
 drivers/leds/leds-gpio-register.c | 42 +++++++++++++++++++++++++++++++++++++++
 include/linux/leds.h              |  2 ++
 5 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 drivers/leds/leds-gpio-register.c

(limited to 'include/linux')

diff --git a/drivers/Makefile b/drivers/Makefile
index 145aeadb6c03..bceb60c85c01 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-$(CONFIG_DMA_ENGINE)	+= dma/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_MEMSTICK)		+= memstick/
-obj-$(CONFIG_NEW_LEDS)		+= leds/
+obj-y				+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index a4a77734ab6e..1d027b475b22 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -14,6 +14,13 @@ config LEDS_CLASS
 	  This option enables the led sysfs class in /sys/class/leds.  You'll
 	  need this to do anything useful with LEDs.  If unsure, say N.
 
+config LEDS_GPIO_REGISTER
+	bool
+	help
+	  This option provides the function gpio_led_register_device.
+	  As this function is used by arch code it must not be compiled as a
+	  module.
+
 if NEW_LEDS
 
 comment "LED drivers"
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 0b6ad375bfdb..bccb96c9bb45 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_LEDS_COBALT_QUBE)		+= leds-cobalt-qube.o
 obj-$(CONFIG_LEDS_COBALT_RAQ)		+= leds-cobalt-raq.o
 obj-$(CONFIG_LEDS_SUNFIRE)		+= leds-sunfire.o
 obj-$(CONFIG_LEDS_PCA9532)		+= leds-pca9532.o
+obj-$(CONFIG_LEDS_GPIO_REGISTER)	+= leds-gpio-register.o
 obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
 obj-$(CONFIG_LEDS_LP3944)		+= leds-lp3944.o
 obj-$(CONFIG_LEDS_LP5521)		+= leds-lp5521.o
diff --git a/drivers/leds/leds-gpio-register.c b/drivers/leds/leds-gpio-register.c
new file mode 100644
index 000000000000..1c4ed5510f35
--- /dev/null
+++ b/drivers/leds/leds-gpio-register.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2011 Pengutronix
+ * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ */
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/leds.h>
+
+/**
+ * gpio_led_register_device - register a gpio-led device
+ * @pdata: the platform data used for the new device
+ *
+ * Makes a copy of pdata and pdata->leds and registers a new leds-gpio device
+ * with the result. This allows to have pdata and pdata-leds in .init.rodata
+ * and so saves some bytes compared to a static struct platform_device with
+ * static platform data.
+ *
+ * Returns the registered device or an error pointer.
+ */
+struct platform_device *__init gpio_led_register_device(
+		int id, const struct gpio_led_platform_data *pdata)
+{
+	struct platform_device *ret;
+	struct gpio_led_platform_data _pdata = *pdata;
+
+	_pdata.leds = kmemdup(pdata->leds,
+			pdata->num_leds * sizeof(*pdata->leds), GFP_KERNEL);
+	if (!_pdata.leds)
+		return ERR_PTR(-ENOMEM);
+
+	ret = platform_device_register_resndata(NULL, "leds-gpio", id,
+			NULL, 0, &_pdata, sizeof(_pdata));
+	if (IS_ERR(ret))
+		kfree(_pdata.leds);
+
+	return ret;
+}
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 61e0340a4b77..5884def15a24 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -207,5 +207,7 @@ struct gpio_led_platform_data {
 					unsigned long *delay_off);
 };
 
+struct platform_device *gpio_led_register_device(
+		int id, const struct gpio_led_platform_data *pdata);
 
 #endif		/* __LINUX_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From c196e32a111b0ee356d67acceb938ae0b5e63ef0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 24 May 2011 17:13:31 -0700
Subject: lib: add kstrto*_from_user()

There is quite a lot of code which does copy_from_user() + strict_strto*()
or simple_strto*() combo in slightly different ways.

Before doing conversions all over tree, let's get final API correct.

Enter kstrtoull_from_user() and friends.

Typical code which uses them looks very simple:

	TYPE val;
	int rv;

	rv = kstrtoTYPE_from_user(buf, count, 0, &val);
	if (rv < 0)
		return rv;
	[use val]
	return count;

There is a tiny semantic difference from the plain kstrto*() API -- the
latter allows any amount of leading zeroes, while the former copies data
into buffer on stack and thus allows leading zeroes as long as it fits
into buffer.

This shouldn't be a problem for typical usecase "echo 42 > /proc/x".

The point is to make reading one integer from userspace _very_ simple and
very bug free.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 31 +++++++++++++++++++++++++++++++
 lib/kstrtox.c          | 26 ++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 22295244de18..fb0e7329fee1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -248,6 +248,37 @@ int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
 int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
 int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
 
+int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res);
+int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res);
+int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res);
+int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res);
+int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res);
+int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res);
+int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res);
+int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res);
+int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res);
+int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res);
+
+static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res)
+{
+	return kstrtoull_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res)
+{
+	return kstrtoll_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res)
+{
+	return kstrtouint_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res)
+{
+	return kstrtoint_from_user(s, count, base, res);
+}
+
 extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index a235f3cc471c..2dbae88090ac 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -17,6 +17,7 @@
 #include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/uaccess.h>
 
 static inline char _tolower(const char c)
 {
@@ -222,3 +223,28 @@ int kstrtos8(const char *s, unsigned int base, s8 *res)
 	return 0;
 }
 EXPORT_SYMBOL(kstrtos8);
+
+#define kstrto_from_user(f, g, type)					\
+int f(const char __user *s, size_t count, unsigned int base, type *res)	\
+{									\
+	/* sign, base 2 representation, newline, terminator */		\
+	char buf[1 + sizeof(type) * 8 + 1 + 1];				\
+									\
+	count = min(count, sizeof(buf) - 1);				\
+	if (copy_from_user(buf, s, count))				\
+		return -EFAULT;						\
+	buf[count] = '\0';						\
+	return g(buf, base, res);					\
+}									\
+EXPORT_SYMBOL(f)
+
+kstrto_from_user(kstrtoull_from_user,	kstrtoull,	unsigned long long);
+kstrto_from_user(kstrtoll_from_user,	kstrtoll,	long long);
+kstrto_from_user(kstrtoul_from_user,	kstrtoul,	unsigned long);
+kstrto_from_user(kstrtol_from_user,	kstrtol,	long);
+kstrto_from_user(kstrtouint_from_user,	kstrtouint,	unsigned int);
+kstrto_from_user(kstrtoint_from_user,	kstrtoint,	int);
+kstrto_from_user(kstrtou16_from_user,	kstrtou16,	u16);
+kstrto_from_user(kstrtos16_from_user,	kstrtos16,	s16);
+kstrto_from_user(kstrtou8_from_user,	kstrtou8,	u8);
+kstrto_from_user(kstrtos8_from_user,	kstrtos8,	s8);
-- 
cgit v1.2.3


From 6aae6e0304d33e537298867dafb2703ec58c2e4f Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Tue, 24 May 2011 17:13:33 -0700
Subject: include/linux/genalloc.h: add multiple-inclusion guards

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Patrice VILCHEZ <patrice.vilchez@atmel.com>
Cc: Jes Sorensen <jes@wildopensource.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 9869ef3674ac..b1c70f10c42b 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -9,6 +9,8 @@
  */
 
 
+#ifndef __GENALLOC_H__
+#define __GENALLOC_H__
 /*
  *  General purpose special memory pool descriptor.
  */
@@ -34,3 +36,4 @@ extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int);
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
+#endif /* __GENALLOC_H__ */
-- 
cgit v1.2.3


From 3c8f370ded3483b27f1218ff0051fcf0c7a2facd Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Tue, 24 May 2011 17:13:34 -0700
Subject: lib/genalloc.c: add support for specifying the physical address

So we can specify the virtual address as the base of the pool chunk and
then get physical addresses for hardware IP.

For example on at91 we will use this on spi, uart or macb

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Patrice VILCHEZ <patrice.vilchez@atmel.com>
Cc: Jes Sorensen <jes@wildopensource.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h | 22 +++++++++++++++++++++-
 lib/genalloc.c           | 45 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 58 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index b1c70f10c42b..5bbebda78b02 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -26,13 +26,33 @@ struct gen_pool {
 struct gen_pool_chunk {
 	spinlock_t lock;
 	struct list_head next_chunk;	/* next chunk in pool */
+	phys_addr_t phys_addr;		/* physical starting address of memory chunk */
 	unsigned long start_addr;	/* starting address of memory chunk */
 	unsigned long end_addr;		/* ending address of memory chunk */
 	unsigned long bits[0];		/* bitmap for allocating memory chunk */
 };
 
 extern struct gen_pool *gen_pool_create(int, int);
-extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int);
+extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long);
+extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t,
+			     size_t, int);
+/**
+ * gen_pool_add - add a new chunk of special memory to the pool
+ * @pool: pool to add new memory chunk to
+ * @addr: starting address of memory chunk to add to pool
+ * @size: size in bytes of the memory chunk to add to pool
+ * @nid: node id of the node the chunk structure and bitmap should be
+ *       allocated on, or -1
+ *
+ * Add a new chunk of special memory to the specified pool.
+ *
+ * Returns 0 on success or a -ve errno on failure.
+ */
+static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
+			       size_t size, int nid)
+{
+	return gen_pool_add_virt(pool, addr, -1, size, nid);
+}
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 1923f1490e72..577ddf805975 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -39,17 +39,20 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 EXPORT_SYMBOL(gen_pool_create);
 
 /**
- * gen_pool_add - add a new chunk of special memory to the pool
+ * gen_pool_add_virt - add a new chunk of special memory to the pool
  * @pool: pool to add new memory chunk to
- * @addr: starting address of memory chunk to add to pool
+ * @virt: virtual starting address of memory chunk to add to pool
+ * @phys: physical starting address of memory chunk to add to pool
  * @size: size in bytes of the memory chunk to add to pool
  * @nid: node id of the node the chunk structure and bitmap should be
  *       allocated on, or -1
  *
  * Add a new chunk of special memory to the specified pool.
+ *
+ * Returns 0 on success or a -ve errno on failure.
  */
-int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
-		 int nid)
+int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
+		 size_t size, int nid)
 {
 	struct gen_pool_chunk *chunk;
 	int nbits = size >> pool->min_alloc_order;
@@ -58,11 +61,12 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
 
 	chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid);
 	if (unlikely(chunk == NULL))
-		return -1;
+		return -ENOMEM;
 
 	spin_lock_init(&chunk->lock);
-	chunk->start_addr = addr;
-	chunk->end_addr = addr + size;
+	chunk->phys_addr = phys;
+	chunk->start_addr = virt;
+	chunk->end_addr = virt + size;
 
 	write_lock(&pool->lock);
 	list_add(&chunk->next_chunk, &pool->chunks);
@@ -70,7 +74,32 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
 
 	return 0;
 }
-EXPORT_SYMBOL(gen_pool_add);
+EXPORT_SYMBOL(gen_pool_add_virt);
+
+/**
+ * gen_pool_virt_to_phys - return the physical address of memory
+ * @pool: pool to allocate from
+ * @addr: starting address of memory
+ *
+ * Returns the physical address on success, or -1 on error.
+ */
+phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
+{
+	struct list_head *_chunk;
+	struct gen_pool_chunk *chunk;
+
+	read_lock(&pool->lock);
+	list_for_each(_chunk, &pool->chunks) {
+		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+
+		if (addr >= chunk->start_addr && addr < chunk->end_addr)
+			return chunk->phys_addr + addr - chunk->start_addr;
+	}
+	read_unlock(&pool->lock);
+
+	return -1;
+}
+EXPORT_SYMBOL(gen_pool_virt_to_phys);
 
 /**
  * gen_pool_destroy - destroy a special memory pool
-- 
cgit v1.2.3


From c84598bbfa756b7d042da31aa4e198ae866a6c7d Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 24 May 2011 17:13:35 -0700
Subject: percpu_counter: change return value and add comments

The percpu_counter_*_positive() API in UP case doesn't check if return
value is positive.  Add comments to explain why we don't.  Also if count <
0, returns 0 instead of 1 for *read_positive().

[akpm@linux-foundation.org: tweak comment]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu_counter.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 46f6ba56fa91..5edc9014263a 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -75,7 +75,7 @@ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
 	barrier();		/* Prevent reloads of fbc->count */
 	if (ret >= 0)
 		return ret;
-	return 1;
+	return 0;
 }
 
 static inline int percpu_counter_initialized(struct percpu_counter *fbc)
@@ -133,6 +133,10 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc)
 	return fbc->count;
 }
 
+/*
+ * percpu_counter is intended to track positive numbers. In the UP case the
+ * number should never be negative.
+ */
 static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
 {
 	return fbc->count;
-- 
cgit v1.2.3


From 774466add7c810fd7e4c8bcf41995b6799608880 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 25 May 2011 20:43:32 +0200
Subject: hwmon: (jc42) Change detection class

While the JC42-compatible chips are temperature sensors, I2C_CLASS_SPD
makes more sense because these chips always live on memory modules.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Guenter Roeck <guenter.roeck@ericsson.com>
---
 drivers/hwmon/jc42.c | 2 +-
 include/linux/i2c.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 934991237061..02cebb74e206 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -213,7 +213,7 @@ static const struct dev_pm_ops jc42_dev_pm_ops = {
 
 /* This is the driver that will be inserted */
 static struct i2c_driver jc42_driver = {
-	.class		= I2C_CLASS_HWMON,
+	.class		= I2C_CLASS_SPD,
 	.driver = {
 		.name	= "jc42",
 		.pm = JC42_DEV_PM_OPS,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f1e3ff5880a9..a6c652ef516d 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -409,7 +409,7 @@ void i2c_unlock_adapter(struct i2c_adapter *);
 /* i2c adapter classes (bitmask) */
 #define I2C_CLASS_HWMON		(1<<0)	/* lm_sensors, ... */
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
-#define I2C_CLASS_SPD		(1<<7)	/* SPD EEPROMs and similar */
+#define I2C_CLASS_SPD		(1<<7)	/* Memory modules */
 
 /* Internal numbers to terminate lists */
 #define I2C_CLIENT_END		0xfffeU
-- 
cgit v1.2.3


From d0c97cfb81ebc5b416c0f92fa2fc18d2773e3023 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:36 -0500
Subject: mmc: core: Use CMD23 for multiblock transfers when we can.

CMD23-prefixed instead of open-ended multiblock transfers
have a performance advantage on some MMC cards.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 108 ++++++++++++++++++++++++++++++++++-------------
 include/linux/mmc/card.h |   1 +
 include/linux/mmc/core.h |   1 +
 include/linux/mmc/host.h |   6 +++
 include/linux/mmc/mmc.h  |   6 +++
 5 files changed, 93 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 126c7f41c5a3..a380accaba9a 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -59,10 +59,6 @@ MODULE_ALIAS("mmc:block");
 #define INAND_CMD38_ARG_SECTRIM1 0x81
 #define INAND_CMD38_ARG_SECTRIM2 0x88
 
-#define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) &&	\
-    (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||	\
-     ((card)->ext_csd.rel_sectors)))
-
 static DEFINE_MUTEX(block_mutex);
 
 /*
@@ -90,6 +86,10 @@ struct mmc_blk_data {
 	struct mmc_queue queue;
 	struct list_head part;
 
+	unsigned int	flags;
+#define MMC_BLK_CMD23	(1 << 0)	/* Can do SET_BLOCK_COUNT for multiblock */
+#define MMC_BLK_REL_WR	(1 << 1)	/* MMC Reliable write support */
+
 	unsigned int	usage;
 	unsigned int	read_only;
 	unsigned int	part_type;
@@ -429,6 +429,7 @@ static const struct block_device_operations mmc_bdops = {
 
 struct mmc_blk_request {
 	struct mmc_request	mrq;
+	struct mmc_command	sbc;
 	struct mmc_command	cmd;
 	struct mmc_command	stop;
 	struct mmc_data		data;
@@ -652,13 +653,10 @@ static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
  * reliable write can handle, thus finish the request in
  * partial completions.
  */
-static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq,
-				   struct mmc_card *card,
-				   struct request *req)
+static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq,
+				    struct mmc_card *card,
+				    struct request *req)
 {
-	int err;
-	struct mmc_command set_count = {0};
-
 	if (!(card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN)) {
 		/* Legacy mode imposes restrictions on transfers. */
 		if (!IS_ALIGNED(brq->cmd.arg, card->ext_csd.rel_sectors))
@@ -669,15 +667,6 @@ static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq,
 		else if (brq->data.blocks < card->ext_csd.rel_sectors)
 			brq->data.blocks = 1;
 	}
-
-	set_count.opcode = MMC_SET_BLOCK_COUNT;
-	set_count.arg = brq->data.blocks | (1 << 31);
-	set_count.flags = MMC_RSP_R1 | MMC_CMD_AC;
-	err = mmc_wait_for_cmd(card->host, &set_count, 0);
-	if (err)
-		printk(KERN_ERR "%s: error %d SET_BLOCK_COUNT\n",
-		       req->rq_disk->disk_name, err);
-	return err;
 }
 
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
@@ -694,7 +683,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 	bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
 			  (req->cmd_flags & REQ_META)) &&
 		(rq_data_dir(req) == WRITE) &&
-		REL_WRITES_SUPPORTED(card);
+		(md->flags & MMC_BLK_REL_WR);
 
 	do {
 		struct mmc_command cmd = {0};
@@ -732,11 +721,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 
 		if (brq.data.blocks > 1 || do_rel_wr) {
 			/* SPI multiblock writes terminate using a special
-			 * token, not a STOP_TRANSMISSION request. Reliable
-			 * writes use SET_BLOCK_COUNT and do not use a
-			 * STOP_TRANSMISSION request either.
+			 * token, not a STOP_TRANSMISSION request.
 			 */
-			if ((!mmc_host_is_spi(card->host) && !do_rel_wr) ||
+			if (!mmc_host_is_spi(card->host) ||
 			    rq_data_dir(req) == READ)
 				brq.mrq.stop = &brq.stop;
 			readcmd = MMC_READ_MULTIPLE_BLOCK;
@@ -754,8 +741,37 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			brq.data.flags |= MMC_DATA_WRITE;
 		}
 
-		if (do_rel_wr && mmc_apply_rel_rw(&brq, card, req))
-			goto cmd_err;
+		if (do_rel_wr)
+			mmc_apply_rel_rw(&brq, card, req);
+
+		/*
+		 * Pre-defined multi-block transfers are preferable to
+		 * open ended-ones (and necessary for reliable writes).
+		 * However, it is not sufficient to just send CMD23,
+		 * and avoid the final CMD12, as on an error condition
+		 * CMD12 (stop) needs to be sent anyway. This, coupled
+		 * with Auto-CMD23 enhancements provided by some
+		 * hosts, means that the complexity of dealing
+		 * with this is best left to the host. If CMD23 is
+		 * supported by card and host, we'll fill sbc in and let
+		 * the host deal with handling it correctly. This means
+		 * that for hosts that don't expose MMC_CAP_CMD23, no
+		 * change of behavior will be observed.
+		 *
+		 * N.B: Some MMC cards experience perf degradation.
+		 * We'll avoid using CMD23-bounded multiblock writes for
+		 * these, while retaining features like reliable writes.
+		 */
+
+		if ((md->flags & MMC_BLK_CMD23) &&
+		    mmc_op_multi(brq.cmd.opcode) &&
+		    (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) {
+			brq.sbc.opcode = MMC_SET_BLOCK_COUNT;
+			brq.sbc.arg = brq.data.blocks |
+				(do_rel_wr ? (1 << 31) : 0);
+			brq.sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
+			brq.mrq.sbc = &brq.sbc;
+		}
 
 		mmc_set_data_timeout(&brq.data, card);
 
@@ -792,7 +808,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 		 * until later as we need to wait for the card to leave
 		 * programming mode even when things go wrong.
 		 */
-		if (brq.cmd.error || brq.data.error || brq.stop.error) {
+		if (brq.sbc.error || brq.cmd.error ||
+		    brq.data.error || brq.stop.error) {
 			if (brq.data.blocks > 1 && rq_data_dir(req) == READ) {
 				/* Redo read one sector at a time */
 				printk(KERN_WARNING "%s: retrying using single "
@@ -803,6 +820,13 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			status = get_card_status(card, req);
 		}
 
+		if (brq.sbc.error) {
+			printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT "
+			       "command, response %#x, card status %#x\n",
+			       req->rq_disk->disk_name, brq.sbc.error,
+			       brq.sbc.resp[0], status);
+		}
+
 		if (brq.cmd.error) {
 			printk(KERN_ERR "%s: error %d sending read/write "
 			       "command, response %#x, card status %#x\n",
@@ -1014,8 +1038,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	md->disk->queue = md->queue.queue;
 	md->disk->driverfs_dev = parent;
 	set_disk_ro(md->disk, md->read_only || default_ro);
-	if (REL_WRITES_SUPPORTED(card))
-		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
 
 	/*
 	 * As discussed on lkml, GENHD_FL_REMOVABLE should:
@@ -1034,6 +1056,19 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 
 	blk_queue_logical_block_size(md->queue.queue, 512);
 	set_capacity(md->disk, size);
+
+	if (mmc_host_cmd23(card->host) &&
+	    mmc_card_mmc(card))
+		md->flags |= MMC_BLK_CMD23;
+
+	if (mmc_card_mmc(card) &&
+	    md->flags & MMC_BLK_CMD23 &&
+	    ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
+	     card->ext_csd.rel_sectors)) {
+		md->flags |= MMC_BLK_REL_WR;
+		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
+	}
+
 	return md;
 
  err_putdisk:
@@ -1189,6 +1224,21 @@ static const struct mmc_fixup blk_fixups[] =
 	MMC_FIXUP("SEM08G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
 	MMC_FIXUP("SEM16G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
 	MMC_FIXUP("SEM32G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+
+	/*
+	 * Some MMC cards experience performance degradation with CMD23
+	 * instead of CMD12-bounded multiblock transfers. For now we'll
+	 * black list what's bad...
+	 * - Certain Toshiba cards.
+	 *
+	 * N.B. This doesn't affect SD cards.
+	 */
+	MMC_FIXUP("MMC08G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BLK_NO_CMD23),
+	MMC_FIXUP("MMC16G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BLK_NO_CMD23),
+	MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BLK_NO_CMD23),
 	END_FIXUP
 };
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 7190aa2096f7..4a0e27baaea0 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -171,6 +171,7 @@ struct mmc_card {
 #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
 #define MMC_QUIRK_DISABLE_CD	(1<<5)		/* disconnect CD/DAT[3] resistor */
 #define MMC_QUIRK_INAND_CMD38	(1<<6)		/* iNAND devices have broken CMD38 */
+#define MMC_QUIRK_BLK_NO_CMD23	(1<<7)		/* Avoid CMD23 for regular multiblock */
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index cbe8d55f64c4..b6718e549a51 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -120,6 +120,7 @@ struct mmc_data {
 };
 
 struct mmc_request {
+	struct mmc_command	*sbc;		/* SET_BLOCK_COUNT for multiblock */
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
 	struct mmc_command	*stop;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index de32e6aa018a..e946bd10fe3f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -210,6 +210,7 @@ struct mmc_host {
 #define MMC_CAP_MAX_CURRENT_400	(1 << 27)	/* Host max current limit is 400mA */
 #define MMC_CAP_MAX_CURRENT_600	(1 << 28)	/* Host max current limit is 600mA */
 #define MMC_CAP_MAX_CURRENT_800	(1 << 29)	/* Host max current limit is 800mA */
+#define MMC_CAP_CMD23		(1 << 30)	/* CMD23 supported. */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
@@ -366,5 +367,10 @@ static inline int mmc_card_wake_sdio_irq(struct mmc_host *host)
 {
 	return host->pm_flags & MMC_PM_WAKE_SDIO_IRQ;
 }
+
+static inline int mmc_host_cmd23(struct mmc_host *host)
+{
+	return host->caps & MMC_CAP_CMD23;
+}
 #endif
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 9fa5a73f393d..ac26a685cca8 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -83,6 +83,12 @@
 #define MMC_APP_CMD              55   /* ac   [31:16] RCA        R1  */
 #define MMC_GEN_CMD              56   /* adtc [0] RD/WR          R1  */
 
+static inline bool mmc_op_multi(u32 opcode)
+{
+	return opcode == MMC_WRITE_MULTIPLE_BLOCK ||
+	       opcode == MMC_READ_MULTIPLE_BLOCK;
+}
+
 /*
  * MMC_SWITCH argument format:
  *
-- 
cgit v1.2.3


From e89d456fcdde2df008c032bf928e69e628e07a28 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:37 -0500
Subject: mmc: sdhci: Implement MMC_CAP_CMD23 for SDHCI.

Implements support for multiblock transfers bounded
by SET_BLOCK_COUNT (CMD23).

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 63 +++++++++++++++++++++++++++++++++++------------
 drivers/mmc/host/sdhci.h  |  2 +-
 include/linux/mmc/sdhci.h |  1 +
 3 files changed, 49 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index cc63f5ed2310..b9ed66307ac3 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -838,9 +838,10 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 }
 
 static void sdhci_set_transfer_mode(struct sdhci_host *host,
-	struct mmc_data *data)
+	struct mmc_command *cmd)
 {
 	u16 mode;
+	struct mmc_data *data = cmd->data;
 
 	if (data == NULL)
 		return;
@@ -848,11 +849,14 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
 	WARN_ON(!host->data);
 
 	mode = SDHCI_TRNS_BLK_CNT_EN;
-	if (data->blocks > 1) {
-		if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
-			mode |= SDHCI_TRNS_MULTI | SDHCI_TRNS_ACMD12;
-		else
-			mode |= SDHCI_TRNS_MULTI;
+	if (mmc_op_multi(cmd->opcode) || data->blocks > 1) {
+		mode |= SDHCI_TRNS_MULTI;
+		/*
+		 * If we are sending CMD23, CMD12 never gets sent
+		 * on successful completion (so no Auto-CMD12).
+		 */
+		if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12))
+			mode |= SDHCI_TRNS_AUTO_CMD12;
 	}
 	if (data->flags & MMC_DATA_READ)
 		mode |= SDHCI_TRNS_READ;
@@ -893,7 +897,15 @@ static void sdhci_finish_data(struct sdhci_host *host)
 	else
 		data->bytes_xfered = data->blksz * data->blocks;
 
-	if (data->stop) {
+	/*
+	 * Need to send CMD12 if -
+	 * a) open-ended multiblock transfer (no CMD23)
+	 * b) error in multiblock transfer
+	 */
+	if (data->stop &&
+	    (data->error ||
+	     !host->mrq->sbc)) {
+
 		/*
 		 * The controller needs a reset of internal state machines
 		 * upon error conditions.
@@ -949,7 +961,7 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 
 	sdhci_writel(host, cmd->arg, SDHCI_ARGUMENT);
 
-	sdhci_set_transfer_mode(host, cmd->data);
+	sdhci_set_transfer_mode(host, cmd);
 
 	if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) {
 		printk(KERN_ERR "%s: Unsupported response type!\n",
@@ -1004,13 +1016,21 @@ static void sdhci_finish_command(struct sdhci_host *host)
 
 	host->cmd->error = 0;
 
-	if (host->data && host->data_early)
-		sdhci_finish_data(host);
+	/* Finished CMD23, now send actual command. */
+	if (host->cmd == host->mrq->sbc) {
+		host->cmd = NULL;
+		sdhci_send_command(host, host->mrq->cmd);
+	} else {
 
-	if (!host->cmd->data)
-		tasklet_schedule(&host->finish_tasklet);
+		/* Processed actual command. */
+		if (host->data && host->data_early)
+			sdhci_finish_data(host);
 
-	host->cmd = NULL;
+		if (!host->cmd->data)
+			tasklet_schedule(&host->finish_tasklet);
+
+		host->cmd = NULL;
+	}
 }
 
 static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
@@ -1189,7 +1209,12 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 #ifndef SDHCI_USE_LEDS_CLASS
 	sdhci_activate_led(host);
 #endif
-	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) {
+
+	/*
+	 * Ensure we don't send the STOP for non-SET_BLOCK_COUNTED
+	 * requests if Auto-CMD12 is enabled.
+	 */
+	if (!mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) {
 		if (mrq->stop) {
 			mrq->data->stop = NULL;
 			mrq->stop = NULL;
@@ -1227,7 +1252,10 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 			host->mrq = mrq;
 		}
 
-		sdhci_send_command(host, mrq->cmd);
+		if (mrq->sbc)
+			sdhci_send_command(host, mrq->sbc);
+		else
+			sdhci_send_command(host, mrq->cmd);
 	}
 
 	mmiowb();
@@ -2455,7 +2483,10 @@ int sdhci_add_host(struct sdhci_host *host)
 	} else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
 
-	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE;
+	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
+
+	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
+		host->flags |= SDHCI_AUTO_CMD12;
 
 	/*
 	 * A controller may support 8-bit width, but the board itself
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 7e28eec97f04..2c3fbc5a4c07 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -36,7 +36,7 @@
 #define SDHCI_TRANSFER_MODE	0x0C
 #define  SDHCI_TRNS_DMA		0x01
 #define  SDHCI_TRNS_BLK_CNT_EN	0x02
-#define  SDHCI_TRNS_ACMD12	0x04
+#define  SDHCI_TRNS_AUTO_CMD12	0x04
 #define  SDHCI_TRNS_READ	0x10
 #define  SDHCI_TRNS_MULTI	0x20
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index e902618d68f4..73e27ba51e99 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -113,6 +113,7 @@ struct sdhci_host {
 #define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
 #define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
 #define SDHCI_NEEDS_RETUNING	(1<<5)	/* Host needs retuning */
+#define SDHCI_AUTO_CMD12	(1<<6)	/* Auto CMD12 support */
 
 	unsigned int version;	/* SDHCI spec. version */
 
-- 
cgit v1.2.3


From f0d89972b01798cf9d245dfa1cacfa0ee78a3593 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:38 -0500
Subject: mmc: core: Block CMD23 support for UHS104/SDXC cards.

SD cards operating at UHS104 or better support SET_BLOCK_COUNT.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Reviewed-by: Arindam Nath <arindam.nath@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 9 ++++++---
 drivers/mmc/core/sd.c    | 2 ++
 include/linux/mmc/card.h | 3 +++
 include/linux/mmc/sd.h   | 2 +-
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index a380accaba9a..71da5641e258 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1057,9 +1057,12 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	blk_queue_logical_block_size(md->queue.queue, 512);
 	set_capacity(md->disk, size);
 
-	if (mmc_host_cmd23(card->host) &&
-	    mmc_card_mmc(card))
-		md->flags |= MMC_BLK_CMD23;
+	if (mmc_host_cmd23(card->host)) {
+		if (mmc_card_mmc(card) ||
+		    (mmc_card_sd(card) &&
+		     card->scr.cmds & SD_SCR_CMD23_SUPPORT))
+			md->flags |= MMC_BLK_CMD23;
+	}
 
 	if (mmc_card_mmc(card) &&
 	    md->flags & MMC_BLK_CMD23 &&
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 596d0b9d30b8..ff2774128aa9 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -203,6 +203,8 @@ static int mmc_decode_scr(struct mmc_card *card)
 	else
 		card->erased_byte = 0x0;
 
+	if (scr->sda_spec3)
+		scr->cmds = UNSTUFF_BITS(resp, 32, 2);
 	return 0;
 }
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 4a0e27baaea0..c6927a4d157f 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -72,6 +72,9 @@ struct sd_scr {
 	unsigned char		bus_widths;
 #define SD_SCR_BUS_WIDTH_1	(1<<0)
 #define SD_SCR_BUS_WIDTH_4	(1<<2)
+	unsigned char		cmds;
+#define SD_SCR_CMD20_SUPPORT   (1<<0)
+#define SD_SCR_CMD23_SUPPORT   (1<<1)
 };
 
 struct sd_ssr {
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index c648878f6734..7d35d52c3df3 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -66,7 +66,7 @@
 
 #define SCR_SPEC_VER_0		0	/* Implements system specification 1.0 - 1.01 */
 #define SCR_SPEC_VER_1		1	/* Implements system specification 1.10 */
-#define SCR_SPEC_VER_2		2	/* Implements system specification 2.00 */
+#define SCR_SPEC_VER_2		2	/* Implements system specification 2.00-3.0X */
 
 /*
  * SD bus widths
-- 
cgit v1.2.3


From 8edf63710bd43e62d59bfe017df542fa0713bbb3 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:39 -0500
Subject: mmc: sdhci: Auto-CMD23 support.

Enables Auto-CMD23 support where available (SDHCI 3.0 controllers)

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Tested-by: Arindam Nath <arindam.nath@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 17 ++++++++++++++++-
 drivers/mmc/host/sdhci.h  |  2 ++
 include/linux/mmc/sdhci.h |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b9ed66307ac3..f845fd6a64ae 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -857,7 +857,12 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
 		 */
 		if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12))
 			mode |= SDHCI_TRNS_AUTO_CMD12;
+		else if (host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) {
+			mode |= SDHCI_TRNS_AUTO_CMD23;
+			sdhci_writel(host, host->mrq->sbc->arg, SDHCI_ARGUMENT2);
+		}
 	}
+
 	if (data->flags & MMC_DATA_READ)
 		mode |= SDHCI_TRNS_READ;
 	if (host->flags & SDHCI_REQ_USE_DMA)
@@ -1252,7 +1257,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 			host->mrq = mrq;
 		}
 
-		if (mrq->sbc)
+		if (mrq->sbc && !(host->flags & SDHCI_AUTO_CMD23))
 			sdhci_send_command(host, mrq->sbc);
 		else
 			sdhci_send_command(host, mrq->cmd);
@@ -2488,6 +2493,16 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
 		host->flags |= SDHCI_AUTO_CMD12;
 
+	/* Auto-CMD23 stuff only works in ADMA or PIO. */
+	if ((host->version == SDHCI_SPEC_300) &&
+	    ((host->flags & SDHCI_USE_ADMA) ||
+	     !(host->flags & SDHCI_REQ_USE_DMA))) {
+		host->flags |= SDHCI_AUTO_CMD23;
+		DBG("%s: Auto-CMD23 available\n", mmc_hostname(mmc));
+	} else {
+		DBG("%s: Auto-CMD23 unavailable\n", mmc_hostname(mmc));
+	}
+
 	/*
 	 * A controller may support 8-bit width, but the board itself
 	 * might not have the pins brought out.  Boards that support
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 2c3fbc5a4c07..745c42fa41ed 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -25,6 +25,7 @@
  */
 
 #define SDHCI_DMA_ADDRESS	0x00
+#define SDHCI_ARGUMENT2		SDHCI_DMA_ADDRESS
 
 #define SDHCI_BLOCK_SIZE	0x04
 #define  SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF))
@@ -37,6 +38,7 @@
 #define  SDHCI_TRNS_DMA		0x01
 #define  SDHCI_TRNS_BLK_CNT_EN	0x02
 #define  SDHCI_TRNS_AUTO_CMD12	0x04
+#define  SDHCI_TRNS_AUTO_CMD23	0x08
 #define  SDHCI_TRNS_READ	0x10
 #define  SDHCI_TRNS_MULTI	0x20
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 73e27ba51e99..6a68c4eb4e44 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -114,6 +114,7 @@ struct sdhci_host {
 #define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
 #define SDHCI_NEEDS_RETUNING	(1<<5)	/* Host needs retuning */
 #define SDHCI_AUTO_CMD12	(1<<6)	/* Auto CMD12 support */
+#define SDHCI_AUTO_CMD23	(1<<7)	/* Auto CMD23 support */
 
 	unsigned int version;	/* SDHCI spec. version */
 
-- 
cgit v1.2.3


From 6dcbbe25dcc9bd2bdeb4f685f8fb874ffc10e6be Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 24 May 2011 08:31:08 +0000
Subject: net: move is_vlan_dev into public header file (v2)

Migrate is_vlan_dev() to if_vlan.h so that core networkig can use it

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: davem@davemloft.net
CC: bhutchings@solarflare.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 5 +++++
 net/8021q/vlan.h        | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 290bd8ac94cf..dc01681fbb42 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -110,6 +110,11 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
+static inline int is_vlan_dev(struct net_device *dev)
+{
+        return dev->priv_flags & IFF_802_1Q_VLAN;
+}
+
 #define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
 #define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index c3408def8a19..9da07e30d1a2 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -118,11 +118,6 @@ extern void vlan_netlink_fini(void);
 
 extern struct rtnl_link_ops vlan_link_ops;
 
-static inline int is_vlan_dev(struct net_device *dev)
-{
-	return dev->priv_flags & IFF_802_1Q_VLAN;
-}
-
 extern int vlan_net_id;
 
 struct proc_dir_entry;
-- 
cgit v1.2.3