From d4dc210f69bcb0b4bef5a83b1c323817be89bad1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Apr 2011 20:54:46 +0200 Subject: block: don't block events on excl write for non-optical devices Disk event code automatically blocks events on excl write. This is primarily to avoid issuing polling commands while burning is in progress. This behavior doesn't fit other types of devices with removeable media where polling commands don't have adverse side effects and door locking usually doesn't exist. This patch introduces new genhd flag which controls the auto-blocking behavior and uses it to enable auto-blocking only on optical devices. Note for stable: 2.6.38 and later only Cc: stable@kernel.org Signed-off-by: Tejun Heo Reported-by: Kay Sievers Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index d764a426e9fd..300d7582006e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -127,6 +127,7 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ #define GENHD_FL_NATIVE_CAPACITY 128 +#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256 enum { DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ -- cgit v1.2.3 From f3876930952390a31c3a7fd68dd621464a36eb80 Mon Sep 17 00:00:00 2001 From: "shaohua.li@intel.com" Date: Fri, 6 May 2011 11:34:32 -0600 Subject: block: add a non-queueable flush flag flush request isn't queueable in some drives. Add a flag to let driver notify block layer about this. We can optimize flush performance with the knowledge. Stable: 2.6.39 only Cc: stable@kernel.org Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-settings.c | 6 ++++++ include/linux/blkdev.h | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/block/blk-settings.c b/block/blk-settings.c index 1fa769293597..cd3c428e194f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -790,6 +790,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush) } EXPORT_SYMBOL_GPL(blk_queue_flush); +void blk_queue_flush_queueable(struct request_queue *q, bool queueable) +{ + q->flush_not_queueable = !queueable; +} +EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cbbfd98ad4a3..8bd2a271b2d8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -364,6 +364,7 @@ struct request_queue * for flush operations */ unsigned int flush_flags; + unsigned int flush_not_queueable:1; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; unsigned long flush_pending_since; @@ -843,6 +844,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); +extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); @@ -1111,6 +1113,11 @@ static inline unsigned int block_size(struct block_device *bdev) return bdev->bd_block_size; } +static inline bool queue_flush_queueable(struct request_queue *q) +{ + return !q->flush_not_queueable; +} + typedef struct {struct page *v;} Sector; unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); -- cgit v1.2.3 From 3ac0cc4508709d42ec9aa351086c7d38bfc0660c Mon Sep 17 00:00:00 2001 From: "shaohua.li@intel.com" Date: Fri, 6 May 2011 11:34:41 -0600 Subject: block: hold queue if flush is running for non-queueable flush drive In some drives, flush requests are non-queueable. When flush request is running, normal read/write requests can't run. If block layer dispatches such request, driver can't handle it and requeue it. Tejun suggested we can hold the queue when flush is running. This can avoid unnecessary requeue. Also this can improve performance. For example, we have request flush1, write1, flush 2. flush1 is dispatched, then queue is hold, write1 isn't inserted to queue. After flush1 is finished, flush2 will be dispatched. Since disk cache is already clean, flush2 will be finished very soon, so looks like flush2 is folded to flush1. In my test, the queue holding completely solves a regression introduced by commit 53d63e6b0dfb95882ec0219ba6bbd50cde423794: block: make the flush insertion use the tail of the dispatch list It's not a preempt type request, in fact we have to insert it behind requests that do specify INSERT_FRONT. which causes about 20% regression running a sysbench fileio workload. Stable: 2.6.39 only Cc: stable@kernel.org Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 16 +++++++++++----- block/blk.h | 21 ++++++++++++++++++++- include/linux/blkdev.h | 1 + 3 files changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/blk-flush.c b/block/blk-flush.c index 6c9b5e189e62..bb21e4c36f70 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error) } /* - * Moving a request silently to empty queue_head may stall the - * queue. Kick the queue in those cases. This function is called - * from request completion path and calling directly into - * request_fn may confuse the driver. Always use kblockd. + * Kick the queue to avoid stall for two cases: + * 1. Moving a request silently to empty queue_head may stall the + * queue. + * 2. When flush request is running in non-queueable queue, the + * queue is hold. Restart the queue after flush request is finished + * to avoid stall. + * This function is called from request completion path and calling + * directly into request_fn may confuse the driver. Always use + * kblockd. */ - if (queued) + if (queued || q->flush_queue_delayed) blk_run_queue_async(q); + q->flush_queue_delayed = 0; } /** diff --git a/block/blk.h b/block/blk.h index c9df8fc3c999..83e4bff36201 100644 --- a/block/blk.h +++ b/block/blk.h @@ -62,7 +62,26 @@ static inline struct request *__elv_next_request(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); return rq; } - + /* + * Flush request is running and flush request isn't queueable + * in the drive, we can hold the queue till flush request is + * finished. Even we don't do this, driver can't dispatch next + * requests and will requeue them. And this can improve + * throughput too. For example, we have request flush1, write1, + * flush 2. flush1 is dispatched, then queue is hold, write1 + * isn't inserted to queue. After flush1 is finished, flush2 + * will be dispatched. Since disk cache is already clean, + * flush2 will be finished very soon, so looks like flush2 is + * folded to flush1. + * Since the queue is hold, a flag is set to indicate the queue + * should be restarted later. Please see flush_end_io() for + * details. + */ + if (q->flush_pending_idx != q->flush_running_idx && + !queue_flush_queueable(q)) { + q->flush_queue_delayed = 1; + return NULL; + } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) return NULL; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8bd2a271b2d8..9f921bf4bf8c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -365,6 +365,7 @@ struct request_queue */ unsigned int flush_flags; unsigned int flush_not_queueable:1; + unsigned int flush_queue_delayed:1; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; unsigned long flush_pending_since; -- cgit v1.2.3 From 23ceb5b7719e9276d4fa72a3ecf94dd396755276 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 6 May 2011 19:30:02 -0600 Subject: block: Remove extra discard_alignment from hd_struct. Currently, hd_struct.discard_alignment is only used when we show /sys/block/sdx/sdx/discard_alignment. So remove it and calculate when it is asked to show. Signed-off-by: Tao Ma Signed-off-by: Jens Axboe --- fs/partitions/check.c | 9 ++++++--- include/linux/genhd.h | 1 - 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index d545e97d99c3..b7e16bccd5e5 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -255,7 +255,12 @@ ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%u\n", p->discard_alignment); + struct gendisk *disk = dev_to_disk(dev); + + return sprintf(buf, "%u\n", + (unsigned long long)queue_limit_discard_alignment( + &disk->queue->limits, + p->start_sect)); } ssize_t part_stat_show(struct device *dev, @@ -449,8 +454,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, p->start_sect = start; p->alignment_offset = queue_limit_alignment_offset(&disk->queue->limits, start); - p->discard_alignment = - queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 300d7582006e..b78956b3c2e7 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -100,7 +100,6 @@ struct hd_struct { sector_t start_sect; sector_t nr_sects; sector_t alignment_offset; - unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v1.2.3 From 6b4e306aa3dc94a0545eb9279475b1ab6209a31f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 16:41:34 -0800 Subject: ns: proc files for namespace naming policy. Create files under /proc//ns/ to allow controlling the namespaces of a process. This addresses three specific problems that can make namespaces hard to work with. - Namespaces require a dedicated process to pin them in memory. - It is not possible to use a namespace unless you are the child of the original creator. - Namespaces don't have names that userspace can use to talk about them. The namespace files under /proc//ns/ can be opened and the file descriptor can be used to talk about a specific namespace, and to keep the specified namespace alive. A namespace can be kept alive by either holding the file descriptor open or bind mounting the file someplace else. aka: mount --bind /proc/self/ns/net /some/filesystem/path mount --bind /proc/self/fd/ /some/filesystem/path This allows namespaces to be named with userspace policy. It requires additional support to make use of these filedescriptors and that will be comming in the following patches. Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- fs/proc/Makefile | 1 + fs/proc/base.c | 20 +++--- fs/proc/inode.c | 7 ++ fs/proc/internal.h | 18 +++++ fs/proc/namespaces.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/proc_fs.h | 18 +++++ 6 files changed, 241 insertions(+), 11 deletions(-) create mode 100644 fs/proc/namespaces.c (limited to 'include/linux') diff --git a/fs/proc/Makefile b/fs/proc/Makefile index df434c5f28fb..c1c729335924 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -20,6 +20,7 @@ proc-y += stat.o proc-y += uptime.o proc-y += version.o proc-y += softirqs.o +proc-y += namespaces.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o diff --git a/fs/proc/base.c b/fs/proc/base.c index dfa532730e55..dc8bca72b002 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode) return allowed; } -static int proc_setattr(struct dentry *dentry, struct iattr *attr) +int proc_setattr(struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = dentry->d_inode; @@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task) return 0; } - -static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) +struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) { struct inode * inode; struct proc_inode *ei; @@ -1779,7 +1778,7 @@ out_unlock: return NULL; } -static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; struct task_struct *task; @@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat * made this apply to all per process world readable and executable * directories. */ -static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) +int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode; struct task_struct *task; @@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry) return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; } -static const struct dentry_operations pid_dentry_operations = +const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, .d_delete = pid_delete_dentry, @@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations = /* Lookups */ -typedef struct dentry *instantiate_t(struct inode *, struct dentry *, - struct task_struct *, const void *); - /* * Fill a directory entry. * @@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, * reported by readdir in sync with the inode numbers reported * by stat. */ -static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - char *name, int len, +int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, + const char *name, int len, instantiate_t instantiate, struct task_struct *task, const void *ptr) { struct dentry *child, *dir = filp->f_path.dentry; @@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), + DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif @@ -3168,6 +3165,7 @@ out_no_task: static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), + DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d15aa1b1cc8f..74b48cfa1bb2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode) { struct proc_dir_entry *de; struct ctl_table_header *head; + const struct proc_ns_operations *ns_ops; truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); @@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode) rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); sysctl_head_put(head); } + /* Release any associated namespace */ + ns_ops = PROC_I(inode)->ns_ops; + if (ns_ops && ns_ops->put) + ns_ops->put(PROC_I(inode)->ns); } static struct kmem_cache * proc_inode_cachep; @@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->pde = NULL; ei->sysctl = NULL; ei->sysctl_entry = NULL; + ei->ns = NULL; + ei->ns_ops = NULL; inode = &ei->vfs_inode; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; return inode; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c03e8d3a3a5b..96245a1b1a7c 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -119,3 +119,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); */ int proc_readdir(struct file *, void *, filldir_t); struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); + + + +/* Lookups */ +typedef struct dentry *instantiate_t(struct inode *, struct dentry *, + struct task_struct *, const void *); +int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, + const char *name, int len, + instantiate_t instantiate, struct task_struct *task, const void *ptr); +int pid_revalidate(struct dentry *dentry, struct nameidata *nd); +struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); +extern const struct dentry_operations pid_dentry_operations; +int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); +int proc_setattr(struct dentry *dentry, struct iattr *attr); + +extern const struct inode_operations proc_ns_dir_inode_operations; +extern const struct file_operations proc_ns_dir_operations; + diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c new file mode 100644 index 000000000000..6ae9f07d59ee --- /dev/null +++ b/fs/proc/namespaces.c @@ -0,0 +1,188 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + + +static const struct proc_ns_operations *ns_entries[] = { +}; + +static const struct file_operations ns_file_operations = { + .llseek = no_llseek, +}; + +static struct dentry *proc_ns_instantiate(struct inode *dir, + struct dentry *dentry, struct task_struct *task, const void *ptr) +{ + const struct proc_ns_operations *ns_ops = ptr; + struct inode *inode; + struct proc_inode *ei; + struct dentry *error = ERR_PTR(-ENOENT); + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + + ei = PROC_I(inode); + inode->i_mode = S_IFREG|S_IRUSR; + inode->i_fop = &ns_file_operations; + ei->ns_ops = ns_ops; + ei->ns = ns_ops->get(task); + if (!ei->ns) + goto out_iput; + + dentry->d_op = &pid_dentry_operations; + d_add(dentry, inode); + /* Close the race of the process dying before we return the dentry */ + if (pid_revalidate(dentry, NULL)) + error = NULL; +out: + return error; +out_iput: + iput(inode); + goto out; +} + +static int proc_ns_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, struct task_struct *task, + const struct proc_ns_operations *ops) +{ + return proc_fill_cache(filp, dirent, filldir, + ops->name, strlen(ops->name), + proc_ns_instantiate, task, ops); +} + +static int proc_ns_dir_readdir(struct file *filp, void *dirent, + filldir_t filldir) +{ + int i; + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + struct task_struct *task = get_proc_task(inode); + const struct proc_ns_operations **entry, **last; + ino_t ino; + int ret; + + ret = -ENOENT; + if (!task) + goto out_no_task; + + ret = -EPERM; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out; + + ret = 0; + i = filp->f_pos; + switch (i) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) + goto out; + i++; + filp->f_pos++; + /* fall through */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) + goto out; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + if (i >= ARRAY_SIZE(ns_entries)) { + ret = 1; + goto out; + } + entry = ns_entries + i; + last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; + while (entry <= last) { + if (proc_ns_fill_cache(filp, dirent, filldir, + task, *entry) < 0) + goto out; + filp->f_pos++; + entry++; + } + } + + ret = 1; +out: + put_task_struct(task); +out_no_task: + return ret; +} + +const struct file_operations proc_ns_dir_operations = { + .read = generic_read_dir, + .readdir = proc_ns_dir_readdir, +}; + +static struct dentry *proc_ns_dir_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *error; + struct task_struct *task = get_proc_task(dir); + const struct proc_ns_operations **entry, **last; + unsigned int len = dentry->d_name.len; + + error = ERR_PTR(-ENOENT); + + if (!task) + goto out_no_task; + + error = ERR_PTR(-EPERM); + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out; + + last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; + for (entry = ns_entries; entry <= last; entry++) { + if (strlen((*entry)->name) != len) + continue; + if (!memcmp(dentry->d_name.name, (*entry)->name, len)) + break; + } + if (entry > last) + goto out; + + error = proc_ns_instantiate(dir, dentry, task, *entry); +out: + put_task_struct(task); +out_no_task: + return error; +} + +const struct inode_operations proc_ns_dir_inode_operations = { + .lookup = proc_ns_dir_lookup, + .getattr = pid_getattr, + .setattr = proc_setattr, +}; + +struct file *proc_ns_fget(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + + if (file->f_op != &ns_file_operations) + goto out_invalid; + + return file; + +out_invalid: + fput(file); + return ERR_PTR(-EINVAL); +} + diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 838c1149251a..a6d2c6da5e5a 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -179,6 +179,8 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm); +extern struct file *proc_ns_fget(int fd); + #else #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) @@ -239,6 +241,11 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) {} +static inline struct file *proc_ns_fget(int fd) +{ + return ERR_PTR(-EINVAL); +} + #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) @@ -250,6 +257,15 @@ kclist_add(struct kcore_list *new, void *addr, size_t size, int type) extern void kclist_add(struct kcore_list *, void *, size_t, int type); #endif +struct nsproxy; +struct proc_ns_operations { + const char *name; + int type; + void *(*get)(struct task_struct *task); + void (*put)(void *ns); + int (*install)(struct nsproxy *nsproxy, void *ns); +}; + union proc_op { int (*proc_get_link)(struct inode *, struct path *); int (*proc_read)(struct task_struct *task, char *page); @@ -268,6 +284,8 @@ struct proc_inode { struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; + void *ns; + const struct proc_ns_operations *ns_ops; struct inode vfs_inode; }; -- cgit v1.2.3 From 13b6f57623bc485e116344fe91fbcb29f149242b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:14:23 -0800 Subject: ns proc: Add support for the network namespace. Implementing file descriptors for the network namespace is simple and straight forward. Acked-by: David S. Miller Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- fs/proc/namespaces.c | 3 +++ include/linux/proc_fs.h | 1 + net/core/net_namespace.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 6ae9f07d59ee..dcbd483e9915 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -16,6 +16,9 @@ static const struct proc_ns_operations *ns_entries[] = { +#ifdef CONFIG_NET_NS + &netns_operations, +#endif }; static const struct file_operations ns_file_operations = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index a6d2c6da5e5a..62126ec6ede9 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -265,6 +265,7 @@ struct proc_ns_operations { void (*put)(void *ns); int (*install)(struct nsproxy *nsproxy, void *ns); }; +extern const struct proc_ns_operations netns_operations; union proc_op { int (*proc_get_link)(struct inode *, struct path *); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3f860261c5ee..bf7707e09a80 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops) mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_device); + +#ifdef CONFIG_NET_NS +static void *netns_get(struct task_struct *task) +{ + struct net *net; + rcu_read_lock(); + net = get_net(task->nsproxy->net_ns); + rcu_read_unlock(); + return net; +} + +static void netns_put(void *ns) +{ + put_net(ns); +} + +static int netns_install(struct nsproxy *nsproxy, void *ns) +{ + put_net(nsproxy->net_ns); + nsproxy->net_ns = get_net(ns); + return 0; +} + +const struct proc_ns_operations netns_operations = { + .name = "net", + .type = CLONE_NEWNET, + .get = netns_get, + .put = netns_put, + .install = netns_install, +}; +#endif -- cgit v1.2.3 From 34482e89a5218f0f9317abf1cfba3bb38b5c29dd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:43:27 -0800 Subject: ns proc: Add support for the uts namespace Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- fs/proc/namespaces.c | 3 +++ include/linux/proc_fs.h | 1 + kernel/utsname.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index dcbd483e9915..b017181f1273 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -19,6 +19,9 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_NET_NS &netns_operations, #endif +#ifdef CONFIG_UTS_NS + &utsns_operations, +#endif }; static const struct file_operations ns_file_operations = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 62126ec6ede9..52aa89df8a6d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -266,6 +266,7 @@ struct proc_ns_operations { int (*install)(struct nsproxy *nsproxy, void *ns); }; extern const struct proc_ns_operations netns_operations; +extern const struct proc_ns_operations utsns_operations; union proc_op { int (*proc_get_link)(struct inode *, struct path *); diff --git a/kernel/utsname.c b/kernel/utsname.c index 44646179eaba..bff131b9510a 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -15,6 +15,7 @@ #include #include #include +#include static struct uts_namespace *create_uts_ns(void) { @@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref) put_user_ns(ns->user_ns); kfree(ns); } + +static void *utsns_get(struct task_struct *task) +{ + struct uts_namespace *ns = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) { + ns = nsproxy->uts_ns; + get_uts_ns(ns); + } + rcu_read_unlock(); + + return ns; +} + +static void utsns_put(void *ns) +{ + put_uts_ns(ns); +} + +static int utsns_install(struct nsproxy *nsproxy, void *ns) +{ + get_uts_ns(ns); + put_uts_ns(nsproxy->uts_ns); + nsproxy->uts_ns = ns; + return 0; +} + +const struct proc_ns_operations utsns_operations = { + .name = "uts", + .type = CLONE_NEWUTS, + .get = utsns_get, + .put = utsns_put, + .install = utsns_install, +}; + -- cgit v1.2.3 From a00eaf11a223c63fbb212369d6db69ce4c55a2d1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:48:39 -0800 Subject: ns proc: Add support for the ipc namespace Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- fs/proc/namespaces.c | 3 +++ include/linux/proc_fs.h | 1 + ipc/namespace.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b017181f1273..f18d6d58bf79 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -22,6 +22,9 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_UTS_NS &utsns_operations, #endif +#ifdef CONFIG_IPC_NS + &ipcns_operations, +#endif }; static const struct file_operations ns_file_operations = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 52aa89df8a6d..a23f0b72a023 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -267,6 +267,7 @@ struct proc_ns_operations { }; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; +extern const struct proc_ns_operations ipcns_operations; union proc_op { int (*proc_get_link)(struct inode *, struct path *); diff --git a/ipc/namespace.c b/ipc/namespace.c index 8054c8e5faf1..ce0a647869b1 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "util.h" @@ -140,3 +141,39 @@ void put_ipc_ns(struct ipc_namespace *ns) free_ipc_ns(ns); } } + +static void *ipcns_get(struct task_struct *task) +{ + struct ipc_namespace *ns = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) + ns = get_ipc_ns(nsproxy->ipc_ns); + rcu_read_unlock(); + + return ns; +} + +static void ipcns_put(void *ns) +{ + return put_ipc_ns(ns); +} + +static int ipcns_install(struct nsproxy *nsproxy, void *ns) +{ + /* Ditch state from the old ipc namespace */ + exit_sem(current); + put_ipc_ns(nsproxy->ipc_ns); + nsproxy->ipc_ns = get_ipc_ns(ns); + return 0; +} + +const struct proc_ns_operations ipcns_operations = { + .name = "ipc", + .type = CLONE_NEWIPC, + .get = ipcns_get, + .put = ipcns_put, + .install = ipcns_install, +}; -- cgit v1.2.3 From f063052947f770845a6252f7fa24f6f624592a24 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 May 2011 17:51:50 -0700 Subject: net: Allow setting the network namespace by fd Take advantage of the new abstraction and allow network devices to be placed in any network namespace that we have a fd to talk about. Acked-by: David S. Miller Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- include/linux/if_link.h | 1 + include/net/net_namespace.h | 1 + net/core/net_namespace.c | 33 +++++++++++++++++++++++++++++++-- net/core/rtnetlink.c | 5 ++++- 4 files changed, 37 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index f4a2e6b1b864..0ee969a5593d 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -136,6 +136,7 @@ enum { IFLA_PORT_SELF, IFLA_AF_SPEC, IFLA_GROUP, /* Group the device belongs to */ + IFLA_NET_NS_FD, __IFLA_MAX }; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 3ae491932bc8..dcc8f5749d3f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -119,6 +119,7 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns) extern struct list_head net_namespace_list; extern struct net *get_net_ns_by_pid(pid_t pid); +extern struct net *get_net_ns_by_fd(int pid); #ifdef CONFIG_NET_NS extern void __put_net(struct net *net); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index bf7707e09a80..b7403ff4d6c6 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include @@ -343,6 +345,28 @@ struct net *get_net_ns_by_pid(pid_t pid) } EXPORT_SYMBOL_GPL(get_net_ns_by_pid); +struct net *get_net_ns_by_fd(int fd) +{ + struct proc_inode *ei; + struct file *file; + struct net *net; + + net = ERR_PTR(-EINVAL); + file = proc_ns_fget(fd); + if (!file) + goto out; + + ei = PROC_I(file->f_dentry->d_inode); + if (ei->ns_ops != &netns_operations) + goto out; + + net = get_net(ei->ns); +out: + if (file) + fput(file); + return net; +} + static int __init net_ns_init(void) { struct net_generic *ng; @@ -577,10 +601,15 @@ EXPORT_SYMBOL_GPL(unregister_pernet_device); #ifdef CONFIG_NET_NS static void *netns_get(struct task_struct *task) { - struct net *net; + struct net *net = NULL; + struct nsproxy *nsproxy; + rcu_read_lock(); - net = get_net(task->nsproxy->net_ns); + nsproxy = task_nsproxy(task); + if (nsproxy) + net = get_net(nsproxy->net_ns); rcu_read_unlock(); + return net; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d7c4bb4b1820..dca9602c62e4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1043,6 +1043,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, + [IFLA_NET_NS_FD] = { .type = NLA_U32 }, [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, @@ -1091,6 +1092,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) */ if (tb[IFLA_NET_NS_PID]) net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + else if (tb[IFLA_NET_NS_FD]) + net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD])); else net = get_net(src_net); return net; @@ -1221,7 +1224,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, int send_addr_notify = 0; int err; - if (tb[IFLA_NET_NS_PID]) { + if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { struct net *net = rtnl_link_get_net(dev_net(dev), tb); if (IS_ERR(net)) { err = PTR_ERR(net); -- cgit v1.2.3 From 81f8115305f821335cf9e16110bf0806f7b93283 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 28 Apr 2011 13:25:36 +0900 Subject: i2c: i2c-sh_mobile bus speed platform data V2 Add support to the i2c-sh_mobile driver for setting the I2C bus speed using platform data. Signed-off-by: Magnus Damm Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-sh_mobile.c | 13 +++++++++++-- include/linux/i2c/i2c-sh_mobile.h | 10 ++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 include/linux/i2c/i2c-sh_mobile.h (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 5a1d37d0b62f..d917dd1cc091 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -32,6 +32,7 @@ #include #include #include +#include /* Transmit operation: */ /* */ @@ -117,7 +118,7 @@ struct sh_mobile_i2c_data { struct device *dev; void __iomem *reg; struct i2c_adapter adap; - + unsigned long bus_speed; struct clk *clk; u_int8_t icic; u_int8_t iccl; @@ -205,7 +206,7 @@ static void activate_ch(struct sh_mobile_i2c_data *pd) * We also round off the result. */ num = i2c_clk * 5; - denom = NORMAL_SPEED * 9; + denom = pd->bus_speed * 9; tmp = num * 10 / denom; if (tmp % 10 >= 5) pd->iccl = (u_int8_t)((num/denom) + 1); @@ -574,6 +575,7 @@ static int sh_mobile_i2c_hook_irqs(struct platform_device *dev, int hook) static int sh_mobile_i2c_probe(struct platform_device *dev) { + struct i2c_sh_mobile_platform_data *pdata = dev->dev.platform_data; struct sh_mobile_i2c_data *pd; struct i2c_adapter *adap; struct resource *res; @@ -618,6 +620,11 @@ static int sh_mobile_i2c_probe(struct platform_device *dev) goto err_irq; } + /* Use platformd data bus speed or NORMAL_SPEED */ + pd->bus_speed = NORMAL_SPEED; + if (pdata && pdata->bus_speed) + pd->bus_speed = pdata->bus_speed; + /* The IIC blocks on SH-Mobile ARM processors * come with two new bits in ICIC. */ @@ -658,6 +665,8 @@ static int sh_mobile_i2c_probe(struct platform_device *dev) goto err_all; } + dev_info(&dev->dev, "I2C adapter %d with bus speed %lu Hz\n", + adap->nr, pd->bus_speed); return 0; err_all: diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h new file mode 100644 index 000000000000..beda7081aead --- /dev/null +++ b/include/linux/i2c/i2c-sh_mobile.h @@ -0,0 +1,10 @@ +#ifndef __I2C_SH_MOBILE_H__ +#define __I2C_SH_MOBILE_H__ + +#include + +struct i2c_sh_mobile_platform_data { + unsigned long bus_speed; +}; + +#endif /* __I2C_SH_MOBILE_H__ */ -- cgit v1.2.3 From be84cb43833ee40a42e08f5425d20310f16229c7 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 9 May 2011 13:12:30 -0400 Subject: compat: fixes to allow working with tile arch The existing mechanism doesn't really provide enough to create the 64-bit "compat" ABI properly in a generic way, since the compat ABI is a mix of things were you can re-use the 64-bit versions of syscalls and things where you need a compat wrapper. To provide this in the most direct way possible, I added two new macros to go along with the existing __SYSCALL and __SC_3264 macros: __SC_COMP and SC_COMP_3264. These macros take an additional argument, typically a "compat_sys_xxx" function, which is passed to __SYSCALL if you define __SYSCALL_COMPAT when including the header, resulting in a pointer to the compat function being placed in the generated syscall table. The change also adds some missing definitions to so that it actually has declarations for all the compat syscalls, since the "[nr] = ##call" approach requires proper C declarations for all the functions included in the syscall table. Finally, compat.c defines compat_sys_sigpending() and compat_sys_sigprocmask() even if the underlying architecture doesn't request it, which tries to pull in undefined compat_old_sigset_t defines. We need to guard those compat syscall definitions with appropriate __ARCH_WANT_SYS_xxx ifdefs. Acked-by: Arnd Bergmann Signed-off-by: Chris Metcalf --- arch/tile/kernel/compat.c | 2 +- include/asm-generic/unistd.h | 221 ++++++++++++++++++++++++------------------- include/linux/compat.h | 187 ++++++++++++++++++++++++++++++++++++ kernel/compat.c | 8 ++ 4 files changed, 320 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index dbc213adf5e1..e35a3975ca3b 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -135,7 +135,7 @@ long tile_compat_sys_msgrcv(int msqid, /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL -#define __SYSCALL(nr, call) [nr] = (compat_##call), +#define __SYSCALL(nr, call) [nr] = (call), /* The generic versions of these don't work for Tile. */ #define compat_sys_msgrcv tile_compat_sys_msgrcv diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 07c40d5149de..33d524704883 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -24,16 +24,24 @@ #define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64) #endif +#ifdef __SYSCALL_COMPAT +#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _comp) +#define __SC_COMP_3264(_nr, _32, _64, _comp) __SYSCALL(_nr, _comp) +#else +#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _sys) +#define __SC_COMP_3264(_nr, _32, _64, _comp) __SC_3264(_nr, _32, _64) +#endif + #define __NR_io_setup 0 -__SYSCALL(__NR_io_setup, sys_io_setup) +__SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup) #define __NR_io_destroy 1 __SYSCALL(__NR_io_destroy, sys_io_destroy) #define __NR_io_submit 2 -__SYSCALL(__NR_io_submit, sys_io_submit) +__SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit) #define __NR_io_cancel 3 __SYSCALL(__NR_io_cancel, sys_io_cancel) #define __NR_io_getevents 4 -__SYSCALL(__NR_io_getevents, sys_io_getevents) +__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents) /* fs/xattr.c */ #define __NR_setxattr 5 @@ -67,7 +75,7 @@ __SYSCALL(__NR_getcwd, sys_getcwd) /* fs/cookies.c */ #define __NR_lookup_dcookie 18 -__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie) +__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie) /* fs/eventfd.c */ #define __NR_eventfd2 19 @@ -79,7 +87,7 @@ __SYSCALL(__NR_epoll_create1, sys_epoll_create1) #define __NR_epoll_ctl 21 __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) #define __NR_epoll_pwait 22 -__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) +__SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait) /* fs/fcntl.c */ #define __NR_dup 23 @@ -87,7 +95,7 @@ __SYSCALL(__NR_dup, sys_dup) #define __NR_dup3 24 __SYSCALL(__NR_dup3, sys_dup3) #define __NR3264_fcntl 25 -__SC_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl) +__SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64) /* fs/inotify_user.c */ #define __NR_inotify_init1 26 @@ -99,7 +107,7 @@ __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) /* fs/ioctl.c */ #define __NR_ioctl 29 -__SYSCALL(__NR_ioctl, sys_ioctl) +__SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl) /* fs/ioprio.c */ #define __NR_ioprio_set 30 @@ -129,26 +137,30 @@ __SYSCALL(__NR_renameat, sys_renameat) #define __NR_umount2 39 __SYSCALL(__NR_umount2, sys_umount) #define __NR_mount 40 -__SYSCALL(__NR_mount, sys_mount) +__SC_COMP(__NR_mount, sys_mount, compat_sys_mount) #define __NR_pivot_root 41 __SYSCALL(__NR_pivot_root, sys_pivot_root) /* fs/nfsctl.c */ #define __NR_nfsservctl 42 -__SYSCALL(__NR_nfsservctl, sys_nfsservctl) +__SC_COMP(__NR_nfsservctl, sys_nfsservctl, compat_sys_nfsservctl) /* fs/open.c */ #define __NR3264_statfs 43 -__SC_3264(__NR3264_statfs, sys_statfs64, sys_statfs) +__SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \ + compat_sys_statfs64) #define __NR3264_fstatfs 44 -__SC_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs) +__SC_COMP_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs, \ + compat_sys_fstatfs64) #define __NR3264_truncate 45 -__SC_3264(__NR3264_truncate, sys_truncate64, sys_truncate) +__SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \ + compat_sys_truncate64) #define __NR3264_ftruncate 46 -__SC_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate) +__SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \ + compat_sys_ftruncate64) #define __NR_fallocate 47 -__SYSCALL(__NR_fallocate, sys_fallocate) +__SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate) #define __NR_faccessat 48 __SYSCALL(__NR_faccessat, sys_faccessat) #define __NR_chdir 49 @@ -166,7 +178,7 @@ __SYSCALL(__NR_fchownat, sys_fchownat) #define __NR_fchown 55 __SYSCALL(__NR_fchown, sys_fchown) #define __NR_openat 56 -__SYSCALL(__NR_openat, sys_openat) +__SC_COMP(__NR_openat, sys_openat, compat_sys_openat) #define __NR_close 57 __SYSCALL(__NR_close, sys_close) #define __NR_vhangup 58 @@ -182,7 +194,7 @@ __SYSCALL(__NR_quotactl, sys_quotactl) /* fs/readdir.c */ #define __NR_getdents64 61 -__SYSCALL(__NR_getdents64, sys_getdents64) +__SC_COMP(__NR_getdents64, sys_getdents64, compat_sys_getdents64) /* fs/read_write.c */ #define __NR3264_lseek 62 @@ -192,17 +204,17 @@ __SYSCALL(__NR_read, sys_read) #define __NR_write 64 __SYSCALL(__NR_write, sys_write) #define __NR_readv 65 -__SYSCALL(__NR_readv, sys_readv) +__SC_COMP(__NR_readv, sys_readv, compat_sys_readv) #define __NR_writev 66 -__SYSCALL(__NR_writev, sys_writev) +__SC_COMP(__NR_writev, sys_writev, compat_sys_writev) #define __NR_pread64 67 -__SYSCALL(__NR_pread64, sys_pread64) +__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64) #define __NR_pwrite64 68 -__SYSCALL(__NR_pwrite64, sys_pwrite64) +__SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64) #define __NR_preadv 69 -__SYSCALL(__NR_preadv, sys_preadv) +__SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv) #define __NR_pwritev 70 -__SYSCALL(__NR_pwritev, sys_pwritev) +__SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev) /* fs/sendfile.c */ #define __NR3264_sendfile 71 @@ -210,17 +222,17 @@ __SC_3264(__NR3264_sendfile, sys_sendfile64, sys_sendfile) /* fs/select.c */ #define __NR_pselect6 72 -__SYSCALL(__NR_pselect6, sys_pselect6) +__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6) #define __NR_ppoll 73 -__SYSCALL(__NR_ppoll, sys_ppoll) +__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll) /* fs/signalfd.c */ #define __NR_signalfd4 74 -__SYSCALL(__NR_signalfd4, sys_signalfd4) +__SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4) /* fs/splice.c */ #define __NR_vmsplice 75 -__SYSCALL(__NR_vmsplice, sys_vmsplice) +__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice) #define __NR_splice 76 __SYSCALL(__NR_splice, sys_splice) #define __NR_tee 77 @@ -243,23 +255,27 @@ __SYSCALL(__NR_fsync, sys_fsync) __SYSCALL(__NR_fdatasync, sys_fdatasync) #ifdef __ARCH_WANT_SYNC_FILE_RANGE2 #define __NR_sync_file_range2 84 -__SYSCALL(__NR_sync_file_range2, sys_sync_file_range2) +__SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \ + compat_sys_sync_file_range2) #else #define __NR_sync_file_range 84 -__SYSCALL(__NR_sync_file_range, sys_sync_file_range) +__SC_COMP(__NR_sync_file_range, sys_sync_file_range, \ + compat_sys_sync_file_range) #endif /* fs/timerfd.c */ #define __NR_timerfd_create 85 __SYSCALL(__NR_timerfd_create, sys_timerfd_create) #define __NR_timerfd_settime 86 -__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) +__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \ + compat_sys_timerfd_settime) #define __NR_timerfd_gettime 87 -__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) +__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \ + compat_sys_timerfd_gettime) /* fs/utimes.c */ #define __NR_utimensat 88 -__SYSCALL(__NR_utimensat, sys_utimensat) +__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat) /* kernel/acct.c */ #define __NR_acct 89 @@ -281,7 +297,7 @@ __SYSCALL(__NR_exit, sys_exit) #define __NR_exit_group 94 __SYSCALL(__NR_exit_group, sys_exit_group) #define __NR_waitid 95 -__SYSCALL(__NR_waitid, sys_waitid) +__SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid) /* kernel/fork.c */ #define __NR_set_tid_address 96 @@ -291,25 +307,27 @@ __SYSCALL(__NR_unshare, sys_unshare) /* kernel/futex.c */ #define __NR_futex 98 -__SYSCALL(__NR_futex, sys_futex) +__SC_COMP(__NR_futex, sys_futex, compat_sys_futex) #define __NR_set_robust_list 99 -__SYSCALL(__NR_set_robust_list, sys_set_robust_list) +__SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ + compat_sys_set_robust_list) #define __NR_get_robust_list 100 -__SYSCALL(__NR_get_robust_list, sys_get_robust_list) +__SC_COMP(__NR_get_robust_list, sys_get_robust_list, \ + compat_sys_get_robust_list) /* kernel/hrtimer.c */ #define __NR_nanosleep 101 -__SYSCALL(__NR_nanosleep, sys_nanosleep) +__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep) /* kernel/itimer.c */ #define __NR_getitimer 102 -__SYSCALL(__NR_getitimer, sys_getitimer) +__SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer) #define __NR_setitimer 103 -__SYSCALL(__NR_setitimer, sys_setitimer) +__SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer) /* kernel/kexec.c */ #define __NR_kexec_load 104 -__SYSCALL(__NR_kexec_load, sys_kexec_load) +__SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load) /* kernel/module.c */ #define __NR_init_module 105 @@ -319,23 +337,24 @@ __SYSCALL(__NR_delete_module, sys_delete_module) /* kernel/posix-timers.c */ #define __NR_timer_create 107 -__SYSCALL(__NR_timer_create, sys_timer_create) +__SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create) #define __NR_timer_gettime 108 -__SYSCALL(__NR_timer_gettime, sys_timer_gettime) +__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime) #define __NR_timer_getoverrun 109 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) #define __NR_timer_settime 110 -__SYSCALL(__NR_timer_settime, sys_timer_settime) +__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime) #define __NR_timer_delete 111 __SYSCALL(__NR_timer_delete, sys_timer_delete) #define __NR_clock_settime 112 -__SYSCALL(__NR_clock_settime, sys_clock_settime) +__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime) #define __NR_clock_gettime 113 -__SYSCALL(__NR_clock_gettime, sys_clock_gettime) +__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime) #define __NR_clock_getres 114 -__SYSCALL(__NR_clock_getres, sys_clock_getres) +__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres) #define __NR_clock_nanosleep 115 -__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep) +__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \ + compat_sys_clock_nanosleep) /* kernel/printk.c */ #define __NR_syslog 116 @@ -355,9 +374,11 @@ __SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) #define __NR_sched_getparam 121 __SYSCALL(__NR_sched_getparam, sys_sched_getparam) #define __NR_sched_setaffinity 122 -__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity) +__SC_COMP(__NR_sched_setaffinity, sys_sched_setaffinity, \ + compat_sys_sched_setaffinity) #define __NR_sched_getaffinity 123 -__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity) +__SC_COMP(__NR_sched_getaffinity, sys_sched_getaffinity, \ + compat_sys_sched_getaffinity) #define __NR_sched_yield 124 __SYSCALL(__NR_sched_yield, sys_sched_yield) #define __NR_sched_get_priority_max 125 @@ -365,7 +386,8 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) #define __NR_sched_get_priority_min 126 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) #define __NR_sched_rr_get_interval 127 -__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval) +__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \ + compat_sys_sched_rr_get_interval) /* kernel/signal.c */ #define __NR_restart_syscall 128 @@ -377,21 +399,23 @@ __SYSCALL(__NR_tkill, sys_tkill) #define __NR_tgkill 131 __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_sigaltstack 132 -__SYSCALL(__NR_sigaltstack, sys_sigaltstack) +__SC_COMP(__NR_sigaltstack, sys_sigaltstack, compat_sys_sigaltstack) #define __NR_rt_sigsuspend 133 -__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ +__SC_COMP(__NR_rt_sigsuspend, sys_rt_sigsuspend, compat_sys_rt_sigsuspend) #define __NR_rt_sigaction 134 -__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) /* __ARCH_WANT_SYS_RT_SIGACTION */ +__SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction) #define __NR_rt_sigprocmask 135 __SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask) #define __NR_rt_sigpending 136 __SYSCALL(__NR_rt_sigpending, sys_rt_sigpending) #define __NR_rt_sigtimedwait 137 -__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) +__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \ + compat_sys_rt_sigtimedwait) #define __NR_rt_sigqueueinfo 138 -__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) +__SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \ + compat_sys_rt_sigqueueinfo) #define __NR_rt_sigreturn 139 -__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) /* sys_rt_sigreturn_wrapper, */ +__SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn) /* kernel/sys.c */ #define __NR_setpriority 140 @@ -421,7 +445,7 @@ __SYSCALL(__NR_setfsuid, sys_setfsuid) #define __NR_setfsgid 152 __SYSCALL(__NR_setfsgid, sys_setfsgid) #define __NR_times 153 -__SYSCALL(__NR_times, sys_times) +__SC_COMP(__NR_times, sys_times, compat_sys_times) #define __NR_setpgid 154 __SYSCALL(__NR_setpgid, sys_setpgid) #define __NR_getpgid 155 @@ -441,11 +465,11 @@ __SYSCALL(__NR_sethostname, sys_sethostname) #define __NR_setdomainname 162 __SYSCALL(__NR_setdomainname, sys_setdomainname) #define __NR_getrlimit 163 -__SYSCALL(__NR_getrlimit, sys_getrlimit) +__SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit) #define __NR_setrlimit 164 -__SYSCALL(__NR_setrlimit, sys_setrlimit) +__SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit) #define __NR_getrusage 165 -__SYSCALL(__NR_getrusage, sys_getrusage) +__SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage) #define __NR_umask 166 __SYSCALL(__NR_umask, sys_umask) #define __NR_prctl 167 @@ -455,11 +479,11 @@ __SYSCALL(__NR_getcpu, sys_getcpu) /* kernel/time.c */ #define __NR_gettimeofday 169 -__SYSCALL(__NR_gettimeofday, sys_gettimeofday) +__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday) #define __NR_settimeofday 170 -__SYSCALL(__NR_settimeofday, sys_settimeofday) +__SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) #define __NR_adjtimex 171 -__SYSCALL(__NR_adjtimex, sys_adjtimex) +__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex) /* kernel/timer.c */ #define __NR_getpid 172 @@ -477,39 +501,40 @@ __SYSCALL(__NR_getegid, sys_getegid) #define __NR_gettid 178 __SYSCALL(__NR_gettid, sys_gettid) #define __NR_sysinfo 179 -__SYSCALL(__NR_sysinfo, sys_sysinfo) +__SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo) /* ipc/mqueue.c */ #define __NR_mq_open 180 -__SYSCALL(__NR_mq_open, sys_mq_open) +__SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 181 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) #define __NR_mq_timedsend 182 -__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) +__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend) #define __NR_mq_timedreceive 183 -__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) +__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \ + compat_sys_mq_timedreceive) #define __NR_mq_notify 184 -__SYSCALL(__NR_mq_notify, sys_mq_notify) +__SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 185 -__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) +__SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr) /* ipc/msg.c */ #define __NR_msgget 186 __SYSCALL(__NR_msgget, sys_msgget) #define __NR_msgctl 187 -__SYSCALL(__NR_msgctl, sys_msgctl) +__SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl) #define __NR_msgrcv 188 -__SYSCALL(__NR_msgrcv, sys_msgrcv) +__SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv) #define __NR_msgsnd 189 -__SYSCALL(__NR_msgsnd, sys_msgsnd) +__SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd) /* ipc/sem.c */ #define __NR_semget 190 __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 191 -__SYSCALL(__NR_semctl, sys_semctl) +__SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) #define __NR_semtimedop 192 -__SYSCALL(__NR_semtimedop, sys_semtimedop) +__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop) #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) @@ -517,9 +542,9 @@ __SYSCALL(__NR_semop, sys_semop) #define __NR_shmget 194 __SYSCALL(__NR_shmget, sys_shmget) #define __NR_shmctl 195 -__SYSCALL(__NR_shmctl, sys_shmctl) +__SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl) #define __NR_shmat 196 -__SYSCALL(__NR_shmat, sys_shmat) +__SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat) #define __NR_shmdt 197 __SYSCALL(__NR_shmdt, sys_shmdt) @@ -543,21 +568,21 @@ __SYSCALL(__NR_getpeername, sys_getpeername) #define __NR_sendto 206 __SYSCALL(__NR_sendto, sys_sendto) #define __NR_recvfrom 207 -__SYSCALL(__NR_recvfrom, sys_recvfrom) +__SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom) #define __NR_setsockopt 208 -__SYSCALL(__NR_setsockopt, sys_setsockopt) +__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt) #define __NR_getsockopt 209 -__SYSCALL(__NR_getsockopt, sys_getsockopt) +__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt) #define __NR_shutdown 210 __SYSCALL(__NR_shutdown, sys_shutdown) #define __NR_sendmsg 211 -__SYSCALL(__NR_sendmsg, sys_sendmsg) +__SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg) #define __NR_recvmsg 212 -__SYSCALL(__NR_recvmsg, sys_recvmsg) +__SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg) /* mm/filemap.c */ #define __NR_readahead 213 -__SYSCALL(__NR_readahead, sys_readahead) +__SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead) /* mm/nommu.c, also with MMU */ #define __NR_brk 214 @@ -573,19 +598,19 @@ __SYSCALL(__NR_add_key, sys_add_key) #define __NR_request_key 218 __SYSCALL(__NR_request_key, sys_request_key) #define __NR_keyctl 219 -__SYSCALL(__NR_keyctl, sys_keyctl) +__SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl) /* arch/example/kernel/sys_example.c */ #define __NR_clone 220 -__SYSCALL(__NR_clone, sys_clone) /* .long sys_clone_wrapper */ +__SYSCALL(__NR_clone, sys_clone) #define __NR_execve 221 -__SYSCALL(__NR_execve, sys_execve) /* .long sys_execve_wrapper */ +__SC_COMP(__NR_execve, sys_execve, compat_sys_execve) #define __NR3264_mmap 222 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap) /* mm/fadvise.c */ #define __NR3264_fadvise64 223 -__SYSCALL(__NR3264_fadvise64, sys_fadvise64_64) +__SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64) /* mm/, CONFIG_MMU only */ #ifndef __ARCH_NOMMU @@ -612,25 +637,26 @@ __SYSCALL(__NR_madvise, sys_madvise) #define __NR_remap_file_pages 234 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) #define __NR_mbind 235 -__SYSCALL(__NR_mbind, sys_mbind) +__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind) #define __NR_get_mempolicy 236 -__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) +__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy) #define __NR_set_mempolicy 237 -__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) +__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy) #define __NR_migrate_pages 238 -__SYSCALL(__NR_migrate_pages, sys_migrate_pages) +__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages) #define __NR_move_pages 239 -__SYSCALL(__NR_move_pages, sys_move_pages) +__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages) #endif #define __NR_rt_tgsigqueueinfo 240 -__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) +__SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \ + compat_sys_rt_tgsigqueueinfo) #define __NR_perf_event_open 241 __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_accept4 242 __SYSCALL(__NR_accept4, sys_accept4) #define __NR_recvmmsg 243 -__SYSCALL(__NR_recvmmsg, sys_recvmmsg) +__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg) /* * Architectures may provide up to 16 syscalls of their own @@ -639,19 +665,20 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg) #define __NR_arch_specific_syscall 244 #define __NR_wait4 260 -__SYSCALL(__NR_wait4, sys_wait4) +__SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4) #define __NR_prlimit64 261 __SYSCALL(__NR_prlimit64, sys_prlimit64) #define __NR_fanotify_init 262 __SYSCALL(__NR_fanotify_init, sys_fanotify_init) #define __NR_fanotify_mark 263 __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) -#define __NR_name_to_handle_at 264 +#define __NR_name_to_handle_at 264 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) -#define __NR_open_by_handle_at 265 -__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) +#define __NR_open_by_handle_at 265 +__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \ + compat_sys_open_by_handle_at) #define __NR_clock_adjtime 266 -__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) +__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime) #define __NR_syncfs 267 __SYSCALL(__NR_syncfs, sys_syncfs) diff --git a/include/linux/compat.h b/include/linux/compat.h index 5778b559d59c..e94184834b71 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -209,6 +210,18 @@ struct compat_robust_list_head { compat_uptr_t list_op_pending; }; +struct compat_statfs; +struct compat_statfs64; +struct compat_old_linux_dirent; +struct compat_linux_dirent; +struct linux_dirent64; +struct compat_msghdr; +struct compat_mmsghdr; +struct compat_sysinfo; +struct compat_sysctl_args; +struct compat_kexec_segment; +struct compat_mq_attr; + extern void compat_exit_robust_list(struct task_struct *curr); asmlinkage long @@ -331,9 +344,13 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); +asmlinkage long compat_sys_utime(const char __user *filename, + struct compat_utimbuf __user *t); asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags); +asmlinkage long compat_sys_time(compat_time_t __user *tloc); +asmlinkage long compat_sys_stime(compat_time_t __user *tptr); asmlinkage long compat_sys_signalfd(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); @@ -350,11 +367,181 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, int flags); asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t); +asmlinkage long compat_sys_utimes(const char __user *filename, + struct compat_timeval __user *t); +asmlinkage long compat_sys_newstat(const char __user * filename, + struct compat_stat __user *statbuf); +asmlinkage long compat_sys_newlstat(const char __user * filename, + struct compat_stat __user *statbuf); asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user * filename, struct compat_stat __user *statbuf, int flag); +asmlinkage long compat_sys_newfstat(unsigned int fd, + struct compat_stat __user * statbuf); +asmlinkage long compat_sys_statfs(const char __user *pathname, + struct compat_statfs __user *buf); +asmlinkage long compat_sys_fstatfs(unsigned int fd, + struct compat_statfs __user *buf); +asmlinkage long compat_sys_statfs64(const char __user *pathname, + compat_size_t sz, + struct compat_statfs64 __user *buf); +asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, + struct compat_statfs64 __user *buf); +asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, + unsigned long arg); +asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, + unsigned long arg); +asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); +asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id, + unsigned long min_nr, + unsigned long nr, + struct io_event __user *events, + struct compat_timespec __user *timeout); +asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr, + u32 __user *iocb); +asmlinkage long compat_sys_mount(const char __user * dev_name, + const char __user * dir_name, + const char __user * type, unsigned long flags, + const void __user * data); +asmlinkage long compat_sys_old_readdir(unsigned int fd, + struct compat_old_linux_dirent __user *, + unsigned int count); +asmlinkage long compat_sys_getdents(unsigned int fd, + struct compat_linux_dirent __user *dirent, + unsigned int count); +asmlinkage long compat_sys_getdents64(unsigned int fd, + struct linux_dirent64 __user * dirent, + unsigned int count); +asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, + unsigned int nr_segs, unsigned int flags); +asmlinkage long compat_sys_open(const char __user *filename, int flags, + int mode); asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int mode); +asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, + struct file_handle __user *handle, + int flags); +asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, + compat_ulong_t __user *exp, + struct compat_timespec __user *tsp, + void __user *sig); +asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, + unsigned int nfds, + struct compat_timespec __user *tsp, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); +#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED) +union compat_nfsctl_res; +struct compat_nfsctl_arg; +asmlinkage long compat_sys_nfsservctl(int cmd, + struct compat_nfsctl_arg __user *arg, + union compat_nfsctl_res __user *res); +#else +long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2); +#endif +asmlinkage long compat_sys_signalfd4(int ufd, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize, int flags); +asmlinkage long compat_sys_get_mempolicy(int __user *policy, + compat_ulong_t __user *nmask, + compat_ulong_t maxnode, + compat_ulong_t addr, + compat_ulong_t flags); +asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, + compat_ulong_t maxnode); +asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, + compat_ulong_t mode, + compat_ulong_t __user *nmask, + compat_ulong_t maxnode, compat_ulong_t flags); + +asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, + char __user *optval, unsigned int optlen); +asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, + unsigned flags); +asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, + unsigned int flags); +asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, + unsigned flags); +asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, + unsigned flags, struct sockaddr __user *addr, + int __user *addrlen); +asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags, + struct compat_timespec __user *timeout); +asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, + struct compat_timespec __user *rmtp); +asmlinkage long compat_sys_getitimer(int which, + struct compat_itimerval __user *it); +asmlinkage long compat_sys_setitimer(int which, + struct compat_itimerval __user *in, + struct compat_itimerval __user *out); +asmlinkage long compat_sys_times(struct compat_tms __user *tbuf); +asmlinkage long compat_sys_setrlimit(unsigned int resource, + struct compat_rlimit __user *rlim); +asmlinkage long compat_sys_getrlimit (unsigned int resource, + struct compat_rlimit __user *rlim); +asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru); +asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, + unsigned int len, + compat_ulong_t __user *user_mask_ptr); +asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, + unsigned int len, + compat_ulong_t __user *user_mask_ptr); +asmlinkage long compat_sys_timer_create(clockid_t which_clock, + struct compat_sigevent __user *timer_event_spec, + timer_t __user *created_timer_id); +asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags, + struct compat_itimerspec __user *new, + struct compat_itimerspec __user *old); +asmlinkage long compat_sys_timer_gettime(timer_t timer_id, + struct compat_itimerspec __user *setting); +asmlinkage long compat_sys_clock_settime(clockid_t which_clock, + struct compat_timespec __user *tp); +asmlinkage long compat_sys_clock_gettime(clockid_t which_clock, + struct compat_timespec __user *tp); +asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, + struct compat_timex __user *tp); +asmlinkage long compat_sys_clock_getres(clockid_t which_clock, + struct compat_timespec __user *tp); +asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, + struct compat_timespec __user *rqtp, + struct compat_timespec __user *rmtp); +asmlinkage long compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, + struct compat_siginfo __user *uinfo, + struct compat_timespec __user *uts, compat_size_t sigsetsize); +asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, + compat_size_t sigsetsize); +asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); +asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, + unsigned long arg); +asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, + struct compat_timespec __user *utime, u32 __user *uaddr2, + u32 val3); +asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, + char __user *optval, int __user *optlen); +asmlinkage long compat_sys_kexec_load(unsigned long entry, + unsigned long nr_segments, + struct compat_kexec_segment __user *, + unsigned long flags); +asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, + const struct compat_mq_attr __user *u_mqstat, + struct compat_mq_attr __user *u_omqstat); +asmlinkage long compat_sys_mq_notify(mqd_t mqdes, + const struct compat_sigevent __user *u_notification); +asmlinkage long compat_sys_mq_open(const char __user *u_name, + int oflag, compat_mode_t mode, + struct compat_mq_attr __user *u_attr); +asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes, + const char __user *u_msg_ptr, + size_t msg_len, unsigned int msg_prio, + const struct compat_timespec __user *u_abs_timeout); +asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, + char __user *u_msg_ptr, + size_t msg_len, unsigned int __user *u_msg_prio, + const struct compat_timespec __user *u_abs_timeout); +asmlinkage long compat_sys_socketcall(int call, u32 __user *args); +asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); extern ssize_t compat_rw_copy_check_uvector(int type, const struct compat_iovec __user *uvector, unsigned long nr_segs, diff --git a/kernel/compat.c b/kernel/compat.c index 38b1d2c1cbe8..80c28562f57b 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -293,6 +293,8 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) return compat_jiffies_to_clock_t(jiffies); } +#ifdef __ARCH_WANT_SYS_SIGPENDING + /* * Assumption: old_sigset_t and compat_old_sigset_t are both * types that can be passed to put_user()/get_user(). @@ -312,6 +314,10 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set) return ret; } +#endif + +#ifdef __ARCH_WANT_SYS_SIGPROCMASK + asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, compat_old_sigset_t __user *oset) { @@ -333,6 +339,8 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, return ret; } +#endif + asmlinkage long compat_sys_setrlimit(unsigned int resource, struct compat_rlimit __user *rlim) { -- cgit v1.2.3 From a934a00a69e940b126b9bdbf83e630ef5fe43523 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 18 May 2011 10:37:35 +0200 Subject: block: Fix discard topology stacking and reporting In some cases we would end up stacking discard_zeroes_data incorrectly. Fix this by enabling the feature by default for stacking drivers and clearing it for low-level drivers. Incorporating a device that does not support dzd will then cause the feature to be disabled in the stacking driver. Also ensure that the maximum discard value does not overflow when exported in sysfs and return 0 in the alignment and dzd fields for devices that don't support discard. Reported-by: Lukas Czerner Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-settings.c | 3 ++- block/blk-sysfs.c | 3 ++- include/linux/blkdev.h | 7 +++++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/blk-settings.c b/block/blk-settings.c index cd3c428e194f..fa1eb0449a05 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -120,7 +120,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->discard_granularity = 0; lim->discard_alignment = 0; lim->discard_misaligned = 0; - lim->discard_zeroes_data = -1; + lim->discard_zeroes_data = 1; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -166,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) blk_set_default_limits(&q->limits); blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); + q->limits.discard_zeroes_data = 0; /* * by default assume old behaviour and bounce for any highmem page diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6d735122bc59..53bd0c77bfda 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag static ssize_t queue_discard_max_show(struct request_queue *q, char *page) { - return queue_var_show(q->limits.max_discard_sectors << 9, page); + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_discard_sectors << 9); } static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9f921bf4bf8c..520d8618ed76 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -257,7 +257,7 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char cluster; - signed char discard_zeroes_data; + unsigned char discard_zeroes_data; }; struct request_queue @@ -1069,13 +1069,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector { unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); + if (!lim->max_discard_sectors) + return 0; + return (lim->discard_granularity + lim->discard_alignment - alignment) & (lim->discard_granularity - 1); } static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) { - if (q->limits.discard_zeroes_data == 1) + if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1) return 1; return 0; -- cgit v1.2.3 From 7176ba23f8b589b1df3229574ff46fb904ce9ec5 Mon Sep 17 00:00:00 2001 From: Rhyland Klein Date: Mon, 16 May 2011 14:41:48 -0700 Subject: net: rfkill: add generic gpio rfkill driver This adds a new generic gpio rfkill driver to support rfkill switches which are controlled by gpios. The driver also supports passing in data about the clock for the radio, so that when rfkill is blocking, it can disable the clock. This driver assumes platform data is passed from the board files to configure it for specific devices. Original-patch-by: Anantha Idapalapati Signed-off-by: Rhyland Klein Signed-off-by: John W. Linville --- include/linux/rfkill-gpio.h | 43 +++++++++ net/rfkill/Kconfig | 9 ++ net/rfkill/Makefile | 1 + net/rfkill/rfkill-gpio.c | 227 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 280 insertions(+) create mode 100644 include/linux/rfkill-gpio.h create mode 100644 net/rfkill/rfkill-gpio.c (limited to 'include/linux') diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h new file mode 100644 index 000000000000..a175d0598033 --- /dev/null +++ b/include/linux/rfkill-gpio.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2011, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + + +#ifndef __RFKILL_GPIO_H +#define __RFKILL_GPIO_H + +#include +#include + +/** + * struct rfkill_gpio_platform_data - platform data for rfkill gpio device. + * for unused gpio's, the expected value is -1. + * @name: name for the gpio rf kill instance + * @reset_gpio: GPIO which is used for reseting rfkill switch + * @shutdown_gpio: GPIO which is used for shutdown of rfkill switch + * @power_clk_name: [optional] name of clk to turn off while blocked + */ + +struct rfkill_gpio_platform_data { + char *name; + int reset_gpio; + int shutdown_gpio; + const char *power_clk_name; + enum rfkill_type type; +}; + +#endif /* __RFKILL_GPIO_H */ diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 48464ca13b24..78efe895b663 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -33,3 +33,12 @@ config RFKILL_REGULATOR To compile this driver as a module, choose M here: the module will be called rfkill-regulator. + +config RFKILL_GPIO + tristate "GPIO RFKILL driver" + depends on RFKILL && GPIOLIB && HAVE_CLK + default n + help + If you say yes here you get support of a generic gpio RFKILL + driver. The platform should fill in the appropriate fields in the + rfkill_gpio_platform_data structure and pass that to the driver. diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile index d9a5a58ffd8c..311768783f4a 100644 --- a/net/rfkill/Makefile +++ b/net/rfkill/Makefile @@ -6,3 +6,4 @@ rfkill-y += core.o rfkill-$(CONFIG_RFKILL_INPUT) += input.o obj-$(CONFIG_RFKILL) += rfkill.o obj-$(CONFIG_RFKILL_REGULATOR) += rfkill-regulator.o +obj-$(CONFIG_RFKILL_GPIO) += rfkill-gpio.o diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c new file mode 100644 index 000000000000..256c5ddd2d72 --- /dev/null +++ b/net/rfkill/rfkill-gpio.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2011, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +enum rfkill_gpio_clk_state { + UNSPECIFIED = 0, + PWR_ENABLED, + PWR_DISABLED +}; + +#define PWR_CLK_SET(_RF, _EN) \ + ((_RF)->pwr_clk_enabled = (!(_EN) ? PWR_ENABLED : PWR_DISABLED)) +#define PWR_CLK_ENABLED(_RF) ((_RF)->pwr_clk_enabled == PWR_ENABLED) +#define PWR_CLK_DISABLED(_RF) ((_RF)->pwr_clk_enabled != PWR_ENABLED) + +struct rfkill_gpio_data { + struct rfkill_gpio_platform_data *pdata; + struct rfkill *rfkill_dev; + char *reset_name; + char *shutdown_name; + enum rfkill_gpio_clk_state pwr_clk_enabled; + struct clk *pwr_clk; +}; + +static int rfkill_gpio_set_power(void *data, bool blocked) +{ + struct rfkill_gpio_data *rfkill = data; + + if (blocked) { + if (gpio_is_valid(rfkill->pdata->shutdown_gpio)) + gpio_direction_output(rfkill->pdata->shutdown_gpio, 0); + if (gpio_is_valid(rfkill->pdata->reset_gpio)) + gpio_direction_output(rfkill->pdata->reset_gpio, 0); + if (rfkill->pwr_clk && PWR_CLK_ENABLED(rfkill)) + clk_disable(rfkill->pwr_clk); + } else { + if (rfkill->pwr_clk && PWR_CLK_DISABLED(rfkill)) + clk_enable(rfkill->pwr_clk); + if (gpio_is_valid(rfkill->pdata->reset_gpio)) + gpio_direction_output(rfkill->pdata->reset_gpio, 1); + if (gpio_is_valid(rfkill->pdata->shutdown_gpio)) + gpio_direction_output(rfkill->pdata->shutdown_gpio, 1); + } + + if (rfkill->pwr_clk) + PWR_CLK_SET(rfkill, blocked); + + return 0; +} + +static const struct rfkill_ops rfkill_gpio_ops = { + .set_block = rfkill_gpio_set_power, +}; + +static int rfkill_gpio_probe(struct platform_device *pdev) +{ + struct rfkill_gpio_data *rfkill; + struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; + int ret = 0; + int len = 0; + + if (!pdata) { + pr_warn("%s: No platform data specified\n", __func__); + return -EINVAL; + } + + /* make sure at-least one of the GPIO is defined and that + * a name is specified for this instance */ + if (!pdata->name || (!gpio_is_valid(pdata->reset_gpio) && + !gpio_is_valid(pdata->shutdown_gpio))) { + pr_warn("%s: invalid platform data\n", __func__); + return -EINVAL; + } + + rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL); + if (!rfkill) + return -ENOMEM; + + rfkill->pdata = pdata; + + len = strlen(pdata->name); + rfkill->reset_name = kzalloc(len + 7, GFP_KERNEL); + if (!rfkill->reset_name) { + ret = -ENOMEM; + goto fail_alloc; + } + + rfkill->shutdown_name = kzalloc(len + 10, GFP_KERNEL); + if (!rfkill->shutdown_name) { + ret = -ENOMEM; + goto fail_reset_name; + } + + snprintf(rfkill->reset_name, len + 6 , "%s_reset", pdata->name); + snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", pdata->name); + + if (pdata->power_clk_name) { + rfkill->pwr_clk = clk_get(&pdev->dev, pdata->power_clk_name); + if (IS_ERR(rfkill->pwr_clk)) { + pr_warn("%s: can't find pwr_clk.\n", __func__); + goto fail_shutdown_name; + } + } + + if (gpio_is_valid(pdata->reset_gpio)) { + ret = gpio_request(pdata->reset_gpio, rfkill->reset_name); + if (ret) { + pr_warn("%s: failed to get reset gpio.\n", __func__); + goto fail_clock; + } + } + + if (gpio_is_valid(pdata->shutdown_gpio)) { + ret = gpio_request(pdata->shutdown_gpio, rfkill->shutdown_name); + if (ret) { + pr_warn("%s: failed to get shutdown gpio.\n", __func__); + goto fail_reset; + } + } + + rfkill->rfkill_dev = rfkill_alloc(pdata->name, &pdev->dev, pdata->type, + &rfkill_gpio_ops, rfkill); + if (!rfkill->rfkill_dev) + goto fail_shutdown; + + ret = rfkill_register(rfkill->rfkill_dev); + if (ret < 0) + goto fail_rfkill; + + platform_set_drvdata(pdev, rfkill); + + dev_info(&pdev->dev, "%s device registered.\n", pdata->name); + + return 0; + +fail_rfkill: + rfkill_destroy(rfkill->rfkill_dev); +fail_shutdown: + if (gpio_is_valid(pdata->shutdown_gpio)) + gpio_free(pdata->shutdown_gpio); +fail_reset: + if (gpio_is_valid(pdata->reset_gpio)) + gpio_free(pdata->reset_gpio); +fail_clock: + if (rfkill->pwr_clk) + clk_put(rfkill->pwr_clk); +fail_shutdown_name: + kfree(rfkill->shutdown_name); +fail_reset_name: + kfree(rfkill->reset_name); +fail_alloc: + kfree(rfkill); + + return ret; +} + +static int rfkill_gpio_remove(struct platform_device *pdev) +{ + struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev); + + rfkill_unregister(rfkill->rfkill_dev); + rfkill_destroy(rfkill->rfkill_dev); + if (gpio_is_valid(rfkill->pdata->shutdown_gpio)) + gpio_free(rfkill->pdata->shutdown_gpio); + if (gpio_is_valid(rfkill->pdata->reset_gpio)) + gpio_free(rfkill->pdata->reset_gpio); + if (rfkill->pwr_clk && PWR_CLK_ENABLED(rfkill)) + clk_disable(rfkill->pwr_clk); + if (rfkill->pwr_clk) + clk_put(rfkill->pwr_clk); + kfree(rfkill->shutdown_name); + kfree(rfkill->reset_name); + kfree(rfkill); + + return 0; +} + +static struct platform_driver rfkill_gpio_driver = { + .probe = rfkill_gpio_probe, + .remove = __devexit_p(rfkill_gpio_remove), + .driver = { + .name = "rfkill_gpio", + .owner = THIS_MODULE, + }, +}; + +static int __init rfkill_gpio_init(void) +{ + return platform_driver_register(&rfkill_gpio_driver); +} + +static void __exit rfkill_gpio_exit(void) +{ + platform_driver_unregister(&rfkill_gpio_driver); +} + +module_init(rfkill_gpio_init); +module_exit(rfkill_gpio_exit); + +MODULE_DESCRIPTION("gpio rfkill"); +MODULE_AUTHOR("NVIDIA"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 4800a5bb13c09a572f7c74662a77c9eca229eba1 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 17 May 2011 14:41:06 -0400 Subject: include/linux/compat.h: coding-style fixes I touched this file when adding support for the "tilegx" sub-architecture, and Andrew Morton observed "The file's a mismash of old-style, wrong-style and right-style. There's no point in doing mishmash preservation! May as well fix things up when we touch them." Accordingly, this change makes as checkpatch-clean as possible. It makes no semantic changes whatsoever. Signed-off-by: Chris Metcalf Signed-off-by: Andrew Morton --- include/linux/compat.h | 74 ++++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index e94184834b71..644e1a4692b1 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -27,7 +27,7 @@ typedef __compat_gid32_t compat_gid_t; struct compat_sel_arg_struct; struct rusage; -struct compat_itimerspec { +struct compat_itimerspec { struct compat_timespec it_interval; struct compat_timespec it_value; }; @@ -71,9 +71,9 @@ struct compat_timex { compat_long_t stbcnt; compat_int_t tai; - compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32; - compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32; - compat_int_t :32; compat_int_t :32; compat_int_t :32; + compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; + compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; + compat_int_t:32; compat_int_t:32; compat_int_t:32; }; #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) @@ -82,8 +82,10 @@ typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; -extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *); -extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *); +extern int get_compat_timespec(struct timespec *, + const struct compat_timespec __user *); +extern int put_compat_timespec(const struct timespec *, + struct compat_timespec __user *); struct compat_iovec { compat_uptr_t iov_base; @@ -114,7 +116,8 @@ struct compat_rusage { compat_long_t ru_nivcsw; }; -extern int put_compat_rusage(const struct rusage *, struct compat_rusage __user *); +extern int put_compat_rusage(const struct rusage *, + struct compat_rusage __user *); struct compat_siginfo; @@ -167,8 +170,7 @@ struct compat_ifmap { unsigned char port; }; -struct compat_if_settings -{ +struct compat_if_settings { unsigned int type; /* Type of physical device or protocol */ unsigned int size; /* Size of the data allocated by the caller */ compat_uptr_t ifs_ifsu; /* union of pointers */ @@ -196,8 +198,8 @@ struct compat_ifreq { }; struct compat_ifconf { - compat_int_t ifc_len; /* size of buffer */ - compat_caddr_t ifcbuf; + compat_int_t ifc_len; /* size of buffer */ + compat_caddr_t ifcbuf; }; struct compat_robust_list { @@ -256,8 +258,8 @@ asmlinkage ssize_t compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen, u32 pos_low, u32 pos_high); -int compat_do_execve(char * filename, compat_uptr_t __user *argv, - compat_uptr_t __user *envp, struct pt_regs * regs); +int compat_do_execve(char *filename, compat_uptr_t __user *argv, + compat_uptr_t __user *envp, struct pt_regs *regs); asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, @@ -346,14 +348,16 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, asmlinkage long compat_sys_utime(const char __user *filename, struct compat_utimbuf __user *t); -asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, - struct compat_timespec __user *t, int flags); +asmlinkage long compat_sys_utimensat(unsigned int dfd, + const char __user *filename, + struct compat_timespec __user *t, + int flags); asmlinkage long compat_sys_time(compat_time_t __user *tloc); asmlinkage long compat_sys_stime(compat_time_t __user *tptr); asmlinkage long compat_sys_signalfd(int ufd, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize); + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, const struct compat_itimerspec __user *utmr, struct compat_itimerspec __user *otmr); @@ -365,19 +369,21 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, const int __user *nodes, int __user *status, int flags); -asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, +asmlinkage long compat_sys_futimesat(unsigned int dfd, + const char __user *filename, struct compat_timeval __user *t); asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t); -asmlinkage long compat_sys_newstat(const char __user * filename, +asmlinkage long compat_sys_newstat(const char __user *filename, struct compat_stat __user *statbuf); -asmlinkage long compat_sys_newlstat(const char __user * filename, +asmlinkage long compat_sys_newlstat(const char __user *filename, struct compat_stat __user *statbuf); -asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user * filename, +asmlinkage long compat_sys_newfstatat(unsigned int dfd, + const char __user *filename, struct compat_stat __user *statbuf, int flag); asmlinkage long compat_sys_newfstat(unsigned int fd, - struct compat_stat __user * statbuf); + struct compat_stat __user *statbuf); asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf); asmlinkage long compat_sys_fstatfs(unsigned int fd, @@ -399,10 +405,10 @@ asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id, struct compat_timespec __user *timeout); asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb); -asmlinkage long compat_sys_mount(const char __user * dev_name, - const char __user * dir_name, - const char __user * type, unsigned long flags, - const void __user * data); +asmlinkage long compat_sys_mount(const char __user *dev_name, + const char __user *dir_name, + const char __user *type, unsigned long flags, + const void __user *data); asmlinkage long compat_sys_old_readdir(unsigned int fd, struct compat_old_linux_dirent __user *, unsigned int count); @@ -410,7 +416,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); asmlinkage long compat_sys_getdents64(unsigned int fd, - struct linux_dirent64 __user * dirent, + struct linux_dirent64 __user *dirent, unsigned int count); asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, unsigned int nr_segs, unsigned int flags); @@ -431,14 +437,15 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, struct compat_timespec __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); -#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED) +#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && \ + !defined(CONFIG_NFSD_DEPRECATED) union compat_nfsctl_res; struct compat_nfsctl_arg; asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg __user *arg, union compat_nfsctl_res __user *res); #else -long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2); +asmlinkage long compat_sys_nfsservctl(int cmd, void *notused, void *notused2); #endif asmlinkage long compat_sys_signalfd4(int ufd, const compat_sigset_t __user *sigmask, @@ -479,8 +486,8 @@ asmlinkage long compat_sys_setitimer(int which, asmlinkage long compat_sys_times(struct compat_tms __user *tbuf); asmlinkage long compat_sys_setrlimit(unsigned int resource, struct compat_rlimit __user *rlim); -asmlinkage long compat_sys_getrlimit (unsigned int resource, - struct compat_rlimit __user *rlim); +asmlinkage long compat_sys_getrlimit(unsigned int resource, + struct compat_rlimit __user *rlim); asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru); asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, unsigned int len, @@ -507,7 +514,7 @@ asmlinkage long compat_sys_clock_getres(clockid_t which_clock, asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, struct compat_timespec __user *rqtp, struct compat_timespec __user *rmtp); -asmlinkage long compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, +asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, struct compat_siginfo __user *uinfo, struct compat_timespec __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, @@ -544,7 +551,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args); asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); extern ssize_t compat_rw_copy_check_uvector(int type, - const struct compat_iovec __user *uvector, unsigned long nr_segs, + const struct compat_iovec __user *uvector, + unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, struct iovec **ret_pointer); -- cgit v1.2.3 From 771949d03b4f5295f648f09141325fd478f6c7ce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 May 2011 20:52:16 +0200 Subject: block: get rid of on-stack plugging debug checks We don't need them anymore, so kill: - REQ_ON_PLUG checks in various places - !rq_mergeable() check in plug merging Signed-off-by: Jens Axboe --- block/blk-core.c | 27 --------------------------- block/elevator.c | 4 ---- include/linux/blk_types.h | 2 -- 3 files changed, 33 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 9e8e297374b9..7369eeeafe23 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -569,8 +569,6 @@ int blk_get_queue(struct request_queue *q) static inline void blk_free_request(struct request_queue *q, struct request *rq) { - BUG_ON(rq->cmd_flags & REQ_ON_PLUG); - if (rq->cmd_flags & REQ_ELVPRIV) elv_put_request(q, rq); mempool_free(rq, q->rq.rq_pool); @@ -1110,14 +1108,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, { const int ff = bio->bi_rw & REQ_FAILFAST_MASK; - /* - * Debug stuff, kill later - */ - if (!rq_mergeable(req)) { - blk_dump_rq_flags(req, "back"); - return false; - } - if (!ll_back_merge_fn(q, req, bio)) return false; @@ -1141,14 +1131,6 @@ static bool bio_attempt_front_merge(struct request_queue *q, const int ff = bio->bi_rw & REQ_FAILFAST_MASK; sector_t sector; - /* - * Debug stuff, kill later - */ - if (!rq_mergeable(req)) { - blk_dump_rq_flags(req, "front"); - return false; - } - if (!ll_front_merge_fn(q, req, bio)) return false; @@ -1258,14 +1240,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) el_ret = elv_merge(q, &req, bio); if (el_ret == ELEVATOR_BACK_MERGE) { - BUG_ON(req->cmd_flags & REQ_ON_PLUG); if (bio_attempt_back_merge(q, req, bio)) { if (!attempt_back_merge(q, req)) elv_merged_request(q, req, el_ret); goto out_unlock; } } else if (el_ret == ELEVATOR_FRONT_MERGE) { - BUG_ON(req->cmd_flags & REQ_ON_PLUG); if (bio_attempt_front_merge(q, req, bio)) { if (!attempt_front_merge(q, req)) elv_merged_request(q, req, el_ret); @@ -1320,10 +1300,6 @@ get_rq: if (__rq->q != q) plug->should_sort = 1; } - /* - * Debug flag, kill later - */ - req->cmd_flags |= REQ_ON_PLUG; list_add_tail(&req->queuelist, &plug->list); drive_stat_acct(req, 1); } else { @@ -2749,7 +2725,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) while (!list_empty(&list)) { rq = list_entry_rq(list.next); list_del_init(&rq->queuelist); - BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); BUG_ON(!rq->q); if (rq->q != q) { /* @@ -2761,8 +2736,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) depth = 0; spin_lock(q->queue_lock); } - rq->cmd_flags &= ~REQ_ON_PLUG; - /* * rq is already accounted, so use raw insert */ diff --git a/block/elevator.c b/block/elevator.c index 2a0b653c90fd..b0b38ce0dcb6 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -416,8 +416,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) struct list_head *entry; int stop_flags; - BUG_ON(rq->cmd_flags & REQ_ON_PLUG); - if (q->last_merge == rq) q->last_merge = NULL; @@ -656,8 +654,6 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) rq->q = q; - BUG_ON(rq->cmd_flags & REQ_ON_PLUG); - if (rq->cmd_flags & REQ_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS || diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index be50d9e70a7d..2a7cea53ca0d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -151,7 +151,6 @@ enum rq_flag_bits { __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ - __REQ_ON_PLUG, /* on plug list */ __REQ_NR_BITS, /* stops here */ }; @@ -192,6 +191,5 @@ enum rq_flag_bits { #define REQ_IO_STAT (1 << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) #define REQ_SECURE (1 << __REQ_SECURE) -#define REQ_ON_PLUG (1 << __REQ_ON_PLUG) #endif /* __LINUX_BLK_TYPES_H */ -- cgit v1.2.3 From d94ba80ebbea17f036cecb104398fbcd788aa742 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 22 Apr 2011 12:03:08 +0200 Subject: ptp: Added a brand new class driver for ptp clocks. This patch adds an infrastructure for hardware clocks that implement IEEE 1588, the Precision Time Protocol (PTP). A class driver offers a registration method to particular hardware clock drivers. Each clock is presented as a standard POSIX clock. The ancillary clock features are exposed in two different ways, via the sysfs and by a character device. Signed-off-by: Richard Cochran Acked-by: Arnd Bergmann Acked-by: David S. Miller Signed-off-by: John Stultz --- Documentation/ABI/testing/sysfs-ptp | 98 ++++++++++ Documentation/ptp/ptp.txt | 89 +++++++++ Documentation/ptp/testptp.c | 381 ++++++++++++++++++++++++++++++++++++ Documentation/ptp/testptp.mk | 33 ++++ drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/ptp/Kconfig | 30 +++ drivers/ptp/Makefile | 6 + drivers/ptp/ptp_chardev.c | 159 +++++++++++++++ drivers/ptp/ptp_clock.c | 343 ++++++++++++++++++++++++++++++++ drivers/ptp/ptp_private.h | 92 +++++++++ drivers/ptp/ptp_sysfs.c | 230 ++++++++++++++++++++++ include/linux/Kbuild | 1 + include/linux/ptp_classify.h | 7 + include/linux/ptp_clock.h | 84 ++++++++ include/linux/ptp_clock_kernel.h | 139 +++++++++++++ 16 files changed, 1695 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-ptp create mode 100644 Documentation/ptp/ptp.txt create mode 100644 Documentation/ptp/testptp.c create mode 100644 Documentation/ptp/testptp.mk create mode 100644 drivers/ptp/Kconfig create mode 100644 drivers/ptp/Makefile create mode 100644 drivers/ptp/ptp_chardev.c create mode 100644 drivers/ptp/ptp_clock.c create mode 100644 drivers/ptp/ptp_private.h create mode 100644 drivers/ptp/ptp_sysfs.c create mode 100644 include/linux/ptp_clock.h create mode 100644 include/linux/ptp_clock_kernel.h (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp new file mode 100644 index 000000000000..d40d2b550502 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-ptp @@ -0,0 +1,98 @@ +What: /sys/class/ptp/ +Date: September 2010 +Contact: Richard Cochran +Description: + This directory contains files and directories + providing a standardized interface to the ancillary + features of PTP hardware clocks. + +What: /sys/class/ptp/ptpN/ +Date: September 2010 +Contact: Richard Cochran +Description: + This directory contains the attributes of the Nth PTP + hardware clock registered into the PTP class driver + subsystem. + +What: /sys/class/ptp/ptpN/clock_name +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the name of the PTP hardware clock + as a human readable string. + +What: /sys/class/ptp/ptpN/max_adjustment +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the PTP hardware clock's maximum + frequency adjustment value (a positive integer) in + parts per billion. + +What: /sys/class/ptp/ptpN/n_alarms +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of periodic or one shot + alarms offer by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/n_external_timestamps +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of external timestamp + channels offered by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/n_periodic_outputs +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of programmable periodic + output channels offered by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/pps_avaiable +Date: September 2010 +Contact: Richard Cochran +Description: + This file indicates whether the PTP hardware clock + supports a Pulse Per Second to the host CPU. Reading + "1" means that the PPS is supported, while "0" means + not supported. + +What: /sys/class/ptp/ptpN/extts_enable +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables external + timestamps. To enable external timestamps, write the + channel index followed by a "1" into the file. + To disable external timestamps, write the channel + index followed by a "0" into the file. + +What: /sys/class/ptp/ptpN/fifo +Date: September 2010 +Contact: Richard Cochran +Description: + This file provides timestamps on external events, in + the form of three integers: channel index, seconds, + and nanoseconds. + +What: /sys/class/ptp/ptpN/period +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables periodic + outputs. To enable a periodic output, write five + integers into the file: channel index, start time + seconds, start time nanoseconds, period seconds, and + period nanoseconds. To disable a periodic output, set + all the seconds and nanoseconds values to zero. + +What: /sys/class/ptp/ptpN/pps_enable +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables delivery of + PPS events to the Linux PPS subsystem. To enable PPS + events, write a "1" into the file. To disable events, + write a "0" into the file. diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt new file mode 100644 index 000000000000..ae8fef86b832 --- /dev/null +++ b/Documentation/ptp/ptp.txt @@ -0,0 +1,89 @@ + +* PTP hardware clock infrastructure for Linux + + This patch set introduces support for IEEE 1588 PTP clocks in + Linux. Together with the SO_TIMESTAMPING socket options, this + presents a standardized method for developing PTP user space + programs, synchronizing Linux with external clocks, and using the + ancillary features of PTP hardware clocks. + + A new class driver exports a kernel interface for specific clock + drivers and a user space interface. The infrastructure supports a + complete set of PTP hardware clock functionality. + + + Basic clock operations + - Set time + - Get time + - Shift the clock by a given offset atomically + - Adjust clock frequency + + + Ancillary clock features + - One short or periodic alarms, with signal delivery to user program + - Time stamp external events + - Period output signals configurable from user space + - Synchronization of the Linux system time via the PPS subsystem + +** PTP hardware clock kernel API + + A PTP clock driver registers itself with the class driver. The + class driver handles all of the dealings with user space. The + author of a clock driver need only implement the details of + programming the clock hardware. The clock driver notifies the class + driver of asynchronous events (alarms and external time stamps) via + a simple message passing interface. + + The class driver supports multiple PTP clock drivers. In normal use + cases, only one PTP clock is needed. However, for testing and + development, it can be useful to have more than one clock in a + single system, in order to allow performance comparisons. + +** PTP hardware clock user space API + + The class driver also creates a character device for each + registered clock. User space can use an open file descriptor from + the character device as a POSIX clock id and may call + clock_gettime, clock_settime, and clock_adjtime. These calls + implement the basic clock operations. + + User space programs may control the clock using standardized + ioctls. A program may query, enable, configure, and disable the + ancillary clock features. User space can receive time stamped + events via blocking read() and poll(). One shot and periodic + signals may be configured via the POSIX timer_settime() system + call. + +** Writing clock drivers + + Clock drivers include include/linux/ptp_clock_kernel.h and register + themselves by presenting a 'struct ptp_clock_info' to the + registration method. Clock drivers must implement all of the + functions in the interface. If a clock does not offer a particular + ancillary feature, then the driver should just return -EOPNOTSUPP + from those functions. + + Drivers must ensure that all of the methods in interface are + reentrant. Since most hardware implementations treat the time value + as a 64 bit integer accessed as two 32 bit registers, drivers + should use spin_lock_irqsave/spin_unlock_irqrestore to protect + against concurrent access. This locking cannot be accomplished in + class driver, since the lock may also be needed by the clock + driver's interrupt service routine. + +** Supported hardware + + + Freescale eTSEC gianfar + - 2 Time stamp external triggers, programmable polarity (opt. interrupt) + - 2 Alarm registers (optional interrupt) + - 3 Periodic signals (optional interrupt) + + + National DP83640 + - 6 GPIOs programmable as inputs or outputs + - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be + used as general inputs or outputs + - GPIO inputs can time stamp external triggers + - GPIO outputs can produce periodic signals + - 1 interrupt pin + + + Intel IXP465 + - Auxiliary Slave/Master Mode Snapshot (optional interrupt) + - Target Time (optional interrupt) diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c new file mode 100644 index 000000000000..f59ded066108 --- /dev/null +++ b/Documentation/ptp/testptp.c @@ -0,0 +1,381 @@ +/* + * PTP 1588 clock support - User space test program + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DEVICE "/dev/ptp0" + +#ifndef ADJ_SETOFFSET +#define ADJ_SETOFFSET 0x0100 +#endif + +#ifndef CLOCK_INVALID +#define CLOCK_INVALID -1 +#endif + +/* When glibc offers the syscall, this will go away. */ +#include +static int clock_adjtime(clockid_t id, struct timex *tx) +{ + return syscall(__NR_clock_adjtime, id, tx); +} + +static clockid_t get_clockid(int fd) +{ +#define CLOCKFD 3 +#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) + + return FD_TO_CLOCKID(fd); +} + +static void handle_alarm(int s) +{ + printf("received signal %d\n", s); +} + +static int install_handler(int signum, void (*handler)(int)) +{ + struct sigaction action; + sigset_t mask; + + /* Unblock the signal. */ + sigemptyset(&mask); + sigaddset(&mask, signum); + sigprocmask(SIG_UNBLOCK, &mask, NULL); + + /* Install the signal handler. */ + action.sa_handler = handler; + action.sa_flags = 0; + sigemptyset(&action.sa_mask); + sigaction(signum, &action, NULL); + + return 0; +} + +static long ppb_to_scaled_ppm(int ppb) +{ + /* + * The 'freq' field in the 'struct timex' is in parts per + * million, but with a 16 bit binary fractional field. + * Instead of calculating either one of + * + * scaled_ppm = (ppb / 1000) << 16 [1] + * scaled_ppm = (ppb << 16) / 1000 [2] + * + * we simply use double precision math, in order to avoid the + * truncation in [1] and the possible overflow in [2]. + */ + return (long) (ppb * 65.536); +} + +static void usage(char *progname) +{ + fprintf(stderr, + "usage: %s [options]\n" + " -a val request a one-shot alarm after 'val' seconds\n" + " -A val request a periodic alarm every 'val' seconds\n" + " -c query the ptp clock's capabilities\n" + " -d name device to open\n" + " -e val read 'val' external time stamp events\n" + " -f val adjust the ptp clock frequency by 'val' ppb\n" + " -g get the ptp clock time\n" + " -h prints this message\n" + " -p val enable output with a period of 'val' nanoseconds\n" + " -P val enable or disable (val=1|0) the system clock PPS\n" + " -s set the ptp clock time from the system time\n" + " -S set the system time from the ptp clock time\n" + " -t val shift the ptp clock time by 'val' seconds\n", + progname); +} + +int main(int argc, char *argv[]) +{ + struct ptp_clock_caps caps; + struct ptp_extts_event event; + struct ptp_extts_request extts_request; + struct ptp_perout_request perout_request; + struct timespec ts; + struct timex tx; + + static timer_t timerid; + struct itimerspec timeout; + struct sigevent sigevent; + + char *progname; + int c, cnt, fd; + + char *device = DEVICE; + clockid_t clkid; + int adjfreq = 0x7fffffff; + int adjtime = 0; + int capabilities = 0; + int extts = 0; + int gettime = 0; + int oneshot = 0; + int periodic = 0; + int perout = -1; + int pps = -1; + int settime = 0; + + progname = strrchr(argv[0], '/'); + progname = progname ? 1+progname : argv[0]; + while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghp:P:sSt:v"))) { + switch (c) { + case 'a': + oneshot = atoi(optarg); + break; + case 'A': + periodic = atoi(optarg); + break; + case 'c': + capabilities = 1; + break; + case 'd': + device = optarg; + break; + case 'e': + extts = atoi(optarg); + break; + case 'f': + adjfreq = atoi(optarg); + break; + case 'g': + gettime = 1; + break; + case 'p': + perout = atoi(optarg); + break; + case 'P': + pps = atoi(optarg); + break; + case 's': + settime = 1; + break; + case 'S': + settime = 2; + break; + case 't': + adjtime = atoi(optarg); + break; + case 'h': + usage(progname); + return 0; + case '?': + default: + usage(progname); + return -1; + } + } + + fd = open(device, O_RDWR); + if (fd < 0) { + fprintf(stderr, "opening %s: %s\n", device, strerror(errno)); + return -1; + } + + clkid = get_clockid(fd); + if (CLOCK_INVALID == clkid) { + fprintf(stderr, "failed to read clock id\n"); + return -1; + } + + if (capabilities) { + if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) { + perror("PTP_CLOCK_GETCAPS"); + } else { + printf("capabilities:\n" + " %d maximum frequency adjustment (ppb)\n" + " %d programmable alarms\n" + " %d external time stamp channels\n" + " %d programmable periodic signals\n" + " %d pulse per second\n", + caps.max_adj, + caps.n_alarm, + caps.n_ext_ts, + caps.n_per_out, + caps.pps); + } + } + + if (0x7fffffff != adjfreq) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_FREQUENCY; + tx.freq = ppb_to_scaled_ppm(adjfreq); + if (clock_adjtime(clkid, &tx)) { + perror("clock_adjtime"); + } else { + puts("frequency adjustment okay"); + } + } + + if (adjtime) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_SETOFFSET; + tx.time.tv_sec = adjtime; + tx.time.tv_usec = 0; + if (clock_adjtime(clkid, &tx) < 0) { + perror("clock_adjtime"); + } else { + puts("time shift okay"); + } + } + + if (gettime) { + if (clock_gettime(clkid, &ts)) { + perror("clock_gettime"); + } else { + printf("clock time: %ld.%09ld or %s", + ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec)); + } + } + + if (settime == 1) { + clock_gettime(CLOCK_REALTIME, &ts); + if (clock_settime(clkid, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (settime == 2) { + clock_gettime(clkid, &ts); + if (clock_settime(CLOCK_REALTIME, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (extts) { + memset(&extts_request, 0, sizeof(extts_request)); + extts_request.index = 0; + extts_request.flags = PTP_ENABLE_FEATURE; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + extts = 0; + } else { + puts("external time stamp request okay"); + } + for (; extts; extts--) { + cnt = read(fd, &event, sizeof(event)); + if (cnt != sizeof(event)) { + perror("read"); + break; + } + printf("event index %u at %lld.%09u\n", event.index, + event.t.sec, event.t.nsec); + fflush(stdout); + } + /* Disable the feature again. */ + extts_request.flags = 0; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + } + } + + if (oneshot) { + install_handler(SIGALRM, handle_alarm); + /* Create a timer. */ + sigevent.sigev_notify = SIGEV_SIGNAL; + sigevent.sigev_signo = SIGALRM; + if (timer_create(clkid, &sigevent, &timerid)) { + perror("timer_create"); + return -1; + } + /* Start the timer. */ + memset(&timeout, 0, sizeof(timeout)); + timeout.it_value.tv_sec = oneshot; + if (timer_settime(timerid, 0, &timeout, NULL)) { + perror("timer_settime"); + return -1; + } + pause(); + timer_delete(timerid); + } + + if (periodic) { + install_handler(SIGALRM, handle_alarm); + /* Create a timer. */ + sigevent.sigev_notify = SIGEV_SIGNAL; + sigevent.sigev_signo = SIGALRM; + if (timer_create(clkid, &sigevent, &timerid)) { + perror("timer_create"); + return -1; + } + /* Start the timer. */ + memset(&timeout, 0, sizeof(timeout)); + timeout.it_interval.tv_sec = periodic; + timeout.it_value.tv_sec = periodic; + if (timer_settime(timerid, 0, &timeout, NULL)) { + perror("timer_settime"); + return -1; + } + while (1) { + pause(); + } + timer_delete(timerid); + } + + if (perout >= 0) { + if (clock_gettime(clkid, &ts)) { + perror("clock_gettime"); + return -1; + } + memset(&perout_request, 0, sizeof(perout_request)); + perout_request.index = 0; + perout_request.start.sec = ts.tv_sec + 2; + perout_request.start.nsec = 0; + perout_request.period.sec = 0; + perout_request.period.nsec = perout; + if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) { + perror("PTP_PEROUT_REQUEST"); + } else { + puts("periodic output request okay"); + } + } + + if (pps != -1) { + int enable = pps ? 1 : 0; + if (ioctl(fd, PTP_ENABLE_PPS, enable)) { + perror("PTP_ENABLE_PPS"); + } else { + puts("pps for system time request okay"); + } + } + + close(fd); + return 0; +} diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk new file mode 100644 index 000000000000..4ef2d9755421 --- /dev/null +++ b/Documentation/ptp/testptp.mk @@ -0,0 +1,33 @@ +# PTP 1588 clock support - User space test program +# +# Copyright (C) 2010 OMICRON electronics GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +CC = $(CROSS_COMPILE)gcc +INC = -I$(KBUILD_OUTPUT)/usr/include +CFLAGS = -Wall $(INC) +LDLIBS = -lrt +PROGS = testptp + +all: $(PROGS) + +testptp: testptp.o + +clean: + rm -f testptp.o + +distclean: clean + rm -f $(PROGS) diff --git a/drivers/Kconfig b/drivers/Kconfig index 61631edfecc2..3bb154d8c8cc 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -54,6 +54,8 @@ source "drivers/spi/Kconfig" source "drivers/pps/Kconfig" +source "drivers/ptp/Kconfig" + source "drivers/gpio/Kconfig" source "drivers/w1/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index a29527f4ded6..3c2960128a7f 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -76,6 +76,7 @@ obj-$(CONFIG_I2O) += message/ obj-$(CONFIG_RTC_LIB) += rtc/ obj-y += i2c/ media/ obj-$(CONFIG_PPS) += pps/ +obj-$(CONFIG_PTP_1588_CLOCK) += ptp/ obj-$(CONFIG_W1) += w1/ obj-$(CONFIG_POWER_SUPPLY) += power/ obj-$(CONFIG_HWMON) += hwmon/ diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig new file mode 100644 index 000000000000..70d4bb1cbdab --- /dev/null +++ b/drivers/ptp/Kconfig @@ -0,0 +1,30 @@ +# +# PTP clock support configuration +# + +menu "PTP clock support" + +comment "Enable Device Drivers -> PPS to see the PTP clock options." + depends on PPS=n + +config PTP_1588_CLOCK + tristate "PTP clock support" + depends on EXPERIMENTAL + depends on PPS + help + The IEEE 1588 standard defines a method to precisely + synchronize distributed clocks over Ethernet networks. The + standard defines a Precision Time Protocol (PTP), which can + be used to achieve synchronization within a few dozen + microseconds. In addition, with the help of special hardware + time stamping units, it can be possible to achieve + synchronization to within a few hundred nanoseconds. + + This driver adds support for PTP clocks as character + devices. If you want to use a PTP clock, then you should + also enable at least one clock driver as well. + + To compile this driver as a module, choose M here: the module + will be called ptp. + +endmenu diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile new file mode 100644 index 000000000000..480e2afdc999 --- /dev/null +++ b/drivers/ptp/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for PTP 1588 clock support. +# + +ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o +obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c new file mode 100644 index 000000000000..a8d03aeb4051 --- /dev/null +++ b/drivers/ptp/ptp_chardev.c @@ -0,0 +1,159 @@ +/* + * PTP 1588 clock support - character device implementation. + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include +#include + +#include "ptp_private.h" + +int ptp_open(struct posix_clock *pc, fmode_t fmode) +{ + return 0; +} + +long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) +{ + struct ptp_clock_caps caps; + struct ptp_clock_request req; + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock_info *ops = ptp->info; + int enable, err = 0; + + switch (cmd) { + + case PTP_CLOCK_GETCAPS: + memset(&caps, 0, sizeof(caps)); + caps.max_adj = ptp->info->max_adj; + caps.n_alarm = ptp->info->n_alarm; + caps.n_ext_ts = ptp->info->n_ext_ts; + caps.n_per_out = ptp->info->n_per_out; + caps.pps = ptp->info->pps; + err = copy_to_user((void __user *)arg, &caps, sizeof(caps)); + break; + + case PTP_EXTTS_REQUEST: + if (copy_from_user(&req.extts, (void __user *)arg, + sizeof(req.extts))) { + err = -EFAULT; + break; + } + if (req.extts.index >= ops->n_ext_ts) { + err = -EINVAL; + break; + } + req.type = PTP_CLK_REQ_EXTTS; + enable = req.extts.flags & PTP_ENABLE_FEATURE ? 1 : 0; + err = ops->enable(ops, &req, enable); + break; + + case PTP_PEROUT_REQUEST: + if (copy_from_user(&req.perout, (void __user *)arg, + sizeof(req.perout))) { + err = -EFAULT; + break; + } + if (req.perout.index >= ops->n_per_out) { + err = -EINVAL; + break; + } + req.type = PTP_CLK_REQ_PEROUT; + enable = req.perout.period.sec || req.perout.period.nsec; + err = ops->enable(ops, &req, enable); + break; + + case PTP_ENABLE_PPS: + if (!capable(CAP_SYS_TIME)) + return -EPERM; + req.type = PTP_CLK_REQ_PPS; + enable = arg ? 1 : 0; + err = ops->enable(ops, &req, enable); + break; + + default: + err = -ENOTTY; + break; + } + return err; +} + +unsigned int ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait) +{ + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + + poll_wait(fp, &ptp->tsev_wq, wait); + + return queue_cnt(&ptp->tsevq) ? POLLIN : 0; +} + +ssize_t ptp_read(struct posix_clock *pc, + uint rdflags, char __user *buf, size_t cnt) +{ + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct timestamp_event_queue *queue = &ptp->tsevq; + struct ptp_extts_event event[PTP_BUF_TIMESTAMPS]; + unsigned long flags; + size_t qcnt, i; + + if (cnt % sizeof(struct ptp_extts_event) != 0) + return -EINVAL; + + if (cnt > sizeof(event)) + cnt = sizeof(event); + + cnt = cnt / sizeof(struct ptp_extts_event); + + if (mutex_lock_interruptible(&ptp->tsevq_mux)) + return -ERESTARTSYS; + + if (wait_event_interruptible(ptp->tsev_wq, + ptp->defunct || queue_cnt(queue))) { + mutex_unlock(&ptp->tsevq_mux); + return -ERESTARTSYS; + } + + if (ptp->defunct) + return -ENODEV; + + spin_lock_irqsave(&queue->lock, flags); + + qcnt = queue_cnt(queue); + + if (cnt > qcnt) + cnt = qcnt; + + for (i = 0; i < cnt; i++) { + event[i] = queue->buf[queue->head]; + queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + } + + spin_unlock_irqrestore(&queue->lock, flags); + + cnt = cnt * sizeof(struct ptp_extts_event); + + mutex_unlock(&ptp->tsevq_mux); + + if (copy_to_user(buf, event, cnt)) { + mutex_unlock(&ptp->tsevq_mux); + return -EFAULT; + } + + return cnt; +} diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c new file mode 100644 index 000000000000..cf3f9997546d --- /dev/null +++ b/drivers/ptp/ptp_clock.c @@ -0,0 +1,343 @@ +/* + * PTP 1588 clock support + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ptp_private.h" + +#define PTP_MAX_ALARMS 4 +#define PTP_MAX_CLOCKS 8 +#define PTP_PPS_DEFAULTS (PPS_CAPTUREASSERT | PPS_OFFSETASSERT) +#define PTP_PPS_EVENT PPS_CAPTUREASSERT +#define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC) + +/* private globals */ + +static dev_t ptp_devt; +static struct class *ptp_class; + +static DECLARE_BITMAP(ptp_clocks_map, PTP_MAX_CLOCKS); +static DEFINE_MUTEX(ptp_clocks_mutex); /* protects 'ptp_clocks_map' */ + +/* time stamp event queue operations */ + +static inline int queue_free(struct timestamp_event_queue *q) +{ + return PTP_MAX_TIMESTAMPS - queue_cnt(q) - 1; +} + +static void enqueue_external_timestamp(struct timestamp_event_queue *queue, + struct ptp_clock_event *src) +{ + struct ptp_extts_event *dst; + unsigned long flags; + s64 seconds; + u32 remainder; + + seconds = div_u64_rem(src->timestamp, 1000000000, &remainder); + + spin_lock_irqsave(&queue->lock, flags); + + dst = &queue->buf[queue->tail]; + dst->index = src->index; + dst->t.sec = seconds; + dst->t.nsec = remainder; + + if (!queue_free(queue)) + queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + + queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS; + + spin_unlock_irqrestore(&queue->lock, flags); +} + +static s32 scaled_ppm_to_ppb(long ppm) +{ + /* + * The 'freq' field in the 'struct timex' is in parts per + * million, but with a 16 bit binary fractional field. + * + * We want to calculate + * + * ppb = scaled_ppm * 1000 / 2^16 + * + * which simplifies to + * + * ppb = scaled_ppm * 125 / 2^13 + */ + s64 ppb = 1 + ppm; + ppb *= 125; + ppb >>= 13; + return (s32) ppb; +} + +/* posix clock implementation */ + +static int ptp_clock_getres(struct posix_clock *pc, struct timespec *tp) +{ + return 1; /* always round timer functions to one nanosecond */ +} + +static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp) +{ + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + return ptp->info->settime(ptp->info, tp); +} + +static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp) +{ + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + return ptp->info->gettime(ptp->info, tp); +} + +static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) +{ + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock_info *ops; + int err = -EOPNOTSUPP; + + ops = ptp->info; + + if (tx->modes & ADJ_SETOFFSET) { + struct timespec ts; + ktime_t kt; + s64 delta; + + ts.tv_sec = tx->time.tv_sec; + ts.tv_nsec = tx->time.tv_usec; + + if (!(tx->modes & ADJ_NANO)) + ts.tv_nsec *= 1000; + + if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC) + return -EINVAL; + + kt = timespec_to_ktime(ts); + delta = ktime_to_ns(kt); + err = ops->adjtime(ops, delta); + + } else if (tx->modes & ADJ_FREQUENCY) { + + err = ops->adjfreq(ops, scaled_ppm_to_ppb(tx->freq)); + } + + return err; +} + +static struct posix_clock_operations ptp_clock_ops = { + .owner = THIS_MODULE, + .clock_adjtime = ptp_clock_adjtime, + .clock_gettime = ptp_clock_gettime, + .clock_getres = ptp_clock_getres, + .clock_settime = ptp_clock_settime, + .ioctl = ptp_ioctl, + .open = ptp_open, + .poll = ptp_poll, + .read = ptp_read, +}; + +static void delete_ptp_clock(struct posix_clock *pc) +{ + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + + mutex_destroy(&ptp->tsevq_mux); + + /* Remove the clock from the bit map. */ + mutex_lock(&ptp_clocks_mutex); + clear_bit(ptp->index, ptp_clocks_map); + mutex_unlock(&ptp_clocks_mutex); + + kfree(ptp); +} + +/* public interface */ + +struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info) +{ + struct ptp_clock *ptp; + int err = 0, index, major = MAJOR(ptp_devt); + + if (info->n_alarm > PTP_MAX_ALARMS) + return ERR_PTR(-EINVAL); + + /* Find a free clock slot and reserve it. */ + err = -EBUSY; + mutex_lock(&ptp_clocks_mutex); + index = find_first_zero_bit(ptp_clocks_map, PTP_MAX_CLOCKS); + if (index < PTP_MAX_CLOCKS) + set_bit(index, ptp_clocks_map); + else + goto no_slot; + + /* Initialize a clock structure. */ + err = -ENOMEM; + ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL); + if (ptp == NULL) + goto no_memory; + + ptp->clock.ops = ptp_clock_ops; + ptp->clock.release = delete_ptp_clock; + ptp->info = info; + ptp->devid = MKDEV(major, index); + ptp->index = index; + spin_lock_init(&ptp->tsevq.lock); + mutex_init(&ptp->tsevq_mux); + init_waitqueue_head(&ptp->tsev_wq); + + /* Create a new device in our class. */ + ptp->dev = device_create(ptp_class, NULL, ptp->devid, ptp, + "ptp%d", ptp->index); + if (IS_ERR(ptp->dev)) + goto no_device; + + dev_set_drvdata(ptp->dev, ptp); + + err = ptp_populate_sysfs(ptp); + if (err) + goto no_sysfs; + + /* Register a new PPS source. */ + if (info->pps) { + struct pps_source_info pps; + memset(&pps, 0, sizeof(pps)); + snprintf(pps.name, PPS_MAX_NAME_LEN, "ptp%d", index); + pps.mode = PTP_PPS_MODE; + pps.owner = info->owner; + ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS); + if (!ptp->pps_source) { + pr_err("failed to register pps source\n"); + goto no_pps; + } + } + + /* Create a posix clock. */ + err = posix_clock_register(&ptp->clock, ptp->devid); + if (err) { + pr_err("failed to create posix clock\n"); + goto no_clock; + } + + mutex_unlock(&ptp_clocks_mutex); + return ptp; + +no_clock: + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); +no_pps: + ptp_cleanup_sysfs(ptp); +no_sysfs: + device_destroy(ptp_class, ptp->devid); +no_device: + mutex_destroy(&ptp->tsevq_mux); + kfree(ptp); +no_memory: + clear_bit(index, ptp_clocks_map); +no_slot: + mutex_unlock(&ptp_clocks_mutex); + return ERR_PTR(err); +} +EXPORT_SYMBOL(ptp_clock_register); + +int ptp_clock_unregister(struct ptp_clock *ptp) +{ + ptp->defunct = 1; + wake_up_interruptible(&ptp->tsev_wq); + + /* Release the clock's resources. */ + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); + ptp_cleanup_sysfs(ptp); + device_destroy(ptp_class, ptp->devid); + + posix_clock_unregister(&ptp->clock); + return 0; +} +EXPORT_SYMBOL(ptp_clock_unregister); + +void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event) +{ + struct pps_event_time evt; + + switch (event->type) { + + case PTP_CLOCK_ALARM: + break; + + case PTP_CLOCK_EXTTS: + enqueue_external_timestamp(&ptp->tsevq, event); + wake_up_interruptible(&ptp->tsev_wq); + break; + + case PTP_CLOCK_PPS: + pps_get_ts(&evt); + pps_event(ptp->pps_source, &evt, PTP_PPS_EVENT, NULL); + break; + } +} +EXPORT_SYMBOL(ptp_clock_event); + +/* module operations */ + +static void __exit ptp_exit(void) +{ + class_destroy(ptp_class); + unregister_chrdev_region(ptp_devt, PTP_MAX_CLOCKS); +} + +static int __init ptp_init(void) +{ + int err; + + ptp_class = class_create(THIS_MODULE, "ptp"); + if (IS_ERR(ptp_class)) { + pr_err("ptp: failed to allocate class\n"); + return PTR_ERR(ptp_class); + } + + err = alloc_chrdev_region(&ptp_devt, 0, PTP_MAX_CLOCKS, "ptp"); + if (err < 0) { + pr_err("ptp: failed to allocate device region\n"); + goto no_region; + } + + ptp_class->dev_attrs = ptp_dev_attrs; + pr_info("PTP clock support registered\n"); + return 0; + +no_region: + class_destroy(ptp_class); + return err; +} + +subsys_initcall(ptp_init); +module_exit(ptp_exit); + +MODULE_AUTHOR("Richard Cochran "); +MODULE_DESCRIPTION("PTP clocks support"); +MODULE_LICENSE("GPL"); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h new file mode 100644 index 000000000000..4d5b5082c3b1 --- /dev/null +++ b/drivers/ptp/ptp_private.h @@ -0,0 +1,92 @@ +/* + * PTP 1588 clock support - private declarations for the core module. + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _PTP_PRIVATE_H_ +#define _PTP_PRIVATE_H_ + +#include +#include +#include +#include +#include +#include +#include + +#define PTP_MAX_TIMESTAMPS 128 +#define PTP_BUF_TIMESTAMPS 30 + +struct timestamp_event_queue { + struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS]; + int head; + int tail; + spinlock_t lock; +}; + +struct ptp_clock { + struct posix_clock clock; + struct device *dev; + struct ptp_clock_info *info; + dev_t devid; + int index; /* index into clocks.map */ + struct pps_device *pps_source; + struct timestamp_event_queue tsevq; /* simple fifo for time stamps */ + struct mutex tsevq_mux; /* one process at a time reading the fifo */ + wait_queue_head_t tsev_wq; + int defunct; /* tells readers to go away when clock is being removed */ +}; + +/* + * The function queue_cnt() is safe for readers to call without + * holding q->lock. Readers use this function to verify that the queue + * is nonempty before proceeding with a dequeue operation. The fact + * that a writer might concurrently increment the tail does not + * matter, since the queue remains nonempty nonetheless. + */ +static inline int queue_cnt(struct timestamp_event_queue *q) +{ + int cnt = q->tail - q->head; + return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt; +} + +/* + * see ptp_chardev.c + */ + +long ptp_ioctl(struct posix_clock *pc, + unsigned int cmd, unsigned long arg); + +int ptp_open(struct posix_clock *pc, fmode_t fmode); + +ssize_t ptp_read(struct posix_clock *pc, + uint flags, char __user *buf, size_t cnt); + +uint ptp_poll(struct posix_clock *pc, + struct file *fp, poll_table *wait); + +/* + * see ptp_sysfs.c + */ + +extern struct device_attribute ptp_dev_attrs[]; + +int ptp_cleanup_sysfs(struct ptp_clock *ptp); + +int ptp_populate_sysfs(struct ptp_clock *ptp); + +#endif diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c new file mode 100644 index 000000000000..2f93926ac976 --- /dev/null +++ b/drivers/ptp/ptp_sysfs.c @@ -0,0 +1,230 @@ +/* + * PTP 1588 clock support - sysfs interface. + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include + +#include "ptp_private.h" + +static ssize_t clock_name_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name); +} + +#define PTP_SHOW_INT(name) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *page) \ +{ \ + struct ptp_clock *ptp = dev_get_drvdata(dev); \ + return snprintf(page, PAGE_SIZE-1, "%d\n", ptp->info->name); \ +} + +PTP_SHOW_INT(max_adj); +PTP_SHOW_INT(n_alarm); +PTP_SHOW_INT(n_ext_ts); +PTP_SHOW_INT(n_per_out); +PTP_SHOW_INT(pps); + +#define PTP_RO_ATTR(_var, _name) { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = _var##_show, \ +} + +struct device_attribute ptp_dev_attrs[] = { + PTP_RO_ATTR(clock_name, clock_name), + PTP_RO_ATTR(max_adj, max_adjustment), + PTP_RO_ATTR(n_alarm, n_alarms), + PTP_RO_ATTR(n_ext_ts, n_external_timestamps), + PTP_RO_ATTR(n_per_out, n_periodic_outputs), + PTP_RO_ATTR(pps, pps_available), + __ATTR_NULL, +}; + +static ssize_t extts_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + struct ptp_clock_info *ops = ptp->info; + struct ptp_clock_request req = { .type = PTP_CLK_REQ_EXTTS }; + int cnt, enable; + int err = -EINVAL; + + cnt = sscanf(buf, "%u %d", &req.extts.index, &enable); + if (cnt != 2) + goto out; + if (req.extts.index >= ops->n_ext_ts) + goto out; + + err = ops->enable(ops, &req, enable ? 1 : 0); + if (err) + goto out; + + return count; +out: + return err; +} + +static ssize_t extts_fifo_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + struct timestamp_event_queue *queue = &ptp->tsevq; + struct ptp_extts_event event; + unsigned long flags; + size_t qcnt; + int cnt = 0; + + memset(&event, 0, sizeof(event)); + + if (mutex_lock_interruptible(&ptp->tsevq_mux)) + return -ERESTARTSYS; + + spin_lock_irqsave(&queue->lock, flags); + qcnt = queue_cnt(queue); + if (qcnt) { + event = queue->buf[queue->head]; + queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + } + spin_unlock_irqrestore(&queue->lock, flags); + + if (!qcnt) + goto out; + + cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n", + event.index, event.t.sec, event.t.nsec); +out: + mutex_unlock(&ptp->tsevq_mux); + return cnt; +} + +static ssize_t period_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + struct ptp_clock_info *ops = ptp->info; + struct ptp_clock_request req = { .type = PTP_CLK_REQ_PEROUT }; + int cnt, enable, err = -EINVAL; + + cnt = sscanf(buf, "%u %lld %u %lld %u", &req.perout.index, + &req.perout.start.sec, &req.perout.start.nsec, + &req.perout.period.sec, &req.perout.period.nsec); + if (cnt != 5) + goto out; + if (req.perout.index >= ops->n_per_out) + goto out; + + enable = req.perout.period.sec || req.perout.period.nsec; + err = ops->enable(ops, &req, enable); + if (err) + goto out; + + return count; +out: + return err; +} + +static ssize_t pps_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + struct ptp_clock_info *ops = ptp->info; + struct ptp_clock_request req = { .type = PTP_CLK_REQ_PPS }; + int cnt, enable; + int err = -EINVAL; + + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + cnt = sscanf(buf, "%d", &enable); + if (cnt != 1) + goto out; + + err = ops->enable(ops, &req, enable ? 1 : 0); + if (err) + goto out; + + return count; +out: + return err; +} + +static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store); +static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL); +static DEVICE_ATTR(period, 0220, NULL, period_store); +static DEVICE_ATTR(pps_enable, 0220, NULL, pps_enable_store); + +int ptp_cleanup_sysfs(struct ptp_clock *ptp) +{ + struct device *dev = ptp->dev; + struct ptp_clock_info *info = ptp->info; + + if (info->n_ext_ts) { + device_remove_file(dev, &dev_attr_extts_enable); + device_remove_file(dev, &dev_attr_fifo); + } + if (info->n_per_out) + device_remove_file(dev, &dev_attr_period); + + if (info->pps) + device_remove_file(dev, &dev_attr_pps_enable); + + return 0; +} + +int ptp_populate_sysfs(struct ptp_clock *ptp) +{ + struct device *dev = ptp->dev; + struct ptp_clock_info *info = ptp->info; + int err; + + if (info->n_ext_ts) { + err = device_create_file(dev, &dev_attr_extts_enable); + if (err) + goto out1; + err = device_create_file(dev, &dev_attr_fifo); + if (err) + goto out2; + } + if (info->n_per_out) { + err = device_create_file(dev, &dev_attr_period); + if (err) + goto out3; + } + if (info->pps) { + err = device_create_file(dev, &dev_attr_pps_enable); + if (err) + goto out4; + } + return 0; +out4: + if (info->n_per_out) + device_remove_file(dev, &dev_attr_period); +out3: + if (info->n_ext_ts) + device_remove_file(dev, &dev_attr_fifo); +out2: + if (info->n_ext_ts) + device_remove_file(dev, &dev_attr_extts_enable); +out1: + return err; +} diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 75cf611641e6..4585836ba664 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -302,6 +302,7 @@ header-y += ppp-comp.h header-y += ppp_defs.h header-y += pps.h header-y += prctl.h +header-y += ptp_clock.h header-y += ptrace.h header-y += qnx4_fs.h header-y += qnxtypes.h diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 943a85ab0020..e07e2742a865 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -25,6 +25,7 @@ #include #include +#include #include #ifdef __KERNEL__ #include @@ -58,6 +59,12 @@ #define OFF_NEXT 6 #define OFF_UDP_DST 2 +#define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ +#define OFF_PTP_SEQUENCE_ID 30 +#define OFF_PTP_CONTROL 32 /* PTPv1 only */ + +#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2) + #define IP6_HLEN 40 #define UDP_HLEN 8 diff --git a/include/linux/ptp_clock.h b/include/linux/ptp_clock.h new file mode 100644 index 000000000000..94e981f810a2 --- /dev/null +++ b/include/linux/ptp_clock.h @@ -0,0 +1,84 @@ +/* + * PTP 1588 clock support - user space interface + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _PTP_CLOCK_H_ +#define _PTP_CLOCK_H_ + +#include +#include + +/* PTP_xxx bits, for the flags field within the request structures. */ +#define PTP_ENABLE_FEATURE (1<<0) +#define PTP_RISING_EDGE (1<<1) +#define PTP_FALLING_EDGE (1<<2) + +/* + * struct ptp_clock_time - represents a time value + * + * The sign of the seconds field applies to the whole value. The + * nanoseconds field is always unsigned. The reserved field is + * included for sub-nanosecond resolution, should the demand for + * this ever appear. + * + */ +struct ptp_clock_time { + __s64 sec; /* seconds */ + __u32 nsec; /* nanoseconds */ + __u32 reserved; +}; + +struct ptp_clock_caps { + int max_adj; /* Maximum frequency adjustment in parts per billon. */ + int n_alarm; /* Number of programmable alarms. */ + int n_ext_ts; /* Number of external time stamp channels. */ + int n_per_out; /* Number of programmable periodic signals. */ + int pps; /* Whether the clock supports a PPS callback. */ + int rsv[15]; /* Reserved for future use. */ +}; + +struct ptp_extts_request { + unsigned int index; /* Which channel to configure. */ + unsigned int flags; /* Bit field for PTP_xxx flags. */ + unsigned int rsv[2]; /* Reserved for future use. */ +}; + +struct ptp_perout_request { + struct ptp_clock_time start; /* Absolute start time. */ + struct ptp_clock_time period; /* Desired period, zero means disable. */ + unsigned int index; /* Which channel to configure. */ + unsigned int flags; /* Reserved for future use. */ + unsigned int rsv[4]; /* Reserved for future use. */ +}; + +#define PTP_CLK_MAGIC '=' + +#define PTP_CLOCK_GETCAPS _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps) +#define PTP_EXTTS_REQUEST _IOW(PTP_CLK_MAGIC, 2, struct ptp_extts_request) +#define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request) +#define PTP_ENABLE_PPS _IOW(PTP_CLK_MAGIC, 4, int) + +struct ptp_extts_event { + struct ptp_clock_time t; /* Time event occured. */ + unsigned int index; /* Which channel produced the event. */ + unsigned int flags; /* Reserved for future use. */ + unsigned int rsv[2]; /* Reserved for future use. */ +}; + +#endif diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h new file mode 100644 index 000000000000..dd2e44fba63e --- /dev/null +++ b/include/linux/ptp_clock_kernel.h @@ -0,0 +1,139 @@ +/* + * PTP 1588 clock support + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _PTP_CLOCK_KERNEL_H_ +#define _PTP_CLOCK_KERNEL_H_ + +#include + + +struct ptp_clock_request { + enum { + PTP_CLK_REQ_EXTTS, + PTP_CLK_REQ_PEROUT, + PTP_CLK_REQ_PPS, + } type; + union { + struct ptp_extts_request extts; + struct ptp_perout_request perout; + }; +}; + +/** + * struct ptp_clock_info - decribes a PTP hardware clock + * + * @owner: The clock driver should set to THIS_MODULE. + * @name: A short name to identify the clock. + * @max_adj: The maximum possible frequency adjustment, in parts per billon. + * @n_alarm: The number of programmable alarms. + * @n_ext_ts: The number of external time stamp channels. + * @n_per_out: The number of programmable periodic signals. + * @pps: Indicates whether the clock supports a PPS callback. + * + * clock operations + * + * @adjfreq: Adjusts the frequency of the hardware clock. + * parameter delta: Desired period change in parts per billion. + * + * @adjtime: Shifts the time of the hardware clock. + * parameter delta: Desired change in nanoseconds. + * + * @gettime: Reads the current time from the hardware clock. + * parameter ts: Holds the result. + * + * @settime: Set the current time on the hardware clock. + * parameter ts: Time value to set. + * + * @enable: Request driver to enable or disable an ancillary feature. + * parameter request: Desired resource to enable or disable. + * parameter on: Caller passes one to enable or zero to disable. + * + * Drivers should embed their ptp_clock_info within a private + * structure, obtaining a reference to it using container_of(). + * + * The callbacks must all return zero on success, non-zero otherwise. + */ + +struct ptp_clock_info { + struct module *owner; + char name[16]; + s32 max_adj; + int n_alarm; + int n_ext_ts; + int n_per_out; + int pps; + int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); + int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); + int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts); + int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts); + int (*enable)(struct ptp_clock_info *ptp, + struct ptp_clock_request *request, int on); +}; + +struct ptp_clock; + +/** + * ptp_clock_register() - register a PTP hardware clock driver + * + * @info: Structure describing the new clock. + */ + +extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info); + +/** + * ptp_clock_unregister() - unregister a PTP hardware clock driver + * + * @ptp: The clock to remove from service. + */ + +extern int ptp_clock_unregister(struct ptp_clock *ptp); + + +enum ptp_clock_events { + PTP_CLOCK_ALARM, + PTP_CLOCK_EXTTS, + PTP_CLOCK_PPS, +}; + +/** + * struct ptp_clock_event - decribes a PTP hardware clock event + * + * @type: One of the ptp_clock_events enumeration values. + * @index: Identifies the source of the event. + * @timestamp: When the event occured. + */ + +struct ptp_clock_event { + int type; + int index; + u64 timestamp; +}; + +/** + * ptp_clock_event() - notify the PTP layer about an event + * + * @ptp: The clock obtained from ptp_clock_register(). + * @event: Message structure describing the event. + */ + +extern void ptp_clock_event(struct ptp_clock *ptp, + struct ptp_clock_event *event); + +#endif -- cgit v1.2.3 From 600942e0fdb7ed1565d056d7305c46c7c0544a3e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 Jan 2011 15:24:58 +0100 Subject: lru_cache.h: fix comments referring to ts_ instead of lc_ For some time we contemplated calling the "struct lru_cache" a "struct tracked_set", and some comments kept the ts_ prefix. Fix those to match the member field names. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/lru_cache.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 6a4fab7c6e09..7a71ffad037c 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -139,9 +139,9 @@ write intent log information, three of which are mentioned here. * .list is on one of three lists: * in_use: currently in use (refcnt > 0, lc_number != LC_FREE) * lru: unused but ready to be reused or recycled - * (ts_refcnt == 0, lc_number != LC_FREE), + * (lc_refcnt == 0, lc_number != LC_FREE), * free: unused but ready to be recycled - * (ts_refcnt == 0, lc_number == LC_FREE), + * (lc_refcnt == 0, lc_number == LC_FREE), * * an element is said to be "in the active set", * if either on "in_use" or "lru", i.e. lc_number != LC_FREE. @@ -160,8 +160,8 @@ struct lc_element { struct hlist_node colision; struct list_head list; /* LRU list or free list */ unsigned refcnt; - /* back "pointer" into ts_cache->element[index], - * for paranoia, and for "ts_element_to_index" */ + /* back "pointer" into lc_cache->element[index], + * for paranoia, and for "lc_element_to_index" */ unsigned lc_index; /* if we want to track a larger set of objects, * it needs to become arch independend u64 */ @@ -190,8 +190,8 @@ struct lru_cache { /* Arbitrary limit on maximum tracked objects. Practical limit is much * lower due to allocation failures, probably. For typical use cases, * nr_elements should be a few thousand at most. - * This also limits the maximum value of ts_element.ts_index, allowing the - * 8 high bits of .ts_index to be overloaded with flags in the future. */ + * This also limits the maximum value of lc_element.lc_index, allowing the + * 8 high bits of .lc_index to be overloaded with flags in the future. */ #define LC_MAX_ACTIVE (1<<24) /* statistics */ -- cgit v1.2.3 From 9a0d9d0389ef769e4b01abf50fcc11407706270b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 2 May 2011 11:51:31 +0200 Subject: drbd: fix schedule in atomic An administrative detach used to request a state change directly to D_DISKLESS, first suspending IO to avoid the last put_ldev() occuring from an endio handler, potentially in irq context. This is not enough on the receiving side (typically secondary), we may miss some peer_req on the way to local disk, which then may do the last put_ldev() from their drbd_peer_request_endio(). This patch makes the detach always go through the intermediate D_FAILED state. We may consider to rename it D_DETACHING. Alternative approach would be to create yet an other work item to be scheduled on the worker, do the destructor work from there, and get the timing right. manually picked commit 564040f from the drbd 8.4 branch. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++++ drivers/block/drbd/drbd_nl.c | 14 +++++++++++--- include/linux/drbd.h | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8aa10391115b..a74d3ee04ba8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2157,6 +2157,10 @@ static inline int get_net_conf(struct drbd_conf *mdev) static inline void put_ldev(struct drbd_conf *mdev) { int i = atomic_dec_return(&mdev->local_cnt); + + /* This may be called from some endio handler, + * so we must not sleep here. */ + __release(local); D_ASSERT(i >= 0); if (i == 0) { diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 7c64ec042124..13569635b922 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1334,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { + enum drbd_ret_code retcode; + int ret; drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - if (mdev->state.disk == D_DISKLESS) - wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); + /* D_FAILED will transition to DISKLESS. */ + ret = wait_event_interruptible(mdev->misc_wait, + mdev->state.disk != D_FAILED); drbd_resume_io(mdev); + if (retcode == SS_IS_DISKLESS) + retcode = SS_NOTHING_TO_DO; + if (ret) + retcode = ERR_INTR; + reply->ret_code = retcode; return 0; } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index cec467f5d676..c86283b4fbab 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.10" +#define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 96 -- cgit v1.2.3 From 24c4830c8ec3cbc904d84c213126a35f41a4e455 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 21 May 2011 18:32:29 +0200 Subject: drbd: Fix spelling Found these with the help of ispell -l. Signed-off-by: Bart Van Assche Signed-off-by: Lars Ellenberg Signed-off-by: Philipp Reisner --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 6 +++--- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 30 +++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 18 +++++++++--------- drivers/block/drbd/drbd_req.h | 4 ++-- drivers/block/drbd/drbd_worker.c | 4 ++-- include/linux/drbd.h | 8 ++++---- include/linux/drbd_tag_magic.h | 2 +- 11 files changed, 45 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index c6828b68d77b..09ef9a878ef0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -28,7 +28,7 @@ #include "drbd_int.h" #include "drbd_wrappers.h" -/* We maintain a trivial check sum in our on disk activity log. +/* We maintain a trivial checksum in our on disk activity log. * With that we can ensure correct operation even when the storage * device might do a partial (last) sector write while losing power. */ diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 76210ba401ac..f440a02dfdb1 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -74,7 +74,7 @@ * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage * seems excessive. * - * We plan to reduce the amount of in-core bitmap pages by pageing them in + * We plan to reduce the amount of in-core bitmap pages by paging them in * and out against their on-disk location as necessary, but need to make * sure we don't cause too much meta data IO, and must not deadlock in * tight memory situations. This needs some more work. @@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) * we if bits have been cleared since last IO. */ #define BM_PAGE_LAZY_WRITEOUT 28 -/* store_page_idx uses non-atomic assingment. It is only used directly after +/* store_page_idx uses non-atomic assignment. It is only used directly after * allocating the page. All other bm_set_page_* and bm_clear_page_* need to * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap * changes) may happen from various contexts, and wait_on_bit/wake_up_bit @@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr) /* word offset from start of bitmap to word number _in_page_ * modulo longs per page #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) - hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) + hm, well, Philipp thinks gcc might not optimize the % into & (... - 1) so do it explicitly: */ #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a74d3ee04ba8..7952eb90d17f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -699,7 +699,7 @@ struct drbd_request { * see drbd_endio_pri(). */ struct bio *private_bio; - struct hlist_node colision; + struct hlist_node collision; sector_t sector; unsigned int size; unsigned int epoch; /* barrier_nr */ @@ -765,7 +765,7 @@ struct digest_info { struct drbd_epoch_entry { struct drbd_work w; - struct hlist_node colision; + struct hlist_node collision; struct drbd_epoch *epoch; /* for writes */ struct drbd_conf *mdev; struct page *pages; @@ -1520,7 +1520,7 @@ extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; -extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); +extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cfeb13b5a216..0358e55356c8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2732,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) /* double check digest, sometimes buffers have been modified in flight. */ if (dgs > 0 && dgs <= 64) { - /* 64 byte, 512 bit, is the larges digest size + /* 64 byte, 512 bit, is the largest digest size * currently supported in kernel crypto. */ unsigned char digest[64]; drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); @@ -3287,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor) drbd_release_ee_lists(mdev); - /* should be free'd on disconnect? */ + /* should be freed on disconnect? */ kfree(mdev->ee_hash); /* mdev->ee_hash_s = 0; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 13569635b922..259ca0b20961 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -596,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev) * Returns 0 on success, negative return values indicate errors. * You should call drbd_md_sync() after calling this function. */ -enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) +enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ sector_t la_size; @@ -1205,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) set_bit(USE_DEGR_WFC_T, &mdev->flags); - dd = drbd_determin_dev_size(mdev, 0); + dd = drbd_determine_dev_size(mdev, 0); if (dd == dev_size_error) { retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; @@ -1719,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); - dd = drbd_determin_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf); drbd_md_sync(mdev); put_ldev(mdev); if (dd == dev_size_error) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6ea0a4b51ece..d9f2109fd9e6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, if (!page) goto fail; - INIT_HLIST_NODE(&e->colision); + INIT_HLIST_NODE(&e->collision); e->epoch = NULL; e->mdev = mdev; e->pages = page; @@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i kfree(e->digest); drbd_pp_free(mdev, e->pages, is_net); D_ASSERT(atomic_read(&e->pending_bios) == 0); - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); mempool_free(e, drbd_ee_mempool); } @@ -1413,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u sector_t sector = e->sector; int ok; - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { drbd_set_in_sync(mdev, sector, e->size); @@ -1482,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi return false; } - /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid + /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. * still no race with drbd_fail_pending_reads */ ok = recv_dless_read(mdev, req, sector, data_size); @@ -1553,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->colision)); - hlist_del_init(&e->colision); + D_ASSERT(!hlist_unhashed(&e->collision)); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); } else { - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); } drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); @@ -1574,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->colision)); - hlist_del_init(&e->colision); + D_ASSERT(!hlist_unhashed(&e->collision)); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); dec_unacked(mdev); @@ -1750,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); - hlist_add_head(&e->colision, ee_hash_slot(mdev, sector)); + hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); #define OVERLAPS overlaps(i->sector, i->size, sector, size) slot = tl_hash_slot(mdev, sector); @@ -1760,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { /* only ALERT on first iteration, * we may be woken up early... */ @@ -1799,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } if (signal_pending(current)) { - hlist_del_init(&e->colision); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); @@ -1857,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->req_lock); list_del(&e->w.list); - hlist_del_init(&e->colision); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->sector); @@ -2988,7 +2988,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(mdev)) { - dd = drbd_determin_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf); put_ldev(mdev); if (dd == dev_size_error) return false; @@ -4261,7 +4261,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, struct hlist_node *n; struct drbd_request *req; - hlist_for_each_entry(req, n, slot, colision) { + hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req == (unsigned long)id) { if (req->sector != sector) { dev_err(DEV, "_ack_id_to_req: found req %p but it has " diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 14645bd40092..3424d675b769 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * they must have been failed on the spot */ #define OVERLAPS overlaps(sector, size, i->sector, i->size) slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " "other: %p %llus +%u\n", @@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, #undef OVERLAPS #define OVERLAPS overlaps(sector, size, e->sector, e->size) slot = ee_hash_slot(mdev, req->sector); - hlist_for_each_entry(e, n, slot, colision) { + hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { wake_up(&mdev->misc_wait); break; @@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* remove the request from the conflict detection * respective block_id verification hash */ - if (!hlist_unhashed(&req->colision)) - hlist_del(&req->colision); + if (!hlist_unhashed(&req->collision)) + hlist_del(&req->collision); else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); @@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req) struct hlist_node *n; struct hlist_head *slot; - D_ASSERT(hlist_unhashed(&req->colision)); + D_ASSERT(hlist_unhashed(&req->collision)); if (!get_net_conf(mdev)) return 0; @@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req) #define OVERLAPS overlaps(i->sector, i->size, sector, size) slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "%s[%u] Concurrent local write detected! " "[DISCARD L] new: %llus +%u; " @@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req) #undef OVERLAPS #define OVERLAPS overlaps(e->sector, e->size, sector, size) slot = ee_hash_slot(mdev, sector); - hlist_for_each_entry(e, n, slot, colision) { + hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "%s[%u] Concurrent remote write detected!" " [DISCARD L] new: %llus +%u; " @@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ - hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector)); set_bit(UNPLUG_REMOTE, &mdev->flags); @@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ /* from drbd_make_request_common only */ - hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector)); /* corresponding hlist_del is in _req_may_be_done() */ /* NOTE diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 32e2c3e6a813..281342dca2c8 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, struct hlist_node *n; struct drbd_request *req; - hlist_for_each_entry(req, n, slot, colision) { + hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req == (unsigned long)id) { D_ASSERT(req->sector == sector); return req; @@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->epoch = 0; req->sector = bio_src->bi_sector; req->size = bio_src->bi_size; - INIT_HLIST_NODE(&req->colision); + INIT_HLIST_NODE(&req->collision); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index b5e53695fd7e..4d76b06b6b20 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, + /* No hlist_del_init(&e->collision) here, we did not send the Ack yet, * neither did we wake possibly waiting conflicting requests. * done from "drbd_process_done_ee" within the appropriate w.cb * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ @@ -840,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) const int ratio = (t == 0) ? 0 : (t < 100000) ? ((s*100)/t) : (s/(t/100)); - dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; " + dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; " "transferred %luK total %luK\n", ratio, Bit2KB(mdev->rs_same_csum), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index c86283b4fbab..9e5f5607eba3 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -38,7 +38,7 @@ /* Although the Linux source code makes a difference between generic endianness and the bitfields' endianness, there is no - architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness + architecture as of Linux-2.6.24-rc4 where the bitfields' endianness does not match the generic endianness. */ #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -195,7 +195,7 @@ enum drbd_conns { C_WF_REPORT_PARAMS, /* we have a socket */ C_CONNECTED, /* we have introduced each other */ C_STARTING_SYNC_S, /* starting full sync by admin request. */ - C_STARTING_SYNC_T, /* stariing full sync by admin request. */ + C_STARTING_SYNC_T, /* starting full sync by admin request. */ C_WF_BITMAP_S, C_WF_BITMAP_T, C_WF_SYNC_UUID, @@ -236,7 +236,7 @@ union drbd_state { * pointed out by Maxim Uvarov q * even though we transmit as "cpu_to_be32(state)", * the offsets of the bitfields still need to be swapped - * on different endianess. + * on different endianness. */ struct { #if defined(__LITTLE_ENDIAN_BITFIELD) @@ -266,7 +266,7 @@ union drbd_state { unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ unsigned role:2 ; /* 3/4 primary/secondary/unknown */ #else -# error "this endianess is not supported" +# error "this endianness is not supported" #endif }; unsigned int i; diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h index f14a165e82dc..069543190516 100644 --- a/include/linux/drbd_tag_magic.h +++ b/include/linux/drbd_tag_magic.h @@ -30,7 +30,7 @@ enum packet_types { int tag_and_len ## member; #include "linux/drbd_nl.h" -/* declate tag-list-sizes */ +/* declare tag-list-sizes */ static const int tag_list_sizes[] = { #define NL_PACKET(name, number, fields) 2 fields , #define NL_INTEGER(pn, pr, member) + 4 + 4 -- cgit v1.2.3 From 8af088710d1eb3c980e0ef3779c8d47f3f217b48 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 May 2011 11:12:58 +0200 Subject: posix-timers: RCU conversion Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard a single spinlock (idr_lock) in posix-timers code, on its 48 core machine. Even on a 16 cpu machine (2x4x2), a single test can show 98% of cpu time used in ticket_spin_lock, from lock_timer Ref: http://www.spinics.net/lists/kvm/msg51526.html Switching to RCU is quite easy, IDR being already RCU ready. idr_lock should be locked only for an insert/delete, not a lookup. Benchmark on a 2x4x2 machine, 16 processes calling timer_gettime(). Before : real 1m18.669s user 0m1.346s sys 1m17.180s After : real 0m3.296s user 0m1.366s sys 0m1.926s Reported-by: Ben Nagy Signed-off-by: Eric Dumazet Tested-by: Ben Nagy Cc: Oleg Nesterov Cc: Avi Kivity Cc: John Stultz Cc: Richard Cochran Cc: Paul E. McKenney Cc: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 1 + kernel/posix-timers.c | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 808227d40a64..959c14132f46 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -82,6 +82,7 @@ struct k_itimer { unsigned long expires; } mmtimer; struct alarm alarmtimer; + struct rcu_head rcu; } it; }; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index a1b5edf1bf92..4556182527f3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void) return tmr; } +static void k_itimer_rcu_free(struct rcu_head *head) +{ + struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); + + kmem_cache_free(posix_timers_cache, tmr); +} + #define IT_ID_SET 1 #define IT_ID_NOT_SET 0 static void release_posix_timer(struct k_itimer *tmr, int it_id_set) @@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) } put_pid(tmr->it_pid); sigqueue_free(tmr->sigq); - kmem_cache_free(posix_timers_cache, tmr); + call_rcu(&tmr->it.rcu, k_itimer_rcu_free); } static struct k_clock *clockid_to_kclock(const clockid_t id) @@ -631,22 +638,18 @@ out: static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; - /* - * Watch out here. We do a irqsave on the idr_lock and pass the - * flags part over to the timer lock. Must not let interrupts in - * while we are moving the lock. - */ - spin_lock_irqsave(&idr_lock, *flags); + + rcu_read_lock(); timr = idr_find(&posix_timers_id, (int)timer_id); if (timr) { - spin_lock(&timr->it_lock); + spin_lock_irqsave(&timr->it_lock, *flags); if (timr->it_signal == current->signal) { - spin_unlock(&idr_lock); + rcu_read_unlock(); return timr; } - spin_unlock(&timr->it_lock); + spin_unlock_irqrestore(&timr->it_lock, *flags); } - spin_unlock_irqrestore(&idr_lock, *flags); + rcu_read_unlock(); return NULL; } -- cgit v1.2.3 From 45e9683e87b69328175df3a9e42039b9892ca47e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 24 May 2011 13:28:54 +1000 Subject: compat: include aio_abi.h for aio_context_t fixes this build error on sparc64 (at least): In file included from arch/sparc/include/asm/siginfo.h:19, from include/linux/signal.h:5, from include/linux/sched.h:73, from arch/sparc/kernel/asm-offsets.c:13: include/linux/compat.h:401: error: expected ')' before 'ctx_id' include/linux/compat.h:406: error: expected ')' before 'ctx_id' Signed-off-by: Stephen Rothwell Signed-off-by: Chris Metcalf --- include/linux/compat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 644e1a4692b1..ddcb7db38e67 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -13,6 +13,7 @@ #include #include #include +#include /* for aio_context_t */ #include #include -- cgit v1.2.3 From 3c454cf21645bc96668e286f6352ac2c4c895fa2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 6 Apr 2011 09:31:40 -0700 Subject: ceph: use LOOKUPINO to make unconnected nfs fh more reliable If we are unable to locate an inode by ino, ask the MDS using the new LOOKUPINO command. Signed-off-by: Sage Weil --- fs/ceph/export.c | 19 +++++++++++++++++-- include/linux/ceph/ceph_fs.h | 1 + 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e41056174bf8..f1828af09912 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, static struct dentry *__fh_to_dentry(struct super_block *sb, struct ceph_nfs_fh *fh) { + struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; struct inode *inode; struct dentry *dentry; struct ceph_vino vino; @@ -95,8 +96,22 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, vino.ino = fh->ino; vino.snap = CEPH_NOSNAP; inode = ceph_find_inode(sb, vino); - if (!inode) - return ERR_PTR(-ESTALE); + if (!inode) { + struct ceph_mds_request *req; + + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO, + USE_ANY_MDS); + if (IS_ERR(req)) + return ERR_CAST(req); + + req->r_ino1 = vino; + req->r_num_caps = 1; + err = ceph_mdsc_do_request(mdsc, NULL, req); + ceph_mdsc_put_request(req); + inode = ceph_find_inode(sb, vino); + if (!inode) + return ERR_PTR(-ESTALE); + } dentry = d_obtain_alias(inode); if (IS_ERR(dentry)) { diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index b8e995fbd867..b8c60694b2b0 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -313,6 +313,7 @@ enum { CEPH_MDS_OP_GETATTR = 0x00101, CEPH_MDS_OP_LOOKUPHASH = 0x00102, CEPH_MDS_OP_LOOKUPPARENT = 0x00103, + CEPH_MDS_OP_LOOKUPINO = 0x00104, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, -- cgit v1.2.3 From 650c2a2145981696c414be1d540a32447d0e353e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 15 May 2011 22:53:56 +0200 Subject: mach-ux500: move the DB8500 PRCMU driver to MFD We have decided that this function arbiter fits better in the MFD subsystem. Since we need to concatenate the split header files we move it basically like this: mv mach-ux500/prcmu-db8500.c drivers/mfd/db8500-prcmu.c mv mach-ux500/include/mach/prcmu-defs.h include/linux/mfd/db8500-prcmu.h mv mach-ux500/include/mach/prcmu-regs.h drivers/mfd/db8500-prcmu-regs.h mach-ux500/include/mach/prcmu.h >> include/linux/mfd/db8500-prcmu.h rm arch/arm/mach-ux500/include/mach/prcmu.h Then we update different #include statements and Makefile orders etc to make the PRCMU driver compile, link and boot in the new place without really changing any code. Acked-by: Samuel Ortiz Signed-off-by: Linus Walleij --- arch/arm/mach-ux500/Kconfig | 1 + arch/arm/mach-ux500/Makefile | 2 +- arch/arm/mach-ux500/cpu.c | 2 +- arch/arm/mach-ux500/cpufreq.c | 3 +- arch/arm/mach-ux500/include/mach/prcmu-defs.h | 30 -- arch/arm/mach-ux500/include/mach/prcmu-regs.h | 94 ------ arch/arm/mach-ux500/include/mach/prcmu.h | 28 -- arch/arm/mach-ux500/prcmu-db8500.c | 394 ------------------------- drivers/mfd/Kconfig | 12 +- drivers/mfd/Makefile | 4 +- drivers/mfd/ab8500-i2c.c | 3 +- drivers/mfd/db8500-prcmu-regs.h | 94 ++++++ drivers/mfd/db8500-prcmu.c | 395 ++++++++++++++++++++++++++ include/linux/mfd/db8500-prcmu.h | 58 ++++ 14 files changed, 566 insertions(+), 554 deletions(-) delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu-defs.h delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu-regs.h delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu.h delete mode 100644 arch/arm/mach-ux500/prcmu-db8500.c create mode 100644 drivers/mfd/db8500-prcmu-regs.h create mode 100644 drivers/mfd/db8500-prcmu.c create mode 100644 include/linux/mfd/db8500-prcmu.h (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig index 58626013aa32..8071d2746f70 100644 --- a/arch/arm/mach-ux500/Kconfig +++ b/arch/arm/mach-ux500/Kconfig @@ -15,6 +15,7 @@ config UX500_SOC_DB5500 config UX500_SOC_DB8500 bool "DB8500" + select MFD_DB8500_PRCMU endmenu diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile index 2a08a10e09da..7a1d43e04f97 100644 --- a/arch/arm/mach-ux500/Makefile +++ b/arch/arm/mach-ux500/Makefile @@ -5,7 +5,7 @@ obj-y := clock.o cpu.o devices.o devices-common.o \ id.o usb.o obj-$(CONFIG_UX500_SOC_DB5500) += cpu-db5500.o dma-db5500.o -obj-$(CONFIG_UX500_SOC_DB8500) += cpu-db8500.o devices-db8500.o prcmu-db8500.o +obj-$(CONFIG_UX500_SOC_DB8500) += cpu-db8500.o devices-db8500.o obj-$(CONFIG_MACH_U8500) += board-mop500.o board-mop500-sdi.o \ board-mop500-regulators.o \ board-mop500-uib.o board-mop500-stuib.o \ diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index 0190e0e68b4d..11360f734cec 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -19,7 +20,6 @@ #include #include #include -#include #include "clock.h" diff --git a/arch/arm/mach-ux500/cpufreq.c b/arch/arm/mach-ux500/cpufreq.c index 5c5b747f134d..d196939fcdb9 100644 --- a/arch/arm/mach-ux500/cpufreq.c +++ b/arch/arm/mach-ux500/cpufreq.c @@ -17,10 +17,9 @@ #include #include #include +#include #include -#include -#include #define DRIVER_NAME "cpufreq-u8500" #define CPUFREQ_NAME "u8500" diff --git a/arch/arm/mach-ux500/include/mach/prcmu-defs.h b/arch/arm/mach-ux500/include/mach/prcmu-defs.h deleted file mode 100644 index 848ba64b561f..000000000000 --- a/arch/arm/mach-ux500/include/mach/prcmu-defs.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) STMicroelectronics 2009 - * Copyright (C) ST-Ericsson SA 2010 - * - * Author: Sundar Iyer - * Author: Martin Persson - * - * License Terms: GNU General Public License v2 - * - * PRCM Unit definitions - */ - -#ifndef __MACH_PRCMU_DEFS_H -#define __MACH_PRCMU_DEFS_H - -enum prcmu_cpu_opp { - CPU_OPP_INIT = 0x00, - CPU_OPP_NO_CHANGE = 0x01, - CPU_OPP_100 = 0x02, - CPU_OPP_50 = 0x03, - CPU_OPP_MAX = 0x04, - CPU_OPP_EXT_CLK = 0x07 -}; -enum prcmu_ape_opp { - APE_OPP_NO_CHANGE = 0x00, - APE_OPP_100 = 0x02, - APE_OPP_50 = 0x03, -}; - -#endif /* __MACH_PRCMU_DEFS_H */ diff --git a/arch/arm/mach-ux500/include/mach/prcmu-regs.h b/arch/arm/mach-ux500/include/mach/prcmu-regs.h deleted file mode 100644 index c1226da19bfb..000000000000 --- a/arch/arm/mach-ux500/include/mach/prcmu-regs.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (C) STMicroelectronics 2009 - * Copyright (C) ST-Ericsson SA 2010 - * - * Author: Kumar Sanghvi - * Author: Sundar Iyer - * - * License Terms: GNU General Public License v2 - * - * PRCM Unit registers - */ - -#ifndef __MACH_PRCMU_REGS_H -#define __MACH_PRCMU_REGS_H - -#include - -#define PRCM_ARM_PLLDIVPS (_PRCMU_BASE + 0x118) -#define PRCM_ARM_CHGCLKREQ (_PRCMU_BASE + 0x114) -#define PRCM_PLLARM_ENABLE (_PRCMU_BASE + 0x98) -#define PRCM_ARMCLKFIX_MGT (_PRCMU_BASE + 0x0) -#define PRCM_A9_RESETN_CLR (_PRCMU_BASE + 0x1f4) -#define PRCM_A9_RESETN_SET (_PRCMU_BASE + 0x1f0) -#define PRCM_ARM_LS_CLAMP (_PRCMU_BASE + 0x30c) -#define PRCM_SRAM_A9 (_PRCMU_BASE + 0x308) - -/* ARM WFI Standby signal register */ -#define PRCM_ARM_WFI_STANDBY (_PRCMU_BASE + 0x130) -#define PRCMU_IOCR (_PRCMU_BASE + 0x310) - -/* CPU mailbox registers */ -#define PRCM_MBOX_CPU_VAL (_PRCMU_BASE + 0x0fc) -#define PRCM_MBOX_CPU_SET (_PRCMU_BASE + 0x100) -#define PRCM_MBOX_CPU_CLR (_PRCMU_BASE + 0x104) - -/* Dual A9 core interrupt management unit registers */ -#define PRCM_A9_MASK_REQ (_PRCMU_BASE + 0x328) -#define PRCM_A9_MASK_ACK (_PRCMU_BASE + 0x32c) -#define PRCM_ARMITMSK31TO0 (_PRCMU_BASE + 0x11c) -#define PRCM_ARMITMSK63TO32 (_PRCMU_BASE + 0x120) -#define PRCM_ARMITMSK95TO64 (_PRCMU_BASE + 0x124) -#define PRCM_ARMITMSK127TO96 (_PRCMU_BASE + 0x128) -#define PRCM_POWER_STATE_VAL (_PRCMU_BASE + 0x25C) -#define PRCM_ARMITVAL31TO0 (_PRCMU_BASE + 0x260) -#define PRCM_ARMITVAL63TO32 (_PRCMU_BASE + 0x264) -#define PRCM_ARMITVAL95TO64 (_PRCMU_BASE + 0x268) -#define PRCM_ARMITVAL127TO96 (_PRCMU_BASE + 0x26C) - -#define PRCM_HOSTACCESS_REQ (_PRCMU_BASE + 0x334) -#define ARM_WAKEUP_MODEM 0x1 - -#define PRCM_ARM_IT1_CLEAR (_PRCMU_BASE + 0x48C) -#define PRCM_ARM_IT1_VAL (_PRCMU_BASE + 0x494) -#define PRCM_HOLD_EVT (_PRCMU_BASE + 0x174) - -#define PRCM_ITSTATUS0 (_PRCMU_BASE + 0x148) -#define PRCM_ITSTATUS1 (_PRCMU_BASE + 0x150) -#define PRCM_ITSTATUS2 (_PRCMU_BASE + 0x158) -#define PRCM_ITSTATUS3 (_PRCMU_BASE + 0x160) -#define PRCM_ITSTATUS4 (_PRCMU_BASE + 0x168) -#define PRCM_ITSTATUS5 (_PRCMU_BASE + 0x484) -#define PRCM_ITCLEAR5 (_PRCMU_BASE + 0x488) -#define PRCM_ARMIT_MASKXP70_IT (_PRCMU_BASE + 0x1018) - -/* System reset register */ -#define PRCM_APE_SOFTRST (_PRCMU_BASE + 0x228) - -/* Level shifter and clamp control registers */ -#define PRCM_MMIP_LS_CLAMP_SET (_PRCMU_BASE + 0x420) -#define PRCM_MMIP_LS_CLAMP_CLR (_PRCMU_BASE + 0x424) - -/* PRCMU clock/PLL/reset registers */ -#define PRCM_PLLDSI_FREQ (_PRCMU_BASE + 0x500) -#define PRCM_PLLDSI_ENABLE (_PRCMU_BASE + 0x504) -#define PRCM_LCDCLK_MGT (_PRCMU_BASE + 0x044) -#define PRCM_MCDECLK_MGT (_PRCMU_BASE + 0x064) -#define PRCM_HDMICLK_MGT (_PRCMU_BASE + 0x058) -#define PRCM_TVCLK_MGT (_PRCMU_BASE + 0x07c) -#define PRCM_DSI_PLLOUT_SEL (_PRCMU_BASE + 0x530) -#define PRCM_DSITVCLK_DIV (_PRCMU_BASE + 0x52C) -#define PRCM_APE_RESETN_SET (_PRCMU_BASE + 0x1E4) -#define PRCM_APE_RESETN_CLR (_PRCMU_BASE + 0x1E8) - -/* ePOD and memory power signal control registers */ -#define PRCM_EPOD_C_SET (_PRCMU_BASE + 0x410) -#define PRCM_SRAM_LS_SLEEP (_PRCMU_BASE + 0x304) - -/* Debug power control unit registers */ -#define PRCM_POWER_STATE_SET (_PRCMU_BASE + 0x254) - -/* Miscellaneous unit registers */ -#define PRCM_DSI_SW_RESET (_PRCMU_BASE + 0x324) - -#endif /* __MACH_PRCMU_REGS_H */ diff --git a/arch/arm/mach-ux500/include/mach/prcmu.h b/arch/arm/mach-ux500/include/mach/prcmu.h deleted file mode 100644 index c49e456162ef..000000000000 --- a/arch/arm/mach-ux500/include/mach/prcmu.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) STMicroelectronics 2009 - * Copyright (C) ST-Ericsson SA 2010 - * - * Author: Kumar Sanghvi - * Author: Sundar Iyer - * Author: Mattias Nilsson - * - * License Terms: GNU General Public License v2 - * - * PRCM Unit f/w API - */ -#ifndef __MACH_PRCMU_H -#define __MACH_PRCMU_H -#include - -void __init prcmu_early_init(void); -int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); -int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size); -int prcmu_set_ape_opp(enum prcmu_ape_opp opp); -int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp); -int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp, - enum prcmu_cpu_opp cpu_opp); -int prcmu_get_ape_opp(void); -int prcmu_get_cpu_opp(void); -bool prcmu_has_arm_maxopp(void); - -#endif /* __MACH_PRCMU_H */ diff --git a/arch/arm/mach-ux500/prcmu-db8500.c b/arch/arm/mach-ux500/prcmu-db8500.c deleted file mode 100644 index c522d26ef348..000000000000 --- a/arch/arm/mach-ux500/prcmu-db8500.c +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright (C) STMicroelectronics 2009 - * Copyright (C) ST-Ericsson SA 2010 - * - * License Terms: GNU General Public License v2 - * Author: Kumar Sanghvi - * Author: Sundar Iyer - * Author: Mattias Nilsson - * - * U8500 PRCM Unit interface driver - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* Global var to runtime determine TCDM base for v2 or v1 */ -static __iomem void *tcdm_base; - -#define _MBOX_HEADER (tcdm_base + 0xFE8) -#define MBOX_HEADER_REQ_MB0 (_MBOX_HEADER + 0x0) - -#define REQ_MB1 (tcdm_base + 0xFD0) -#define REQ_MB5 (tcdm_base + 0xE44) - -#define REQ_MB1_ARMOPP (REQ_MB1 + 0x0) -#define REQ_MB1_APEOPP (REQ_MB1 + 0x1) -#define REQ_MB1_BOOSTOPP (REQ_MB1 + 0x2) - -#define ACK_MB1 (tcdm_base + 0xE04) -#define ACK_MB5 (tcdm_base + 0xDF4) - -#define ACK_MB1_CURR_ARMOPP (ACK_MB1 + 0x0) -#define ACK_MB1_CURR_APEOPP (ACK_MB1 + 0x1) - -#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5) -#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1) -#define REQ_MB5_I2C_REG (REQ_MB5 + 2) -#define REQ_MB5_I2C_VAL (REQ_MB5 + 3) - -#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1) -#define ACK_MB5_I2C_VAL (ACK_MB5 + 3) - -#define PRCM_AVS_VARM_MAX_OPP (tcdm_base + 0x2E4) -#define PRCM_AVS_ISMODEENABLE 7 -#define PRCM_AVS_ISMODEENABLE_MASK (1 << PRCM_AVS_ISMODEENABLE) - -#define I2C_WRITE(slave) \ - (((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0)) -#define I2C_READ(slave) \ - (((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0)) -#define I2C_STOP_EN BIT(3) - -enum mb1_h { - MB1H_ARM_OPP = 1, - MB1H_APE_OPP, - MB1H_ARM_APE_OPP, -}; - -static struct { - struct mutex lock; - struct completion work; - struct { - u8 arm_opp; - u8 ape_opp; - u8 arm_status; - u8 ape_status; - } ack; -} mb1_transfer; - -enum ack_mb5_status { - I2C_WR_OK = 0x01, - I2C_RD_OK = 0x02, -}; - -#define MBOX_BIT BIT -#define NUM_MBOX 8 - -static struct { - struct mutex lock; - struct completion work; - bool failed; - struct { - u8 status; - u8 value; - } ack; -} mb5_transfer; - -/** - * prcmu_abb_read() - Read register value(s) from the ABB. - * @slave: The I2C slave address. - * @reg: The (start) register address. - * @value: The read out value(s). - * @size: The number of registers to read. - * - * Reads register value(s) from the ABB. - * @size has to be 1 for the current firmware version. - */ -int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size) -{ - int r; - - if (size != 1) - return -EINVAL; - - r = mutex_lock_interruptible(&mb5_transfer.lock); - if (r) - return r; - - while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) - cpu_relax(); - - writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP); - writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS); - writeb(reg, REQ_MB5_I2C_REG); - - writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET); - if (!wait_for_completion_timeout(&mb5_transfer.work, - msecs_to_jiffies(500))) { - pr_err("prcmu: prcmu_abb_read timed out.\n"); - r = -EIO; - goto unlock_and_return; - } - r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO); - if (!r) - *value = mb5_transfer.ack.value; - -unlock_and_return: - mutex_unlock(&mb5_transfer.lock); - return r; -} -EXPORT_SYMBOL(prcmu_abb_read); - -/** - * prcmu_abb_write() - Write register value(s) to the ABB. - * @slave: The I2C slave address. - * @reg: The (start) register address. - * @value: The value(s) to write. - * @size: The number of registers to write. - * - * Reads register value(s) from the ABB. - * @size has to be 1 for the current firmware version. - */ -int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size) -{ - int r; - - if (size != 1) - return -EINVAL; - - r = mutex_lock_interruptible(&mb5_transfer.lock); - if (r) - return r; - - - while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) - cpu_relax(); - - writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP); - writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS); - writeb(reg, REQ_MB5_I2C_REG); - writeb(*value, REQ_MB5_I2C_VAL); - - writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET); - if (!wait_for_completion_timeout(&mb5_transfer.work, - msecs_to_jiffies(500))) { - pr_err("prcmu: prcmu_abb_write timed out.\n"); - r = -EIO; - goto unlock_and_return; - } - r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO); - -unlock_and_return: - mutex_unlock(&mb5_transfer.lock); - return r; -} -EXPORT_SYMBOL(prcmu_abb_write); - -static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp, - enum prcmu_cpu_opp cpu_opp) -{ - bool do_ape; - bool do_arm; - int err = 0; - - do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP)); - do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP)); - - mutex_lock(&mb1_transfer.lock); - - while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1)) - cpu_relax(); - - writeb(0, MBOX_HEADER_REQ_MB0); - writeb(cpu_opp, REQ_MB1_ARMOPP); - writeb(ape_opp, REQ_MB1_APEOPP); - writeb(0, REQ_MB1_BOOSTOPP); - writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET); - wait_for_completion(&mb1_transfer.work); - if ((do_ape) && (mb1_transfer.ack.ape_status != 0)) - err = -EIO; - if ((do_arm) && (mb1_transfer.ack.arm_status != 0)) - err = -EIO; - - mutex_unlock(&mb1_transfer.lock); - - return err; -} - -/** - * prcmu_set_ape_opp() - Set the OPP of the APE. - * @opp: The OPP to set. - * - * This function sets the OPP of the APE. - */ -int prcmu_set_ape_opp(enum prcmu_ape_opp opp) -{ - return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE); -} -EXPORT_SYMBOL(prcmu_set_ape_opp); - -/** - * prcmu_set_cpu_opp() - Set the OPP of the CPU. - * @opp: The OPP to set. - * - * This function sets the OPP of the CPU. - */ -int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp) -{ - return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp); -} -EXPORT_SYMBOL(prcmu_set_cpu_opp); - -/** - * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU. - * @ape_opp: The APE OPP to set. - * @cpu_opp: The CPU OPP to set. - * - * This function sets the OPPs of the APE and the CPU. - */ -int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp, - enum prcmu_cpu_opp cpu_opp) -{ - return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp); -} -EXPORT_SYMBOL(prcmu_set_ape_cpu_opps); - -/** - * prcmu_get_ape_opp() - Get the OPP of the APE. - * - * This function gets the OPP of the APE. - */ -enum prcmu_ape_opp prcmu_get_ape_opp(void) -{ - return readb(ACK_MB1_CURR_APEOPP); -} -EXPORT_SYMBOL(prcmu_get_ape_opp); - -/** - * prcmu_get_cpu_opp() - Get the OPP of the CPU. - * - * This function gets the OPP of the CPU. The OPP is specified in %%. - * PRCMU_OPP_EXT is a special OPP value, not specified in %%. - */ -int prcmu_get_cpu_opp(void) -{ - return readb(ACK_MB1_CURR_ARMOPP); -} -EXPORT_SYMBOL(prcmu_get_cpu_opp); - -bool prcmu_has_arm_maxopp(void) -{ - return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK) - == PRCM_AVS_ISMODEENABLE_MASK; -} - -static void read_mailbox_0(void) -{ - writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR); -} - -static void read_mailbox_1(void) -{ - mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP); - mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP); - complete(&mb1_transfer.work); - writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR); -} - -static void read_mailbox_2(void) -{ - writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR); -} - -static void read_mailbox_3(void) -{ - writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR); -} - -static void read_mailbox_4(void) -{ - writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR); -} - -static void read_mailbox_5(void) -{ - mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS); - mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL); - complete(&mb5_transfer.work); - writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR); -} - -static void read_mailbox_6(void) -{ - writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR); -} - -static void read_mailbox_7(void) -{ - writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR); -} - -static void (* const read_mailbox[NUM_MBOX])(void) = { - read_mailbox_0, - read_mailbox_1, - read_mailbox_2, - read_mailbox_3, - read_mailbox_4, - read_mailbox_5, - read_mailbox_6, - read_mailbox_7 -}; - -static irqreturn_t prcmu_irq_handler(int irq, void *data) -{ - u32 bits; - u8 n; - - bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1)); - if (unlikely(!bits)) - return IRQ_NONE; - - for (n = 0; bits; n++) { - if (bits & MBOX_BIT(n)) { - bits -= MBOX_BIT(n); - read_mailbox[n](); - } - } - return IRQ_HANDLED; -} - -void __init prcmu_early_init(void) -{ - if (cpu_is_u8500v11() || cpu_is_u8500ed()) { - tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1); - } else if (cpu_is_u8500v2()) { - tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE); - } else { - pr_err("prcmu: Unsupported chip version\n"); - BUG(); - } -} - -static int __init prcmu_init(void) -{ - if (cpu_is_u8500ed()) { - pr_err("prcmu: Unsupported chip version\n"); - return 0; - } - - mutex_init(&mb1_transfer.lock); - init_completion(&mb1_transfer.work); - mutex_init(&mb5_transfer.lock); - init_completion(&mb5_transfer.work); - - /* Clean up the mailbox interrupts after pre-kernel code. */ - writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR); - - return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0, - "prcmu", NULL); -} - -arch_initcall(prcmu_init); diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 3ed3ff06be5d..7eaeb9750793 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -538,7 +538,7 @@ config AB8500_CORE config AB8500_I2C_CORE bool "AB8500 register access via PRCMU I2C" - depends on AB8500_CORE && UX500_SOC_DB8500 + depends on AB8500_CORE && MFD_DB8500_PRCMU default y help This enables register access to the AB8500 chip via PRCMU I2C. @@ -575,6 +575,16 @@ config AB3550_CORE LEDs, vibrator, system power and temperature, power management and ALSA sound. +config MFD_DB8500_PRCMU + bool "ST-Ericsson DB8500 Power Reset Control Management Unit" + depends on UX500_SOC_DB8500 + select MFD_CORE + help + Select this option to enable support for the DB8500 Power Reset + and Control Management Unit. This is basically an autonomous + system controller running an XP70 microprocessor, which is accessed + through a register map. + config MFD_CS5535 tristate "Support for CS5535 and CS5536 southbridge core functions" select MFD_CORE diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 419caa9d7dcf..814c57a692a9 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -74,9 +74,11 @@ obj-$(CONFIG_AB3100_CORE) += ab3100-core.o obj-$(CONFIG_AB3100_OTP) += ab3100-otp.o obj-$(CONFIG_AB3550_CORE) += ab3550-core.o obj-$(CONFIG_AB8500_CORE) += ab8500-core.o ab8500-sysctrl.o -obj-$(CONFIG_AB8500_I2C_CORE) += ab8500-i2c.o obj-$(CONFIG_AB8500_DEBUG) += ab8500-debugfs.o obj-$(CONFIG_AB8500_GPADC) += ab8500-gpadc.o +obj-$(CONFIG_MFD_DB8500_PRCMU) += db8500-prcmu.o +# ab8500-i2c need to come after db8500-prcmu (which provides the channel) +obj-$(CONFIG_AB8500_I2C_CORE) += ab8500-i2c.o obj-$(CONFIG_MFD_TIMBERDALE) += timberdale.o obj-$(CONFIG_PMIC_ADP5520) += adp5520.o obj-$(CONFIG_LPC_SCH) += lpc_sch.o diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c index 821e6b86afd2..9be541c6b004 100644 --- a/drivers/mfd/ab8500-i2c.c +++ b/drivers/mfd/ab8500-i2c.c @@ -11,8 +11,7 @@ #include #include #include - -#include +#include static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data) { diff --git a/drivers/mfd/db8500-prcmu-regs.h b/drivers/mfd/db8500-prcmu-regs.h new file mode 100644 index 000000000000..c1226da19bfb --- /dev/null +++ b/drivers/mfd/db8500-prcmu-regs.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) STMicroelectronics 2009 + * Copyright (C) ST-Ericsson SA 2010 + * + * Author: Kumar Sanghvi + * Author: Sundar Iyer + * + * License Terms: GNU General Public License v2 + * + * PRCM Unit registers + */ + +#ifndef __MACH_PRCMU_REGS_H +#define __MACH_PRCMU_REGS_H + +#include + +#define PRCM_ARM_PLLDIVPS (_PRCMU_BASE + 0x118) +#define PRCM_ARM_CHGCLKREQ (_PRCMU_BASE + 0x114) +#define PRCM_PLLARM_ENABLE (_PRCMU_BASE + 0x98) +#define PRCM_ARMCLKFIX_MGT (_PRCMU_BASE + 0x0) +#define PRCM_A9_RESETN_CLR (_PRCMU_BASE + 0x1f4) +#define PRCM_A9_RESETN_SET (_PRCMU_BASE + 0x1f0) +#define PRCM_ARM_LS_CLAMP (_PRCMU_BASE + 0x30c) +#define PRCM_SRAM_A9 (_PRCMU_BASE + 0x308) + +/* ARM WFI Standby signal register */ +#define PRCM_ARM_WFI_STANDBY (_PRCMU_BASE + 0x130) +#define PRCMU_IOCR (_PRCMU_BASE + 0x310) + +/* CPU mailbox registers */ +#define PRCM_MBOX_CPU_VAL (_PRCMU_BASE + 0x0fc) +#define PRCM_MBOX_CPU_SET (_PRCMU_BASE + 0x100) +#define PRCM_MBOX_CPU_CLR (_PRCMU_BASE + 0x104) + +/* Dual A9 core interrupt management unit registers */ +#define PRCM_A9_MASK_REQ (_PRCMU_BASE + 0x328) +#define PRCM_A9_MASK_ACK (_PRCMU_BASE + 0x32c) +#define PRCM_ARMITMSK31TO0 (_PRCMU_BASE + 0x11c) +#define PRCM_ARMITMSK63TO32 (_PRCMU_BASE + 0x120) +#define PRCM_ARMITMSK95TO64 (_PRCMU_BASE + 0x124) +#define PRCM_ARMITMSK127TO96 (_PRCMU_BASE + 0x128) +#define PRCM_POWER_STATE_VAL (_PRCMU_BASE + 0x25C) +#define PRCM_ARMITVAL31TO0 (_PRCMU_BASE + 0x260) +#define PRCM_ARMITVAL63TO32 (_PRCMU_BASE + 0x264) +#define PRCM_ARMITVAL95TO64 (_PRCMU_BASE + 0x268) +#define PRCM_ARMITVAL127TO96 (_PRCMU_BASE + 0x26C) + +#define PRCM_HOSTACCESS_REQ (_PRCMU_BASE + 0x334) +#define ARM_WAKEUP_MODEM 0x1 + +#define PRCM_ARM_IT1_CLEAR (_PRCMU_BASE + 0x48C) +#define PRCM_ARM_IT1_VAL (_PRCMU_BASE + 0x494) +#define PRCM_HOLD_EVT (_PRCMU_BASE + 0x174) + +#define PRCM_ITSTATUS0 (_PRCMU_BASE + 0x148) +#define PRCM_ITSTATUS1 (_PRCMU_BASE + 0x150) +#define PRCM_ITSTATUS2 (_PRCMU_BASE + 0x158) +#define PRCM_ITSTATUS3 (_PRCMU_BASE + 0x160) +#define PRCM_ITSTATUS4 (_PRCMU_BASE + 0x168) +#define PRCM_ITSTATUS5 (_PRCMU_BASE + 0x484) +#define PRCM_ITCLEAR5 (_PRCMU_BASE + 0x488) +#define PRCM_ARMIT_MASKXP70_IT (_PRCMU_BASE + 0x1018) + +/* System reset register */ +#define PRCM_APE_SOFTRST (_PRCMU_BASE + 0x228) + +/* Level shifter and clamp control registers */ +#define PRCM_MMIP_LS_CLAMP_SET (_PRCMU_BASE + 0x420) +#define PRCM_MMIP_LS_CLAMP_CLR (_PRCMU_BASE + 0x424) + +/* PRCMU clock/PLL/reset registers */ +#define PRCM_PLLDSI_FREQ (_PRCMU_BASE + 0x500) +#define PRCM_PLLDSI_ENABLE (_PRCMU_BASE + 0x504) +#define PRCM_LCDCLK_MGT (_PRCMU_BASE + 0x044) +#define PRCM_MCDECLK_MGT (_PRCMU_BASE + 0x064) +#define PRCM_HDMICLK_MGT (_PRCMU_BASE + 0x058) +#define PRCM_TVCLK_MGT (_PRCMU_BASE + 0x07c) +#define PRCM_DSI_PLLOUT_SEL (_PRCMU_BASE + 0x530) +#define PRCM_DSITVCLK_DIV (_PRCMU_BASE + 0x52C) +#define PRCM_APE_RESETN_SET (_PRCMU_BASE + 0x1E4) +#define PRCM_APE_RESETN_CLR (_PRCMU_BASE + 0x1E8) + +/* ePOD and memory power signal control registers */ +#define PRCM_EPOD_C_SET (_PRCMU_BASE + 0x410) +#define PRCM_SRAM_LS_SLEEP (_PRCMU_BASE + 0x304) + +/* Debug power control unit registers */ +#define PRCM_POWER_STATE_SET (_PRCMU_BASE + 0x254) + +/* Miscellaneous unit registers */ +#define PRCM_DSI_SW_RESET (_PRCMU_BASE + 0x324) + +#endif /* __MACH_PRCMU_REGS_H */ diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c new file mode 100644 index 000000000000..31f18c8c6bf8 --- /dev/null +++ b/drivers/mfd/db8500-prcmu.c @@ -0,0 +1,395 @@ +/* + * Copyright (C) STMicroelectronics 2009 + * Copyright (C) ST-Ericsson SA 2010 + * + * License Terms: GNU General Public License v2 + * Author: Kumar Sanghvi + * Author: Sundar Iyer + * Author: Mattias Nilsson + * + * U8500 PRCM Unit interface driver + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "db8500-prcmu-regs.h" + +/* Global var to runtime determine TCDM base for v2 or v1 */ +static __iomem void *tcdm_base; + +#define _MBOX_HEADER (tcdm_base + 0xFE8) +#define MBOX_HEADER_REQ_MB0 (_MBOX_HEADER + 0x0) + +#define REQ_MB1 (tcdm_base + 0xFD0) +#define REQ_MB5 (tcdm_base + 0xE44) + +#define REQ_MB1_ARMOPP (REQ_MB1 + 0x0) +#define REQ_MB1_APEOPP (REQ_MB1 + 0x1) +#define REQ_MB1_BOOSTOPP (REQ_MB1 + 0x2) + +#define ACK_MB1 (tcdm_base + 0xE04) +#define ACK_MB5 (tcdm_base + 0xDF4) + +#define ACK_MB1_CURR_ARMOPP (ACK_MB1 + 0x0) +#define ACK_MB1_CURR_APEOPP (ACK_MB1 + 0x1) + +#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5) +#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1) +#define REQ_MB5_I2C_REG (REQ_MB5 + 2) +#define REQ_MB5_I2C_VAL (REQ_MB5 + 3) + +#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1) +#define ACK_MB5_I2C_VAL (ACK_MB5 + 3) + +#define PRCM_AVS_VARM_MAX_OPP (tcdm_base + 0x2E4) +#define PRCM_AVS_ISMODEENABLE 7 +#define PRCM_AVS_ISMODEENABLE_MASK (1 << PRCM_AVS_ISMODEENABLE) + +#define I2C_WRITE(slave) \ + (((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0)) +#define I2C_READ(slave) \ + (((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0)) +#define I2C_STOP_EN BIT(3) + +enum mb1_h { + MB1H_ARM_OPP = 1, + MB1H_APE_OPP, + MB1H_ARM_APE_OPP, +}; + +static struct { + struct mutex lock; + struct completion work; + struct { + u8 arm_opp; + u8 ape_opp; + u8 arm_status; + u8 ape_status; + } ack; +} mb1_transfer; + +enum ack_mb5_status { + I2C_WR_OK = 0x01, + I2C_RD_OK = 0x02, +}; + +#define MBOX_BIT BIT +#define NUM_MBOX 8 + +static struct { + struct mutex lock; + struct completion work; + bool failed; + struct { + u8 status; + u8 value; + } ack; +} mb5_transfer; + +/** + * prcmu_abb_read() - Read register value(s) from the ABB. + * @slave: The I2C slave address. + * @reg: The (start) register address. + * @value: The read out value(s). + * @size: The number of registers to read. + * + * Reads register value(s) from the ABB. + * @size has to be 1 for the current firmware version. + */ +int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size) +{ + int r; + + if (size != 1) + return -EINVAL; + + r = mutex_lock_interruptible(&mb5_transfer.lock); + if (r) + return r; + + while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) + cpu_relax(); + + writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP); + writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS); + writeb(reg, REQ_MB5_I2C_REG); + + writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET); + if (!wait_for_completion_timeout(&mb5_transfer.work, + msecs_to_jiffies(500))) { + pr_err("prcmu: prcmu_abb_read timed out.\n"); + r = -EIO; + goto unlock_and_return; + } + r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO); + if (!r) + *value = mb5_transfer.ack.value; + +unlock_and_return: + mutex_unlock(&mb5_transfer.lock); + return r; +} +EXPORT_SYMBOL(prcmu_abb_read); + +/** + * prcmu_abb_write() - Write register value(s) to the ABB. + * @slave: The I2C slave address. + * @reg: The (start) register address. + * @value: The value(s) to write. + * @size: The number of registers to write. + * + * Reads register value(s) from the ABB. + * @size has to be 1 for the current firmware version. + */ +int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size) +{ + int r; + + if (size != 1) + return -EINVAL; + + r = mutex_lock_interruptible(&mb5_transfer.lock); + if (r) + return r; + + + while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) + cpu_relax(); + + writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP); + writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS); + writeb(reg, REQ_MB5_I2C_REG); + writeb(*value, REQ_MB5_I2C_VAL); + + writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET); + if (!wait_for_completion_timeout(&mb5_transfer.work, + msecs_to_jiffies(500))) { + pr_err("prcmu: prcmu_abb_write timed out.\n"); + r = -EIO; + goto unlock_and_return; + } + r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO); + +unlock_and_return: + mutex_unlock(&mb5_transfer.lock); + return r; +} +EXPORT_SYMBOL(prcmu_abb_write); + +static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp, + enum prcmu_cpu_opp cpu_opp) +{ + bool do_ape; + bool do_arm; + int err = 0; + + do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP)); + do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP)); + + mutex_lock(&mb1_transfer.lock); + + while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1)) + cpu_relax(); + + writeb(0, MBOX_HEADER_REQ_MB0); + writeb(cpu_opp, REQ_MB1_ARMOPP); + writeb(ape_opp, REQ_MB1_APEOPP); + writeb(0, REQ_MB1_BOOSTOPP); + writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET); + wait_for_completion(&mb1_transfer.work); + if ((do_ape) && (mb1_transfer.ack.ape_status != 0)) + err = -EIO; + if ((do_arm) && (mb1_transfer.ack.arm_status != 0)) + err = -EIO; + + mutex_unlock(&mb1_transfer.lock); + + return err; +} + +/** + * prcmu_set_ape_opp() - Set the OPP of the APE. + * @opp: The OPP to set. + * + * This function sets the OPP of the APE. + */ +int prcmu_set_ape_opp(enum prcmu_ape_opp opp) +{ + return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE); +} +EXPORT_SYMBOL(prcmu_set_ape_opp); + +/** + * prcmu_set_cpu_opp() - Set the OPP of the CPU. + * @opp: The OPP to set. + * + * This function sets the OPP of the CPU. + */ +int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp) +{ + return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp); +} +EXPORT_SYMBOL(prcmu_set_cpu_opp); + +/** + * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU. + * @ape_opp: The APE OPP to set. + * @cpu_opp: The CPU OPP to set. + * + * This function sets the OPPs of the APE and the CPU. + */ +int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp, + enum prcmu_cpu_opp cpu_opp) +{ + return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp); +} +EXPORT_SYMBOL(prcmu_set_ape_cpu_opps); + +/** + * prcmu_get_ape_opp() - Get the OPP of the APE. + * + * This function gets the OPP of the APE. + */ +enum prcmu_ape_opp prcmu_get_ape_opp(void) +{ + return readb(ACK_MB1_CURR_APEOPP); +} +EXPORT_SYMBOL(prcmu_get_ape_opp); + +/** + * prcmu_get_cpu_opp() - Get the OPP of the CPU. + * + * This function gets the OPP of the CPU. The OPP is specified in %%. + * PRCMU_OPP_EXT is a special OPP value, not specified in %%. + */ +int prcmu_get_cpu_opp(void) +{ + return readb(ACK_MB1_CURR_ARMOPP); +} +EXPORT_SYMBOL(prcmu_get_cpu_opp); + +bool prcmu_has_arm_maxopp(void) +{ + return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK) + == PRCM_AVS_ISMODEENABLE_MASK; +} + +static void read_mailbox_0(void) +{ + writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR); +} + +static void read_mailbox_1(void) +{ + mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP); + mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP); + complete(&mb1_transfer.work); + writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR); +} + +static void read_mailbox_2(void) +{ + writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR); +} + +static void read_mailbox_3(void) +{ + writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR); +} + +static void read_mailbox_4(void) +{ + writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR); +} + +static void read_mailbox_5(void) +{ + mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS); + mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL); + complete(&mb5_transfer.work); + writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR); +} + +static void read_mailbox_6(void) +{ + writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR); +} + +static void read_mailbox_7(void) +{ + writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR); +} + +static void (* const read_mailbox[NUM_MBOX])(void) = { + read_mailbox_0, + read_mailbox_1, + read_mailbox_2, + read_mailbox_3, + read_mailbox_4, + read_mailbox_5, + read_mailbox_6, + read_mailbox_7 +}; + +static irqreturn_t prcmu_irq_handler(int irq, void *data) +{ + u32 bits; + u8 n; + + bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1)); + if (unlikely(!bits)) + return IRQ_NONE; + + for (n = 0; bits; n++) { + if (bits & MBOX_BIT(n)) { + bits -= MBOX_BIT(n); + read_mailbox[n](); + } + } + return IRQ_HANDLED; +} + +void __init prcmu_early_init(void) +{ + if (cpu_is_u8500v11() || cpu_is_u8500ed()) { + tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1); + } else if (cpu_is_u8500v2()) { + tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE); + } else { + pr_err("prcmu: Unsupported chip version\n"); + BUG(); + } +} + +static int __init prcmu_init(void) +{ + if (cpu_is_u8500ed()) { + pr_err("prcmu: Unsupported chip version\n"); + return 0; + } + + mutex_init(&mb1_transfer.lock); + init_completion(&mb1_transfer.work); + mutex_init(&mb5_transfer.lock); + init_completion(&mb5_transfer.work); + + /* Clean up the mailbox interrupts after pre-kernel code. */ + writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR); + + return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0, + "prcmu", NULL); +} + +arch_initcall(prcmu_init); diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h new file mode 100644 index 000000000000..d591d79aa6f0 --- /dev/null +++ b/include/linux/mfd/db8500-prcmu.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) STMicroelectronics 2009 + * Copyright (C) ST-Ericsson SA 2010 + * + * Author: Sundar Iyer + * Author: Martin Persson + * + * License Terms: GNU General Public License v2 + * + * PRCM Unit definitions + */ + +#ifndef __MACH_PRCMU_DEFS_H +#define __MACH_PRCMU_DEFS_H + +enum prcmu_cpu_opp { + CPU_OPP_INIT = 0x00, + CPU_OPP_NO_CHANGE = 0x01, + CPU_OPP_100 = 0x02, + CPU_OPP_50 = 0x03, + CPU_OPP_MAX = 0x04, + CPU_OPP_EXT_CLK = 0x07 +}; +enum prcmu_ape_opp { + APE_OPP_NO_CHANGE = 0x00, + APE_OPP_100 = 0x02, + APE_OPP_50 = 0x03, +}; + +#endif /* __MACH_PRCMU_DEFS_H */ + +/* + * Copyright (C) STMicroelectronics 2009 + * Copyright (C) ST-Ericsson SA 2010 + * + * Author: Kumar Sanghvi + * Author: Sundar Iyer + * Author: Mattias Nilsson + * + * License Terms: GNU General Public License v2 + * + * PRCM Unit f/w API + */ +#ifndef __MACH_PRCMU_H +#define __MACH_PRCMU_H + +void __init prcmu_early_init(void); +int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); +int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size); +int prcmu_set_ape_opp(enum prcmu_ape_opp opp); +int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp); +int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp, + enum prcmu_cpu_opp cpu_opp); +enum prcmu_ape_opp prcmu_get_ape_opp(void); +int prcmu_get_cpu_opp(void); +bool prcmu_has_arm_maxopp(void); + +#endif /* __MACH_PRCMU_H */ -- cgit v1.2.3 From 3df57bcf5a6ba74572218a811bd0e311414f2aff Mon Sep 17 00:00:00 2001 From: Mattias Nilsson Date: Mon, 16 May 2011 00:15:05 +0200 Subject: mfd: update DB8500 PRCMU driver This updates the DB8500 PRCMU driver to the latest version available internally. Nominally we would update the dependent CPUfreq driver at the same time but since that is being moved around in this patch set we postpone that by simply deactivating it for the time being. This is a snapshot of the current PRCMU firmware API as it looks right now. The PRCMU firmware is still subject to change. This also updates the CPUfreq driver to a newer version that will utilize the new API. Acked-by: Samuel Ortiz Signed-off-by: Mattias Nilsson Signed-off-by: Martin Persson Signed-off-by: Per Fransson Signed-off-by: Jonas Aaberg Signed-off-by: Sebastien Rault Signed-off-by: Bengt Jonsson Signed-off-by: Rickard Andersson Signed-off-by: Linus Walleij --- arch/arm/mach-ux500/Makefile | 2 +- arch/arm/mach-ux500/cpu-db8500.c | 5 + drivers/mfd/db8500-prcmu-regs.h | 194 ++-- drivers/mfd/db8500-prcmu.c | 1879 ++++++++++++++++++++++++++++++++++---- include/linux/mfd/db8500-prcmu.h | 992 +++++++++++++++++++- 5 files changed, 2782 insertions(+), 290 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile index 7a1d43e04f97..1694916e6822 100644 --- a/arch/arm/mach-ux500/Makefile +++ b/arch/arm/mach-ux500/Makefile @@ -17,4 +17,4 @@ obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o obj-$(CONFIG_LOCAL_TIMERS) += localtimer.o obj-$(CONFIG_U5500_MODEM_IRQ) += modem-irq-db5500.o obj-$(CONFIG_U5500_MBOX) += mbox-db5500.o -obj-$(CONFIG_CPU_FREQ) += cpufreq.o + diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 3d419fef0470..c3c417656bd9 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -131,9 +131,14 @@ static struct platform_device db8500_pmu_device = { .dev.platform_data = &db8500_pmu_platdata, }; +static struct platform_device db8500_prcmu_device = { + .name = "db8500-prcmu", +}; + static struct platform_device *platform_devs[] __initdata = { &u8500_dma40_device, &db8500_pmu_device, + &db8500_prcmu_device, }; static resource_size_t __initdata db8500_gpio_base[] = { diff --git a/drivers/mfd/db8500-prcmu-regs.h b/drivers/mfd/db8500-prcmu-regs.h index c1226da19bfb..3bbf04d58043 100644 --- a/drivers/mfd/db8500-prcmu-regs.h +++ b/drivers/mfd/db8500-prcmu-regs.h @@ -9,86 +9,158 @@ * * PRCM Unit registers */ +#ifndef __DB8500_PRCMU_REGS_H +#define __DB8500_PRCMU_REGS_H -#ifndef __MACH_PRCMU_REGS_H -#define __MACH_PRCMU_REGS_H - +#include #include -#define PRCM_ARM_PLLDIVPS (_PRCMU_BASE + 0x118) -#define PRCM_ARM_CHGCLKREQ (_PRCMU_BASE + 0x114) -#define PRCM_PLLARM_ENABLE (_PRCMU_BASE + 0x98) -#define PRCM_ARMCLKFIX_MGT (_PRCMU_BASE + 0x0) -#define PRCM_A9_RESETN_CLR (_PRCMU_BASE + 0x1f4) -#define PRCM_A9_RESETN_SET (_PRCMU_BASE + 0x1f0) -#define PRCM_ARM_LS_CLAMP (_PRCMU_BASE + 0x30c) -#define PRCM_SRAM_A9 (_PRCMU_BASE + 0x308) +#define BITS(_start, _end) ((BIT(_end) - BIT(_start)) + BIT(_end)) + +#define PRCM_ARM_PLLDIVPS 0x118 +#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE BITS(0, 5) +#define PRCM_ARM_PLLDIVPS_MAX_MASK 0xF + +#define PRCM_PLLARM_LOCKP 0x0A8 +#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3 BIT(1) + +#define PRCM_ARM_CHGCLKREQ 0x114 +#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ BIT(0) + +#define PRCM_PLLARM_ENABLE 0x98 +#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE BIT(0) +#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON BIT(8) + +#define PRCM_ARMCLKFIX_MGT 0x0 +#define PRCM_A9_RESETN_CLR 0x1f4 +#define PRCM_A9_RESETN_SET 0x1f0 +#define PRCM_ARM_LS_CLAMP 0x30C +#define PRCM_SRAM_A9 0x308 /* ARM WFI Standby signal register */ -#define PRCM_ARM_WFI_STANDBY (_PRCMU_BASE + 0x130) -#define PRCMU_IOCR (_PRCMU_BASE + 0x310) +#define PRCM_ARM_WFI_STANDBY 0x130 +#define PRCM_IOCR 0x310 +#define PRCM_IOCR_IOFORCE BIT(0) /* CPU mailbox registers */ -#define PRCM_MBOX_CPU_VAL (_PRCMU_BASE + 0x0fc) -#define PRCM_MBOX_CPU_SET (_PRCMU_BASE + 0x100) -#define PRCM_MBOX_CPU_CLR (_PRCMU_BASE + 0x104) +#define PRCM_MBOX_CPU_VAL 0x0FC +#define PRCM_MBOX_CPU_SET 0x100 /* Dual A9 core interrupt management unit registers */ -#define PRCM_A9_MASK_REQ (_PRCMU_BASE + 0x328) -#define PRCM_A9_MASK_ACK (_PRCMU_BASE + 0x32c) -#define PRCM_ARMITMSK31TO0 (_PRCMU_BASE + 0x11c) -#define PRCM_ARMITMSK63TO32 (_PRCMU_BASE + 0x120) -#define PRCM_ARMITMSK95TO64 (_PRCMU_BASE + 0x124) -#define PRCM_ARMITMSK127TO96 (_PRCMU_BASE + 0x128) -#define PRCM_POWER_STATE_VAL (_PRCMU_BASE + 0x25C) -#define PRCM_ARMITVAL31TO0 (_PRCMU_BASE + 0x260) -#define PRCM_ARMITVAL63TO32 (_PRCMU_BASE + 0x264) -#define PRCM_ARMITVAL95TO64 (_PRCMU_BASE + 0x268) -#define PRCM_ARMITVAL127TO96 (_PRCMU_BASE + 0x26C) - -#define PRCM_HOSTACCESS_REQ (_PRCMU_BASE + 0x334) -#define ARM_WAKEUP_MODEM 0x1 - -#define PRCM_ARM_IT1_CLEAR (_PRCMU_BASE + 0x48C) -#define PRCM_ARM_IT1_VAL (_PRCMU_BASE + 0x494) -#define PRCM_HOLD_EVT (_PRCMU_BASE + 0x174) - -#define PRCM_ITSTATUS0 (_PRCMU_BASE + 0x148) -#define PRCM_ITSTATUS1 (_PRCMU_BASE + 0x150) -#define PRCM_ITSTATUS2 (_PRCMU_BASE + 0x158) -#define PRCM_ITSTATUS3 (_PRCMU_BASE + 0x160) -#define PRCM_ITSTATUS4 (_PRCMU_BASE + 0x168) -#define PRCM_ITSTATUS5 (_PRCMU_BASE + 0x484) -#define PRCM_ITCLEAR5 (_PRCMU_BASE + 0x488) -#define PRCM_ARMIT_MASKXP70_IT (_PRCMU_BASE + 0x1018) +#define PRCM_A9_MASK_REQ 0x328 +#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ BIT(0) + +#define PRCM_A9_MASK_ACK 0x32C +#define PRCM_ARMITMSK31TO0 0x11C +#define PRCM_ARMITMSK63TO32 0x120 +#define PRCM_ARMITMSK95TO64 0x124 +#define PRCM_ARMITMSK127TO96 0x128 +#define PRCM_POWER_STATE_VAL 0x25C +#define PRCM_ARMITVAL31TO0 0x260 +#define PRCM_ARMITVAL63TO32 0x264 +#define PRCM_ARMITVAL95TO64 0x268 +#define PRCM_ARMITVAL127TO96 0x26C + +#define PRCM_HOSTACCESS_REQ 0x334 +#define PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ BIT(0) + +#define PRCM_ARM_IT1_CLR 0x48C +#define PRCM_ARM_IT1_VAL 0x494 + +#define PRCM_ITSTATUS0 0x148 +#define PRCM_ITSTATUS1 0x150 +#define PRCM_ITSTATUS2 0x158 +#define PRCM_ITSTATUS3 0x160 +#define PRCM_ITSTATUS4 0x168 +#define PRCM_ITSTATUS5 0x484 +#define PRCM_ITCLEAR5 0x488 +#define PRCM_ARMIT_MASKXP70_IT 0x1018 /* System reset register */ -#define PRCM_APE_SOFTRST (_PRCMU_BASE + 0x228) +#define PRCM_APE_SOFTRST 0x228 /* Level shifter and clamp control registers */ -#define PRCM_MMIP_LS_CLAMP_SET (_PRCMU_BASE + 0x420) -#define PRCM_MMIP_LS_CLAMP_CLR (_PRCMU_BASE + 0x424) +#define PRCM_MMIP_LS_CLAMP_SET 0x420 +#define PRCM_MMIP_LS_CLAMP_CLR 0x424 + +/* PRCMU HW semaphore */ +#define PRCM_SEM 0x400 +#define PRCM_SEM_PRCM_SEM BIT(0) /* PRCMU clock/PLL/reset registers */ -#define PRCM_PLLDSI_FREQ (_PRCMU_BASE + 0x500) -#define PRCM_PLLDSI_ENABLE (_PRCMU_BASE + 0x504) -#define PRCM_LCDCLK_MGT (_PRCMU_BASE + 0x044) -#define PRCM_MCDECLK_MGT (_PRCMU_BASE + 0x064) -#define PRCM_HDMICLK_MGT (_PRCMU_BASE + 0x058) -#define PRCM_TVCLK_MGT (_PRCMU_BASE + 0x07c) -#define PRCM_DSI_PLLOUT_SEL (_PRCMU_BASE + 0x530) -#define PRCM_DSITVCLK_DIV (_PRCMU_BASE + 0x52C) -#define PRCM_APE_RESETN_SET (_PRCMU_BASE + 0x1E4) -#define PRCM_APE_RESETN_CLR (_PRCMU_BASE + 0x1E8) +#define PRCM_PLLDSI_FREQ 0x500 +#define PRCM_PLLDSI_ENABLE 0x504 +#define PRCM_PLLDSI_LOCKP 0x508 +#define PRCM_DSI_PLLOUT_SEL 0x530 +#define PRCM_DSITVCLK_DIV 0x52C +#define PRCM_APE_RESETN_SET 0x1E4 +#define PRCM_APE_RESETN_CLR 0x1E8 + +#define PRCM_TCR 0x1C8 +#define PRCM_TCR_TENSEL_MASK BITS(0, 7) +#define PRCM_TCR_STOP_TIMERS BIT(16) +#define PRCM_TCR_DOZE_MODE BIT(17) + +#define PRCM_CLKOCR 0x1CC +#define PRCM_CLKOCR_CLKODIV0_SHIFT 0 +#define PRCM_CLKOCR_CLKODIV0_MASK BITS(0, 5) +#define PRCM_CLKOCR_CLKOSEL0_SHIFT 6 +#define PRCM_CLKOCR_CLKOSEL0_MASK BITS(6, 8) +#define PRCM_CLKOCR_CLKODIV1_SHIFT 16 +#define PRCM_CLKOCR_CLKODIV1_MASK BITS(16, 21) +#define PRCM_CLKOCR_CLKOSEL1_SHIFT 22 +#define PRCM_CLKOCR_CLKOSEL1_MASK BITS(22, 24) +#define PRCM_CLKOCR_CLK1TYPE BIT(28) + +#define PRCM_SGACLK_MGT 0x014 +#define PRCM_UARTCLK_MGT 0x018 +#define PRCM_MSP02CLK_MGT 0x01C +#define PRCM_MSP1CLK_MGT 0x288 +#define PRCM_I2CCLK_MGT 0x020 +#define PRCM_SDMMCCLK_MGT 0x024 +#define PRCM_SLIMCLK_MGT 0x028 +#define PRCM_PER1CLK_MGT 0x02C +#define PRCM_PER2CLK_MGT 0x030 +#define PRCM_PER3CLK_MGT 0x034 +#define PRCM_PER5CLK_MGT 0x038 +#define PRCM_PER6CLK_MGT 0x03C +#define PRCM_PER7CLK_MGT 0x040 +#define PRCM_LCDCLK_MGT 0x044 +#define PRCM_BMLCLK_MGT 0x04C +#define PRCM_HSITXCLK_MGT 0x050 +#define PRCM_HSIRXCLK_MGT 0x054 +#define PRCM_HDMICLK_MGT 0x058 +#define PRCM_APEATCLK_MGT 0x05C +#define PRCM_APETRACECLK_MGT 0x060 +#define PRCM_MCDECLK_MGT 0x064 +#define PRCM_IPI2CCLK_MGT 0x068 +#define PRCM_DSIALTCLK_MGT 0x06C +#define PRCM_DMACLK_MGT 0x074 +#define PRCM_B2R2CLK_MGT 0x078 +#define PRCM_TVCLK_MGT 0x07C +#define PRCM_UNIPROCLK_MGT 0x278 +#define PRCM_SSPCLK_MGT 0x280 +#define PRCM_RNGCLK_MGT 0x284 +#define PRCM_UICCCLK_MGT 0x27C + +#define PRCM_CLK_MGT_CLKPLLDIV_MASK BITS(0, 4) +#define PRCM_CLK_MGT_CLKPLLSW_MASK BITS(5, 7) +#define PRCM_CLK_MGT_CLKEN BIT(8) /* ePOD and memory power signal control registers */ -#define PRCM_EPOD_C_SET (_PRCMU_BASE + 0x410) -#define PRCM_SRAM_LS_SLEEP (_PRCMU_BASE + 0x304) +#define PRCM_EPOD_C_SET 0x410 +#define PRCM_SRAM_LS_SLEEP 0x304 /* Debug power control unit registers */ -#define PRCM_POWER_STATE_SET (_PRCMU_BASE + 0x254) +#define PRCM_POWER_STATE_SET 0x254 /* Miscellaneous unit registers */ -#define PRCM_DSI_SW_RESET (_PRCMU_BASE + 0x324) +#define PRCM_DSI_SW_RESET 0x324 +#define PRCM_GPIOCR 0x138 + +/* GPIOCR register */ +#define PRCM_GPIOCR_SPI2_SELECT BIT(23) + +#define PRCM_DDR_SUBSYS_APE_MINBW 0x438 -#endif /* __MACH_PRCMU_REGS_H */ +#endif /* __DB8500_PRCMU_REGS_H */ diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 31f18c8c6bf8..c44725bd8b9a 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -10,93 +10,1354 @@ * U8500 PRCM Unit interface driver * */ -#include #include +#include +#include #include #include +#include #include +#include #include #include +#include #include #include -#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include "db8500-prcmu-regs.h" + +/* Offset for the firmware version within the TCPM */ +#define PRCMU_FW_VERSION_OFFSET 0xA4 + +/* PRCMU project numbers, defined by PRCMU FW */ +#define PRCMU_PROJECT_ID_8500V1_0 1 +#define PRCMU_PROJECT_ID_8500V2_0 2 +#define PRCMU_PROJECT_ID_8400V2_0 3 + +/* Index of different voltages to be used when accessing AVSData */ +#define PRCM_AVS_BASE 0x2FC +#define PRCM_AVS_VBB_RET (PRCM_AVS_BASE + 0x0) +#define PRCM_AVS_VBB_MAX_OPP (PRCM_AVS_BASE + 0x1) +#define PRCM_AVS_VBB_100_OPP (PRCM_AVS_BASE + 0x2) +#define PRCM_AVS_VBB_50_OPP (PRCM_AVS_BASE + 0x3) +#define PRCM_AVS_VARM_MAX_OPP (PRCM_AVS_BASE + 0x4) +#define PRCM_AVS_VARM_100_OPP (PRCM_AVS_BASE + 0x5) +#define PRCM_AVS_VARM_50_OPP (PRCM_AVS_BASE + 0x6) +#define PRCM_AVS_VARM_RET (PRCM_AVS_BASE + 0x7) +#define PRCM_AVS_VAPE_100_OPP (PRCM_AVS_BASE + 0x8) +#define PRCM_AVS_VAPE_50_OPP (PRCM_AVS_BASE + 0x9) +#define PRCM_AVS_VMOD_100_OPP (PRCM_AVS_BASE + 0xA) +#define PRCM_AVS_VMOD_50_OPP (PRCM_AVS_BASE + 0xB) +#define PRCM_AVS_VSAFE (PRCM_AVS_BASE + 0xC) + +#define PRCM_AVS_VOLTAGE 0 +#define PRCM_AVS_VOLTAGE_MASK 0x3f +#define PRCM_AVS_ISSLOWSTARTUP 6 +#define PRCM_AVS_ISSLOWSTARTUP_MASK (1 << PRCM_AVS_ISSLOWSTARTUP) +#define PRCM_AVS_ISMODEENABLE 7 +#define PRCM_AVS_ISMODEENABLE_MASK (1 << PRCM_AVS_ISMODEENABLE) + +#define PRCM_BOOT_STATUS 0xFFF +#define PRCM_ROMCODE_A2P 0xFFE +#define PRCM_ROMCODE_P2A 0xFFD +#define PRCM_XP70_CUR_PWR_STATE 0xFFC /* 4 BYTES */ + +#define PRCM_SW_RST_REASON 0xFF8 /* 2 bytes */ + +#define _PRCM_MBOX_HEADER 0xFE8 /* 16 bytes */ +#define PRCM_MBOX_HEADER_REQ_MB0 (_PRCM_MBOX_HEADER + 0x0) +#define PRCM_MBOX_HEADER_REQ_MB1 (_PRCM_MBOX_HEADER + 0x1) +#define PRCM_MBOX_HEADER_REQ_MB2 (_PRCM_MBOX_HEADER + 0x2) +#define PRCM_MBOX_HEADER_REQ_MB3 (_PRCM_MBOX_HEADER + 0x3) +#define PRCM_MBOX_HEADER_REQ_MB4 (_PRCM_MBOX_HEADER + 0x4) +#define PRCM_MBOX_HEADER_REQ_MB5 (_PRCM_MBOX_HEADER + 0x5) +#define PRCM_MBOX_HEADER_ACK_MB0 (_PRCM_MBOX_HEADER + 0x8) + +/* Req Mailboxes */ +#define PRCM_REQ_MB0 0xFDC /* 12 bytes */ +#define PRCM_REQ_MB1 0xFD0 /* 12 bytes */ +#define PRCM_REQ_MB2 0xFC0 /* 16 bytes */ +#define PRCM_REQ_MB3 0xE4C /* 372 bytes */ +#define PRCM_REQ_MB4 0xE48 /* 4 bytes */ +#define PRCM_REQ_MB5 0xE44 /* 4 bytes */ + +/* Ack Mailboxes */ +#define PRCM_ACK_MB0 0xE08 /* 52 bytes */ +#define PRCM_ACK_MB1 0xE04 /* 4 bytes */ +#define PRCM_ACK_MB2 0xE00 /* 4 bytes */ +#define PRCM_ACK_MB3 0xDFC /* 4 bytes */ +#define PRCM_ACK_MB4 0xDF8 /* 4 bytes */ +#define PRCM_ACK_MB5 0xDF4 /* 4 bytes */ + +/* Mailbox 0 headers */ +#define MB0H_POWER_STATE_TRANS 0 +#define MB0H_CONFIG_WAKEUPS_EXE 1 +#define MB0H_READ_WAKEUP_ACK 3 +#define MB0H_CONFIG_WAKEUPS_SLEEP 4 + +#define MB0H_WAKEUP_EXE 2 +#define MB0H_WAKEUP_SLEEP 5 + +/* Mailbox 0 REQs */ +#define PRCM_REQ_MB0_AP_POWER_STATE (PRCM_REQ_MB0 + 0x0) +#define PRCM_REQ_MB0_AP_PLL_STATE (PRCM_REQ_MB0 + 0x1) +#define PRCM_REQ_MB0_ULP_CLOCK_STATE (PRCM_REQ_MB0 + 0x2) +#define PRCM_REQ_MB0_DO_NOT_WFI (PRCM_REQ_MB0 + 0x3) +#define PRCM_REQ_MB0_WAKEUP_8500 (PRCM_REQ_MB0 + 0x4) +#define PRCM_REQ_MB0_WAKEUP_4500 (PRCM_REQ_MB0 + 0x8) + +/* Mailbox 0 ACKs */ +#define PRCM_ACK_MB0_AP_PWRSTTR_STATUS (PRCM_ACK_MB0 + 0x0) +#define PRCM_ACK_MB0_READ_POINTER (PRCM_ACK_MB0 + 0x1) +#define PRCM_ACK_MB0_WAKEUP_0_8500 (PRCM_ACK_MB0 + 0x4) +#define PRCM_ACK_MB0_WAKEUP_0_4500 (PRCM_ACK_MB0 + 0x8) +#define PRCM_ACK_MB0_WAKEUP_1_8500 (PRCM_ACK_MB0 + 0x1C) +#define PRCM_ACK_MB0_WAKEUP_1_4500 (PRCM_ACK_MB0 + 0x20) +#define PRCM_ACK_MB0_EVENT_4500_NUMBERS 20 + +/* Mailbox 1 headers */ +#define MB1H_ARM_APE_OPP 0x0 +#define MB1H_RESET_MODEM 0x2 +#define MB1H_REQUEST_APE_OPP_100_VOLT 0x3 +#define MB1H_RELEASE_APE_OPP_100_VOLT 0x4 +#define MB1H_RELEASE_USB_WAKEUP 0x5 + +/* Mailbox 1 Requests */ +#define PRCM_REQ_MB1_ARM_OPP (PRCM_REQ_MB1 + 0x0) +#define PRCM_REQ_MB1_APE_OPP (PRCM_REQ_MB1 + 0x1) +#define PRCM_REQ_MB1_APE_OPP_100_RESTORE (PRCM_REQ_MB1 + 0x4) +#define PRCM_REQ_MB1_ARM_OPP_100_RESTORE (PRCM_REQ_MB1 + 0x8) + +/* Mailbox 1 ACKs */ +#define PRCM_ACK_MB1_CURRENT_ARM_OPP (PRCM_ACK_MB1 + 0x0) +#define PRCM_ACK_MB1_CURRENT_APE_OPP (PRCM_ACK_MB1 + 0x1) +#define PRCM_ACK_MB1_APE_VOLTAGE_STATUS (PRCM_ACK_MB1 + 0x2) +#define PRCM_ACK_MB1_DVFS_STATUS (PRCM_ACK_MB1 + 0x3) + +/* Mailbox 2 headers */ +#define MB2H_DPS 0x0 +#define MB2H_AUTO_PWR 0x1 + +/* Mailbox 2 REQs */ +#define PRCM_REQ_MB2_SVA_MMDSP (PRCM_REQ_MB2 + 0x0) +#define PRCM_REQ_MB2_SVA_PIPE (PRCM_REQ_MB2 + 0x1) +#define PRCM_REQ_MB2_SIA_MMDSP (PRCM_REQ_MB2 + 0x2) +#define PRCM_REQ_MB2_SIA_PIPE (PRCM_REQ_MB2 + 0x3) +#define PRCM_REQ_MB2_SGA (PRCM_REQ_MB2 + 0x4) +#define PRCM_REQ_MB2_B2R2_MCDE (PRCM_REQ_MB2 + 0x5) +#define PRCM_REQ_MB2_ESRAM12 (PRCM_REQ_MB2 + 0x6) +#define PRCM_REQ_MB2_ESRAM34 (PRCM_REQ_MB2 + 0x7) +#define PRCM_REQ_MB2_AUTO_PM_SLEEP (PRCM_REQ_MB2 + 0x8) +#define PRCM_REQ_MB2_AUTO_PM_IDLE (PRCM_REQ_MB2 + 0xC) + +/* Mailbox 2 ACKs */ +#define PRCM_ACK_MB2_DPS_STATUS (PRCM_ACK_MB2 + 0x0) +#define HWACC_PWR_ST_OK 0xFE + +/* Mailbox 3 headers */ +#define MB3H_ANC 0x0 +#define MB3H_SIDETONE 0x1 +#define MB3H_SYSCLK 0xE + +/* Mailbox 3 Requests */ +#define PRCM_REQ_MB3_ANC_FIR_COEFF (PRCM_REQ_MB3 + 0x0) +#define PRCM_REQ_MB3_ANC_IIR_COEFF (PRCM_REQ_MB3 + 0x20) +#define PRCM_REQ_MB3_ANC_SHIFTER (PRCM_REQ_MB3 + 0x60) +#define PRCM_REQ_MB3_ANC_WARP (PRCM_REQ_MB3 + 0x64) +#define PRCM_REQ_MB3_SIDETONE_FIR_GAIN (PRCM_REQ_MB3 + 0x68) +#define PRCM_REQ_MB3_SIDETONE_FIR_COEFF (PRCM_REQ_MB3 + 0x6C) +#define PRCM_REQ_MB3_SYSCLK_MGT (PRCM_REQ_MB3 + 0x16C) + +/* Mailbox 4 headers */ +#define MB4H_DDR_INIT 0x0 +#define MB4H_MEM_ST 0x1 +#define MB4H_HOTDOG 0x12 +#define MB4H_HOTMON 0x13 +#define MB4H_HOT_PERIOD 0x14 + +/* Mailbox 4 Requests */ +#define PRCM_REQ_MB4_DDR_ST_AP_SLEEP_IDLE (PRCM_REQ_MB4 + 0x0) +#define PRCM_REQ_MB4_DDR_ST_AP_DEEP_IDLE (PRCM_REQ_MB4 + 0x1) +#define PRCM_REQ_MB4_ESRAM0_ST (PRCM_REQ_MB4 + 0x3) +#define PRCM_REQ_MB4_HOTDOG_THRESHOLD (PRCM_REQ_MB4 + 0x0) +#define PRCM_REQ_MB4_HOTMON_LOW (PRCM_REQ_MB4 + 0x0) +#define PRCM_REQ_MB4_HOTMON_HIGH (PRCM_REQ_MB4 + 0x1) +#define PRCM_REQ_MB4_HOTMON_CONFIG (PRCM_REQ_MB4 + 0x2) +#define PRCM_REQ_MB4_HOT_PERIOD (PRCM_REQ_MB4 + 0x0) +#define HOTMON_CONFIG_LOW BIT(0) +#define HOTMON_CONFIG_HIGH BIT(1) + +/* Mailbox 5 Requests */ +#define PRCM_REQ_MB5_I2C_SLAVE_OP (PRCM_REQ_MB5 + 0x0) +#define PRCM_REQ_MB5_I2C_HW_BITS (PRCM_REQ_MB5 + 0x1) +#define PRCM_REQ_MB5_I2C_REG (PRCM_REQ_MB5 + 0x2) +#define PRCM_REQ_MB5_I2C_VAL (PRCM_REQ_MB5 + 0x3) +#define PRCMU_I2C_WRITE(slave) \ + (((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0)) +#define PRCMU_I2C_READ(slave) \ + (((slave) << 1) | BIT(0) | (cpu_is_u8500v2() ? BIT(6) : 0)) +#define PRCMU_I2C_STOP_EN BIT(3) + +/* Mailbox 5 ACKs */ +#define PRCM_ACK_MB5_I2C_STATUS (PRCM_ACK_MB5 + 0x1) +#define PRCM_ACK_MB5_I2C_VAL (PRCM_ACK_MB5 + 0x3) +#define I2C_WR_OK 0x1 +#define I2C_RD_OK 0x2 + +#define NUM_MB 8 +#define MBOX_BIT BIT +#define ALL_MBOX_BITS (MBOX_BIT(NUM_MB) - 1) + +/* + * Wakeups/IRQs + */ + +#define WAKEUP_BIT_RTC BIT(0) +#define WAKEUP_BIT_RTT0 BIT(1) +#define WAKEUP_BIT_RTT1 BIT(2) +#define WAKEUP_BIT_HSI0 BIT(3) +#define WAKEUP_BIT_HSI1 BIT(4) +#define WAKEUP_BIT_CA_WAKE BIT(5) +#define WAKEUP_BIT_USB BIT(6) +#define WAKEUP_BIT_ABB BIT(7) +#define WAKEUP_BIT_ABB_FIFO BIT(8) +#define WAKEUP_BIT_SYSCLK_OK BIT(9) +#define WAKEUP_BIT_CA_SLEEP BIT(10) +#define WAKEUP_BIT_AC_WAKE_ACK BIT(11) +#define WAKEUP_BIT_SIDE_TONE_OK BIT(12) +#define WAKEUP_BIT_ANC_OK BIT(13) +#define WAKEUP_BIT_SW_ERROR BIT(14) +#define WAKEUP_BIT_AC_SLEEP_ACK BIT(15) +#define WAKEUP_BIT_ARM BIT(17) +#define WAKEUP_BIT_HOTMON_LOW BIT(18) +#define WAKEUP_BIT_HOTMON_HIGH BIT(19) +#define WAKEUP_BIT_MODEM_SW_RESET_REQ BIT(20) +#define WAKEUP_BIT_GPIO0 BIT(23) +#define WAKEUP_BIT_GPIO1 BIT(24) +#define WAKEUP_BIT_GPIO2 BIT(25) +#define WAKEUP_BIT_GPIO3 BIT(26) +#define WAKEUP_BIT_GPIO4 BIT(27) +#define WAKEUP_BIT_GPIO5 BIT(28) +#define WAKEUP_BIT_GPIO6 BIT(29) +#define WAKEUP_BIT_GPIO7 BIT(30) +#define WAKEUP_BIT_GPIO8 BIT(31) + +/* + * This vector maps irq numbers to the bits in the bit field used in + * communication with the PRCMU firmware. + * + * The reason for having this is to keep the irq numbers contiguous even though + * the bits in the bit field are not. (The bits also have a tendency to move + * around, to further complicate matters.) + */ +#define IRQ_INDEX(_name) ((IRQ_PRCMU_##_name) - IRQ_PRCMU_BASE) +#define IRQ_ENTRY(_name)[IRQ_INDEX(_name)] = (WAKEUP_BIT_##_name) +static u32 prcmu_irq_bit[NUM_PRCMU_WAKEUPS] = { + IRQ_ENTRY(RTC), + IRQ_ENTRY(RTT0), + IRQ_ENTRY(RTT1), + IRQ_ENTRY(HSI0), + IRQ_ENTRY(HSI1), + IRQ_ENTRY(CA_WAKE), + IRQ_ENTRY(USB), + IRQ_ENTRY(ABB), + IRQ_ENTRY(ABB_FIFO), + IRQ_ENTRY(CA_SLEEP), + IRQ_ENTRY(ARM), + IRQ_ENTRY(HOTMON_LOW), + IRQ_ENTRY(HOTMON_HIGH), + IRQ_ENTRY(MODEM_SW_RESET_REQ), + IRQ_ENTRY(GPIO0), + IRQ_ENTRY(GPIO1), + IRQ_ENTRY(GPIO2), + IRQ_ENTRY(GPIO3), + IRQ_ENTRY(GPIO4), + IRQ_ENTRY(GPIO5), + IRQ_ENTRY(GPIO6), + IRQ_ENTRY(GPIO7), + IRQ_ENTRY(GPIO8) +}; + +#define VALID_WAKEUPS (BIT(NUM_PRCMU_WAKEUP_INDICES) - 1) +#define WAKEUP_ENTRY(_name)[PRCMU_WAKEUP_INDEX_##_name] = (WAKEUP_BIT_##_name) +static u32 prcmu_wakeup_bit[NUM_PRCMU_WAKEUP_INDICES] = { + WAKEUP_ENTRY(RTC), + WAKEUP_ENTRY(RTT0), + WAKEUP_ENTRY(RTT1), + WAKEUP_ENTRY(HSI0), + WAKEUP_ENTRY(HSI1), + WAKEUP_ENTRY(USB), + WAKEUP_ENTRY(ABB), + WAKEUP_ENTRY(ABB_FIFO), + WAKEUP_ENTRY(ARM) +}; + +/* + * mb0_transfer - state needed for mailbox 0 communication. + * @lock: The transaction lock. + * @dbb_events_lock: A lock used to handle concurrent access to (parts of) + * the request data. + * @mask_work: Work structure used for (un)masking wakeup interrupts. + * @req: Request data that need to persist between requests. + */ +static struct { + spinlock_t lock; + spinlock_t dbb_irqs_lock; + struct work_struct mask_work; + struct mutex ac_wake_lock; + struct completion ac_wake_work; + struct { + u32 dbb_irqs; + u32 dbb_wakeups; + u32 abb_events; + } req; +} mb0_transfer; + +/* + * mb1_transfer - state needed for mailbox 1 communication. + * @lock: The transaction lock. + * @work: The transaction completion structure. + * @ack: Reply ("acknowledge") data. + */ +static struct { + struct mutex lock; + struct completion work; + struct { + u8 header; + u8 arm_opp; + u8 ape_opp; + u8 ape_voltage_status; + } ack; +} mb1_transfer; + +/* + * mb2_transfer - state needed for mailbox 2 communication. + * @lock: The transaction lock. + * @work: The transaction completion structure. + * @auto_pm_lock: The autonomous power management configuration lock. + * @auto_pm_enabled: A flag indicating whether autonomous PM is enabled. + * @req: Request data that need to persist between requests. + * @ack: Reply ("acknowledge") data. + */ +static struct { + struct mutex lock; + struct completion work; + spinlock_t auto_pm_lock; + bool auto_pm_enabled; + struct { + u8 status; + } ack; +} mb2_transfer; + +/* + * mb3_transfer - state needed for mailbox 3 communication. + * @lock: The request lock. + * @sysclk_lock: A lock used to handle concurrent sysclk requests. + * @sysclk_work: Work structure used for sysclk requests. + */ +static struct { + spinlock_t lock; + struct mutex sysclk_lock; + struct completion sysclk_work; +} mb3_transfer; + +/* + * mb4_transfer - state needed for mailbox 4 communication. + * @lock: The transaction lock. + * @work: The transaction completion structure. + */ +static struct { + struct mutex lock; + struct completion work; +} mb4_transfer; + +/* + * mb5_transfer - state needed for mailbox 5 communication. + * @lock: The transaction lock. + * @work: The transaction completion structure. + * @ack: Reply ("acknowledge") data. + */ +static struct { + struct mutex lock; + struct completion work; + struct { + u8 status; + u8 value; + } ack; +} mb5_transfer; + +static atomic_t ac_wake_req_state = ATOMIC_INIT(0); + +/* Spinlocks */ +static DEFINE_SPINLOCK(clkout_lock); +static DEFINE_SPINLOCK(gpiocr_lock); + +/* Global var to runtime determine TCDM base for v2 or v1 */ +static __iomem void *tcdm_base; + +struct clk_mgt { + unsigned int offset; + u32 pllsw; +}; + +static DEFINE_SPINLOCK(clk_mgt_lock); + +#define CLK_MGT_ENTRY(_name)[PRCMU_##_name] = { (PRCM_##_name##_MGT), 0 } +struct clk_mgt clk_mgt[PRCMU_NUM_REG_CLOCKS] = { + CLK_MGT_ENTRY(SGACLK), + CLK_MGT_ENTRY(UARTCLK), + CLK_MGT_ENTRY(MSP02CLK), + CLK_MGT_ENTRY(MSP1CLK), + CLK_MGT_ENTRY(I2CCLK), + CLK_MGT_ENTRY(SDMMCCLK), + CLK_MGT_ENTRY(SLIMCLK), + CLK_MGT_ENTRY(PER1CLK), + CLK_MGT_ENTRY(PER2CLK), + CLK_MGT_ENTRY(PER3CLK), + CLK_MGT_ENTRY(PER5CLK), + CLK_MGT_ENTRY(PER6CLK), + CLK_MGT_ENTRY(PER7CLK), + CLK_MGT_ENTRY(LCDCLK), + CLK_MGT_ENTRY(BMLCLK), + CLK_MGT_ENTRY(HSITXCLK), + CLK_MGT_ENTRY(HSIRXCLK), + CLK_MGT_ENTRY(HDMICLK), + CLK_MGT_ENTRY(APEATCLK), + CLK_MGT_ENTRY(APETRACECLK), + CLK_MGT_ENTRY(MCDECLK), + CLK_MGT_ENTRY(IPI2CCLK), + CLK_MGT_ENTRY(DSIALTCLK), + CLK_MGT_ENTRY(DMACLK), + CLK_MGT_ENTRY(B2R2CLK), + CLK_MGT_ENTRY(TVCLK), + CLK_MGT_ENTRY(SSPCLK), + CLK_MGT_ENTRY(RNGCLK), + CLK_MGT_ENTRY(UICCCLK), +}; + +/* +* Used by MCDE to setup all necessary PRCMU registers +*/ +#define PRCMU_RESET_DSIPLL 0x00004000 +#define PRCMU_UNCLAMP_DSIPLL 0x00400800 + +#define PRCMU_CLK_PLL_DIV_SHIFT 0 +#define PRCMU_CLK_PLL_SW_SHIFT 5 +#define PRCMU_CLK_38 (1 << 9) +#define PRCMU_CLK_38_SRC (1 << 10) +#define PRCMU_CLK_38_DIV (1 << 11) + +/* PLLDIV=12, PLLSW=4 (PLLDDR) */ +#define PRCMU_DSI_CLOCK_SETTING 0x0000008C + +/* PLLDIV=8, PLLSW=4 (PLLDDR) */ +#define PRCMU_DSI_CLOCK_SETTING_U8400 0x00000088 + +/* DPI 50000000 Hz */ +#define PRCMU_DPI_CLOCK_SETTING ((1 << PRCMU_CLK_PLL_SW_SHIFT) | \ + (16 << PRCMU_CLK_PLL_DIV_SHIFT)) +#define PRCMU_DSI_LP_CLOCK_SETTING 0x00000E00 + +/* D=101, N=1, R=4, SELDIV2=0 */ +#define PRCMU_PLLDSI_FREQ_SETTING 0x00040165 + +/* D=70, N=1, R=3, SELDIV2=0 */ +#define PRCMU_PLLDSI_FREQ_SETTING_U8400 0x00030146 + +#define PRCMU_ENABLE_PLLDSI 0x00000001 +#define PRCMU_DISABLE_PLLDSI 0x00000000 +#define PRCMU_RELEASE_RESET_DSS 0x0000400C +#define PRCMU_DSI_PLLOUT_SEL_SETTING 0x00000202 +/* ESC clk, div0=1, div1=1, div2=3 */ +#define PRCMU_ENABLE_ESCAPE_CLOCK_DIV 0x07030101 +#define PRCMU_DISABLE_ESCAPE_CLOCK_DIV 0x00030101 +#define PRCMU_DSI_RESET_SW 0x00000007 + +#define PRCMU_PLLDSI_LOCKP_LOCKED 0x3 + +static struct { + u8 project_number; + u8 api_version; + u8 func_version; + u8 errata; +} prcmu_version; + + +int prcmu_enable_dsipll(void) +{ + int i; + unsigned int plldsifreq; + + /* Clear DSIPLL_RESETN */ + writel(PRCMU_RESET_DSIPLL, (_PRCMU_BASE + PRCM_APE_RESETN_CLR)); + /* Unclamp DSIPLL in/out */ + writel(PRCMU_UNCLAMP_DSIPLL, (_PRCMU_BASE + PRCM_MMIP_LS_CLAMP_CLR)); + + if (prcmu_is_u8400()) + plldsifreq = PRCMU_PLLDSI_FREQ_SETTING_U8400; + else + plldsifreq = PRCMU_PLLDSI_FREQ_SETTING; + /* Set DSI PLL FREQ */ + writel(plldsifreq, (_PRCMU_BASE + PRCM_PLLDSI_FREQ)); + writel(PRCMU_DSI_PLLOUT_SEL_SETTING, + (_PRCMU_BASE + PRCM_DSI_PLLOUT_SEL)); + /* Enable Escape clocks */ + writel(PRCMU_ENABLE_ESCAPE_CLOCK_DIV, + (_PRCMU_BASE + PRCM_DSITVCLK_DIV)); + + /* Start DSI PLL */ + writel(PRCMU_ENABLE_PLLDSI, (_PRCMU_BASE + PRCM_PLLDSI_ENABLE)); + /* Reset DSI PLL */ + writel(PRCMU_DSI_RESET_SW, (_PRCMU_BASE + PRCM_DSI_SW_RESET)); + for (i = 0; i < 10; i++) { + if ((readl(_PRCMU_BASE + PRCM_PLLDSI_LOCKP) & + PRCMU_PLLDSI_LOCKP_LOCKED) + == PRCMU_PLLDSI_LOCKP_LOCKED) + break; + udelay(100); + } + /* Set DSIPLL_RESETN */ + writel(PRCMU_RESET_DSIPLL, (_PRCMU_BASE + PRCM_APE_RESETN_SET)); + return 0; +} + +int prcmu_disable_dsipll(void) +{ + /* Disable dsi pll */ + writel(PRCMU_DISABLE_PLLDSI, (_PRCMU_BASE + PRCM_PLLDSI_ENABLE)); + /* Disable escapeclock */ + writel(PRCMU_DISABLE_ESCAPE_CLOCK_DIV, + (_PRCMU_BASE + PRCM_DSITVCLK_DIV)); + return 0; +} + +int prcmu_set_display_clocks(void) +{ + unsigned long flags; + unsigned int dsiclk; + + if (prcmu_is_u8400()) + dsiclk = PRCMU_DSI_CLOCK_SETTING_U8400; + else + dsiclk = PRCMU_DSI_CLOCK_SETTING; + + spin_lock_irqsave(&clk_mgt_lock, flags); + + /* Grab the HW semaphore. */ + while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0) + cpu_relax(); + + writel(dsiclk, (_PRCMU_BASE + PRCM_HDMICLK_MGT)); + writel(PRCMU_DSI_LP_CLOCK_SETTING, (_PRCMU_BASE + PRCM_TVCLK_MGT)); + writel(PRCMU_DPI_CLOCK_SETTING, (_PRCMU_BASE + PRCM_LCDCLK_MGT)); + + /* Release the HW semaphore. */ + writel(0, (_PRCMU_BASE + PRCM_SEM)); + + spin_unlock_irqrestore(&clk_mgt_lock, flags); + + return 0; +} + +/** + * prcmu_enable_spi2 - Enables pin muxing for SPI2 on OtherAlternateC1. + */ +void prcmu_enable_spi2(void) +{ + u32 reg; + unsigned long flags; + + spin_lock_irqsave(&gpiocr_lock, flags); + reg = readl(_PRCMU_BASE + PRCM_GPIOCR); + writel(reg | PRCM_GPIOCR_SPI2_SELECT, _PRCMU_BASE + PRCM_GPIOCR); + spin_unlock_irqrestore(&gpiocr_lock, flags); +} + +/** + * prcmu_disable_spi2 - Disables pin muxing for SPI2 on OtherAlternateC1. + */ +void prcmu_disable_spi2(void) +{ + u32 reg; + unsigned long flags; + + spin_lock_irqsave(&gpiocr_lock, flags); + reg = readl(_PRCMU_BASE + PRCM_GPIOCR); + writel(reg & ~PRCM_GPIOCR_SPI2_SELECT, _PRCMU_BASE + PRCM_GPIOCR); + spin_unlock_irqrestore(&gpiocr_lock, flags); +} + +bool prcmu_has_arm_maxopp(void) +{ + return (readb(tcdm_base + PRCM_AVS_VARM_MAX_OPP) & + PRCM_AVS_ISMODEENABLE_MASK) == PRCM_AVS_ISMODEENABLE_MASK; +} + +bool prcmu_is_u8400(void) +{ + return prcmu_version.project_number == PRCMU_PROJECT_ID_8400V2_0; +} + +/** + * prcmu_get_boot_status - PRCMU boot status checking + * Returns: the current PRCMU boot status + */ +int prcmu_get_boot_status(void) +{ + return readb(tcdm_base + PRCM_BOOT_STATUS); +} + +/** + * prcmu_set_rc_a2p - This function is used to run few power state sequences + * @val: Value to be set, i.e. transition requested + * Returns: 0 on success, -EINVAL on invalid argument + * + * This function is used to run the following power state sequences - + * any state to ApReset, ApDeepSleep to ApExecute, ApExecute to ApDeepSleep + */ +int prcmu_set_rc_a2p(enum romcode_write val) +{ + if (val < RDY_2_DS || val > RDY_2_XP70_RST) + return -EINVAL; + writeb(val, (tcdm_base + PRCM_ROMCODE_A2P)); + return 0; +} + +/** + * prcmu_get_rc_p2a - This function is used to get power state sequences + * Returns: the power transition that has last happened + * + * This function can return the following transitions- + * any state to ApReset, ApDeepSleep to ApExecute, ApExecute to ApDeepSleep + */ +enum romcode_read prcmu_get_rc_p2a(void) +{ + return readb(tcdm_base + PRCM_ROMCODE_P2A); +} + +/** + * prcmu_get_current_mode - Return the current XP70 power mode + * Returns: Returns the current AP(ARM) power mode: init, + * apBoot, apExecute, apDeepSleep, apSleep, apIdle, apReset + */ +enum ap_pwrst prcmu_get_xp70_current_state(void) +{ + return readb(tcdm_base + PRCM_XP70_CUR_PWR_STATE); +} + +/** + * prcmu_config_clkout - Configure one of the programmable clock outputs. + * @clkout: The CLKOUT number (0 or 1). + * @source: The clock to be used (one of the PRCMU_CLKSRC_*). + * @div: The divider to be applied. + * + * Configures one of the programmable clock outputs (CLKOUTs). + * @div should be in the range [1,63] to request a configuration, or 0 to + * inform that the configuration is no longer requested. + */ +int prcmu_config_clkout(u8 clkout, u8 source, u8 div) +{ + static int requests[2]; + int r = 0; + unsigned long flags; + u32 val; + u32 bits; + u32 mask; + u32 div_mask; + + BUG_ON(clkout > 1); + BUG_ON(div > 63); + BUG_ON((clkout == 0) && (source > PRCMU_CLKSRC_CLK009)); + + if (!div && !requests[clkout]) + return -EINVAL; + + switch (clkout) { + case 0: + div_mask = PRCM_CLKOCR_CLKODIV0_MASK; + mask = (PRCM_CLKOCR_CLKODIV0_MASK | PRCM_CLKOCR_CLKOSEL0_MASK); + bits = ((source << PRCM_CLKOCR_CLKOSEL0_SHIFT) | + (div << PRCM_CLKOCR_CLKODIV0_SHIFT)); + break; + case 1: + div_mask = PRCM_CLKOCR_CLKODIV1_MASK; + mask = (PRCM_CLKOCR_CLKODIV1_MASK | PRCM_CLKOCR_CLKOSEL1_MASK | + PRCM_CLKOCR_CLK1TYPE); + bits = ((source << PRCM_CLKOCR_CLKOSEL1_SHIFT) | + (div << PRCM_CLKOCR_CLKODIV1_SHIFT)); + break; + } + bits &= mask; + + spin_lock_irqsave(&clkout_lock, flags); + + val = readl(_PRCMU_BASE + PRCM_CLKOCR); + if (val & div_mask) { + if (div) { + if ((val & mask) != bits) { + r = -EBUSY; + goto unlock_and_return; + } + } else { + if ((val & mask & ~div_mask) != bits) { + r = -EINVAL; + goto unlock_and_return; + } + } + } + writel((bits | (val & ~mask)), (_PRCMU_BASE + PRCM_CLKOCR)); + requests[clkout] += (div ? 1 : -1); + +unlock_and_return: + spin_unlock_irqrestore(&clkout_lock, flags); + + return r; +} + +int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll) +{ + unsigned long flags; + + BUG_ON((state < PRCMU_AP_SLEEP) || (PRCMU_AP_DEEP_IDLE < state)); + + spin_lock_irqsave(&mb0_transfer.lock, flags); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0)) + cpu_relax(); + + writeb(MB0H_POWER_STATE_TRANS, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0)); + writeb(state, (tcdm_base + PRCM_REQ_MB0_AP_POWER_STATE)); + writeb((keep_ap_pll ? 1 : 0), (tcdm_base + PRCM_REQ_MB0_AP_PLL_STATE)); + writeb((keep_ulp_clk ? 1 : 0), + (tcdm_base + PRCM_REQ_MB0_ULP_CLOCK_STATE)); + writeb(0, (tcdm_base + PRCM_REQ_MB0_DO_NOT_WFI)); + writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + + spin_unlock_irqrestore(&mb0_transfer.lock, flags); + + return 0; +} + +/* This function should only be called while mb0_transfer.lock is held. */ +static void config_wakeups(void) +{ + const u8 header[2] = { + MB0H_CONFIG_WAKEUPS_EXE, + MB0H_CONFIG_WAKEUPS_SLEEP + }; + static u32 last_dbb_events; + static u32 last_abb_events; + u32 dbb_events; + u32 abb_events; + unsigned int i; + + dbb_events = mb0_transfer.req.dbb_irqs | mb0_transfer.req.dbb_wakeups; + dbb_events |= (WAKEUP_BIT_AC_WAKE_ACK | WAKEUP_BIT_AC_SLEEP_ACK); + + abb_events = mb0_transfer.req.abb_events; + + if ((dbb_events == last_dbb_events) && (abb_events == last_abb_events)) + return; + + for (i = 0; i < 2; i++) { + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0)) + cpu_relax(); + writel(dbb_events, (tcdm_base + PRCM_REQ_MB0_WAKEUP_8500)); + writel(abb_events, (tcdm_base + PRCM_REQ_MB0_WAKEUP_4500)); + writeb(header[i], (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0)); + writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + } + last_dbb_events = dbb_events; + last_abb_events = abb_events; +} + +void prcmu_enable_wakeups(u32 wakeups) +{ + unsigned long flags; + u32 bits; + int i; + + BUG_ON(wakeups != (wakeups & VALID_WAKEUPS)); + + for (i = 0, bits = 0; i < NUM_PRCMU_WAKEUP_INDICES; i++) { + if (wakeups & BIT(i)) + bits |= prcmu_wakeup_bit[i]; + } + + spin_lock_irqsave(&mb0_transfer.lock, flags); + + mb0_transfer.req.dbb_wakeups = bits; + config_wakeups(); + + spin_unlock_irqrestore(&mb0_transfer.lock, flags); +} + +void prcmu_config_abb_event_readout(u32 abb_events) +{ + unsigned long flags; + + spin_lock_irqsave(&mb0_transfer.lock, flags); + + mb0_transfer.req.abb_events = abb_events; + config_wakeups(); + + spin_unlock_irqrestore(&mb0_transfer.lock, flags); +} + +void prcmu_get_abb_event_buffer(void __iomem **buf) +{ + if (readb(tcdm_base + PRCM_ACK_MB0_READ_POINTER) & 1) + *buf = (tcdm_base + PRCM_ACK_MB0_WAKEUP_1_4500); + else + *buf = (tcdm_base + PRCM_ACK_MB0_WAKEUP_0_4500); +} + +/** + * prcmu_set_arm_opp - set the appropriate ARM OPP + * @opp: The new ARM operating point to which transition is to be made + * Returns: 0 on success, non-zero on failure + * + * This function sets the the operating point of the ARM. + */ +int prcmu_set_arm_opp(u8 opp) +{ + int r; + + if (opp < ARM_NO_CHANGE || opp > ARM_EXTCLK) + return -EINVAL; + + r = 0; + + mutex_lock(&mb1_transfer.lock); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1)) + cpu_relax(); + + writeb(MB1H_ARM_APE_OPP, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1)); + writeb(opp, (tcdm_base + PRCM_REQ_MB1_ARM_OPP)); + writeb(APE_NO_CHANGE, (tcdm_base + PRCM_REQ_MB1_APE_OPP)); + + writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb1_transfer.work); + + if ((mb1_transfer.ack.header != MB1H_ARM_APE_OPP) || + (mb1_transfer.ack.arm_opp != opp)) + r = -EIO; + + mutex_unlock(&mb1_transfer.lock); + + return r; +} + +/** + * prcmu_get_arm_opp - get the current ARM OPP + * + * Returns: the current ARM OPP + */ +int prcmu_get_arm_opp(void) +{ + return readb(tcdm_base + PRCM_ACK_MB1_CURRENT_ARM_OPP); +} + +/** + * prcmu_get_ddr_opp - get the current DDR OPP + * + * Returns: the current DDR OPP + */ +int prcmu_get_ddr_opp(void) +{ + return readb(_PRCMU_BASE + PRCM_DDR_SUBSYS_APE_MINBW); +} + +/** + * set_ddr_opp - set the appropriate DDR OPP + * @opp: The new DDR operating point to which transition is to be made + * Returns: 0 on success, non-zero on failure + * + * This function sets the operating point of the DDR. + */ +int prcmu_set_ddr_opp(u8 opp) +{ + if (opp < DDR_100_OPP || opp > DDR_25_OPP) + return -EINVAL; + /* Changing the DDR OPP can hang the hardware pre-v21 */ + if (cpu_is_u8500v20_or_later() && !cpu_is_u8500v20()) + writeb(opp, (_PRCMU_BASE + PRCM_DDR_SUBSYS_APE_MINBW)); + + return 0; +} +/** + * set_ape_opp - set the appropriate APE OPP + * @opp: The new APE operating point to which transition is to be made + * Returns: 0 on success, non-zero on failure + * + * This function sets the operating point of the APE. + */ +int prcmu_set_ape_opp(u8 opp) +{ + int r = 0; + + mutex_lock(&mb1_transfer.lock); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1)) + cpu_relax(); + + writeb(MB1H_ARM_APE_OPP, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1)); + writeb(ARM_NO_CHANGE, (tcdm_base + PRCM_REQ_MB1_ARM_OPP)); + writeb(opp, (tcdm_base + PRCM_REQ_MB1_APE_OPP)); + + writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb1_transfer.work); + + if ((mb1_transfer.ack.header != MB1H_ARM_APE_OPP) || + (mb1_transfer.ack.ape_opp != opp)) + r = -EIO; + + mutex_unlock(&mb1_transfer.lock); + + return r; +} + +/** + * prcmu_get_ape_opp - get the current APE OPP + * + * Returns: the current APE OPP + */ +int prcmu_get_ape_opp(void) +{ + return readb(tcdm_base + PRCM_ACK_MB1_CURRENT_APE_OPP); +} + +/** + * prcmu_request_ape_opp_100_voltage - Request APE OPP 100% voltage + * @enable: true to request the higher voltage, false to drop a request. + * + * Calls to this function to enable and disable requests must be balanced. + */ +int prcmu_request_ape_opp_100_voltage(bool enable) +{ + int r = 0; + u8 header; + static unsigned int requests; + + mutex_lock(&mb1_transfer.lock); + + if (enable) { + if (0 != requests++) + goto unlock_and_return; + header = MB1H_REQUEST_APE_OPP_100_VOLT; + } else { + if (requests == 0) { + r = -EIO; + goto unlock_and_return; + } else if (1 != requests--) { + goto unlock_and_return; + } + header = MB1H_RELEASE_APE_OPP_100_VOLT; + } + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1)) + cpu_relax(); + + writeb(header, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1)); + + writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb1_transfer.work); + + if ((mb1_transfer.ack.header != header) || + ((mb1_transfer.ack.ape_voltage_status & BIT(0)) != 0)) + r = -EIO; + +unlock_and_return: + mutex_unlock(&mb1_transfer.lock); + + return r; +} + +/** + * prcmu_release_usb_wakeup_state - release the state required by a USB wakeup + * + * This function releases the power state requirements of a USB wakeup. + */ +int prcmu_release_usb_wakeup_state(void) +{ + int r = 0; + + mutex_lock(&mb1_transfer.lock); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1)) + cpu_relax(); + + writeb(MB1H_RELEASE_USB_WAKEUP, + (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1)); + + writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb1_transfer.work); + + if ((mb1_transfer.ack.header != MB1H_RELEASE_USB_WAKEUP) || + ((mb1_transfer.ack.ape_voltage_status & BIT(0)) != 0)) + r = -EIO; + + mutex_unlock(&mb1_transfer.lock); + + return r; +} + +/** + * prcmu_set_epod - set the state of a EPOD (power domain) + * @epod_id: The EPOD to set + * @epod_state: The new EPOD state + * + * This function sets the state of a EPOD (power domain). It may not be called + * from interrupt context. + */ +int prcmu_set_epod(u16 epod_id, u8 epod_state) +{ + int r = 0; + bool ram_retention = false; + int i; + + /* check argument */ + BUG_ON(epod_id >= NUM_EPOD_ID); + + /* set flag if retention is possible */ + switch (epod_id) { + case EPOD_ID_SVAMMDSP: + case EPOD_ID_SIAMMDSP: + case EPOD_ID_ESRAM12: + case EPOD_ID_ESRAM34: + ram_retention = true; + break; + } + + /* check argument */ + BUG_ON(epod_state > EPOD_STATE_ON); + BUG_ON(epod_state == EPOD_STATE_RAMRET && !ram_retention); + + /* get lock */ + mutex_lock(&mb2_transfer.lock); + + /* wait for mailbox */ + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(2)) + cpu_relax(); + + /* fill in mailbox */ + for (i = 0; i < NUM_EPOD_ID; i++) + writeb(EPOD_STATE_NO_CHANGE, (tcdm_base + PRCM_REQ_MB2 + i)); + writeb(epod_state, (tcdm_base + PRCM_REQ_MB2 + epod_id)); + + writeb(MB2H_DPS, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB2)); + + writel(MBOX_BIT(2), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + + /* + * The current firmware version does not handle errors correctly, + * and we cannot recover if there is an error. + * This is expected to change when the firmware is updated. + */ + if (!wait_for_completion_timeout(&mb2_transfer.work, + msecs_to_jiffies(20000))) { + pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n", + __func__); + r = -EIO; + goto unlock_and_return; + } + + if (mb2_transfer.ack.status != HWACC_PWR_ST_OK) + r = -EIO; + +unlock_and_return: + mutex_unlock(&mb2_transfer.lock); + return r; +} + +/** + * prcmu_configure_auto_pm - Configure autonomous power management. + * @sleep: Configuration for ApSleep. + * @idle: Configuration for ApIdle. + */ +void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep, + struct prcmu_auto_pm_config *idle) +{ + u32 sleep_cfg; + u32 idle_cfg; + unsigned long flags; -#include + BUG_ON((sleep == NULL) || (idle == NULL)); -#include "db8500-prcmu-regs.h" + sleep_cfg = (sleep->sva_auto_pm_enable & 0xF); + sleep_cfg = ((sleep_cfg << 4) | (sleep->sia_auto_pm_enable & 0xF)); + sleep_cfg = ((sleep_cfg << 8) | (sleep->sva_power_on & 0xFF)); + sleep_cfg = ((sleep_cfg << 8) | (sleep->sia_power_on & 0xFF)); + sleep_cfg = ((sleep_cfg << 4) | (sleep->sva_policy & 0xF)); + sleep_cfg = ((sleep_cfg << 4) | (sleep->sia_policy & 0xF)); -/* Global var to runtime determine TCDM base for v2 or v1 */ -static __iomem void *tcdm_base; + idle_cfg = (idle->sva_auto_pm_enable & 0xF); + idle_cfg = ((idle_cfg << 4) | (idle->sia_auto_pm_enable & 0xF)); + idle_cfg = ((idle_cfg << 8) | (idle->sva_power_on & 0xFF)); + idle_cfg = ((idle_cfg << 8) | (idle->sia_power_on & 0xFF)); + idle_cfg = ((idle_cfg << 4) | (idle->sva_policy & 0xF)); + idle_cfg = ((idle_cfg << 4) | (idle->sia_policy & 0xF)); -#define _MBOX_HEADER (tcdm_base + 0xFE8) -#define MBOX_HEADER_REQ_MB0 (_MBOX_HEADER + 0x0) + spin_lock_irqsave(&mb2_transfer.auto_pm_lock, flags); -#define REQ_MB1 (tcdm_base + 0xFD0) -#define REQ_MB5 (tcdm_base + 0xE44) + /* + * The autonomous power management configuration is done through + * fields in mailbox 2, but these fields are only used as shared + * variables - i.e. there is no need to send a message. + */ + writel(sleep_cfg, (tcdm_base + PRCM_REQ_MB2_AUTO_PM_SLEEP)); + writel(idle_cfg, (tcdm_base + PRCM_REQ_MB2_AUTO_PM_IDLE)); -#define REQ_MB1_ARMOPP (REQ_MB1 + 0x0) -#define REQ_MB1_APEOPP (REQ_MB1 + 0x1) -#define REQ_MB1_BOOSTOPP (REQ_MB1 + 0x2) + mb2_transfer.auto_pm_enabled = + ((sleep->sva_auto_pm_enable == PRCMU_AUTO_PM_ON) || + (sleep->sia_auto_pm_enable == PRCMU_AUTO_PM_ON) || + (idle->sva_auto_pm_enable == PRCMU_AUTO_PM_ON) || + (idle->sia_auto_pm_enable == PRCMU_AUTO_PM_ON)); -#define ACK_MB1 (tcdm_base + 0xE04) -#define ACK_MB5 (tcdm_base + 0xDF4) + spin_unlock_irqrestore(&mb2_transfer.auto_pm_lock, flags); +} +EXPORT_SYMBOL(prcmu_configure_auto_pm); -#define ACK_MB1_CURR_ARMOPP (ACK_MB1 + 0x0) -#define ACK_MB1_CURR_APEOPP (ACK_MB1 + 0x1) +bool prcmu_is_auto_pm_enabled(void) +{ + return mb2_transfer.auto_pm_enabled; +} -#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5) -#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1) -#define REQ_MB5_I2C_REG (REQ_MB5 + 2) -#define REQ_MB5_I2C_VAL (REQ_MB5 + 3) +static int request_sysclk(bool enable) +{ + int r; + unsigned long flags; -#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1) -#define ACK_MB5_I2C_VAL (ACK_MB5 + 3) + r = 0; -#define PRCM_AVS_VARM_MAX_OPP (tcdm_base + 0x2E4) -#define PRCM_AVS_ISMODEENABLE 7 -#define PRCM_AVS_ISMODEENABLE_MASK (1 << PRCM_AVS_ISMODEENABLE) + mutex_lock(&mb3_transfer.sysclk_lock); -#define I2C_WRITE(slave) \ - (((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0)) -#define I2C_READ(slave) \ - (((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0)) -#define I2C_STOP_EN BIT(3) - -enum mb1_h { - MB1H_ARM_OPP = 1, - MB1H_APE_OPP, - MB1H_ARM_APE_OPP, -}; + spin_lock_irqsave(&mb3_transfer.lock, flags); -static struct { - struct mutex lock; - struct completion work; - struct { - u8 arm_opp; - u8 ape_opp; - u8 arm_status; - u8 ape_status; - } ack; -} mb1_transfer; + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(3)) + cpu_relax(); -enum ack_mb5_status { - I2C_WR_OK = 0x01, - I2C_RD_OK = 0x02, -}; + writeb((enable ? ON : OFF), (tcdm_base + PRCM_REQ_MB3_SYSCLK_MGT)); -#define MBOX_BIT BIT -#define NUM_MBOX 8 + writeb(MB3H_SYSCLK, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB3)); + writel(MBOX_BIT(3), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); -static struct { - struct mutex lock; - struct completion work; - bool failed; - struct { - u8 status; - u8 value; - } ack; -} mb5_transfer; + spin_unlock_irqrestore(&mb3_transfer.lock, flags); + + /* + * The firmware only sends an ACK if we want to enable the + * SysClk, and it succeeds. + */ + if (enable && !wait_for_completion_timeout(&mb3_transfer.sysclk_work, + msecs_to_jiffies(20000))) { + pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n", + __func__); + r = -EIO; + } + + mutex_unlock(&mb3_transfer.sysclk_lock); + + return r; +} + +static int request_timclk(bool enable) +{ + u32 val = (PRCM_TCR_DOZE_MODE | PRCM_TCR_TENSEL_MASK); + + if (!enable) + val |= PRCM_TCR_STOP_TIMERS; + writel(val, (_PRCMU_BASE + PRCM_TCR)); + + return 0; +} + +static int request_reg_clock(u8 clock, bool enable) +{ + u32 val; + unsigned long flags; + + spin_lock_irqsave(&clk_mgt_lock, flags); + + /* Grab the HW semaphore. */ + while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0) + cpu_relax(); + + val = readl(_PRCMU_BASE + clk_mgt[clock].offset); + if (enable) { + val |= (PRCM_CLK_MGT_CLKEN | clk_mgt[clock].pllsw); + } else { + clk_mgt[clock].pllsw = (val & PRCM_CLK_MGT_CLKPLLSW_MASK); + val &= ~(PRCM_CLK_MGT_CLKEN | PRCM_CLK_MGT_CLKPLLSW_MASK); + } + writel(val, (_PRCMU_BASE + clk_mgt[clock].offset)); + + /* Release the HW semaphore. */ + writel(0, (_PRCMU_BASE + PRCM_SEM)); + + spin_unlock_irqrestore(&clk_mgt_lock, flags); + + return 0; +} + +/** + * prcmu_request_clock() - Request for a clock to be enabled or disabled. + * @clock: The clock for which the request is made. + * @enable: Whether the clock should be enabled (true) or disabled (false). + * + * This function should only be used by the clock implementation. + * Do not use it from any other place! + */ +int prcmu_request_clock(u8 clock, bool enable) +{ + if (clock < PRCMU_NUM_REG_CLOCKS) + return request_reg_clock(clock, enable); + else if (clock == PRCMU_TIMCLK) + return request_timclk(enable); + else if (clock == PRCMU_SYSCLK) + return request_sysclk(enable); + else + return -EINVAL; +} + +int prcmu_config_esram0_deep_sleep(u8 state) +{ + if ((state > ESRAM0_DEEP_SLEEP_STATE_RET) || + (state < ESRAM0_DEEP_SLEEP_STATE_OFF)) + return -EINVAL; + + mutex_lock(&mb4_transfer.lock); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4)) + cpu_relax(); + + writeb(MB4H_MEM_ST, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4)); + writeb(((DDR_PWR_STATE_OFFHIGHLAT << 4) | DDR_PWR_STATE_ON), + (tcdm_base + PRCM_REQ_MB4_DDR_ST_AP_SLEEP_IDLE)); + writeb(DDR_PWR_STATE_ON, + (tcdm_base + PRCM_REQ_MB4_DDR_ST_AP_DEEP_IDLE)); + writeb(state, (tcdm_base + PRCM_REQ_MB4_ESRAM0_ST)); + + writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb4_transfer.work); + + mutex_unlock(&mb4_transfer.lock); + + return 0; +} + +int prcmu_config_hotdog(u8 threshold) +{ + mutex_lock(&mb4_transfer.lock); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4)) + cpu_relax(); + + writeb(threshold, (tcdm_base + PRCM_REQ_MB4_HOTDOG_THRESHOLD)); + writeb(MB4H_HOTDOG, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4)); + + writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb4_transfer.work); + + mutex_unlock(&mb4_transfer.lock); + + return 0; +} + +int prcmu_config_hotmon(u8 low, u8 high) +{ + mutex_lock(&mb4_transfer.lock); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4)) + cpu_relax(); + + writeb(low, (tcdm_base + PRCM_REQ_MB4_HOTMON_LOW)); + writeb(high, (tcdm_base + PRCM_REQ_MB4_HOTMON_HIGH)); + writeb((HOTMON_CONFIG_LOW | HOTMON_CONFIG_HIGH), + (tcdm_base + PRCM_REQ_MB4_HOTMON_CONFIG)); + writeb(MB4H_HOTMON, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4)); + + writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb4_transfer.work); + + mutex_unlock(&mb4_transfer.lock); + + return 0; +} + +static int config_hot_period(u16 val) +{ + mutex_lock(&mb4_transfer.lock); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4)) + cpu_relax(); + + writew(val, (tcdm_base + PRCM_REQ_MB4_HOT_PERIOD)); + writeb(MB4H_HOT_PERIOD, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4)); + + writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb4_transfer.work); + + mutex_unlock(&mb4_transfer.lock); + + return 0; +} + +int prcmu_start_temp_sense(u16 cycles32k) +{ + if (cycles32k == 0xFFFF) + return -EINVAL; + + return config_hot_period(cycles32k); +} + +int prcmu_stop_temp_sense(void) +{ + return config_hot_period(0xFFFF); +} + +/** + * prcmu_set_clock_divider() - Configure the clock divider. + * @clock: The clock for which the request is made. + * @divider: The clock divider. (< 32) + * + * This function should only be used by the clock implementation. + * Do not use it from any other place! + */ +int prcmu_set_clock_divider(u8 clock, u8 divider) +{ + u32 val; + unsigned long flags; + + if ((clock >= PRCMU_NUM_REG_CLOCKS) || (divider < 1) || (31 < divider)) + return -EINVAL; + + spin_lock_irqsave(&clk_mgt_lock, flags); + + /* Grab the HW semaphore. */ + while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0) + cpu_relax(); + + val = readl(_PRCMU_BASE + clk_mgt[clock].offset); + val &= ~(PRCM_CLK_MGT_CLKPLLDIV_MASK); + val |= (u32)divider; + writel(val, (_PRCMU_BASE + clk_mgt[clock].offset)); + + /* Release the HW semaphore. */ + writel(0, (_PRCMU_BASE + PRCM_SEM)); + + spin_unlock_irqrestore(&clk_mgt_lock, flags); + + return 0; +} /** * prcmu_abb_read() - Read register value(s) from the ABB. @@ -115,33 +1376,34 @@ int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size) if (size != 1) return -EINVAL; - r = mutex_lock_interruptible(&mb5_transfer.lock); - if (r) - return r; + mutex_lock(&mb5_transfer.lock); - while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) cpu_relax(); - writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP); - writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS); - writeb(reg, REQ_MB5_I2C_REG); + writeb(PRCMU_I2C_READ(slave), (tcdm_base + PRCM_REQ_MB5_I2C_SLAVE_OP)); + writeb(PRCMU_I2C_STOP_EN, (tcdm_base + PRCM_REQ_MB5_I2C_HW_BITS)); + writeb(reg, (tcdm_base + PRCM_REQ_MB5_I2C_REG)); + writeb(0, (tcdm_base + PRCM_REQ_MB5_I2C_VAL)); + + writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); - writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET); if (!wait_for_completion_timeout(&mb5_transfer.work, - msecs_to_jiffies(500))) { - pr_err("prcmu: prcmu_abb_read timed out.\n"); + msecs_to_jiffies(20000))) { + pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n", + __func__); r = -EIO; - goto unlock_and_return; + } else { + r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO); } - r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO); + if (!r) *value = mb5_transfer.ack.value; -unlock_and_return: mutex_unlock(&mb5_transfer.lock); + return r; } -EXPORT_SYMBOL(prcmu_abb_read); /** * prcmu_abb_write() - Write register value(s) to the ABB. @@ -160,179 +1422,262 @@ int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size) if (size != 1) return -EINVAL; - r = mutex_lock_interruptible(&mb5_transfer.lock); - if (r) - return r; + mutex_lock(&mb5_transfer.lock); - - while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) cpu_relax(); - writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP); - writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS); - writeb(reg, REQ_MB5_I2C_REG); - writeb(*value, REQ_MB5_I2C_VAL); + writeb(PRCMU_I2C_WRITE(slave), (tcdm_base + PRCM_REQ_MB5_I2C_SLAVE_OP)); + writeb(PRCMU_I2C_STOP_EN, (tcdm_base + PRCM_REQ_MB5_I2C_HW_BITS)); + writeb(reg, (tcdm_base + PRCM_REQ_MB5_I2C_REG)); + writeb(*value, (tcdm_base + PRCM_REQ_MB5_I2C_VAL)); + + writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); - writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET); if (!wait_for_completion_timeout(&mb5_transfer.work, - msecs_to_jiffies(500))) { - pr_err("prcmu: prcmu_abb_write timed out.\n"); + msecs_to_jiffies(20000))) { + pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n", + __func__); r = -EIO; - goto unlock_and_return; + } else { + r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO); } - r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO); -unlock_and_return: mutex_unlock(&mb5_transfer.lock); + return r; } -EXPORT_SYMBOL(prcmu_abb_write); -static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp, - enum prcmu_cpu_opp cpu_opp) +/** + * prcmu_ac_wake_req - should be called whenever ARM wants to wakeup Modem + */ +void prcmu_ac_wake_req(void) { - bool do_ape; - bool do_arm; - int err = 0; + u32 val; - do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP)); - do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP)); + mutex_lock(&mb0_transfer.ac_wake_lock); - mutex_lock(&mb1_transfer.lock); + val = readl(_PRCMU_BASE + PRCM_HOSTACCESS_REQ); + if (val & PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ) + goto unlock_and_return; - while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1)) - cpu_relax(); + atomic_set(&ac_wake_req_state, 1); - writeb(0, MBOX_HEADER_REQ_MB0); - writeb(cpu_opp, REQ_MB1_ARMOPP); - writeb(ape_opp, REQ_MB1_APEOPP); - writeb(0, REQ_MB1_BOOSTOPP); - writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET); - wait_for_completion(&mb1_transfer.work); - if ((do_ape) && (mb1_transfer.ack.ape_status != 0)) - err = -EIO; - if ((do_arm) && (mb1_transfer.ack.arm_status != 0)) - err = -EIO; + writel((val | PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ), + (_PRCMU_BASE + PRCM_HOSTACCESS_REQ)); - mutex_unlock(&mb1_transfer.lock); + if (!wait_for_completion_timeout(&mb0_transfer.ac_wake_work, + msecs_to_jiffies(20000))) { + pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n", + __func__); + } - return err; +unlock_and_return: + mutex_unlock(&mb0_transfer.ac_wake_lock); } /** - * prcmu_set_ape_opp() - Set the OPP of the APE. - * @opp: The OPP to set. - * - * This function sets the OPP of the APE. + * prcmu_ac_sleep_req - called when ARM no longer needs to talk to modem */ -int prcmu_set_ape_opp(enum prcmu_ape_opp opp) +void prcmu_ac_sleep_req() { - return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE); + u32 val; + + mutex_lock(&mb0_transfer.ac_wake_lock); + + val = readl(_PRCMU_BASE + PRCM_HOSTACCESS_REQ); + if (!(val & PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ)) + goto unlock_and_return; + + writel((val & ~PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ), + (_PRCMU_BASE + PRCM_HOSTACCESS_REQ)); + + if (!wait_for_completion_timeout(&mb0_transfer.ac_wake_work, + msecs_to_jiffies(20000))) { + pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n", + __func__); + } + + atomic_set(&ac_wake_req_state, 0); + +unlock_and_return: + mutex_unlock(&mb0_transfer.ac_wake_lock); } -EXPORT_SYMBOL(prcmu_set_ape_opp); -/** - * prcmu_set_cpu_opp() - Set the OPP of the CPU. - * @opp: The OPP to set. - * - * This function sets the OPP of the CPU. - */ -int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp) +bool prcmu_is_ac_wake_requested(void) { - return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp); + return (atomic_read(&ac_wake_req_state) != 0); } -EXPORT_SYMBOL(prcmu_set_cpu_opp); /** - * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU. - * @ape_opp: The APE OPP to set. - * @cpu_opp: The CPU OPP to set. + * prcmu_system_reset - System reset * - * This function sets the OPPs of the APE and the CPU. + * Saves the reset reason code and then sets the APE_SOFRST register which + * fires interrupt to fw */ -int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp, - enum prcmu_cpu_opp cpu_opp) +void prcmu_system_reset(u16 reset_code) { - return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp); + writew(reset_code, (tcdm_base + PRCM_SW_RST_REASON)); + writel(1, (_PRCMU_BASE + PRCM_APE_SOFTRST)); } -EXPORT_SYMBOL(prcmu_set_ape_cpu_opps); /** - * prcmu_get_ape_opp() - Get the OPP of the APE. - * - * This function gets the OPP of the APE. + * prcmu_reset_modem - ask the PRCMU to reset modem */ -enum prcmu_ape_opp prcmu_get_ape_opp(void) +void prcmu_modem_reset(void) { - return readb(ACK_MB1_CURR_APEOPP); + mutex_lock(&mb1_transfer.lock); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1)) + cpu_relax(); + + writeb(MB1H_RESET_MODEM, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1)); + writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + wait_for_completion(&mb1_transfer.work); + + /* + * No need to check return from PRCMU as modem should go in reset state + * This state is already managed by upper layer + */ + + mutex_unlock(&mb1_transfer.lock); } -EXPORT_SYMBOL(prcmu_get_ape_opp); -/** - * prcmu_get_cpu_opp() - Get the OPP of the CPU. - * - * This function gets the OPP of the CPU. The OPP is specified in %%. - * PRCMU_OPP_EXT is a special OPP value, not specified in %%. - */ -int prcmu_get_cpu_opp(void) +static void ack_dbb_wakeup(void) { - return readb(ACK_MB1_CURR_ARMOPP); + unsigned long flags; + + spin_lock_irqsave(&mb0_transfer.lock, flags); + + while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0)) + cpu_relax(); + + writeb(MB0H_READ_WAKEUP_ACK, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0)); + writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET)); + + spin_unlock_irqrestore(&mb0_transfer.lock, flags); } -EXPORT_SYMBOL(prcmu_get_cpu_opp); -bool prcmu_has_arm_maxopp(void) +static inline void print_unknown_header_warning(u8 n, u8 header) { - return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK) - == PRCM_AVS_ISMODEENABLE_MASK; + pr_warning("prcmu: Unknown message header (%d) in mailbox %d.\n", + header, n); } -static void read_mailbox_0(void) +static bool read_mailbox_0(void) { - writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR); + bool r; + u32 ev; + unsigned int n; + u8 header; + + header = readb(tcdm_base + PRCM_MBOX_HEADER_ACK_MB0); + switch (header) { + case MB0H_WAKEUP_EXE: + case MB0H_WAKEUP_SLEEP: + if (readb(tcdm_base + PRCM_ACK_MB0_READ_POINTER) & 1) + ev = readl(tcdm_base + PRCM_ACK_MB0_WAKEUP_1_8500); + else + ev = readl(tcdm_base + PRCM_ACK_MB0_WAKEUP_0_8500); + + if (ev & (WAKEUP_BIT_AC_WAKE_ACK | WAKEUP_BIT_AC_SLEEP_ACK)) + complete(&mb0_transfer.ac_wake_work); + if (ev & WAKEUP_BIT_SYSCLK_OK) + complete(&mb3_transfer.sysclk_work); + + ev &= mb0_transfer.req.dbb_irqs; + + for (n = 0; n < NUM_PRCMU_WAKEUPS; n++) { + if (ev & prcmu_irq_bit[n]) + generic_handle_irq(IRQ_PRCMU_BASE + n); + } + r = true; + break; + default: + print_unknown_header_warning(0, header); + r = false; + break; + } + writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); + return r; } -static void read_mailbox_1(void) +static bool read_mailbox_1(void) { - mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP); - mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP); + mb1_transfer.ack.header = readb(tcdm_base + PRCM_MBOX_HEADER_REQ_MB1); + mb1_transfer.ack.arm_opp = readb(tcdm_base + + PRCM_ACK_MB1_CURRENT_ARM_OPP); + mb1_transfer.ack.ape_opp = readb(tcdm_base + + PRCM_ACK_MB1_CURRENT_APE_OPP); + mb1_transfer.ack.ape_voltage_status = readb(tcdm_base + + PRCM_ACK_MB1_APE_VOLTAGE_STATUS); + writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); complete(&mb1_transfer.work); - writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR); + return false; } -static void read_mailbox_2(void) +static bool read_mailbox_2(void) { - writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR); + mb2_transfer.ack.status = readb(tcdm_base + PRCM_ACK_MB2_DPS_STATUS); + writel(MBOX_BIT(2), (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); + complete(&mb2_transfer.work); + return false; } -static void read_mailbox_3(void) +static bool read_mailbox_3(void) { - writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR); + writel(MBOX_BIT(3), (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); + return false; } -static void read_mailbox_4(void) +static bool read_mailbox_4(void) { - writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR); + u8 header; + bool do_complete = true; + + header = readb(tcdm_base + PRCM_MBOX_HEADER_REQ_MB4); + switch (header) { + case MB4H_MEM_ST: + case MB4H_HOTDOG: + case MB4H_HOTMON: + case MB4H_HOT_PERIOD: + break; + default: + print_unknown_header_warning(4, header); + do_complete = false; + break; + } + + writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); + + if (do_complete) + complete(&mb4_transfer.work); + + return false; } -static void read_mailbox_5(void) +static bool read_mailbox_5(void) { - mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS); - mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL); + mb5_transfer.ack.status = readb(tcdm_base + PRCM_ACK_MB5_I2C_STATUS); + mb5_transfer.ack.value = readb(tcdm_base + PRCM_ACK_MB5_I2C_VAL); + writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); complete(&mb5_transfer.work); - writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR); + return false; } -static void read_mailbox_6(void) +static bool read_mailbox_6(void) { - writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR); + writel(MBOX_BIT(6), (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); + return false; } -static void read_mailbox_7(void) +static bool read_mailbox_7(void) { - writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR); + writel(MBOX_BIT(7), (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); + return false; } -static void (* const read_mailbox[NUM_MBOX])(void) = { +static bool (* const read_mailbox[NUM_MB])(void) = { read_mailbox_0, read_mailbox_1, read_mailbox_2, @@ -347,49 +1692,199 @@ static irqreturn_t prcmu_irq_handler(int irq, void *data) { u32 bits; u8 n; + irqreturn_t r; - bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1)); + bits = (readl(_PRCMU_BASE + PRCM_ARM_IT1_VAL) & ALL_MBOX_BITS); if (unlikely(!bits)) return IRQ_NONE; + r = IRQ_HANDLED; for (n = 0; bits; n++) { if (bits & MBOX_BIT(n)) { bits -= MBOX_BIT(n); - read_mailbox[n](); + if (read_mailbox[n]()) + r = IRQ_WAKE_THREAD; } } + return r; +} + +static irqreturn_t prcmu_irq_thread_fn(int irq, void *data) +{ + ack_dbb_wakeup(); return IRQ_HANDLED; } +static void prcmu_mask_work(struct work_struct *work) +{ + unsigned long flags; + + spin_lock_irqsave(&mb0_transfer.lock, flags); + + config_wakeups(); + + spin_unlock_irqrestore(&mb0_transfer.lock, flags); +} + +static void prcmu_irq_mask(struct irq_data *d) +{ + unsigned long flags; + + spin_lock_irqsave(&mb0_transfer.dbb_irqs_lock, flags); + + mb0_transfer.req.dbb_irqs &= ~prcmu_irq_bit[d->irq - IRQ_PRCMU_BASE]; + + spin_unlock_irqrestore(&mb0_transfer.dbb_irqs_lock, flags); + + if (d->irq != IRQ_PRCMU_CA_SLEEP) + schedule_work(&mb0_transfer.mask_work); +} + +static void prcmu_irq_unmask(struct irq_data *d) +{ + unsigned long flags; + + spin_lock_irqsave(&mb0_transfer.dbb_irqs_lock, flags); + + mb0_transfer.req.dbb_irqs |= prcmu_irq_bit[d->irq - IRQ_PRCMU_BASE]; + + spin_unlock_irqrestore(&mb0_transfer.dbb_irqs_lock, flags); + + if (d->irq != IRQ_PRCMU_CA_SLEEP) + schedule_work(&mb0_transfer.mask_work); +} + +static void noop(struct irq_data *d) +{ +} + +static struct irq_chip prcmu_irq_chip = { + .name = "prcmu", + .irq_disable = prcmu_irq_mask, + .irq_ack = noop, + .irq_mask = prcmu_irq_mask, + .irq_unmask = prcmu_irq_unmask, +}; + void __init prcmu_early_init(void) { - if (cpu_is_u8500v11() || cpu_is_u8500ed()) { + unsigned int i; + + if (cpu_is_u8500v1()) { tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1); } else if (cpu_is_u8500v2()) { + void *tcpm_base = ioremap_nocache(U8500_PRCMU_TCPM_BASE, SZ_4K); + + if (tcpm_base != NULL) { + int version; + version = readl(tcpm_base + PRCMU_FW_VERSION_OFFSET); + prcmu_version.project_number = version & 0xFF; + prcmu_version.api_version = (version >> 8) & 0xFF; + prcmu_version.func_version = (version >> 16) & 0xFF; + prcmu_version.errata = (version >> 24) & 0xFF; + pr_info("PRCMU firmware version %d.%d.%d\n", + (version >> 8) & 0xFF, (version >> 16) & 0xFF, + (version >> 24) & 0xFF); + iounmap(tcpm_base); + } + tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE); } else { pr_err("prcmu: Unsupported chip version\n"); BUG(); } -} - -static int __init prcmu_init(void) -{ - if (cpu_is_u8500ed()) { - pr_err("prcmu: Unsupported chip version\n"); - return 0; - } + spin_lock_init(&mb0_transfer.lock); + spin_lock_init(&mb0_transfer.dbb_irqs_lock); + mutex_init(&mb0_transfer.ac_wake_lock); + init_completion(&mb0_transfer.ac_wake_work); mutex_init(&mb1_transfer.lock); init_completion(&mb1_transfer.work); + mutex_init(&mb2_transfer.lock); + init_completion(&mb2_transfer.work); + spin_lock_init(&mb2_transfer.auto_pm_lock); + spin_lock_init(&mb3_transfer.lock); + mutex_init(&mb3_transfer.sysclk_lock); + init_completion(&mb3_transfer.sysclk_work); + mutex_init(&mb4_transfer.lock); + init_completion(&mb4_transfer.work); mutex_init(&mb5_transfer.lock); init_completion(&mb5_transfer.work); + INIT_WORK(&mb0_transfer.mask_work, prcmu_mask_work); + + /* Initalize irqs. */ + for (i = 0; i < NUM_PRCMU_WAKEUPS; i++) { + unsigned int irq; + + irq = IRQ_PRCMU_BASE + i; + irq_set_chip_and_handler(irq, &prcmu_irq_chip, + handle_simple_irq); + set_irq_flags(irq, IRQF_VALID); + } +} + +static struct mfd_cell db8500_prcmu_devs[] = { + { + .name = "db8500-prcmu-regulators", + }, + { + .name = "cpufreq-u8500", + }, +}; + +/** + * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic + * + */ +static int __init db8500_prcmu_probe(struct platform_device *pdev) +{ + int err = 0; + + if (ux500_is_svp()) + return -ENODEV; + /* Clean up the mailbox interrupts after pre-kernel code. */ - writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR); + writel(ALL_MBOX_BITS, (_PRCMU_BASE + PRCM_ARM_IT1_CLR)); + + err = request_threaded_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, + prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL); + if (err < 0) { + pr_err("prcmu: Failed to allocate IRQ_DB8500_PRCMU1.\n"); + err = -EBUSY; + goto no_irq_return; + } + + if (cpu_is_u8500v20_or_later()) + prcmu_config_esram0_deep_sleep(ESRAM0_DEEP_SLEEP_STATE_RET); + + err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs, + ARRAY_SIZE(db8500_prcmu_devs), NULL, + 0); - return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0, - "prcmu", NULL); + if (err) + pr_err("prcmu: Failed to add subdevices\n"); + else + pr_info("DB8500 PRCMU initialized\n"); + +no_irq_return: + return err; +} + +static struct platform_driver db8500_prcmu_driver = { + .driver = { + .name = "db8500-prcmu", + .owner = THIS_MODULE, + }, +}; + +static int __init db8500_prcmu_init(void) +{ + return platform_driver_probe(&db8500_prcmu_driver, db8500_prcmu_probe); } -arch_initcall(prcmu_init); +arch_initcall(db8500_prcmu_init); + +MODULE_AUTHOR("Mattias Nilsson "); +MODULE_DESCRIPTION("DB8500 PRCM Unit driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h index d591d79aa6f0..917dbcab701c 100644 --- a/include/linux/mfd/db8500-prcmu.h +++ b/include/linux/mfd/db8500-prcmu.h @@ -2,57 +2,977 @@ * Copyright (C) STMicroelectronics 2009 * Copyright (C) ST-Ericsson SA 2010 * - * Author: Sundar Iyer - * Author: Martin Persson - * * License Terms: GNU General Public License v2 + * Author: Kumar Sanghvi * - * PRCM Unit definitions + * PRCMU f/w APIs */ +#ifndef __MFD_DB8500_PRCMU_H +#define __MFD_DB8500_PRCMU_H + +#include +#include -#ifndef __MACH_PRCMU_DEFS_H -#define __MACH_PRCMU_DEFS_H +/* This portion previously known as */ -enum prcmu_cpu_opp { - CPU_OPP_INIT = 0x00, - CPU_OPP_NO_CHANGE = 0x01, - CPU_OPP_100 = 0x02, - CPU_OPP_50 = 0x03, - CPU_OPP_MAX = 0x04, - CPU_OPP_EXT_CLK = 0x07 +/** + * enum state - ON/OFF state definition + * @OFF: State is ON + * @ON: State is OFF + * + */ +enum state { + OFF = 0x0, + ON = 0x1, }; -enum prcmu_ape_opp { - APE_OPP_NO_CHANGE = 0x00, - APE_OPP_100 = 0x02, - APE_OPP_50 = 0x03, + +/** + * enum ret_state - general purpose On/Off/Retention states + * + */ +enum ret_state { + OFFST = 0, + ONST = 1, + RETST = 2 }; -#endif /* __MACH_PRCMU_DEFS_H */ +/** + * enum clk_arm - ARM Cortex A9 clock schemes + * @A9_OFF: + * @A9_BOOT: + * @A9_OPPT1: + * @A9_OPPT2: + * @A9_EXTCLK: + */ +enum clk_arm { + A9_OFF, + A9_BOOT, + A9_OPPT1, + A9_OPPT2, + A9_EXTCLK +}; -/* - * Copyright (C) STMicroelectronics 2009 - * Copyright (C) ST-Ericsson SA 2010 +/** + * enum clk_gen - GEN#0/GEN#1 clock schemes + * @GEN_OFF: + * @GEN_BOOT: + * @GEN_OPPT1: + */ +enum clk_gen { + GEN_OFF, + GEN_BOOT, + GEN_OPPT1, +}; + +/* some information between arm and xp70 */ + +/** + * enum romcode_write - Romcode message written by A9 AND read by XP70 + * @RDY_2_DS: Value set when ApDeepSleep state can be executed by XP70 + * @RDY_2_XP70_RST: Value set when 0x0F has been successfully polled by the + * romcode. The xp70 will go into self-reset + */ +enum romcode_write { + RDY_2_DS = 0x09, + RDY_2_XP70_RST = 0x10 +}; + +/** + * enum romcode_read - Romcode message written by XP70 and read by A9 + * @INIT: Init value when romcode field is not used + * @FS_2_DS: Value set when power state is going from ApExecute to + * ApDeepSleep + * @END_DS: Value set when ApDeepSleep power state is reached coming from + * ApExecute state + * @DS_TO_FS: Value set when power state is going from ApDeepSleep to + * ApExecute + * @END_FS: Value set when ApExecute power state is reached coming from + * ApDeepSleep state + * @SWR: Value set when power state is going to ApReset + * @END_SWR: Value set when the xp70 finished executing ApReset actions and + * waits for romcode acknowledgment to go to self-reset + */ +enum romcode_read { + INIT = 0x00, + FS_2_DS = 0x0A, + END_DS = 0x0B, + DS_TO_FS = 0x0C, + END_FS = 0x0D, + SWR = 0x0E, + END_SWR = 0x0F +}; + +/** + * enum ap_pwrst - current power states defined in PRCMU firmware + * @NO_PWRST: Current power state init + * @AP_BOOT: Current power state is apBoot + * @AP_EXECUTE: Current power state is apExecute + * @AP_DEEP_SLEEP: Current power state is apDeepSleep + * @AP_SLEEP: Current power state is apSleep + * @AP_IDLE: Current power state is apIdle + * @AP_RESET: Current power state is apReset + */ +enum ap_pwrst { + NO_PWRST = 0x00, + AP_BOOT = 0x01, + AP_EXECUTE = 0x02, + AP_DEEP_SLEEP = 0x03, + AP_SLEEP = 0x04, + AP_IDLE = 0x05, + AP_RESET = 0x06 +}; + +/** + * enum ap_pwrst_trans - Transition states defined in PRCMU firmware + * @NO_TRANSITION: No power state transition + * @APEXECUTE_TO_APSLEEP: Power state transition from ApExecute to ApSleep + * @APIDLE_TO_APSLEEP: Power state transition from ApIdle to ApSleep + * @APBOOT_TO_APEXECUTE: Power state transition from ApBoot to ApExecute + * @APEXECUTE_TO_APDEEPSLEEP: Power state transition from ApExecute to + * ApDeepSleep + * @APEXECUTE_TO_APIDLE: Power state transition from ApExecute to ApIdle + */ +enum ap_pwrst_trans { + NO_TRANSITION = 0x00, + APEXECUTE_TO_APSLEEP = 0x01, + APIDLE_TO_APSLEEP = 0x02, /* To be removed */ + PRCMU_AP_SLEEP = 0x01, + APBOOT_TO_APEXECUTE = 0x03, + APEXECUTE_TO_APDEEPSLEEP = 0x04, /* To be removed */ + PRCMU_AP_DEEP_SLEEP = 0x04, + APEXECUTE_TO_APIDLE = 0x05, /* To be removed */ + PRCMU_AP_IDLE = 0x05, + PRCMU_AP_DEEP_IDLE = 0x07, +}; + +/** + * enum ddr_pwrst - DDR power states definition + * @DDR_PWR_STATE_UNCHANGED: SDRAM and DDR controller state is unchanged + * @DDR_PWR_STATE_ON: + * @DDR_PWR_STATE_OFFLOWLAT: + * @DDR_PWR_STATE_OFFHIGHLAT: + */ +enum ddr_pwrst { + DDR_PWR_STATE_UNCHANGED = 0x00, + DDR_PWR_STATE_ON = 0x01, + DDR_PWR_STATE_OFFLOWLAT = 0x02, + DDR_PWR_STATE_OFFHIGHLAT = 0x03 +}; + +/** + * enum arm_opp - ARM OPP states definition + * @ARM_OPP_INIT: + * @ARM_NO_CHANGE: The ARM operating point is unchanged + * @ARM_100_OPP: The new ARM operating point is arm100opp + * @ARM_50_OPP: The new ARM operating point is arm50opp + * @ARM_MAX_OPP: Operating point is "max" (more than 100) + * @ARM_MAX_FREQ100OPP: Set max opp if available, else 100 + * @ARM_EXTCLK: The new ARM operating point is armExtClk + */ +enum arm_opp { + ARM_OPP_INIT = 0x00, + ARM_NO_CHANGE = 0x01, + ARM_100_OPP = 0x02, + ARM_50_OPP = 0x03, + ARM_MAX_OPP = 0x04, + ARM_MAX_FREQ100OPP = 0x05, + ARM_EXTCLK = 0x07 +}; + +/** + * enum ape_opp - APE OPP states definition + * @APE_OPP_INIT: + * @APE_NO_CHANGE: The APE operating point is unchanged + * @APE_100_OPP: The new APE operating point is ape100opp + * @APE_50_OPP: 50% + */ +enum ape_opp { + APE_OPP_INIT = 0x00, + APE_NO_CHANGE = 0x01, + APE_100_OPP = 0x02, + APE_50_OPP = 0x03 +}; + +/** + * enum hw_acc_state - State definition for hardware accelerator + * @HW_NO_CHANGE: The hardware accelerator state must remain unchanged + * @HW_OFF: The hardware accelerator must be switched off + * @HW_OFF_RAMRET: The hardware accelerator must be switched off with its + * internal RAM in retention + * @HW_ON: The hwa hardware accelerator hwa must be switched on * - * Author: Kumar Sanghvi - * Author: Sundar Iyer - * Author: Mattias Nilsson + * NOTE! Deprecated, to be removed when all users switched over to use the + * regulator API. + */ +enum hw_acc_state { + HW_NO_CHANGE = 0x00, + HW_OFF = 0x01, + HW_OFF_RAMRET = 0x02, + HW_ON = 0x04 +}; + +/** + * enum mbox_2_arm_stat - Status messages definition for mbox_arm + * @BOOT_TO_EXECUTEOK: The apBoot to apExecute state transition has been + * completed + * @DEEPSLEEPOK: The apExecute to apDeepSleep state transition has been + * completed + * @SLEEPOK: The apExecute to apSleep state transition has been completed + * @IDLEOK: The apExecute to apIdle state transition has been completed + * @SOFTRESETOK: The A9 watchdog/ SoftReset state has been completed + * @SOFTRESETGO : The A9 watchdog/SoftReset state is on going + * @BOOT_TO_EXECUTE: The apBoot to apExecute state transition is on going + * @EXECUTE_TO_DEEPSLEEP: The apExecute to apDeepSleep state transition is on + * going + * @DEEPSLEEP_TO_EXECUTE: The apDeepSleep to apExecute state transition is on + * going + * @DEEPSLEEP_TO_EXECUTEOK: The apDeepSleep to apExecute state transition has + * been completed + * @EXECUTE_TO_SLEEP: The apExecute to apSleep state transition is on going + * @SLEEP_TO_EXECUTE: The apSleep to apExecute state transition is on going + * @SLEEP_TO_EXECUTEOK: The apSleep to apExecute state transition has been + * completed + * @EXECUTE_TO_IDLE: The apExecute to apIdle state transition is on going + * @IDLE_TO_EXECUTE: The apIdle to apExecute state transition is on going + * @IDLE_TO_EXECUTEOK: The apIdle to apExecute state transition has been + * completed + * @INIT_STATUS: Status init + */ +enum ap_pwrsttr_status { + BOOT_TO_EXECUTEOK = 0xFF, + DEEPSLEEPOK = 0xFE, + SLEEPOK = 0xFD, + IDLEOK = 0xFC, + SOFTRESETOK = 0xFB, + SOFTRESETGO = 0xFA, + BOOT_TO_EXECUTE = 0xF9, + EXECUTE_TO_DEEPSLEEP = 0xF8, + DEEPSLEEP_TO_EXECUTE = 0xF7, + DEEPSLEEP_TO_EXECUTEOK = 0xF6, + EXECUTE_TO_SLEEP = 0xF5, + SLEEP_TO_EXECUTE = 0xF4, + SLEEP_TO_EXECUTEOK = 0xF3, + EXECUTE_TO_IDLE = 0xF2, + IDLE_TO_EXECUTE = 0xF1, + IDLE_TO_EXECUTEOK = 0xF0, + RDYTODS_RETURNTOEXE = 0xEF, + NORDYTODS_RETURNTOEXE = 0xEE, + EXETOSLEEP_RETURNTOEXE = 0xED, + EXETOIDLE_RETURNTOEXE = 0xEC, + INIT_STATUS = 0xEB, + + /*error messages */ + INITERROR = 0x00, + PLLARMLOCKP_ER = 0x01, + PLLDDRLOCKP_ER = 0x02, + PLLSOCLOCKP_ER = 0x03, + PLLSOCK1LOCKP_ER = 0x04, + ARMWFI_ER = 0x05, + SYSCLKOK_ER = 0x06, + I2C_NACK_DATA_ER = 0x07, + BOOT_ER = 0x08, + I2C_STATUS_ALWAYS_1 = 0x0A, + I2C_NACK_REG_ADDR_ER = 0x0B, + I2C_NACK_DATA0123_ER = 0x1B, + I2C_NACK_ADDR_ER = 0x1F, + CURAPPWRSTISNOT_BOOT = 0x20, + CURAPPWRSTISNOT_EXECUTE = 0x21, + CURAPPWRSTISNOT_SLEEPMODE = 0x22, + CURAPPWRSTISNOT_CORRECTFORIT10 = 0x23, + FIFO4500WUISNOT_WUPEVENT = 0x24, + PLL32KLOCKP_ER = 0x29, + DDRDEEPSLEEPOK_ER = 0x2A, + ROMCODEREADY_ER = 0x50, + WUPBEFOREDS = 0x51, + DDRCONFIG_ER = 0x52, + WUPBEFORESLEEP = 0x53, + WUPBEFOREIDLE = 0x54 +}; /* earlier called as mbox_2_arm_stat */ + +/** + * enum dvfs_stat - DVFS status messages definition + * @DVFS_GO: A state transition DVFS is on going + * @DVFS_ARM100OPPOK: The state transition DVFS has been completed for 100OPP + * @DVFS_ARM50OPPOK: The state transition DVFS has been completed for 50OPP + * @DVFS_ARMEXTCLKOK: The state transition DVFS has been completed for EXTCLK + * @DVFS_NOCHGTCLKOK: The state transition DVFS has been completed for + * NOCHGCLK + * @DVFS_INITSTATUS: Value init + */ +enum dvfs_stat { + DVFS_GO = 0xFF, + DVFS_ARM100OPPOK = 0xFE, + DVFS_ARM50OPPOK = 0xFD, + DVFS_ARMEXTCLKOK = 0xFC, + DVFS_NOCHGTCLKOK = 0xFB, + DVFS_INITSTATUS = 0x00 +}; + +/** + * enum sva_mmdsp_stat - SVA MMDSP status messages + * @SVA_MMDSP_GO: SVAMMDSP interrupt has happened + * @SVA_MMDSP_INIT: Status init + */ +enum sva_mmdsp_stat { + SVA_MMDSP_GO = 0xFF, + SVA_MMDSP_INIT = 0x00 +}; + +/** + * enum sia_mmdsp_stat - SIA MMDSP status messages + * @SIA_MMDSP_GO: SIAMMDSP interrupt has happened + * @SIA_MMDSP_INIT: Status init + */ +enum sia_mmdsp_stat { + SIA_MMDSP_GO = 0xFF, + SIA_MMDSP_INIT = 0x00 +}; + +/** + * enum mbox_to_arm_err - Error messages definition + * @INIT_ERR: Init value + * @PLLARMLOCKP_ERR: PLLARM has not been correctly locked in given time + * @PLLDDRLOCKP_ERR: PLLDDR has not been correctly locked in the given time + * @PLLSOC0LOCKP_ERR: PLLSOC0 has not been correctly locked in the given time + * @PLLSOC1LOCKP_ERR: PLLSOC1 has not been correctly locked in the given time + * @ARMWFI_ERR: The ARM WFI has not been correctly executed in the given time + * @SYSCLKOK_ERR: The SYSCLK is not available in the given time + * @BOOT_ERR: Romcode has not validated the XP70 self reset in the given time + * @ROMCODESAVECONTEXT: The Romcode didn.t correctly save it secure context + * @VARMHIGHSPEEDVALTO_ERR: The ARM high speed supply value transfered + * through I2C has not been correctly executed in the given time + * @VARMHIGHSPEEDACCESS_ERR: The command value of VarmHighSpeedVal transfered + * through I2C has not been correctly executed in the given time + * @VARMLOWSPEEDVALTO_ERR:The ARM low speed supply value transfered through + * I2C has not been correctly executed in the given time + * @VARMLOWSPEEDACCESS_ERR: The command value of VarmLowSpeedVal transfered + * through I2C has not been correctly executed in the given time + * @VARMRETENTIONVALTO_ERR: The ARM retention supply value transfered through + * I2C has not been correctly executed in the given time + * @VARMRETENTIONACCESS_ERR: The command value of VarmRetentionVal transfered + * through I2C has not been correctly executed in the given time + * @VAPEHIGHSPEEDVALTO_ERR: The APE highspeed supply value transfered through + * I2C has not been correctly executed in the given time + * @VSAFEHPVALTO_ERR: The SAFE high power supply value transfered through I2C + * has not been correctly executed in the given time + * @VMODSEL1VALTO_ERR: The MODEM sel1 supply value transfered through I2C has + * not been correctly executed in the given time + * @VMODSEL2VALTO_ERR: The MODEM sel2 supply value transfered through I2C has + * not been correctly executed in the given time + * @VARMOFFACCESS_ERR: The command value of Varm ON/OFF transfered through + * I2C has not been correctly executed in the given time + * @VAPEOFFACCESS_ERR: The command value of Vape ON/OFF transfered through + * I2C has not been correctly executed in the given time + * @VARMRETACCES_ERR: The command value of Varm retention ON/OFF transfered + * through I2C has not been correctly executed in the given time + * @CURAPPWRSTISNOTBOOT:Generated when Arm want to do power state transition + * ApBoot to ApExecute but the power current state is not Apboot + * @CURAPPWRSTISNOTEXECUTE: Generated when Arm want to do power state + * transition from ApExecute to others power state but the + * power current state is not ApExecute + * @CURAPPWRSTISNOTSLEEPMODE: Generated when wake up events are transmitted + * but the power current state is not ApDeepSleep/ApSleep/ApIdle + * @CURAPPWRSTISNOTCORRECTDBG: Generated when wake up events are transmitted + * but the power current state is not correct + * @ARMREGU1VALTO_ERR:The ArmRegu1 value transferred through I2C has not + * been correctly executed in the given time + * @ARMREGU2VALTO_ERR: The ArmRegu2 value transferred through I2C has not + * been correctly executed in the given time + * @VAPEREGUVALTO_ERR: The VApeRegu value transfered through I2C has not + * been correctly executed in the given time + * @VSMPS3REGUVALTO_ERR: The VSmps3Regu value transfered through I2C has not + * been correctly executed in the given time + * @VMODREGUVALTO_ERR: The VModemRegu value transfered through I2C has not + * been correctly executed in the given time + */ +enum mbox_to_arm_err { + INIT_ERR = 0x00, + PLLARMLOCKP_ERR = 0x01, + PLLDDRLOCKP_ERR = 0x02, + PLLSOC0LOCKP_ERR = 0x03, + PLLSOC1LOCKP_ERR = 0x04, + ARMWFI_ERR = 0x05, + SYSCLKOK_ERR = 0x06, + BOOT_ERR = 0x07, + ROMCODESAVECONTEXT = 0x08, + VARMHIGHSPEEDVALTO_ERR = 0x10, + VARMHIGHSPEEDACCESS_ERR = 0x11, + VARMLOWSPEEDVALTO_ERR = 0x12, + VARMLOWSPEEDACCESS_ERR = 0x13, + VARMRETENTIONVALTO_ERR = 0x14, + VARMRETENTIONACCESS_ERR = 0x15, + VAPEHIGHSPEEDVALTO_ERR = 0x16, + VSAFEHPVALTO_ERR = 0x17, + VMODSEL1VALTO_ERR = 0x18, + VMODSEL2VALTO_ERR = 0x19, + VARMOFFACCESS_ERR = 0x1A, + VAPEOFFACCESS_ERR = 0x1B, + VARMRETACCES_ERR = 0x1C, + CURAPPWRSTISNOTBOOT = 0x20, + CURAPPWRSTISNOTEXECUTE = 0x21, + CURAPPWRSTISNOTSLEEPMODE = 0x22, + CURAPPWRSTISNOTCORRECTDBG = 0x23, + ARMREGU1VALTO_ERR = 0x24, + ARMREGU2VALTO_ERR = 0x25, + VAPEREGUVALTO_ERR = 0x26, + VSMPS3REGUVALTO_ERR = 0x27, + VMODREGUVALTO_ERR = 0x28 +}; + +enum hw_acc { + SVAMMDSP = 0, + SVAPIPE = 1, + SIAMMDSP = 2, + SIAPIPE = 3, + SGA = 4, + B2R2MCDE = 5, + ESRAM12 = 6, + ESRAM34 = 7, +}; + +enum cs_pwrmgt { + PWRDNCS0 = 0, + WKUPCS0 = 1, + PWRDNCS1 = 2, + WKUPCS1 = 3 +}; + +/* Defs related to autonomous power management */ + +/** + * enum sia_sva_pwr_policy - Power policy + * @NO_CHGT: No change + * @DSPOFF_HWPOFF: + * @DSPOFFRAMRET_HWPOFF: + * @DSPCLKOFF_HWPOFF: + * @DSPCLKOFF_HWPCLKOFF: * - * License Terms: GNU General Public License v2 + */ +enum sia_sva_pwr_policy { + NO_CHGT = 0x0, + DSPOFF_HWPOFF = 0x1, + DSPOFFRAMRET_HWPOFF = 0x2, + DSPCLKOFF_HWPOFF = 0x3, + DSPCLKOFF_HWPCLKOFF = 0x4, +}; + +/** + * enum auto_enable - Auto Power enable + * @AUTO_OFF: + * @AUTO_ON: + * + */ +enum auto_enable { + AUTO_OFF = 0x0, + AUTO_ON = 0x1, +}; + +/* End of file previously known as prcmu-fw-defs_v1.h */ + +/* PRCMU Wakeup defines */ +enum prcmu_wakeup_index { + PRCMU_WAKEUP_INDEX_RTC, + PRCMU_WAKEUP_INDEX_RTT0, + PRCMU_WAKEUP_INDEX_RTT1, + PRCMU_WAKEUP_INDEX_HSI0, + PRCMU_WAKEUP_INDEX_HSI1, + PRCMU_WAKEUP_INDEX_USB, + PRCMU_WAKEUP_INDEX_ABB, + PRCMU_WAKEUP_INDEX_ABB_FIFO, + PRCMU_WAKEUP_INDEX_ARM, + NUM_PRCMU_WAKEUP_INDICES +}; +#define PRCMU_WAKEUP(_name) (BIT(PRCMU_WAKEUP_INDEX_##_name)) + +/* PRCMU QoS APE OPP class */ +#define PRCMU_QOS_APE_OPP 1 +#define PRCMU_QOS_DDR_OPP 2 +#define PRCMU_QOS_DEFAULT_VALUE -1 + +/** + * enum hw_acc_dev - enum for hw accelerators + * @HW_ACC_SVAMMDSP: for SVAMMDSP + * @HW_ACC_SVAPIPE: for SVAPIPE + * @HW_ACC_SIAMMDSP: for SIAMMDSP + * @HW_ACC_SIAPIPE: for SIAPIPE + * @HW_ACC_SGA: for SGA + * @HW_ACC_B2R2: for B2R2 + * @HW_ACC_MCDE: for MCDE + * @HW_ACC_ESRAM1: for ESRAM1 + * @HW_ACC_ESRAM2: for ESRAM2 + * @HW_ACC_ESRAM3: for ESRAM3 + * @HW_ACC_ESRAM4: for ESRAM4 + * @NUM_HW_ACC: number of hardware accelerators + * + * Different hw accelerators which can be turned ON/ + * OFF or put into retention (MMDSPs and ESRAMs). + * Used with EPOD API. * - * PRCM Unit f/w API + * NOTE! Deprecated, to be removed when all users switched over to use the + * regulator API. + */ +enum hw_acc_dev { + HW_ACC_SVAMMDSP, + HW_ACC_SVAPIPE, + HW_ACC_SIAMMDSP, + HW_ACC_SIAPIPE, + HW_ACC_SGA, + HW_ACC_B2R2, + HW_ACC_MCDE, + HW_ACC_ESRAM1, + HW_ACC_ESRAM2, + HW_ACC_ESRAM3, + HW_ACC_ESRAM4, + NUM_HW_ACC +}; + +/* + * Ids for all EPODs (power domains) + * - EPOD_ID_SVAMMDSP: power domain for SVA MMDSP + * - EPOD_ID_SVAPIPE: power domain for SVA pipe + * - EPOD_ID_SIAMMDSP: power domain for SIA MMDSP + * - EPOD_ID_SIAPIPE: power domain for SIA pipe + * - EPOD_ID_SGA: power domain for SGA + * - EPOD_ID_B2R2_MCDE: power domain for B2R2 and MCDE + * - EPOD_ID_ESRAM12: power domain for ESRAM 1 and 2 + * - EPOD_ID_ESRAM34: power domain for ESRAM 3 and 4 + * - NUM_EPOD_ID: number of power domains + */ +#define EPOD_ID_SVAMMDSP 0 +#define EPOD_ID_SVAPIPE 1 +#define EPOD_ID_SIAMMDSP 2 +#define EPOD_ID_SIAPIPE 3 +#define EPOD_ID_SGA 4 +#define EPOD_ID_B2R2_MCDE 5 +#define EPOD_ID_ESRAM12 6 +#define EPOD_ID_ESRAM34 7 +#define NUM_EPOD_ID 8 + +/* + * state definition for EPOD (power domain) + * - EPOD_STATE_NO_CHANGE: The EPOD should remain unchanged + * - EPOD_STATE_OFF: The EPOD is switched off + * - EPOD_STATE_RAMRET: The EPOD is switched off with its internal RAM in + * retention + * - EPOD_STATE_ON_CLK_OFF: The EPOD is switched on, clock is still off + * - EPOD_STATE_ON: Same as above, but with clock enabled */ -#ifndef __MACH_PRCMU_H -#define __MACH_PRCMU_H +#define EPOD_STATE_NO_CHANGE 0x00 +#define EPOD_STATE_OFF 0x01 +#define EPOD_STATE_RAMRET 0x02 +#define EPOD_STATE_ON_CLK_OFF 0x03 +#define EPOD_STATE_ON 0x04 +/* + * CLKOUT sources + */ +#define PRCMU_CLKSRC_CLK38M 0x00 +#define PRCMU_CLKSRC_ACLK 0x01 +#define PRCMU_CLKSRC_SYSCLK 0x02 +#define PRCMU_CLKSRC_LCDCLK 0x03 +#define PRCMU_CLKSRC_SDMMCCLK 0x04 +#define PRCMU_CLKSRC_TVCLK 0x05 +#define PRCMU_CLKSRC_TIMCLK 0x06 +#define PRCMU_CLKSRC_CLK009 0x07 +/* These are only valid for CLKOUT1: */ +#define PRCMU_CLKSRC_SIAMMDSPCLK 0x40 +#define PRCMU_CLKSRC_I2CCLK 0x41 +#define PRCMU_CLKSRC_MSP02CLK 0x42 +#define PRCMU_CLKSRC_ARMPLL_OBSCLK 0x43 +#define PRCMU_CLKSRC_HSIRXCLK 0x44 +#define PRCMU_CLKSRC_HSITXCLK 0x45 +#define PRCMU_CLKSRC_ARMCLKFIX 0x46 +#define PRCMU_CLKSRC_HDMICLK 0x47 + +/* + * Definitions for autonomous power management configuration. + */ + +#define PRCMU_AUTO_PM_OFF 0 +#define PRCMU_AUTO_PM_ON 1 + +#define PRCMU_AUTO_PM_POWER_ON_HSEM BIT(0) +#define PRCMU_AUTO_PM_POWER_ON_ABB_FIFO_IT BIT(1) + +enum prcmu_auto_pm_policy { + PRCMU_AUTO_PM_POLICY_NO_CHANGE, + PRCMU_AUTO_PM_POLICY_DSP_OFF_HWP_OFF, + PRCMU_AUTO_PM_POLICY_DSP_OFF_RAMRET_HWP_OFF, + PRCMU_AUTO_PM_POLICY_DSP_CLK_OFF_HWP_OFF, + PRCMU_AUTO_PM_POLICY_DSP_CLK_OFF_HWP_CLK_OFF, +}; + +/** + * struct prcmu_auto_pm_config - Autonomous power management configuration. + * @sia_auto_pm_enable: SIA autonomous pm enable. (PRCMU_AUTO_PM_{OFF,ON}) + * @sia_power_on: SIA power ON enable. (PRCMU_AUTO_PM_POWER_ON_* bitmask) + * @sia_policy: SIA power policy. (enum prcmu_auto_pm_policy) + * @sva_auto_pm_enable: SVA autonomous pm enable. (PRCMU_AUTO_PM_{OFF,ON}) + * @sva_power_on: SVA power ON enable. (PRCMU_AUTO_PM_POWER_ON_* bitmask) + * @sva_policy: SVA power policy. (enum prcmu_auto_pm_policy) + */ +struct prcmu_auto_pm_config { + u8 sia_auto_pm_enable; + u8 sia_power_on; + u8 sia_policy; + u8 sva_auto_pm_enable; + u8 sva_power_on; + u8 sva_policy; +}; + +/** + * enum ddr_opp - DDR OPP states definition + * @DDR_100_OPP: The new DDR operating point is ddr100opp + * @DDR_50_OPP: The new DDR operating point is ddr50opp + * @DDR_25_OPP: The new DDR operating point is ddr25opp + */ +enum ddr_opp { + DDR_100_OPP = 0x00, + DDR_50_OPP = 0x01, + DDR_25_OPP = 0x02, +}; + +/* + * Clock identifiers. + */ +enum prcmu_clock { + PRCMU_SGACLK, + PRCMU_UARTCLK, + PRCMU_MSP02CLK, + PRCMU_MSP1CLK, + PRCMU_I2CCLK, + PRCMU_SDMMCCLK, + PRCMU_SLIMCLK, + PRCMU_PER1CLK, + PRCMU_PER2CLK, + PRCMU_PER3CLK, + PRCMU_PER5CLK, + PRCMU_PER6CLK, + PRCMU_PER7CLK, + PRCMU_LCDCLK, + PRCMU_BMLCLK, + PRCMU_HSITXCLK, + PRCMU_HSIRXCLK, + PRCMU_HDMICLK, + PRCMU_APEATCLK, + PRCMU_APETRACECLK, + PRCMU_MCDECLK, + PRCMU_IPI2CCLK, + PRCMU_DSIALTCLK, + PRCMU_DMACLK, + PRCMU_B2R2CLK, + PRCMU_TVCLK, + PRCMU_SSPCLK, + PRCMU_RNGCLK, + PRCMU_UICCCLK, + PRCMU_NUM_REG_CLOCKS, + PRCMU_SYSCLK = PRCMU_NUM_REG_CLOCKS, + PRCMU_TIMCLK, +}; + +/* + * Definitions for controlling ESRAM0 in deep sleep. + */ +#define ESRAM0_DEEP_SLEEP_STATE_OFF 1 +#define ESRAM0_DEEP_SLEEP_STATE_RET 2 + +#ifdef CONFIG_MFD_DB8500_PRCMU void __init prcmu_early_init(void); +int prcmu_set_display_clocks(void); +int prcmu_disable_dsipll(void); +int prcmu_enable_dsipll(void); +#else +static inline void __init prcmu_early_init(void) {} +#endif + +#ifdef CONFIG_MFD_DB8500_PRCMU + +int prcmu_set_rc_a2p(enum romcode_write); +enum romcode_read prcmu_get_rc_p2a(void); +enum ap_pwrst prcmu_get_xp70_current_state(void); +int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll); + +void prcmu_enable_wakeups(u32 wakeups); +static inline void prcmu_disable_wakeups(void) +{ + prcmu_enable_wakeups(0); +} + +void prcmu_config_abb_event_readout(u32 abb_events); +void prcmu_get_abb_event_buffer(void __iomem **buf); +int prcmu_set_arm_opp(u8 opp); +int prcmu_get_arm_opp(void); +bool prcmu_has_arm_maxopp(void); +bool prcmu_is_u8400(void); +int prcmu_set_ape_opp(u8 opp); +int prcmu_get_ape_opp(void); +int prcmu_request_ape_opp_100_voltage(bool enable); +int prcmu_release_usb_wakeup_state(void); +int prcmu_set_ddr_opp(u8 opp); +int prcmu_get_ddr_opp(void); +unsigned long prcmu_qos_get_cpufreq_opp_delay(void); +void prcmu_qos_set_cpufreq_opp_delay(unsigned long); +/* NOTE! Use regulator framework instead */ +int prcmu_set_hwacc(u16 hw_acc_dev, u8 state); +int prcmu_set_epod(u16 epod_id, u8 epod_state); +void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep, + struct prcmu_auto_pm_config *idle); +bool prcmu_is_auto_pm_enabled(void); + +int prcmu_config_clkout(u8 clkout, u8 source, u8 div); +int prcmu_request_clock(u8 clock, bool enable); +int prcmu_set_clock_divider(u8 clock, u8 divider); +int prcmu_config_esram0_deep_sleep(u8 state); +int prcmu_config_hotdog(u8 threshold); +int prcmu_config_hotmon(u8 low, u8 high); +int prcmu_start_temp_sense(u16 cycles32k); +int prcmu_stop_temp_sense(void); int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size); -int prcmu_set_ape_opp(enum prcmu_ape_opp opp); -int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp); -int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp, - enum prcmu_cpu_opp cpu_opp); -enum prcmu_ape_opp prcmu_get_ape_opp(void); -int prcmu_get_cpu_opp(void); -bool prcmu_has_arm_maxopp(void); -#endif /* __MACH_PRCMU_H */ +void prcmu_ac_wake_req(void); +void prcmu_ac_sleep_req(void); +void prcmu_system_reset(u16 reset_code); +void prcmu_modem_reset(void); +bool prcmu_is_ac_wake_requested(void); +void prcmu_enable_spi2(void); +void prcmu_disable_spi2(void); + +#else /* !CONFIG_MFD_DB8500_PRCMU */ + +static inline int prcmu_set_rc_a2p(enum romcode_write code) +{ + return 0; +} + +static inline enum romcode_read prcmu_get_rc_p2a(void) +{ + return INIT; +} + +static inline enum ap_pwrst prcmu_get_xp70_current_state(void) +{ + return AP_EXECUTE; +} + +static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk, + bool keep_ap_pll) +{ + return 0; +} + +static inline void prcmu_enable_wakeups(u32 wakeups) {} + +static inline void prcmu_disable_wakeups(void) {} + +static inline void prcmu_config_abb_event_readout(u32 abb_events) {} + +static inline int prcmu_set_arm_opp(u8 opp) +{ + return 0; +} + +static inline int prcmu_get_arm_opp(void) +{ + return ARM_100_OPP; +} + +static bool prcmu_has_arm_maxopp(void) +{ + return false; +} + +static bool prcmu_is_u8400(void) +{ + return false; +} + +static inline int prcmu_set_ape_opp(u8 opp) +{ + return 0; +} + +static inline int prcmu_get_ape_opp(void) +{ + return APE_100_OPP; +} + +static inline int prcmu_request_ape_opp_100_voltage(bool enable) +{ + return 0; +} + +static inline int prcmu_release_usb_wakeup_state(void) +{ + return 0; +} + +static inline int prcmu_set_ddr_opp(u8 opp) +{ + return 0; +} + +static inline int prcmu_get_ddr_opp(void) +{ + return DDR_100_OPP; +} + +static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void) +{ + return 0; +} + +static inline void prcmu_qos_set_cpufreq_opp_delay(unsigned long n) {} + +static inline int prcmu_set_hwacc(u16 hw_acc_dev, u8 state) +{ + return 0; +} + +static inline void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep, + struct prcmu_auto_pm_config *idle) +{ +} + +static inline bool prcmu_is_auto_pm_enabled(void) +{ + return false; +} + +static inline int prcmu_config_clkout(u8 clkout, u8 source, u8 div) +{ + return 0; +} + +static inline int prcmu_request_clock(u8 clock, bool enable) +{ + return 0; +} + +static inline int prcmu_set_clock_divider(u8 clock, u8 divider) +{ + return 0; +} + +int prcmu_config_esram0_deep_sleep(u8 state) +{ + return 0; +} + +static inline int prcmu_config_hotdog(u8 threshold) +{ + return 0; +} + +static inline int prcmu_config_hotmon(u8 low, u8 high) +{ + return 0; +} + +static inline int prcmu_start_temp_sense(u16 cycles32k) +{ + return 0; +} + +static inline int prcmu_stop_temp_sense(void) +{ + return 0; +} + +static inline int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size) +{ + return -ENOSYS; +} + +static inline int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size) +{ + return -ENOSYS; +} + +static inline void prcmu_ac_wake_req(void) {} + +static inline void prcmu_ac_sleep_req(void) {} + +static inline void prcmu_system_reset(u16 reset_code) {} + +static inline void prcmu_modem_reset(void) {} + +static inline bool prcmu_is_ac_wake_requested(void) +{ + return false; +} + +#ifndef CONFIG_UX500_SOC_DB5500 +static inline int prcmu_set_display_clocks(void) +{ + return 0; +} + +static inline int prcmu_disable_dsipll(void) +{ + return 0; +} + +static inline int prcmu_enable_dsipll(void) +{ + return 0; +} +#endif + +static inline int prcmu_enable_spi2(void) +{ + return 0; +} + +static inline int prcmu_disable_spi2(void) +{ + return 0; +} + +#endif /* !CONFIG_MFD_DB8500_PRCMU */ + +#ifdef CONFIG_UX500_PRCMU_QOS_POWER +int prcmu_qos_requirement(int pm_qos_class); +int prcmu_qos_add_requirement(int pm_qos_class, char *name, s32 value); +int prcmu_qos_update_requirement(int pm_qos_class, char *name, s32 new_value); +void prcmu_qos_remove_requirement(int pm_qos_class, char *name); +int prcmu_qos_add_notifier(int prcmu_qos_class, + struct notifier_block *notifier); +int prcmu_qos_remove_notifier(int prcmu_qos_class, + struct notifier_block *notifier); +#else +static inline int prcmu_qos_requirement(int prcmu_qos_class) +{ + return 0; +} + +static inline int prcmu_qos_add_requirement(int prcmu_qos_class, + char *name, s32 value) +{ + return 0; +} + +static inline int prcmu_qos_update_requirement(int prcmu_qos_class, + char *name, s32 new_value) +{ + return 0; +} + +static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name) +{ +} + +static inline int prcmu_qos_add_notifier(int prcmu_qos_class, + struct notifier_block *notifier) +{ + return 0; +} +static inline int prcmu_qos_remove_notifier(int prcmu_qos_class, + struct notifier_block *notifier) +{ + return 0; +} + +#endif + +#endif /* __MFD_DB8500_PRCMU_H */ -- cgit v1.2.3 From 8317797ca657081ed81312ea3501f3a3d59d52e9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 3 May 2011 18:14:48 +0200 Subject: mfd: add DB5500 PRCMU driver This adds the DB5500 PRCMU driver. Right now this one is pretty restricted in functionality, exposing a simple interface to send I2C messages. Acked-by: Samuel Ortiz Signed-off-by: Linus Walleij --- arch/arm/mach-ux500/Kconfig | 1 + arch/arm/mach-ux500/cpu.c | 3 + drivers/mfd/Kconfig | 10 + drivers/mfd/Makefile | 1 + drivers/mfd/db5500-prcmu-regs.h | 115 ++++++++++ drivers/mfd/db5500-prcmu.c | 448 +++++++++++++++++++++++++++++++++++++++ include/linux/mfd/db5500-prcmu.h | 45 ++++ 7 files changed, 623 insertions(+) create mode 100644 drivers/mfd/db5500-prcmu-regs.h create mode 100644 drivers/mfd/db5500-prcmu.c create mode 100644 include/linux/mfd/db5500-prcmu.h (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig index 8071d2746f70..365c4c35faa7 100644 --- a/arch/arm/mach-ux500/Kconfig +++ b/arch/arm/mach-ux500/Kconfig @@ -12,6 +12,7 @@ menu "Ux500 SoC" config UX500_SOC_DB5500 bool "DB5500" + select MFD_DB5500_PRCMU config UX500_SOC_DB8500 bool "DB8500" diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index 11360f734cec..1da23bb87c16 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,8 @@ void __init ux500_init_irq(void) * Init clocks here so that they are available for system timer * initialization. */ + if (cpu_is_u5500()) + db5500_prcmu_early_init(); if (cpu_is_u8500()) prcmu_early_init(); clk_init(); diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 7eaeb9750793..481770ab2716 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -585,6 +585,16 @@ config MFD_DB8500_PRCMU system controller running an XP70 microprocessor, which is accessed through a register map. +config MFD_DB5500_PRCMU + bool "ST-Ericsson DB5500 Power Reset Control Management Unit" + depends on UX500_SOC_DB5500 + select MFD_CORE + help + Select this option to enable support for the DB5500 Power Reset + and Control Management Unit. This is basically an autonomous + system controller running an XP70 microprocessor, which is accessed + through a register map. + config MFD_CS5535 tristate "Support for CS5535 and CS5536 southbridge core functions" select MFD_CORE diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 814c57a692a9..24aa44448daf 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -79,6 +79,7 @@ obj-$(CONFIG_AB8500_GPADC) += ab8500-gpadc.o obj-$(CONFIG_MFD_DB8500_PRCMU) += db8500-prcmu.o # ab8500-i2c need to come after db8500-prcmu (which provides the channel) obj-$(CONFIG_AB8500_I2C_CORE) += ab8500-i2c.o +obj-$(CONFIG_MFD_DB5500_PRCMU) += db5500-prcmu.o obj-$(CONFIG_MFD_TIMBERDALE) += timberdale.o obj-$(CONFIG_PMIC_ADP5520) += adp5520.o obj-$(CONFIG_LPC_SCH) += lpc_sch.o diff --git a/drivers/mfd/db5500-prcmu-regs.h b/drivers/mfd/db5500-prcmu-regs.h new file mode 100644 index 000000000000..9a8e9e4ddd33 --- /dev/null +++ b/drivers/mfd/db5500-prcmu-regs.h @@ -0,0 +1,115 @@ +/* + * Copyright (C) STMicroelectronics 2009 + * Copyright (C) ST-Ericsson SA 2010 + * + * Author: Kumar Sanghvi + * Author: Sundar Iyer + * + * License Terms: GNU General Public License v2 + * + * PRCM Unit registers + */ + +#ifndef __MACH_PRCMU_REGS_H +#define __MACH_PRCMU_REGS_H + +#include + +#define PRCM_ARM_PLLDIVPS (_PRCMU_BASE + 0x118) +#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE 0x3f +#define PRCM_ARM_PLLDIVPS_MAX_MASK 0xf + +#define PRCM_PLLARM_LOCKP (_PRCMU_BASE + 0x0a8) +#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3 0x2 + +#define PRCM_ARM_CHGCLKREQ (_PRCMU_BASE + 0x114) +#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ 0x1 + +#define PRCM_PLLARM_ENABLE (_PRCMU_BASE + 0x98) +#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE 0x1 +#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON 0x100 + +#define PRCM_ARMCLKFIX_MGT (_PRCMU_BASE + 0x0) +#define PRCM_A9_RESETN_CLR (_PRCMU_BASE + 0x1f4) +#define PRCM_A9_RESETN_SET (_PRCMU_BASE + 0x1f0) +#define PRCM_ARM_LS_CLAMP (_PRCMU_BASE + 0x30c) +#define PRCM_SRAM_A9 (_PRCMU_BASE + 0x308) + +/* ARM WFI Standby signal register */ +#define PRCM_ARM_WFI_STANDBY (_PRCMU_BASE + 0x130) +#define PRCM_IOCR (_PRCMU_BASE + 0x310) +#define PRCM_IOCR_IOFORCE 0x1 + +/* CPU mailbox registers */ +#define PRCM_MBOX_CPU_VAL (_PRCMU_BASE + 0x0fc) +#define PRCM_MBOX_CPU_SET (_PRCMU_BASE + 0x100) +#define PRCM_MBOX_CPU_CLR (_PRCMU_BASE + 0x104) + +/* Dual A9 core interrupt management unit registers */ +#define PRCM_A9_MASK_REQ (_PRCMU_BASE + 0x328) +#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ 0x1 + +#define PRCM_A9_MASK_ACK (_PRCMU_BASE + 0x32c) +#define PRCM_ARMITMSK31TO0 (_PRCMU_BASE + 0x11c) +#define PRCM_ARMITMSK63TO32 (_PRCMU_BASE + 0x120) +#define PRCM_ARMITMSK95TO64 (_PRCMU_BASE + 0x124) +#define PRCM_ARMITMSK127TO96 (_PRCMU_BASE + 0x128) +#define PRCM_POWER_STATE_VAL (_PRCMU_BASE + 0x25C) +#define PRCM_ARMITVAL31TO0 (_PRCMU_BASE + 0x260) +#define PRCM_ARMITVAL63TO32 (_PRCMU_BASE + 0x264) +#define PRCM_ARMITVAL95TO64 (_PRCMU_BASE + 0x268) +#define PRCM_ARMITVAL127TO96 (_PRCMU_BASE + 0x26C) + +#define PRCM_HOSTACCESS_REQ (_PRCMU_BASE + 0x334) +#define ARM_WAKEUP_MODEM 0x1 + +#define PRCM_ARM_IT1_CLEAR (_PRCMU_BASE + 0x48C) +#define PRCM_ARM_IT1_VAL (_PRCMU_BASE + 0x494) +#define PRCM_HOLD_EVT (_PRCMU_BASE + 0x174) + +#define PRCM_ITSTATUS0 (_PRCMU_BASE + 0x148) +#define PRCM_ITSTATUS1 (_PRCMU_BASE + 0x150) +#define PRCM_ITSTATUS2 (_PRCMU_BASE + 0x158) +#define PRCM_ITSTATUS3 (_PRCMU_BASE + 0x160) +#define PRCM_ITSTATUS4 (_PRCMU_BASE + 0x168) +#define PRCM_ITSTATUS5 (_PRCMU_BASE + 0x484) +#define PRCM_ITCLEAR5 (_PRCMU_BASE + 0x488) +#define PRCM_ARMIT_MASKXP70_IT (_PRCMU_BASE + 0x1018) + +/* System reset register */ +#define PRCM_APE_SOFTRST (_PRCMU_BASE + 0x228) + +/* Level shifter and clamp control registers */ +#define PRCM_MMIP_LS_CLAMP_SET (_PRCMU_BASE + 0x420) +#define PRCM_MMIP_LS_CLAMP_CLR (_PRCMU_BASE + 0x424) + +/* PRCMU clock/PLL/reset registers */ +#define PRCM_PLLDSI_FREQ (_PRCMU_BASE + 0x500) +#define PRCM_PLLDSI_ENABLE (_PRCMU_BASE + 0x504) +#define PRCM_PLLDSI_LOCKP (_PRCMU_BASE + 0x508) +#define PRCM_LCDCLK_MGT (_PRCMU_BASE + 0x044) +#define PRCM_MCDECLK_MGT (_PRCMU_BASE + 0x064) +#define PRCM_HDMICLK_MGT (_PRCMU_BASE + 0x058) +#define PRCM_TVCLK_MGT (_PRCMU_BASE + 0x07c) +#define PRCM_DSI_PLLOUT_SEL (_PRCMU_BASE + 0x530) +#define PRCM_DSITVCLK_DIV (_PRCMU_BASE + 0x52C) +#define PRCM_PLLDSI_LOCKP (_PRCMU_BASE + 0x508) +#define PRCM_APE_RESETN_SET (_PRCMU_BASE + 0x1E4) +#define PRCM_APE_RESETN_CLR (_PRCMU_BASE + 0x1E8) +#define PRCM_CLKOCR (_PRCMU_BASE + 0x1CC) + +/* ePOD and memory power signal control registers */ +#define PRCM_EPOD_C_SET (_PRCMU_BASE + 0x410) +#define PRCM_SRAM_LS_SLEEP (_PRCMU_BASE + 0x304) + +/* Debug power control unit registers */ +#define PRCM_POWER_STATE_SET (_PRCMU_BASE + 0x254) + +/* Miscellaneous unit registers */ +#define PRCM_DSI_SW_RESET (_PRCMU_BASE + 0x324) +#define PRCM_GPIOCR (_PRCMU_BASE + 0x138) +#define PRCM_GPIOCR_DBG_STM_MOD_CMD1 0x800 +#define PRCM_GPIOCR_DBG_UARTMOD_CMD0 0x1 + + +#endif /* __MACH_PRCMU__REGS_H */ diff --git a/drivers/mfd/db5500-prcmu.c b/drivers/mfd/db5500-prcmu.c new file mode 100644 index 000000000000..9dbb3cab4a6f --- /dev/null +++ b/drivers/mfd/db5500-prcmu.c @@ -0,0 +1,448 @@ +/* + * Copyright (C) ST-Ericsson SA 2010 + * + * License Terms: GNU General Public License v2 + * Author: Mattias Nilsson + * + * U5500 PRCM Unit interface driver + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "db5500-prcmu-regs.h" + +#define _PRCM_MB_HEADER (tcdm_base + 0xFE8) +#define PRCM_REQ_MB0_HEADER (_PRCM_MB_HEADER + 0x0) +#define PRCM_REQ_MB1_HEADER (_PRCM_MB_HEADER + 0x1) +#define PRCM_REQ_MB2_HEADER (_PRCM_MB_HEADER + 0x2) +#define PRCM_REQ_MB3_HEADER (_PRCM_MB_HEADER + 0x3) +#define PRCM_REQ_MB4_HEADER (_PRCM_MB_HEADER + 0x4) +#define PRCM_REQ_MB5_HEADER (_PRCM_MB_HEADER + 0x5) +#define PRCM_REQ_MB6_HEADER (_PRCM_MB_HEADER + 0x6) +#define PRCM_REQ_MB7_HEADER (_PRCM_MB_HEADER + 0x7) +#define PRCM_ACK_MB0_HEADER (_PRCM_MB_HEADER + 0x8) +#define PRCM_ACK_MB1_HEADER (_PRCM_MB_HEADER + 0x9) +#define PRCM_ACK_MB2_HEADER (_PRCM_MB_HEADER + 0xa) +#define PRCM_ACK_MB3_HEADER (_PRCM_MB_HEADER + 0xb) +#define PRCM_ACK_MB4_HEADER (_PRCM_MB_HEADER + 0xc) +#define PRCM_ACK_MB5_HEADER (_PRCM_MB_HEADER + 0xd) +#define PRCM_ACK_MB6_HEADER (_PRCM_MB_HEADER + 0xe) +#define PRCM_ACK_MB7_HEADER (_PRCM_MB_HEADER + 0xf) + +/* Req Mailboxes */ +#define PRCM_REQ_MB0 (tcdm_base + 0xFD8) +#define PRCM_REQ_MB1 (tcdm_base + 0xFCC) +#define PRCM_REQ_MB2 (tcdm_base + 0xFC4) +#define PRCM_REQ_MB3 (tcdm_base + 0xFC0) +#define PRCM_REQ_MB4 (tcdm_base + 0xF98) +#define PRCM_REQ_MB5 (tcdm_base + 0xF90) +#define PRCM_REQ_MB6 (tcdm_base + 0xF8C) +#define PRCM_REQ_MB7 (tcdm_base + 0xF84) + +/* Ack Mailboxes */ +#define PRCM_ACK_MB0 (tcdm_base + 0xF38) +#define PRCM_ACK_MB1 (tcdm_base + 0xF30) +#define PRCM_ACK_MB2 (tcdm_base + 0xF24) +#define PRCM_ACK_MB3 (tcdm_base + 0xF20) +#define PRCM_ACK_MB4 (tcdm_base + 0xF1C) +#define PRCM_ACK_MB5 (tcdm_base + 0xF14) +#define PRCM_ACK_MB6 (tcdm_base + 0xF0C) +#define PRCM_ACK_MB7 (tcdm_base + 0xF08) + +enum mb_return_code { + RC_SUCCESS, + RC_FAIL, +}; + +/* Mailbox 0 headers. */ +enum mb0_header { + /* request */ + RMB0H_PWR_STATE_TRANS = 1, + RMB0H_WAKE_UP_CFG, + RMB0H_RD_WAKE_UP_ACK, + /* acknowledge */ + AMB0H_WAKE_UP = 1, +}; + +/* Mailbox 5 headers. */ +enum mb5_header { + MB5H_I2C_WRITE = 1, + MB5H_I2C_READ, +}; + +/* Request mailbox 5 fields. */ +#define PRCM_REQ_MB5_I2C_SLAVE (PRCM_REQ_MB5 + 0) +#define PRCM_REQ_MB5_I2C_REG (PRCM_REQ_MB5 + 1) +#define PRCM_REQ_MB5_I2C_SIZE (PRCM_REQ_MB5 + 2) +#define PRCM_REQ_MB5_I2C_DATA (PRCM_REQ_MB5 + 4) + +/* Acknowledge mailbox 5 fields. */ +#define PRCM_ACK_MB5_RETURN_CODE (PRCM_ACK_MB5 + 0) +#define PRCM_ACK_MB5_I2C_DATA (PRCM_ACK_MB5 + 4) + +#define NUM_MB 8 +#define MBOX_BIT BIT +#define ALL_MBOX_BITS (MBOX_BIT(NUM_MB) - 1) + +/* +* Used by MCDE to setup all necessary PRCMU registers +*/ +#define PRCMU_RESET_DSIPLL 0x00004000 +#define PRCMU_UNCLAMP_DSIPLL 0x00400800 + +/* HDMI CLK MGT PLLSW=001 (PLLSOC0), PLLDIV=0x8, = 50 Mhz*/ +#define PRCMU_DSI_CLOCK_SETTING 0x00000128 +/* TVCLK_MGT PLLSW=001 (PLLSOC0) PLLDIV=0x13, = 19.05 MHZ */ +#define PRCMU_DSI_LP_CLOCK_SETTING 0x00000135 +#define PRCMU_PLLDSI_FREQ_SETTING 0x0004013C +#define PRCMU_DSI_PLLOUT_SEL_SETTING 0x00000002 +#define PRCMU_ENABLE_ESCAPE_CLOCK_DIV 0x03000101 +#define PRCMU_DISABLE_ESCAPE_CLOCK_DIV 0x00000101 + +#define PRCMU_ENABLE_PLLDSI 0x00000001 +#define PRCMU_DISABLE_PLLDSI 0x00000000 + +#define PRCMU_DSI_RESET_SW 0x00000003 + +#define PRCMU_PLLDSI_LOCKP_LOCKED 0x3 + +/* + * mb0_transfer - state needed for mailbox 0 communication. + * @lock: The transaction lock. + */ +static struct { + spinlock_t lock; +} mb0_transfer; + +/* + * mb5_transfer - state needed for mailbox 5 communication. + * @lock: The transaction lock. + * @work: The transaction completion structure. + * @ack: Reply ("acknowledge") data. + */ +static struct { + struct mutex lock; + struct completion work; + struct { + u8 header; + u8 status; + u8 value[4]; + } ack; +} mb5_transfer; + +/* PRCMU TCDM base IO address. */ +static __iomem void *tcdm_base; + +/** + * db5500_prcmu_abb_read() - Read register value(s) from the ABB. + * @slave: The I2C slave address. + * @reg: The (start) register address. + * @value: The read out value(s). + * @size: The number of registers to read. + * + * Reads register value(s) from the ABB. + * @size has to be <= 4. + */ +int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size) +{ + int r; + + if ((size < 1) || (4 < size)) + return -EINVAL; + + mutex_lock(&mb5_transfer.lock); + + while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) + cpu_relax(); + writeb(slave, PRCM_REQ_MB5_I2C_SLAVE); + writeb(reg, PRCM_REQ_MB5_I2C_REG); + writeb(size, PRCM_REQ_MB5_I2C_SIZE); + writeb(MB5H_I2C_READ, PRCM_REQ_MB5_HEADER); + + writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET); + wait_for_completion(&mb5_transfer.work); + + r = 0; + if ((mb5_transfer.ack.header == MB5H_I2C_READ) && + (mb5_transfer.ack.status == RC_SUCCESS)) + memcpy(value, mb5_transfer.ack.value, (size_t)size); + else + r = -EIO; + + mutex_unlock(&mb5_transfer.lock); + + return r; +} + +/** + * db5500_prcmu_abb_write() - Write register value(s) to the ABB. + * @slave: The I2C slave address. + * @reg: The (start) register address. + * @value: The value(s) to write. + * @size: The number of registers to write. + * + * Writes register value(s) to the ABB. + * @size has to be <= 4. + */ +int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size) +{ + int r; + + if ((size < 1) || (4 < size)) + return -EINVAL; + + mutex_lock(&mb5_transfer.lock); + + while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5)) + cpu_relax(); + writeb(slave, PRCM_REQ_MB5_I2C_SLAVE); + writeb(reg, PRCM_REQ_MB5_I2C_REG); + writeb(size, PRCM_REQ_MB5_I2C_SIZE); + memcpy_toio(PRCM_REQ_MB5_I2C_DATA, value, size); + writeb(MB5H_I2C_WRITE, PRCM_REQ_MB5_HEADER); + + writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET); + wait_for_completion(&mb5_transfer.work); + + if ((mb5_transfer.ack.header == MB5H_I2C_WRITE) && + (mb5_transfer.ack.status == RC_SUCCESS)) + r = 0; + else + r = -EIO; + + mutex_unlock(&mb5_transfer.lock); + + return r; +} + +int db5500_prcmu_enable_dsipll(void) +{ + int i; + + /* Enable DSIPLL_RESETN resets */ + writel(PRCMU_RESET_DSIPLL, PRCM_APE_RESETN_CLR); + /* Unclamp DSIPLL in/out */ + writel(PRCMU_UNCLAMP_DSIPLL, PRCM_MMIP_LS_CLAMP_CLR); + /* Set DSI PLL FREQ */ + writel(PRCMU_PLLDSI_FREQ_SETTING, PRCM_PLLDSI_FREQ); + writel(PRCMU_DSI_PLLOUT_SEL_SETTING, + PRCM_DSI_PLLOUT_SEL); + /* Enable Escape clocks */ + writel(PRCMU_ENABLE_ESCAPE_CLOCK_DIV, PRCM_DSITVCLK_DIV); + + /* Start DSI PLL */ + writel(PRCMU_ENABLE_PLLDSI, PRCM_PLLDSI_ENABLE); + /* Reset DSI PLL */ + writel(PRCMU_DSI_RESET_SW, PRCM_DSI_SW_RESET); + for (i = 0; i < 10; i++) { + if ((readl(PRCM_PLLDSI_LOCKP) & + PRCMU_PLLDSI_LOCKP_LOCKED) == PRCMU_PLLDSI_LOCKP_LOCKED) + break; + udelay(100); + } + /* Release DSIPLL_RESETN */ + writel(PRCMU_RESET_DSIPLL, PRCM_APE_RESETN_SET); + return 0; +} + +int db5500_prcmu_disable_dsipll(void) +{ + /* Disable dsi pll */ + writel(PRCMU_DISABLE_PLLDSI, PRCM_PLLDSI_ENABLE); + /* Disable escapeclock */ + writel(PRCMU_DISABLE_ESCAPE_CLOCK_DIV, PRCM_DSITVCLK_DIV); + return 0; +} + +int db5500_prcmu_set_display_clocks(void) +{ + /* HDMI and TVCLK Should be handled somewhere else */ + /* PLLDIV=8, PLLSW=2, CLKEN=1 */ + writel(PRCMU_DSI_CLOCK_SETTING, PRCM_HDMICLK_MGT); + /* PLLDIV=14, PLLSW=2, CLKEN=1 */ + writel(PRCMU_DSI_LP_CLOCK_SETTING, PRCM_TVCLK_MGT); + return 0; +} + +static void ack_dbb_wakeup(void) +{ + unsigned long flags; + + spin_lock_irqsave(&mb0_transfer.lock, flags); + + while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(0)) + cpu_relax(); + + writeb(RMB0H_RD_WAKE_UP_ACK, PRCM_REQ_MB0_HEADER); + writel(MBOX_BIT(0), PRCM_MBOX_CPU_SET); + + spin_unlock_irqrestore(&mb0_transfer.lock, flags); +} + +static inline void print_unknown_header_warning(u8 n, u8 header) +{ + pr_warning("prcmu: Unknown message header (%d) in mailbox %d.\n", + header, n); +} + +static bool read_mailbox_0(void) +{ + bool r; + u8 header; + + header = readb(PRCM_ACK_MB0_HEADER); + switch (header) { + case AMB0H_WAKE_UP: + r = true; + break; + default: + print_unknown_header_warning(0, header); + r = false; + break; + } + writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR); + return r; +} + +static bool read_mailbox_1(void) +{ + writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR); + return false; +} + +static bool read_mailbox_2(void) +{ + writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR); + return false; +} + +static bool read_mailbox_3(void) +{ + writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR); + return false; +} + +static bool read_mailbox_4(void) +{ + writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR); + return false; +} + +static bool read_mailbox_5(void) +{ + u8 header; + + header = readb(PRCM_ACK_MB5_HEADER); + switch (header) { + case MB5H_I2C_READ: + memcpy_fromio(mb5_transfer.ack.value, PRCM_ACK_MB5_I2C_DATA, 4); + case MB5H_I2C_WRITE: + mb5_transfer.ack.header = header; + mb5_transfer.ack.status = readb(PRCM_ACK_MB5_RETURN_CODE); + complete(&mb5_transfer.work); + break; + default: + print_unknown_header_warning(5, header); + break; + } + writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR); + return false; +} + +static bool read_mailbox_6(void) +{ + writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR); + return false; +} + +static bool read_mailbox_7(void) +{ + writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR); + return false; +} + +static bool (* const read_mailbox[NUM_MB])(void) = { + read_mailbox_0, + read_mailbox_1, + read_mailbox_2, + read_mailbox_3, + read_mailbox_4, + read_mailbox_5, + read_mailbox_6, + read_mailbox_7 +}; + +static irqreturn_t prcmu_irq_handler(int irq, void *data) +{ + u32 bits; + u8 n; + irqreturn_t r; + + bits = (readl(PRCM_ARM_IT1_VAL) & ALL_MBOX_BITS); + if (unlikely(!bits)) + return IRQ_NONE; + + r = IRQ_HANDLED; + for (n = 0; bits; n++) { + if (bits & MBOX_BIT(n)) { + bits -= MBOX_BIT(n); + if (read_mailbox[n]()) + r = IRQ_WAKE_THREAD; + } + } + return r; +} + +static irqreturn_t prcmu_irq_thread_fn(int irq, void *data) +{ + ack_dbb_wakeup(); + return IRQ_HANDLED; +} + +void __init db5500_prcmu_early_init(void) +{ + tcdm_base = __io_address(U5500_PRCMU_TCDM_BASE); + spin_lock_init(&mb0_transfer.lock); + mutex_init(&mb5_transfer.lock); + init_completion(&mb5_transfer.work); +} + +/** + * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic + * + */ +int __init db5500_prcmu_init(void) +{ + int r = 0; + + if (ux500_is_svp() || !cpu_is_u5500()) + return -ENODEV; + + /* Clean up the mailbox interrupts after pre-kernel code. */ + writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLEAR); + + r = request_threaded_irq(IRQ_DB5500_PRCMU1, prcmu_irq_handler, + prcmu_irq_thread_fn, 0, "prcmu", NULL); + if (r < 0) { + pr_err("prcmu: Failed to allocate IRQ_DB5500_PRCMU1.\n"); + return -EBUSY; + } + return 0; +} + +arch_initcall(db5500_prcmu_init); diff --git a/include/linux/mfd/db5500-prcmu.h b/include/linux/mfd/db5500-prcmu.h new file mode 100644 index 000000000000..f0977986402c --- /dev/null +++ b/include/linux/mfd/db5500-prcmu.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) ST-Ericsson SA 2010 + * + * License Terms: GNU General Public License v2 + * + * U5500 PRCMU API. + */ +#ifndef __MACH_PRCMU_U5500_H +#define __MACH_PRCMU_U5500_H + +#ifdef CONFIG_UX500_SOC_DB5500 + +void db5500_prcmu_early_init(void); + +int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); +int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size); + +#else /* !CONFIG_UX500_SOC_DB5500 */ + +static inline void db5500_prcmu_early_init(void) +{ +} + +static inline int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size) +{ + return -ENOSYS; +} + +static inline int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size) +{ + return -ENOSYS; +} + +#endif /* CONFIG_UX500_SOC_DB5500 */ + +static inline int db5500_prcmu_config_abb_event_readout(u32 abb_events) +{ +#ifdef CONFIG_MACH_U5500_SIMULATOR + return 0; +#else + return -1; +#endif +} + +#endif /* __MACH_PRCMU_U5500_H */ -- cgit v1.2.3 From 1032fbfd792f2b384ac16a63993b8fae5eea9083 Mon Sep 17 00:00:00 2001 From: Bengt Jonsson Date: Fri, 1 Apr 2011 14:43:33 +0200 Subject: mach-ux500: voltage domain regulators for DB8500 The DB8500 has ePOD:s (electronic power domains) which are possible to switch on/off to deactivate silicon blocks on the DB8500 SoC by cutting their power without retention. We model these as simple regulators with one bit on/off settings. Acked-by: Liam Girdwood Acked-by: Mark Brown Signed-off-by: Bengt Jonsson Signed-off-by: Sundar Iyer Signed-off-by: Jonas Aberg Signed-off-by: Virupax Sadashivpetimath Signed-off-by: Martin Persson Signed-off-by: Linus Walleij --- arch/arm/mach-ux500/Kconfig | 1 + drivers/mfd/db8500-prcmu.c | 179 +++++++++++ drivers/regulator/Kconfig | 7 + drivers/regulator/Makefile | 1 + drivers/regulator/db8500-prcmu.c | 558 +++++++++++++++++++++++++++++++++ include/linux/regulator/db8500-prcmu.h | 45 +++ 6 files changed, 791 insertions(+) create mode 100644 drivers/regulator/db8500-prcmu.c create mode 100644 include/linux/regulator/db8500-prcmu.h (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig index 365c4c35faa7..54429d015954 100644 --- a/arch/arm/mach-ux500/Kconfig +++ b/arch/arm/mach-ux500/Kconfig @@ -17,6 +17,7 @@ config UX500_SOC_DB5500 config UX500_SOC_DB8500 bool "DB8500" select MFD_DB8500_PRCMU + select REGULATOR_DB8500_PRCMU endmenu diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index c44725bd8b9a..e63782107e2f 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -1824,9 +1826,186 @@ void __init prcmu_early_init(void) } } +/* + * Power domain switches (ePODs) modeled as regulators for the DB8500 SoC + */ +static struct regulator_consumer_supply db8500_vape_consumers[] = { + REGULATOR_SUPPLY("v-ape", NULL), + REGULATOR_SUPPLY("v-i2c", "nmk-i2c.0"), + REGULATOR_SUPPLY("v-i2c", "nmk-i2c.1"), + REGULATOR_SUPPLY("v-i2c", "nmk-i2c.2"), + REGULATOR_SUPPLY("v-i2c", "nmk-i2c.3"), + /* "v-mmc" changed to "vcore" in the mainline kernel */ + REGULATOR_SUPPLY("vcore", "sdi0"), + REGULATOR_SUPPLY("vcore", "sdi1"), + REGULATOR_SUPPLY("vcore", "sdi2"), + REGULATOR_SUPPLY("vcore", "sdi3"), + REGULATOR_SUPPLY("vcore", "sdi4"), + REGULATOR_SUPPLY("v-dma", "dma40.0"), + REGULATOR_SUPPLY("v-ape", "ab8500-usb.0"), + /* "v-uart" changed to "vcore" in the mainline kernel */ + REGULATOR_SUPPLY("vcore", "uart0"), + REGULATOR_SUPPLY("vcore", "uart1"), + REGULATOR_SUPPLY("vcore", "uart2"), + REGULATOR_SUPPLY("v-ape", "nmk-ske-keypad.0"), +}; + +static struct regulator_consumer_supply db8500_vsmps2_consumers[] = { + /* CG2900 and CW1200 power to off-chip peripherals */ + REGULATOR_SUPPLY("gbf_1v8", "cg2900-uart.0"), + REGULATOR_SUPPLY("wlan_1v8", "cw1200.0"), + REGULATOR_SUPPLY("musb_1v8", "ab8500-usb.0"), + /* AV8100 regulator */ + REGULATOR_SUPPLY("hdmi_1v8", "0-0070"), +}; + +static struct regulator_consumer_supply db8500_b2r2_mcde_consumers[] = { + REGULATOR_SUPPLY("vsupply", "b2r2.0"), + REGULATOR_SUPPLY("vsupply", "mcde.0"), +}; + +static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = { + [DB8500_REGULATOR_VAPE] = { + .constraints = { + .name = "db8500-vape", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .consumer_supplies = db8500_vape_consumers, + .num_consumer_supplies = ARRAY_SIZE(db8500_vape_consumers), + }, + [DB8500_REGULATOR_VARM] = { + .constraints = { + .name = "db8500-varm", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_VMODEM] = { + .constraints = { + .name = "db8500-vmodem", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_VPLL] = { + .constraints = { + .name = "db8500-vpll", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_VSMPS1] = { + .constraints = { + .name = "db8500-vsmps1", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_VSMPS2] = { + .constraints = { + .name = "db8500-vsmps2", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .consumer_supplies = db8500_vsmps2_consumers, + .num_consumer_supplies = ARRAY_SIZE(db8500_vsmps2_consumers), + }, + [DB8500_REGULATOR_VSMPS3] = { + .constraints = { + .name = "db8500-vsmps3", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_VRF1] = { + .constraints = { + .name = "db8500-vrf1", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_SVAMMDSP] = { + .supply_regulator = "db8500-vape", + .constraints = { + .name = "db8500-sva-mmdsp", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_SVAMMDSPRET] = { + .constraints = { + /* "ret" means "retention" */ + .name = "db8500-sva-mmdsp-ret", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_SVAPIPE] = { + .supply_regulator = "db8500-vape", + .constraints = { + .name = "db8500-sva-pipe", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_SIAMMDSP] = { + .supply_regulator = "db8500-vape", + .constraints = { + .name = "db8500-sia-mmdsp", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_SIAMMDSPRET] = { + .constraints = { + .name = "db8500-sia-mmdsp-ret", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_SIAPIPE] = { + .supply_regulator = "db8500-vape", + .constraints = { + .name = "db8500-sia-pipe", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_SGA] = { + .supply_regulator = "db8500-vape", + .constraints = { + .name = "db8500-sga", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_B2R2_MCDE] = { + .supply_regulator = "db8500-vape", + .constraints = { + .name = "db8500-b2r2-mcde", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .consumer_supplies = db8500_b2r2_mcde_consumers, + .num_consumer_supplies = ARRAY_SIZE(db8500_b2r2_mcde_consumers), + }, + [DB8500_REGULATOR_SWITCH_ESRAM12] = { + .supply_regulator = "db8500-vape", + .constraints = { + .name = "db8500-esram12", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_ESRAM12RET] = { + .constraints = { + .name = "db8500-esram12-ret", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_ESRAM34] = { + .supply_regulator = "db8500-vape", + .constraints = { + .name = "db8500-esram34", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, + [DB8500_REGULATOR_SWITCH_ESRAM34RET] = { + .constraints = { + .name = "db8500-esram34-ret", + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + }, +}; + static struct mfd_cell db8500_prcmu_devs[] = { { .name = "db8500-prcmu-regulators", + .mfd_data = &db8500_regulators, }, { .name = "cpufreq-u8500", diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index b9f29e0d4295..f0b13a0d1851 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -274,6 +274,13 @@ config REGULATOR_AB8500 This driver supports the regulators found on the ST-Ericsson mixed signal AB8500 PMIC +config REGULATOR_DB8500_PRCMU + bool "ST-Ericsson DB8500 Voltage Domain Regulators" + depends on MFD_DB8500_PRCMU + help + This driver supports the voltage domain regulators controlled by the + DB8500 PRCMU + config REGULATOR_TPS6586X tristate "TI TPS6586X Power regulators" depends on MFD_TPS6586X diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index d72a42756778..165ff5371e9e 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -41,5 +41,6 @@ obj-$(CONFIG_REGULATOR_TPS6524X) += tps6524x-regulator.o obj-$(CONFIG_REGULATOR_88PM8607) += 88pm8607.o obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o obj-$(CONFIG_REGULATOR_AB8500) += ab8500.o +obj-$(CONFIG_REGULATOR_DB8500_PRCMU) += db8500-prcmu.o ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG diff --git a/drivers/regulator/db8500-prcmu.c b/drivers/regulator/db8500-prcmu.c new file mode 100644 index 000000000000..1089a961616e --- /dev/null +++ b/drivers/regulator/db8500-prcmu.c @@ -0,0 +1,558 @@ +/* + * Copyright (C) ST-Ericsson SA 2010 + * + * License Terms: GNU General Public License v2 + * Authors: Sundar Iyer for ST-Ericsson + * Bengt Jonsson for ST-Ericsson + * + * Power domain regulators on DB8500 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * power state reference count + */ +static int power_state_active_cnt; /* will initialize to zero */ +static DEFINE_SPINLOCK(power_state_active_lock); + +static void power_state_active_enable(void) +{ + unsigned long flags; + + spin_lock_irqsave(&power_state_active_lock, flags); + power_state_active_cnt++; + spin_unlock_irqrestore(&power_state_active_lock, flags); +} + +static int power_state_active_disable(void) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&power_state_active_lock, flags); + if (power_state_active_cnt <= 0) { + pr_err("power state: unbalanced enable/disable calls\n"); + ret = -EINVAL; + goto out; + } + + power_state_active_cnt--; +out: + spin_unlock_irqrestore(&power_state_active_lock, flags); + return ret; +} + +/* + * Exported interface for CPUIdle only. This function is called when interrupts + * are turned off. Hence, no locking. + */ +int power_state_active_is_enabled(void) +{ + return (power_state_active_cnt > 0); +} + +/** + * struct db8500_regulator_info - db8500 regulator information + * @dev: device pointer + * @desc: regulator description + * @rdev: regulator device pointer + * @is_enabled: status of the regulator + * @epod_id: id for EPOD (power domain) + * @is_ramret: RAM retention switch for EPOD (power domain) + * @operating_point: operating point (only for vape, to be removed) + * + */ +struct db8500_regulator_info { + struct device *dev; + struct regulator_desc desc; + struct regulator_dev *rdev; + bool is_enabled; + u16 epod_id; + bool is_ramret; + bool exclude_from_power_state; + unsigned int operating_point; +}; + +static int db8500_regulator_enable(struct regulator_dev *rdev) +{ + struct db8500_regulator_info *info = rdev_get_drvdata(rdev); + + if (info == NULL) + return -EINVAL; + + dev_vdbg(rdev_get_dev(rdev), "regulator-%s-enable\n", + info->desc.name); + + info->is_enabled = true; + if (!info->exclude_from_power_state) + power_state_active_enable(); + + return 0; +} + +static int db8500_regulator_disable(struct regulator_dev *rdev) +{ + struct db8500_regulator_info *info = rdev_get_drvdata(rdev); + int ret = 0; + + if (info == NULL) + return -EINVAL; + + dev_vdbg(rdev_get_dev(rdev), "regulator-%s-disable\n", + info->desc.name); + + info->is_enabled = false; + if (!info->exclude_from_power_state) + ret = power_state_active_disable(); + + return ret; +} + +static int db8500_regulator_is_enabled(struct regulator_dev *rdev) +{ + struct db8500_regulator_info *info = rdev_get_drvdata(rdev); + + if (info == NULL) + return -EINVAL; + + dev_vdbg(rdev_get_dev(rdev), "regulator-%s-is_enabled (is_enabled):" + " %i\n", info->desc.name, info->is_enabled); + + return info->is_enabled; +} + +/* db8500 regulator operations */ +static struct regulator_ops db8500_regulator_ops = { + .enable = db8500_regulator_enable, + .disable = db8500_regulator_disable, + .is_enabled = db8500_regulator_is_enabled, +}; + +/* + * EPOD control + */ +static bool epod_on[NUM_EPOD_ID]; +static bool epod_ramret[NUM_EPOD_ID]; + +static int enable_epod(u16 epod_id, bool ramret) +{ + int ret; + + if (ramret) { + if (!epod_on[epod_id]) { + ret = prcmu_set_epod(epod_id, EPOD_STATE_RAMRET); + if (ret < 0) + return ret; + } + epod_ramret[epod_id] = true; + } else { + ret = prcmu_set_epod(epod_id, EPOD_STATE_ON); + if (ret < 0) + return ret; + epod_on[epod_id] = true; + } + + return 0; +} + +static int disable_epod(u16 epod_id, bool ramret) +{ + int ret; + + if (ramret) { + if (!epod_on[epod_id]) { + ret = prcmu_set_epod(epod_id, EPOD_STATE_OFF); + if (ret < 0) + return ret; + } + epod_ramret[epod_id] = false; + } else { + if (epod_ramret[epod_id]) { + ret = prcmu_set_epod(epod_id, EPOD_STATE_RAMRET); + if (ret < 0) + return ret; + } else { + ret = prcmu_set_epod(epod_id, EPOD_STATE_OFF); + if (ret < 0) + return ret; + } + epod_on[epod_id] = false; + } + + return 0; +} + +/* + * Regulator switch + */ +static int db8500_regulator_switch_enable(struct regulator_dev *rdev) +{ + struct db8500_regulator_info *info = rdev_get_drvdata(rdev); + int ret; + + if (info == NULL) + return -EINVAL; + + dev_vdbg(rdev_get_dev(rdev), "regulator-switch-%s-enable\n", + info->desc.name); + + ret = enable_epod(info->epod_id, info->is_ramret); + if (ret < 0) { + dev_err(rdev_get_dev(rdev), + "regulator-switch-%s-enable: prcmu call failed\n", + info->desc.name); + goto out; + } + + info->is_enabled = true; +out: + return ret; +} + +static int db8500_regulator_switch_disable(struct regulator_dev *rdev) +{ + struct db8500_regulator_info *info = rdev_get_drvdata(rdev); + int ret; + + if (info == NULL) + return -EINVAL; + + dev_vdbg(rdev_get_dev(rdev), "regulator-switch-%s-disable\n", + info->desc.name); + + ret = disable_epod(info->epod_id, info->is_ramret); + if (ret < 0) { + dev_err(rdev_get_dev(rdev), + "regulator_switch-%s-disable: prcmu call failed\n", + info->desc.name); + goto out; + } + + info->is_enabled = 0; +out: + return ret; +} + +static int db8500_regulator_switch_is_enabled(struct regulator_dev *rdev) +{ + struct db8500_regulator_info *info = rdev_get_drvdata(rdev); + + if (info == NULL) + return -EINVAL; + + dev_vdbg(rdev_get_dev(rdev), + "regulator-switch-%s-is_enabled (is_enabled): %i\n", + info->desc.name, info->is_enabled); + + return info->is_enabled; +} + +static struct regulator_ops db8500_regulator_switch_ops = { + .enable = db8500_regulator_switch_enable, + .disable = db8500_regulator_switch_disable, + .is_enabled = db8500_regulator_switch_is_enabled, +}; + +/* + * Regulator information + */ +static struct db8500_regulator_info + db8500_regulator_info[DB8500_NUM_REGULATORS] = { + [DB8500_REGULATOR_VAPE] = { + .desc = { + .name = "db8500-vape", + .id = DB8500_REGULATOR_VAPE, + .ops = &db8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + }, + [DB8500_REGULATOR_VARM] = { + .desc = { + .name = "db8500-varm", + .id = DB8500_REGULATOR_VARM, + .ops = &db8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + }, + [DB8500_REGULATOR_VMODEM] = { + .desc = { + .name = "db8500-vmodem", + .id = DB8500_REGULATOR_VMODEM, + .ops = &db8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + }, + [DB8500_REGULATOR_VPLL] = { + .desc = { + .name = "db8500-vpll", + .id = DB8500_REGULATOR_VPLL, + .ops = &db8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + }, + [DB8500_REGULATOR_VSMPS1] = { + .desc = { + .name = "db8500-vsmps1", + .id = DB8500_REGULATOR_VSMPS1, + .ops = &db8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + }, + [DB8500_REGULATOR_VSMPS2] = { + .desc = { + .name = "db8500-vsmps2", + .id = DB8500_REGULATOR_VSMPS2, + .ops = &db8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .exclude_from_power_state = true, + }, + [DB8500_REGULATOR_VSMPS3] = { + .desc = { + .name = "db8500-vsmps3", + .id = DB8500_REGULATOR_VSMPS3, + .ops = &db8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + }, + [DB8500_REGULATOR_VRF1] = { + .desc = { + .name = "db8500-vrf1", + .id = DB8500_REGULATOR_VRF1, + .ops = &db8500_regulator_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + }, + [DB8500_REGULATOR_SWITCH_SVAMMDSP] = { + .desc = { + .name = "db8500-sva-mmdsp", + .id = DB8500_REGULATOR_SWITCH_SVAMMDSP, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_SVAMMDSP, + }, + [DB8500_REGULATOR_SWITCH_SVAMMDSPRET] = { + .desc = { + .name = "db8500-sva-mmdsp-ret", + .id = DB8500_REGULATOR_SWITCH_SVAMMDSPRET, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_SVAMMDSP, + .is_ramret = true, + }, + [DB8500_REGULATOR_SWITCH_SVAPIPE] = { + .desc = { + .name = "db8500-sva-pipe", + .id = DB8500_REGULATOR_SWITCH_SVAPIPE, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_SVAPIPE, + }, + [DB8500_REGULATOR_SWITCH_SIAMMDSP] = { + .desc = { + .name = "db8500-sia-mmdsp", + .id = DB8500_REGULATOR_SWITCH_SIAMMDSP, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_SIAMMDSP, + }, + [DB8500_REGULATOR_SWITCH_SIAMMDSPRET] = { + .desc = { + .name = "db8500-sia-mmdsp-ret", + .id = DB8500_REGULATOR_SWITCH_SIAMMDSPRET, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_SIAMMDSP, + .is_ramret = true, + }, + [DB8500_REGULATOR_SWITCH_SIAPIPE] = { + .desc = { + .name = "db8500-sia-pipe", + .id = DB8500_REGULATOR_SWITCH_SIAPIPE, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_SIAPIPE, + }, + [DB8500_REGULATOR_SWITCH_SGA] = { + .desc = { + .name = "db8500-sga", + .id = DB8500_REGULATOR_SWITCH_SGA, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_SGA, + }, + [DB8500_REGULATOR_SWITCH_B2R2_MCDE] = { + .desc = { + .name = "db8500-b2r2-mcde", + .id = DB8500_REGULATOR_SWITCH_B2R2_MCDE, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_B2R2_MCDE, + }, + [DB8500_REGULATOR_SWITCH_ESRAM12] = { + .desc = { + .name = "db8500-esram12", + .id = DB8500_REGULATOR_SWITCH_ESRAM12, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_ESRAM12, + .is_enabled = true, + }, + [DB8500_REGULATOR_SWITCH_ESRAM12RET] = { + .desc = { + .name = "db8500-esram12-ret", + .id = DB8500_REGULATOR_SWITCH_ESRAM12RET, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_ESRAM12, + .is_ramret = true, + }, + [DB8500_REGULATOR_SWITCH_ESRAM34] = { + .desc = { + .name = "db8500-esram34", + .id = DB8500_REGULATOR_SWITCH_ESRAM34, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_ESRAM34, + .is_enabled = true, + }, + [DB8500_REGULATOR_SWITCH_ESRAM34RET] = { + .desc = { + .name = "db8500-esram34-ret", + .id = DB8500_REGULATOR_SWITCH_ESRAM34RET, + .ops = &db8500_regulator_switch_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + .epod_id = EPOD_ID_ESRAM34, + .is_ramret = true, + }, +}; + +static int __devinit db8500_regulator_probe(struct platform_device *pdev) +{ + struct regulator_init_data *db8500_init_data = mfd_get_data(pdev); + int i, err; + + /* register all regulators */ + for (i = 0; i < ARRAY_SIZE(db8500_regulator_info); i++) { + struct db8500_regulator_info *info; + struct regulator_init_data *init_data = &db8500_init_data[i]; + + /* assign per-regulator data */ + info = &db8500_regulator_info[i]; + info->dev = &pdev->dev; + + /* register with the regulator framework */ + info->rdev = regulator_register(&info->desc, &pdev->dev, + init_data, info); + if (IS_ERR(info->rdev)) { + err = PTR_ERR(info->rdev); + dev_err(&pdev->dev, "failed to register %s: err %i\n", + info->desc.name, err); + + /* if failing, unregister all earlier regulators */ + i--; + while (i >= 0) { + info = &db8500_regulator_info[i]; + regulator_unregister(info->rdev); + i--; + } + return err; + } + + dev_dbg(rdev_get_dev(info->rdev), + "regulator-%s-probed\n", info->desc.name); + } + + return 0; +} + +static int __exit db8500_regulator_remove(struct platform_device *pdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(db8500_regulator_info); i++) { + struct db8500_regulator_info *info; + info = &db8500_regulator_info[i]; + + dev_vdbg(rdev_get_dev(info->rdev), + "regulator-%s-remove\n", info->desc.name); + + regulator_unregister(info->rdev); + } + + return 0; +} + +static struct platform_driver db8500_regulator_driver = { + .driver = { + .name = "db8500-prcmu-regulators", + .owner = THIS_MODULE, + }, + .probe = db8500_regulator_probe, + .remove = __exit_p(db8500_regulator_remove), +}; + +static int __init db8500_regulator_init(void) +{ + int ret; + + ret = platform_driver_register(&db8500_regulator_driver); + if (ret < 0) + return -ENODEV; + + return 0; +} + +static void __exit db8500_regulator_exit(void) +{ + platform_driver_unregister(&db8500_regulator_driver); +} + +arch_initcall(db8500_regulator_init); +module_exit(db8500_regulator_exit); + +MODULE_AUTHOR("STMicroelectronics/ST-Ericsson"); +MODULE_DESCRIPTION("DB8500 regulator driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/regulator/db8500-prcmu.h b/include/linux/regulator/db8500-prcmu.h new file mode 100644 index 000000000000..612062313b68 --- /dev/null +++ b/include/linux/regulator/db8500-prcmu.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) ST-Ericsson SA 2010 + * + * License Terms: GNU General Public License v2 + * + * Author: Bengt Jonsson for ST-Ericsson + * + * Interface to power domain regulators on DB8500 + */ + +#ifndef __REGULATOR_H__ +#define __REGULATOR_H__ + +/* Number of DB8500 regulators and regulator enumeration */ +enum db8500_regulator_id { + DB8500_REGULATOR_VAPE, + DB8500_REGULATOR_VARM, + DB8500_REGULATOR_VMODEM, + DB8500_REGULATOR_VPLL, + DB8500_REGULATOR_VSMPS1, + DB8500_REGULATOR_VSMPS2, + DB8500_REGULATOR_VSMPS3, + DB8500_REGULATOR_VRF1, + DB8500_REGULATOR_SWITCH_SVAMMDSP, + DB8500_REGULATOR_SWITCH_SVAMMDSPRET, + DB8500_REGULATOR_SWITCH_SVAPIPE, + DB8500_REGULATOR_SWITCH_SIAMMDSP, + DB8500_REGULATOR_SWITCH_SIAMMDSPRET, + DB8500_REGULATOR_SWITCH_SIAPIPE, + DB8500_REGULATOR_SWITCH_SGA, + DB8500_REGULATOR_SWITCH_B2R2_MCDE, + DB8500_REGULATOR_SWITCH_ESRAM12, + DB8500_REGULATOR_SWITCH_ESRAM12RET, + DB8500_REGULATOR_SWITCH_ESRAM34, + DB8500_REGULATOR_SWITCH_ESRAM34RET, + DB8500_NUM_REGULATORS +}; + +/* + * Exported interface for CPUIdle only. This function is called with all + * interrupts turned off. + */ +int power_state_active_is_enabled(void); + +#endif -- cgit v1.2.3 From 618e724b8d79c6232daac49cb39b0723bf5c4b08 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 May 2011 14:06:58 -0700 Subject: ns: Declare sys_setns in syscalls.h Ooops I overlooked this one, and missing it causes compile errors of the powerpc syscall. Signed-off-by: Eric W. Biederman --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 83ecc1749ef6..b8b380443d05 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -844,4 +844,5 @@ asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); +asmlinkage long sys_setns(int fd, int nstype); #endif -- cgit v1.2.3 From 41e2a4893566ced3c46af15df5b727326881e47d Mon Sep 17 00:00:00 2001 From: Philip Rakity Date: Sat, 19 Mar 2011 14:10:33 -0400 Subject: mmc: Ensure linux starts in eMMC user partition uBoot sometimes leaves eMMC pointing to the private boot partition. Ensure we always start looking at the user partition. Signed-off-by: Philip Rakity Signed-off-by: Bruce Clemens Signed-off-by: Mark F. Brown Signed-off-by: Chris Ball --- drivers/mmc/core/mmc.c | 10 ++++++++++ include/linux/mmc/card.h | 1 + include/linux/mmc/mmc.h | 1 + 3 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 772d0d0a541b..5c611a6e0080 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -288,6 +288,7 @@ static int mmc_read_ext_csd(struct mmc_card *card) if (card->ext_csd.rev >= 3) { u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT]; + card->ext_csd.bootconfig = ext_csd[EXT_CSD_BOOT_CONFIG]; /* Sleep / awake timeout in 100ns units */ if (sa_shift > 0 && sa_shift <= 0x17) @@ -567,6 +568,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, } } + /* + * Ensure eMMC user default partition is enabled + */ + if (card->ext_csd.bootconfig & 0x7) { + card->ext_csd.bootconfig &= ~0x7; + mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG, + card->ext_csd.bootconfig); + } + /* * Activate high speed (if supported) */ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index adb4888248be..557b73263390 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -45,6 +45,7 @@ struct mmc_ext_csd { u8 rev; u8 erase_group_def; u8 sec_feature_support; + u8 bootconfig; unsigned int sa_timeout; /* Units: 100ns */ unsigned int hs_max_dtr; unsigned int sectors; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 264ba5451e3b..b5ec88fd1352 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -256,6 +256,7 @@ struct _mmc_csd { #define EXT_CSD_PARTITION_ATTRIBUTE 156 /* R/W */ #define EXT_CSD_PARTITION_SUPPORT 160 /* RO */ #define EXT_CSD_ERASE_GROUP_DEF 175 /* R/W */ +#define EXT_CSD_BOOT_CONFIG 179 /* R/W */ #define EXT_CSD_ERASED_MEM_CONT 181 /* RO */ #define EXT_CSD_BUS_WIDTH 183 /* R/W */ #define EXT_CSD_HS_TIMING 185 /* R/W */ -- cgit v1.2.3 From f4c5522b0a8827f39f83f928961d87e081bfe71c Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Thu, 31 Mar 2011 18:40:00 -0500 Subject: mmc: Reliable write support. Allows reliable writes to be used for MMC writes. Reliable writes are used to service write REQ_FUA/REQ_META requests. Handles both the legacy and the enhanced reliable write support in MMC cards. Signed-off-by: Andrei Warkentin Reviewed-by: Arnd Bergmann Signed-off-by: Chris Ball --- drivers/mmc/card/block.c | 81 +++++++++++++++++++++++++++++++++++++++++++++--- drivers/mmc/core/mmc.c | 5 +++ include/linux/mmc/card.h | 2 ++ include/linux/mmc/mmc.h | 4 +++ 4 files changed, 88 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 61d233a7c118..91a676773608 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -48,6 +48,10 @@ MODULE_ALIAS("mmc:block"); #endif #define MODULE_PARAM_PREFIX "mmcblk." +#define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) && \ + (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || \ + ((card)->ext_csd.rel_sectors))) + static DEFINE_MUTEX(block_mutex); /* @@ -331,6 +335,57 @@ out: return err ? 0 : 1; } +static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) +{ + struct mmc_blk_data *md = mq->data; + + /* + * No-op, only service this because we need REQ_FUA for reliable + * writes. + */ + spin_lock_irq(&md->lock); + __blk_end_request_all(req, 0); + spin_unlock_irq(&md->lock); + + return 1; +} + +/* + * Reformat current write as a reliable write, supporting + * both legacy and the enhanced reliable write MMC cards. + * In each transfer we'll handle only as much as a single + * reliable write can handle, thus finish the request in + * partial completions. + */ +static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq, + struct mmc_card *card, + struct request *req) +{ + int err; + struct mmc_command set_count; + + if (!(card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN)) { + /* Legacy mode imposes restrictions on transfers. */ + if (!IS_ALIGNED(brq->cmd.arg, card->ext_csd.rel_sectors)) + brq->data.blocks = 1; + + if (brq->data.blocks > card->ext_csd.rel_sectors) + brq->data.blocks = card->ext_csd.rel_sectors; + else if (brq->data.blocks < card->ext_csd.rel_sectors) + brq->data.blocks = 1; + } + + memset(&set_count, 0, sizeof(struct mmc_command)); + set_count.opcode = MMC_SET_BLOCK_COUNT; + set_count.arg = brq->data.blocks | (1 << 31); + set_count.flags = MMC_RSP_R1 | MMC_CMD_AC; + err = mmc_wait_for_cmd(card->host, &set_count, 0); + if (err) + printk(KERN_ERR "%s: error %d SET_BLOCK_COUNT\n", + req->rq_disk->disk_name, err); + return err; +} + static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) { struct mmc_blk_data *md = mq->data; @@ -338,6 +393,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) struct mmc_blk_request brq; int ret = 1, disable_multi = 0; + /* + * Reliable writes are used to implement Forced Unit Access and + * REQ_META accesses, and are supported only on MMCs. + */ + bool do_rel_wr = ((req->cmd_flags & REQ_FUA) || + (req->cmd_flags & REQ_META)) && + (rq_data_dir(req) == WRITE) && + REL_WRITES_SUPPORTED(card); + mmc_claim_host(card->host); do { @@ -374,12 +438,14 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) if (disable_multi && brq.data.blocks > 1) brq.data.blocks = 1; - if (brq.data.blocks > 1) { + if (brq.data.blocks > 1 || do_rel_wr) { /* SPI multiblock writes terminate using a special - * token, not a STOP_TRANSMISSION request. + * token, not a STOP_TRANSMISSION request. Reliable + * writes use SET_BLOCK_COUNT and do not use a + * STOP_TRANSMISSION request either. */ - if (!mmc_host_is_spi(card->host) - || rq_data_dir(req) == READ) + if ((!mmc_host_is_spi(card->host) && !do_rel_wr) || + rq_data_dir(req) == READ) brq.mrq.stop = &brq.stop; readcmd = MMC_READ_MULTIPLE_BLOCK; writecmd = MMC_WRITE_MULTIPLE_BLOCK; @@ -396,6 +462,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) brq.data.flags |= MMC_DATA_WRITE; } + if (do_rel_wr && mmc_apply_rel_rw(&brq, card, req)) + goto cmd_err; + mmc_set_data_timeout(&brq.data, card); brq.data.sg = mq->sg; @@ -565,6 +634,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) return mmc_blk_issue_secdiscard_rq(mq, req); else return mmc_blk_issue_discard_rq(mq, req); + } else if (req->cmd_flags & REQ_FLUSH) { + return mmc_blk_issue_flush(mq, req); } else { return mmc_blk_issue_rw_rq(mq, req); } @@ -622,6 +693,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) md->disk->queue = md->queue.queue; md->disk->driverfs_dev = &card->dev; set_disk_ro(md->disk, md->read_only); + if (REL_WRITES_SUPPORTED(card)) + blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA); /* * As discussed on lkml, GENHD_FL_REMOVABLE should: diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 5c611a6e0080..ae6b8fd38800 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -300,6 +300,8 @@ static int mmc_read_ext_csd(struct mmc_card *card) ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT]; card->ext_csd.hc_erase_size = ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] << 10; + + card->ext_csd.rel_sectors = ext_csd[EXT_CSD_REL_WR_SEC_C]; } if (card->ext_csd.rev >= 4) { @@ -351,6 +353,9 @@ static int mmc_read_ext_csd(struct mmc_card *card) ext_csd[EXT_CSD_TRIM_MULT]; } + if (card->ext_csd.rev >= 5) + card->ext_csd.rel_param = ext_csd[EXT_CSD_WR_REL_PARAM]; + if (ext_csd[EXT_CSD_ERASED_MEM_CONT]) card->erased_byte = 0xFF; else diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 557b73263390..c4e96fa5fb2b 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -45,6 +45,8 @@ struct mmc_ext_csd { u8 rev; u8 erase_group_def; u8 sec_feature_support; + u8 rel_sectors; + u8 rel_param; u8 bootconfig; unsigned int sa_timeout; /* Units: 100ns */ unsigned int hs_max_dtr; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index b5ec88fd1352..390aa6eef676 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -255,6 +255,7 @@ struct _mmc_csd { #define EXT_CSD_PARTITION_ATTRIBUTE 156 /* R/W */ #define EXT_CSD_PARTITION_SUPPORT 160 /* RO */ +#define EXT_CSD_WR_REL_PARAM 166 /* RO */ #define EXT_CSD_ERASE_GROUP_DEF 175 /* R/W */ #define EXT_CSD_BOOT_CONFIG 179 /* R/W */ #define EXT_CSD_ERASED_MEM_CONT 181 /* RO */ @@ -265,6 +266,7 @@ struct _mmc_csd { #define EXT_CSD_CARD_TYPE 196 /* RO */ #define EXT_CSD_SEC_CNT 212 /* RO, 4 bytes */ #define EXT_CSD_S_A_TIMEOUT 217 /* RO */ +#define EXT_CSD_REL_WR_SEC_C 222 /* RO */ #define EXT_CSD_HC_WP_GRP_SIZE 221 /* RO */ #define EXT_CSD_ERASE_TIMEOUT_MULT 223 /* RO */ #define EXT_CSD_HC_ERASE_GRP_SIZE 224 /* RO */ @@ -277,6 +279,8 @@ struct _mmc_csd { * EXT_CSD field definitions */ +#define EXT_CSD_WR_REL_PARAM_EN (1<<2) + #define EXT_CSD_CMD_SET_NORMAL (1<<0) #define EXT_CSD_CMD_SET_SECURE (1<<1) #define EXT_CSD_CMD_SET_CPSECURE (1<<2) -- cgit v1.2.3 From a5e9425d2010978c5f85986cc70a9fa0c0d5b912 Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Tue, 5 Apr 2011 17:43:20 +0300 Subject: mmc: mmc_card_keep_power cleanups mmc_card_is_powered_resumed is a mouthful; instead, simply use mmc_card_keep_power, which also better explains the purpose of the macro. Employ mmc_card_keep_power() where possible. Signed-off-by: Ohad Ben-Cohen Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 4 ++-- drivers/mmc/core/sdio.c | 10 +++++----- include/linux/mmc/host.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 1f453acc8682..c2350e474159 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1746,7 +1746,7 @@ int mmc_suspend_host(struct mmc_host *host) } mmc_bus_put(host); - if (!err && !(host->pm_flags & MMC_PM_KEEP_POWER)) + if (!err && !mmc_card_keep_power(host)) mmc_power_off(host); return err; @@ -1764,7 +1764,7 @@ int mmc_resume_host(struct mmc_host *host) mmc_bus_get(host); if (host->bus_ops && !host->bus_dead) { - if (!(host->pm_flags & MMC_PM_KEEP_POWER)) { + if (!mmc_card_keep_power(host)) { mmc_power_up(host); mmc_select_voltage(host, host->ocr); /* diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index db0f0b44d684..0f7d4362d213 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -625,7 +625,7 @@ static int mmc_sdio_suspend(struct mmc_host *host) } } - if (!err && host->pm_flags & MMC_PM_KEEP_POWER) { + if (!err && mmc_card_keep_power(host)) { mmc_claim_host(host); sdio_disable_wide(host->card); mmc_release_host(host); @@ -645,10 +645,10 @@ static int mmc_sdio_resume(struct mmc_host *host) mmc_claim_host(host); /* No need to reinitialize powered-resumed nonremovable cards */ - if (mmc_card_is_removable(host) || !mmc_card_is_powered_resumed(host)) + if (mmc_card_is_removable(host) || !mmc_card_keep_power(host)) err = mmc_sdio_init_card(host, host->ocr, host->card, - (host->pm_flags & MMC_PM_KEEP_POWER)); - else if (mmc_card_is_powered_resumed(host)) { + mmc_card_keep_power(host)); + else if (mmc_card_keep_power(host)) { /* We may have switched to 1-bit mode during suspend */ err = sdio_enable_4bit_bus(host->card); if (err > 0) { @@ -691,7 +691,7 @@ static int mmc_sdio_power_restore(struct mmc_host *host) mmc_claim_host(host); ret = mmc_sdio_init_card(host, host->ocr, host->card, - (host->pm_flags & MMC_PM_KEEP_POWER)); + mmc_card_keep_power(host)); if (!ret && host->sdio_irqs) mmc_signal_sdio_irq(host); mmc_release_host(host); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index eb792cb6d745..8ad3a9c6f495 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -320,7 +320,7 @@ static inline int mmc_card_is_removable(struct mmc_host *host) return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable; } -static inline int mmc_card_is_powered_resumed(struct mmc_host *host) +static inline int mmc_card_keep_power(struct mmc_host *host) { return host->pm_flags & MMC_PM_KEEP_POWER; } -- cgit v1.2.3 From 6b93d01fe5971951911a070f51f412d50e9536dc Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Tue, 5 Apr 2011 17:43:21 +0300 Subject: mmc: do not switch to 1-bit mode if not required 6b5eda36 followed SDIO spec part E1 section 8, which states that in case SDIO interrupts are being used to wake up a suspended host, then it is required to switch to 1-bit mode before stopping the clock. Before switching to 1-bit mode (or back to 4-bit mode on resume), make sure that SDIO interrupts are really being used to wake the host. This is helpful for devices which have an external irq line (e.g. wl1271), and do not use SDIO interrupts to wake up the host. In this case, switching to 1-bit mode (and back to 4-bit mode on resume) is not necessary. Reported-by: Eliad Peller Signed-off-by: Ohad Ben-Cohen Signed-off-by: Chris Ball --- drivers/mmc/core/sdio.c | 4 ++-- include/linux/mmc/host.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 0f7d4362d213..4221670bf82e 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -625,7 +625,7 @@ static int mmc_sdio_suspend(struct mmc_host *host) } } - if (!err && mmc_card_keep_power(host)) { + if (!err && mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) { mmc_claim_host(host); sdio_disable_wide(host->card); mmc_release_host(host); @@ -648,7 +648,7 @@ static int mmc_sdio_resume(struct mmc_host *host) if (mmc_card_is_removable(host) || !mmc_card_keep_power(host)) err = mmc_sdio_init_card(host, host->ocr, host->card, mmc_card_keep_power(host)); - else if (mmc_card_keep_power(host)) { + else if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) { /* We may have switched to 1-bit mode during suspend */ err = sdio_enable_4bit_bus(host->card); if (err > 0) { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8ad3a9c6f495..0fffa5cdc183 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -325,5 +325,9 @@ static inline int mmc_card_keep_power(struct mmc_host *host) return host->pm_flags & MMC_PM_KEEP_POWER; } +static inline int mmc_card_wake_sdio_irq(struct mmc_host *host) +{ + return host->pm_flags & MMC_PM_WAKE_SDIO_IRQ; +} #endif -- cgit v1.2.3 From eab4068795d670b065164096805cbf15a19e9690 Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Tue, 5 Apr 2011 17:50:14 +0300 Subject: mmc: add MMC_QUIRK_NONSTD_FUNC_IF Introduce MMC_QUIRK_NONSTD_FUNC_IF to ignore the "SDIO Standard Function interface code" as indicated by the card's FBR, and instead treat all functions as non-standard interfaces. This is required to prevent standard drivers from facing errors when trying to communicate with SDIO cards that erroneously indicate standard function interface codes. Signed-off-by: Ohad Ben-Cohen Signed-off-by: Chris Ball --- drivers/mmc/core/sdio.c | 6 ++++++ include/linux/mmc/card.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 4221670bf82e..c3c2bd0874e8 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "core.h" #include "bus.h" @@ -31,6 +32,11 @@ static int sdio_read_fbr(struct sdio_func *func) int ret; unsigned char data; + if (mmc_card_nonstd_func_interface(func->card)) { + func->class = SDIO_CLASS_NONE; + return 0; + } + ret = mmc_io_rw_direct(func->card, 0, 0, SDIO_FBR_BASE(func->num) + SDIO_FBR_STD_IF, 0, &data); if (ret) diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index c4e96fa5fb2b..317a0029e7d7 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -128,6 +128,7 @@ struct mmc_card { #define MMC_QUIRK_NONSTD_SDIO (1<<2) /* non-standard SDIO card attached */ /* (missing CIA registers) */ #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3) /* clock gating the sdio bus will make card fail */ +#define MMC_QUIRK_NONSTD_FUNC_IF (1<<4) /* SDIO card has nonstd function interfaces */ unsigned int erase_size; /* erase size in sectors */ unsigned int erase_shift; /* if erase unit is power 2 */ @@ -183,6 +184,11 @@ static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE; } +static inline int mmc_card_nonstd_func_interface(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_NONSTD_FUNC_IF; +} + #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) (dev_name(&(c)->dev)) -- cgit v1.2.3 From 2059a02dcb84236f9db9197fa9b00418d7b8465b Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Tue, 5 Apr 2011 18:02:25 +0300 Subject: mmc: add MMC_QUIRK_DISABLE_CD 006ebd5d introduced sdio_disable_cd(), which disconnects the pull-up resistor on CD/DAT[3] (pin 1) of the card. Make it possible to start using sdio_disable_cd() by introducing MMC_QUIRK_DISABLE_CD. Signed-off-by: Ohad Ben-Cohen Signed-off-by: Chris Ball --- drivers/mmc/core/sdio.c | 2 +- include/linux/mmc/card.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index c3c2bd0874e8..a5840c0de2e4 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -187,7 +187,7 @@ static int sdio_disable_cd(struct mmc_card *card) int ret; u8 ctrl; - if (!card->cccr.disable_cd) + if (!mmc_card_disable_cd(card)) return 0; ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_IF, 0, &ctrl); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 317a0029e7d7..2a7e54970c93 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -129,6 +129,7 @@ struct mmc_card { /* (missing CIA registers) */ #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3) /* clock gating the sdio bus will make card fail */ #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4) /* SDIO card has nonstd function interfaces */ +#define MMC_QUIRK_DISABLE_CD (1<<5) /* disconnect CD/DAT[3] resistor */ unsigned int erase_size; /* erase size in sectors */ unsigned int erase_shift; /* if erase unit is power 2 */ @@ -184,6 +185,11 @@ static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE; } +static inline int mmc_card_disable_cd(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_DISABLE_CD; +} + static inline int mmc_card_nonstd_func_interface(const struct mmc_card *c) { return c->quirks & MMC_QUIRK_NONSTD_FUNC_IF; -- cgit v1.2.3 From 32780cd1350e651e68bdf33b7f5b009d21d5b794 Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 11 Apr 2011 17:02:15 -0500 Subject: mmc: quirks: Extends card quirks with MMC/SD quirks matching the CID. The current mechanism is SDIO-only. This allows us to create function-specific quirks, without creating messy Kconfig dependencies, or polluting core/ with function-specific code. Signed-off-by: Andrei Warkentin Acked-by: Arnd Bergmann Signed-off-by: Chris Ball --- drivers/mmc/core/core.h | 2 - drivers/mmc/core/quirks.c | 93 +++++++++++++++++++++-------------------------- drivers/mmc/core/sdio.c | 2 +- include/linux/mmc/card.h | 91 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 133 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 20b1c0831eac..ca1fdde29df6 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -61,8 +61,6 @@ int mmc_attach_mmc(struct mmc_host *host); int mmc_attach_sd(struct mmc_host *host); int mmc_attach_sdio(struct mmc_host *host); -void mmc_fixup_device(struct mmc_card *card); - /* Module parameters */ extern int use_spi_crc; diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c index a4c42edc6cb3..3a596217029e 100644 --- a/drivers/mmc/core/quirks.c +++ b/drivers/mmc/core/quirks.c @@ -1,7 +1,8 @@ /* - * This file contains work-arounds for many known sdio hardware - * bugs. + * This file contains work-arounds for many known SD/MMC + * and SDIO hardware bugs. * + * Copyright (c) 2011 Andrei Warkentin * Copyright (c) 2011 Pierre Tardy * Inspired from pci fixup code: * Copyright (c) 1999 Martin Mares @@ -11,34 +12,14 @@ #include #include #include -#include -/* - * The world is not perfect and supplies us with broken mmc/sdio devices. - * For at least a part of these bugs we need a work-around - */ - -struct mmc_fixup { - u16 vendor, device; /* You can use SDIO_ANY_ID here of course */ - void (*vendor_fixup)(struct mmc_card *card, int data); - int data; -}; - -/* - * This hook just adds a quirk unconditionnally - */ -static void __maybe_unused add_quirk(struct mmc_card *card, int data) -{ - card->quirks |= data; -} +#ifndef SDIO_VENDOR_ID_TI +#define SDIO_VENDOR_ID_TI 0x0097 +#endif -/* - * This hook just removes a quirk unconditionnally - */ -static void __maybe_unused remove_quirk(struct mmc_card *card, int data) -{ - card->quirks &= ~data; -} +#ifndef SDIO_DEVICE_ID_TI_WL1271 +#define SDIO_DEVICE_ID_TI_WL1271 0x4076 +#endif /* * This hook just adds a quirk for all sdio devices @@ -49,37 +30,47 @@ static void add_quirk_for_sdio_devices(struct mmc_card *card, int data) card->quirks |= data; } -#ifndef SDIO_VENDOR_ID_TI -#define SDIO_VENDOR_ID_TI 0x0097 -#endif - -#ifndef SDIO_DEVICE_ID_TI_WL1271 -#define SDIO_DEVICE_ID_TI_WL1271 0x4076 -#endif - static const struct mmc_fixup mmc_fixup_methods[] = { /* by default sdio devices are considered CLK_GATING broken */ /* good cards will be whitelisted as they are tested */ - { SDIO_ANY_ID, SDIO_ANY_ID, - add_quirk_for_sdio_devices, MMC_QUIRK_BROKEN_CLK_GATING }, - { SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271, - remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING }, - { SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271, - add_quirk, MMC_QUIRK_NONSTD_FUNC_IF }, - { SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271, - add_quirk, MMC_QUIRK_DISABLE_CD }, - { 0 } + SDIO_FIXUP(SDIO_ANY_ID, SDIO_ANY_ID, + add_quirk_for_sdio_devices, + MMC_QUIRK_BROKEN_CLK_GATING), + + SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271, + remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING), + + SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271, + add_quirk, MMC_QUIRK_NONSTD_FUNC_IF), + + SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271, + add_quirk, MMC_QUIRK_DISABLE_CD), + + END_FIXUP }; -void mmc_fixup_device(struct mmc_card *card) +void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table) { const struct mmc_fixup *f; + u64 rev = cid_rev_card(card); + + /* Non-core specific workarounds. */ + if (!table) + table = mmc_fixup_methods; - for (f = mmc_fixup_methods; f->vendor_fixup; f++) { - if ((f->vendor == card->cis.vendor - || f->vendor == (u16) SDIO_ANY_ID) && - (f->device == card->cis.device - || f->device == (u16) SDIO_ANY_ID)) { + for (f = table; f->vendor_fixup; f++) { + if ((f->manfid == CID_MANFID_ANY || + f->manfid == card->cid.manfid) && + (f->oemid == CID_OEMID_ANY || + f->oemid == card->cid.oemid) && + (f->name == CID_NAME_ANY || + !strncmp(f->name, card->cid.prod_name, + sizeof(card->cid.prod_name))) && + (f->cis_vendor == card->cis.vendor || + f->cis_vendor == (u16) SDIO_ANY_ID) && + (f->cis_device == card->cis.device || + f->cis_device == (u16) SDIO_ANY_ID) && + rev >= f->rev_start && rev <= f->rev_end) { dev_dbg(&card->dev, "calling %pF\n", f->vendor_fixup); f->vendor_fixup(card, f->data); } diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index a5840c0de2e4..1e6095961500 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -472,7 +472,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, card = oldcard; } - mmc_fixup_device(card); + mmc_fixup_device(card, NULL); if (card->type == MMC_TYPE_SD_COMBO) { err = mmc_sd_setup_card(host, card, oldcard != NULL); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 2a7e54970c93..c6513175f7f1 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -11,6 +11,7 @@ #define LINUX_MMC_CARD_H #include +#include struct mmc_cid { unsigned int manfid; @@ -157,7 +158,92 @@ struct mmc_card { struct dentry *debugfs_root; }; -void mmc_fixup_device(struct mmc_card *dev); +/* + * The world is not perfect and supplies us with broken mmc/sdio devices. + * For at least some of these bugs we need a work-around. + */ + +struct mmc_fixup { + /* CID-specific fields. */ + const char *name; + + /* Valid revision range */ + u64 rev_start, rev_end; + + unsigned int manfid; + unsigned short oemid; + + /* SDIO-specfic fields. You can use SDIO_ANY_ID here of course */ + u16 cis_vendor, cis_device; + + void (*vendor_fixup)(struct mmc_card *card, int data); + int data; +}; + +#define CID_MANFID_ANY (-1ul) +#define CID_OEMID_ANY ((unsigned short) -1) +#define CID_NAME_ANY (NULL) + +#define END_FIXUP { 0 } + +#define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end, \ + _cis_vendor, _cis_device, \ + _fixup, _data) \ + { \ + .name = (_name), \ + .manfid = (_manfid), \ + .oemid = (_oemid), \ + .rev_start = (_rev_start), \ + .rev_end = (_rev_end), \ + .cis_vendor = (_cis_vendor), \ + .cis_device = (_cis_device), \ + .vendor_fixup = (_fixup), \ + .data = (_data), \ + } + +#define MMC_FIXUP_REV(_name, _manfid, _oemid, _rev_start, _rev_end, \ + _fixup, _data) \ + _FIXUP_EXT(_name, _manfid, \ + _oemid, _rev_start, _rev_end, \ + SDIO_ANY_ID, SDIO_ANY_ID, \ + _fixup, _data) \ + +#define MMC_FIXUP(_name, _manfid, _oemid, _fixup, _data) \ + MMC_FIXUP_REV(_name, _manfid, _oemid, 0, -1ull, _fixup, _data) + +#define SDIO_FIXUP(_vendor, _device, _fixup, _data) \ + _FIXUP_EXT(CID_NAME_ANY, CID_MANFID_ANY, \ + CID_OEMID_ANY, 0, -1ull, \ + _vendor, _device, \ + _fixup, _data) \ + +#define cid_rev(hwrev, fwrev, year, month) \ + (((u64) hwrev) << 40 | \ + ((u64) fwrev) << 32 | \ + ((u64) year) << 16 | \ + ((u64) month)) + +#define cid_rev_card(card) \ + cid_rev(card->cid.hwrev, \ + card->cid.fwrev, \ + card->cid.year, \ + card->cid.month) + +/* + * This hook just adds a quirk unconditionally. + */ +static inline void __maybe_unused add_quirk(struct mmc_card *card, int data) +{ + card->quirks |= data; +} + +/* + * This hook just removes a quirk unconditionally. + */ +static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) +{ + card->quirks &= ~data; +} #define mmc_card_mmc(c) ((c)->type == MMC_TYPE_MMC) #define mmc_card_sd(c) ((c)->type == MMC_TYPE_SD) @@ -218,4 +304,7 @@ struct mmc_driver { extern int mmc_register_driver(struct mmc_driver *); extern void mmc_unregister_driver(struct mmc_driver *); +extern void mmc_fixup_device(struct mmc_card *card, + const struct mmc_fixup *table); + #endif -- cgit v1.2.3 From 853c6cac0dc0d9d330deb5b48c19eebafaed1841 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 12 Apr 2011 12:59:09 -0400 Subject: mmc: quirks: fix truncation warnings Fix data truncation warnings: .manfid is not unsigned long: drivers/mmc/core/quirks.c:36: warning: large integer implicitly truncated to unsigned type drivers/mmc/core/quirks.c:40: warning: large integer implicitly truncated to unsigned type drivers/mmc/core/quirks.c:43: warning: large integer implicitly truncated to unsigned type drivers/mmc/core/quirks.c:46: warning: large integer implicitly truncated to unsigned type Signed-off-by: Randy Dunlap Signed-off-by: Chris Ball --- include/linux/mmc/card.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index c6513175f7f1..937f852cf01e 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -180,7 +180,7 @@ struct mmc_fixup { int data; }; -#define CID_MANFID_ANY (-1ul) +#define CID_MANFID_ANY (-1u) #define CID_OEMID_ANY ((unsigned short) -1) #define CID_NAME_ANY (NULL) -- cgit v1.2.3 From eaa02f751ff4f8abfc2e55a15c20a5a274244418 Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 11 Apr 2011 16:13:41 -0500 Subject: mmc: core: Rename erase_timeout to cmd_timeout_ms. Renames erase_timeout to cmd_timeout_ms inside struct mmc_command. First step to making host honor timeouts for non-data-transfer commands. Cleans up erase timeout code. Signed-off-by: Andrei Warkentin Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 39 +++++++++++++++++++++------------------ include/linux/mmc/core.h | 2 +- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index c2350e474159..5178d5daa5f4 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1187,9 +1187,8 @@ void mmc_init_erase(struct mmc_card *card) } } -static void mmc_set_mmc_erase_timeout(struct mmc_card *card, - struct mmc_command *cmd, - unsigned int arg, unsigned int qty) +static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, + unsigned int arg, unsigned int qty) { unsigned int erase_timeout; @@ -1246,38 +1245,42 @@ static void mmc_set_mmc_erase_timeout(struct mmc_card *card, if (mmc_host_is_spi(card->host) && erase_timeout < 1000) erase_timeout = 1000; - cmd->erase_timeout = erase_timeout; + return erase_timeout; } -static void mmc_set_sd_erase_timeout(struct mmc_card *card, - struct mmc_command *cmd, unsigned int arg, - unsigned int qty) +static unsigned int mmc_sd_erase_timeout(struct mmc_card *card, + unsigned int arg, + unsigned int qty) { + unsigned int erase_timeout; + if (card->ssr.erase_timeout) { /* Erase timeout specified in SD Status Register (SSR) */ - cmd->erase_timeout = card->ssr.erase_timeout * qty + - card->ssr.erase_offset; + erase_timeout = card->ssr.erase_timeout * qty + + card->ssr.erase_offset; } else { /* * Erase timeout not specified in SD Status Register (SSR) so * use 250ms per write block. */ - cmd->erase_timeout = 250 * qty; + erase_timeout = 250 * qty; } /* Must not be less than 1 second */ - if (cmd->erase_timeout < 1000) - cmd->erase_timeout = 1000; + if (erase_timeout < 1000) + erase_timeout = 1000; + + return erase_timeout; } -static void mmc_set_erase_timeout(struct mmc_card *card, - struct mmc_command *cmd, unsigned int arg, - unsigned int qty) +static unsigned int mmc_erase_timeout(struct mmc_card *card, + unsigned int arg, + unsigned int qty) { if (mmc_card_sd(card)) - mmc_set_sd_erase_timeout(card, cmd, arg, qty); + return mmc_sd_erase_timeout(card, arg, qty); else - mmc_set_mmc_erase_timeout(card, cmd, arg, qty); + return mmc_mmc_erase_timeout(card, arg, qty); } static int mmc_do_erase(struct mmc_card *card, unsigned int from, @@ -1351,7 +1354,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from, cmd.opcode = MMC_ERASE; cmd.arg = arg; cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; - mmc_set_erase_timeout(card, &cmd, arg, qty); + cmd.cmd_timeout_ms = mmc_erase_timeout(card, arg, qty); err = mmc_wait_for_cmd(card->host, &cmd, 0); if (err) { printk(KERN_ERR "mmc_erase: erase error %d, status %#x\n", diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 07f27af4dba5..811e96e8d1fe 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -92,7 +92,7 @@ struct mmc_command { * actively failing requests */ - unsigned int erase_timeout; /* in milliseconds */ + unsigned int cmd_timeout_ms; /* in milliseconds */ struct mmc_data *data; /* data segment associated with cmd */ struct mmc_request *mrq; /* associated request */ -- cgit v1.2.3 From d3a8d95dcbb726b9cf0bbc166b2473bdd236c88c Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 11 Apr 2011 16:13:43 -0500 Subject: mmc: core: Allow setting CMD timeout for CMD6 (SWITCH). CMD6 is an R1B-type command, where DAT is used as busy. Depending on register written using CMD6, timeout value can be different as per spec. Signed-off-by: Andrei Warkentin Signed-off-by: Chris Ball --- drivers/mmc/core/mmc.c | 14 ++++++++------ drivers/mmc/core/mmc_ops.c | 16 +++++++++++++++- drivers/mmc/core/mmc_ops.h | 1 - include/linux/mmc/core.h | 1 + 4 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index ae6b8fd38800..396cb23625d2 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -548,7 +548,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, */ if (card->ext_csd.enhanced_area_en) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_ERASE_GROUP_DEF, 1); + EXT_CSD_ERASE_GROUP_DEF, 1, 0); if (err && err != -EBADMSG) goto free_card; @@ -579,7 +579,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if (card->ext_csd.bootconfig & 0x7) { card->ext_csd.bootconfig &= ~0x7; mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG, - card->ext_csd.bootconfig); + card->ext_csd.bootconfig, 0); } /* @@ -588,7 +588,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if ((card->ext_csd.hs_max_dtr != 0) && (host->caps & MMC_CAP_MMC_HIGHSPEED)) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_HS_TIMING, 1); + EXT_CSD_HS_TIMING, 1, 0); if (err && err != -EBADMSG) goto free_card; @@ -655,7 +655,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, ddr = 0; /* no DDR for 1-bit width */ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BUS_WIDTH, - ext_csd_bits[idx][0]); + ext_csd_bits[idx][0], + 0); if (!err) { mmc_set_bus_width_ddr(card->host, bus_width, MMC_SDR_MODE); @@ -674,8 +675,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if (!err && ddr) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BUS_WIDTH, - ext_csd_bits[idx][1]); + EXT_CSD_BUS_WIDTH, + ext_csd_bits[idx][1], + 0); } if (err) { printk(KERN_WARNING "%s: switch to bus width %d ddr %d " diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index f3b22bf89cc9..a2bae625332a 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -387,7 +387,19 @@ int mmc_spi_set_crc(struct mmc_host *host, int use_crc) return err; } -int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value) +/** + * mmc_switch - modify EXT_CSD register + * @card: the MMC card associated with the data transfer + * @set: cmd set values + * @index: EXT_CSD register index + * @value: value to program into EXT_CSD register + * @timeout_ms: timeout (ms) for operation performed by register write, + * timeout of zero implies maximum possible timeout + * + * Modifies the EXT_CSD register for selected card. + */ +int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, + unsigned int timeout_ms) { int err; struct mmc_command cmd; @@ -404,6 +416,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value) (value << 8) | set; cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; + cmd.cmd_timeout_ms = timeout_ms; err = mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES); if (err) @@ -433,6 +446,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value) return 0; } +EXPORT_SYMBOL_GPL(mmc_switch); int mmc_send_status(struct mmc_card *card, u32 *status) { diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h index e6d44b8a18db..9276946fa5b7 100644 --- a/drivers/mmc/core/mmc_ops.h +++ b/drivers/mmc/core/mmc_ops.h @@ -20,7 +20,6 @@ int mmc_all_send_cid(struct mmc_host *host, u32 *cid); int mmc_set_relative_addr(struct mmc_card *card); int mmc_send_csd(struct mmc_card *card, u32 *csd); int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd); -int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value); int mmc_send_status(struct mmc_card *card, u32 *status); int mmc_send_cid(struct mmc_host *host, u32 *cid); int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 811e96e8d1fe..f8e4bcbd2846 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -135,6 +135,7 @@ extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); +extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int); #define MMC_ERASE_ARG 0x00000000 #define MMC_SECURE_ERASE_ARG 0x80000000 -- cgit v1.2.3 From 371a689f64b0da140c3bcd3f55305ffa1c3a58ef Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 11 Apr 2011 18:10:25 -0500 Subject: mmc: MMC boot partitions support. Allows device MMC boot partitions to be accessed. MMC partitions are treated effectively as separate block devices on the same MMC card. Signed-off-by: Andrei Warkentin Acked-by: Arnd Bergmann Signed-off-by: Chris Ball --- Documentation/mmc/00-INDEX | 2 + Documentation/mmc/mmc-dev-attrs.txt | 10 ++ Documentation/mmc/mmc-dev-parts.txt | 27 ++++ drivers/mmc/card/block.c | 274 +++++++++++++++++++++++++++++++----- drivers/mmc/core/mmc.c | 22 ++- include/linux/mmc/card.h | 4 +- include/linux/mmc/mmc.h | 8 +- 7 files changed, 305 insertions(+), 42 deletions(-) create mode 100644 Documentation/mmc/mmc-dev-parts.txt (limited to 'include/linux') diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX index fca586f5b853..93dd7a714075 100644 --- a/Documentation/mmc/00-INDEX +++ b/Documentation/mmc/00-INDEX @@ -2,3 +2,5 @@ - this file mmc-dev-attrs.txt - info on SD and MMC device attributes +mmc-dev-parts.txt + - info on SD and MMC device partitions diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt index ff2bd685bced..8898a95b41e5 100644 --- a/Documentation/mmc/mmc-dev-attrs.txt +++ b/Documentation/mmc/mmc-dev-attrs.txt @@ -1,3 +1,13 @@ +SD and MMC Block Device Attributes +================================== + +These attributes are defined for the block devices associated with the +SD or MMC device. + +The following attributes are read/write. + + force_ro Enforce read-only access even if write protect switch is off. + SD and MMC Device Attributes ============================ diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt new file mode 100644 index 000000000000..2db28b8e662f --- /dev/null +++ b/Documentation/mmc/mmc-dev-parts.txt @@ -0,0 +1,27 @@ +SD and MMC Device Partitions +============================ + +Device partitions are additional logical block devices present on the +SD/MMC device. + +As of this writing, MMC boot partitions as supported and exposed as +/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the +parent /dev/mmcblkX. + +MMC Boot Partitions +=================== + +Read and write access is provided to the two MMC boot partitions. Due to +the sensitive nature of the boot partition contents, which often store +a bootloader or bootloader configuration tables crucial to booting the +platform, write access is disabled by default to reduce the chance of +accidental bricking. + +To enable write access to /dev/mmcblkXbootY, disable the forced read-only +access with: + +echo 0 > /sys/block/mmcblkXbootY/force_ro + +To re-enable read-only access: + +echo 1 > /sys/block/mmcblkXbootY/force_ro diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 3e9082b729ff..1e6bd910e79e 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -76,9 +76,19 @@ struct mmc_blk_data { spinlock_t lock; struct gendisk *disk; struct mmc_queue queue; + struct list_head part; unsigned int usage; unsigned int read_only; + unsigned int part_type; + + /* + * Only set in main mmc_blk_data associated + * with mmc_card with mmc_set_drvdata, and keeps + * track of the current selected device partition. + */ + unsigned int part_curr; + struct device_attribute force_ro; }; static DEFINE_MUTEX(open_lock); @@ -101,17 +111,22 @@ static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk) return md; } +static inline int mmc_get_devidx(struct gendisk *disk) +{ + int devmaj = MAJOR(disk_devt(disk)); + int devidx = MINOR(disk_devt(disk)) / perdev_minors; + + if (!devmaj) + devidx = disk->first_minor / perdev_minors; + return devidx; +} + static void mmc_blk_put(struct mmc_blk_data *md) { mutex_lock(&open_lock); md->usage--; if (md->usage == 0) { - int devmaj = MAJOR(disk_devt(md->disk)); - int devidx = MINOR(disk_devt(md->disk)) / perdev_minors; - - if (!devmaj) - devidx = md->disk->first_minor / perdev_minors; - + int devidx = mmc_get_devidx(md->disk); blk_cleanup_queue(md->queue.queue); __clear_bit(devidx, dev_use); @@ -122,6 +137,38 @@ static void mmc_blk_put(struct mmc_blk_data *md) mutex_unlock(&open_lock); } +static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int ret; + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + + ret = snprintf(buf, PAGE_SIZE, "%d", + get_disk_ro(dev_to_disk(dev)) ^ + md->read_only); + mmc_blk_put(md); + return ret; +} + +static ssize_t force_ro_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + char *end; + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + unsigned long set = simple_strtoul(buf, &end, 0); + if (end == buf) { + ret = -EINVAL; + goto out; + } + + set_disk_ro(dev_to_disk(dev), set || md->read_only); + ret = count; +out: + mmc_blk_put(md); + return ret; +} + static int mmc_blk_open(struct block_device *bdev, fmode_t mode) { struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk); @@ -176,6 +223,29 @@ struct mmc_blk_request { struct mmc_data data; }; +static inline int mmc_blk_part_switch(struct mmc_card *card, + struct mmc_blk_data *md) +{ + int ret; + struct mmc_blk_data *main_md = mmc_get_drvdata(card); + if (main_md->part_curr == md->part_type) + return 0; + + if (mmc_card_mmc(card)) { + card->ext_csd.part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK; + card->ext_csd.part_config |= md->part_type; + + ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_PART_CONFIG, card->ext_csd.part_config, + card->ext_csd.part_time); + if (ret) + return ret; +} + + main_md->part_curr = md->part_type; + return 0; +} + static u32 mmc_sd_num_wr_blocks(struct mmc_card *card) { int err; @@ -620,6 +690,11 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) struct mmc_card *card = md->queue.card; mmc_claim_host(card->host); + ret = mmc_blk_part_switch(card, md); + if (ret) { + ret = 0; + goto out; + } if (req->cmd_flags & REQ_DISCARD) { if (req->cmd_flags & REQ_SECURE) @@ -632,6 +707,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) ret = mmc_blk_issue_rw_rq(mq, req); } +out: mmc_release_host(card->host); return ret; } @@ -642,7 +718,11 @@ static inline int mmc_blk_readonly(struct mmc_card *card) !(card->csd.cmdclass & CCC_BLOCK_WRITE); } -static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) +static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, + struct device *parent, + sector_t size, + bool default_ro, + const char *subname) { struct mmc_blk_data *md; int devidx, ret; @@ -658,7 +738,6 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) goto out; } - /* * Set the read-only status based on the supported commands * and the write protect switch. @@ -672,6 +751,7 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) } spin_lock_init(&md->lock); + INIT_LIST_HEAD(&md->part); md->usage = 1; ret = mmc_init_queue(&md->queue, card, &md->lock); @@ -686,8 +766,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) md->disk->fops = &mmc_bdops; md->disk->private_data = md; md->disk->queue = md->queue.queue; - md->disk->driverfs_dev = &card->dev; - set_disk_ro(md->disk, md->read_only); + md->disk->driverfs_dev = parent; + set_disk_ro(md->disk, md->read_only || default_ro); if (REL_WRITES_SUPPORTED(card)) blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA); @@ -703,33 +783,97 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) * messages to tell when the card is present. */ - snprintf(md->disk->disk_name, sizeof(md->disk->disk_name), - "mmcblk%d", devidx); + if (subname) + snprintf(md->disk->disk_name, sizeof(md->disk->disk_name), + "mmcblk%d%s", + mmc_get_devidx(dev_to_disk(parent)), subname); + else + snprintf(md->disk->disk_name, sizeof(md->disk->disk_name), + "mmcblk%d", devidx); blk_queue_logical_block_size(md->queue.queue, 512); + set_capacity(md->disk, size); + return md; + + err_putdisk: + put_disk(md->disk); + err_kfree: + kfree(md); + out: + return ERR_PTR(ret); +} + +static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) +{ + sector_t size; + struct mmc_blk_data *md; if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) { /* * The EXT_CSD sector count is in number or 512 byte * sectors. */ - set_capacity(md->disk, card->ext_csd.sectors); + size = card->ext_csd.sectors; } else { /* * The CSD capacity field is in units of read_blkbits. * set_capacity takes units of 512 bytes. */ - set_capacity(md->disk, - card->csd.capacity << (card->csd.read_blkbits - 9)); + size = card->csd.capacity << (card->csd.read_blkbits - 9); } + + md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL); return md; +} - err_putdisk: - put_disk(md->disk); - err_kfree: - kfree(md); - out: - return ERR_PTR(ret); +static int mmc_blk_alloc_part(struct mmc_card *card, + struct mmc_blk_data *md, + unsigned int part_type, + sector_t size, + bool default_ro, + const char *subname) +{ + char cap_str[10]; + struct mmc_blk_data *part_md; + + part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro, + subname); + if (IS_ERR(part_md)) + return PTR_ERR(part_md); + part_md->part_type = part_type; + list_add(&part_md->part, &md->part); + + string_get_size((u64)get_capacity(part_md->disk) << 9, STRING_UNITS_2, + cap_str, sizeof(cap_str)); + printk(KERN_INFO "%s: %s %s partition %u %s\n", + part_md->disk->disk_name, mmc_card_id(card), + mmc_card_name(card), part_md->part_type, cap_str); + return 0; +} + +static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md) +{ + int ret = 0; + + if (!mmc_card_mmc(card)) + return 0; + + if (card->ext_csd.boot_size) { + ret = mmc_blk_alloc_part(card, md, EXT_CSD_PART_CONFIG_ACC_BOOT0, + card->ext_csd.boot_size >> 9, + true, + "boot0"); + if (ret) + return ret; + ret = mmc_blk_alloc_part(card, md, EXT_CSD_PART_CONFIG_ACC_BOOT1, + card->ext_csd.boot_size >> 9, + true, + "boot1"); + if (ret) + return ret; + } + + return ret; } static int @@ -750,9 +894,54 @@ mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card) return 0; } +static void mmc_blk_remove_req(struct mmc_blk_data *md) +{ + if (md) { + if (md->disk->flags & GENHD_FL_UP) { + device_remove_file(disk_to_dev(md->disk), &md->force_ro); + + /* Stop new requests from getting into the queue */ + del_gendisk(md->disk); + } + + /* Then flush out any already in there */ + mmc_cleanup_queue(&md->queue); + mmc_blk_put(md); + } +} + +static void mmc_blk_remove_parts(struct mmc_card *card, + struct mmc_blk_data *md) +{ + struct list_head *pos, *q; + struct mmc_blk_data *part_md; + + list_for_each_safe(pos, q, &md->part) { + part_md = list_entry(pos, struct mmc_blk_data, part); + list_del(pos); + mmc_blk_remove_req(part_md); + } +} + +static int mmc_add_disk(struct mmc_blk_data *md) +{ + int ret; + + add_disk(md->disk); + md->force_ro.show = force_ro_show; + md->force_ro.store = force_ro_store; + md->force_ro.attr.name = "force_ro"; + md->force_ro.attr.mode = S_IRUGO | S_IWUSR; + ret = device_create_file(disk_to_dev(md->disk), &md->force_ro); + if (ret) + del_gendisk(md->disk); + + return ret; +} + static int mmc_blk_probe(struct mmc_card *card) { - struct mmc_blk_data *md; + struct mmc_blk_data *md, *part_md; int err; char cap_str[10]; @@ -776,14 +965,22 @@ static int mmc_blk_probe(struct mmc_card *card) md->disk->disk_name, mmc_card_id(card), mmc_card_name(card), cap_str, md->read_only ? "(ro)" : ""); + if (mmc_blk_alloc_parts(card, md)) + goto out; + mmc_set_drvdata(card, md); - add_disk(md->disk); + if (mmc_add_disk(md)) + goto out; + + list_for_each_entry(part_md, &md->part, part) { + if (mmc_add_disk(part_md)) + goto out; + } return 0; out: - mmc_cleanup_queue(&md->queue); - mmc_blk_put(md); - + mmc_blk_remove_parts(card, md); + mmc_blk_remove_req(md); return err; } @@ -791,36 +988,43 @@ static void mmc_blk_remove(struct mmc_card *card) { struct mmc_blk_data *md = mmc_get_drvdata(card); - if (md) { - /* Stop new requests from getting into the queue */ - del_gendisk(md->disk); - - /* Then flush out any already in there */ - mmc_cleanup_queue(&md->queue); - - mmc_blk_put(md); - } + mmc_blk_remove_parts(card, md); + mmc_blk_remove_req(md); mmc_set_drvdata(card, NULL); } #ifdef CONFIG_PM static int mmc_blk_suspend(struct mmc_card *card, pm_message_t state) { + struct mmc_blk_data *part_md; struct mmc_blk_data *md = mmc_get_drvdata(card); if (md) { mmc_queue_suspend(&md->queue); + list_for_each_entry(part_md, &md->part, part) { + mmc_queue_suspend(&part_md->queue); + } } return 0; } static int mmc_blk_resume(struct mmc_card *card) { + struct mmc_blk_data *part_md; struct mmc_blk_data *md = mmc_get_drvdata(card); if (md) { mmc_blk_set_blksize(md, card); + + /* + * Resume involves the card going into idle state, + * so current partition is always the main one. + */ + md->part_curr = md->part_type; mmc_queue_resume(&md->queue); + list_for_each_entry(part_md, &md->part, part) { + mmc_queue_resume(&part_md->queue); + } } return 0; } diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 396cb23625d2..a2c795e8f9dd 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -288,7 +288,10 @@ static int mmc_read_ext_csd(struct mmc_card *card) if (card->ext_csd.rev >= 3) { u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT]; - card->ext_csd.bootconfig = ext_csd[EXT_CSD_BOOT_CONFIG]; + card->ext_csd.part_config = ext_csd[EXT_CSD_PART_CONFIG]; + + /* EXT_CSD value is in units of 10ms, but we store in ms */ + card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME]; /* Sleep / awake timeout in 100ns units */ if (sa_shift > 0 && sa_shift <= 0x17) @@ -302,6 +305,12 @@ static int mmc_read_ext_csd(struct mmc_card *card) ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] << 10; card->ext_csd.rel_sectors = ext_csd[EXT_CSD_REL_WR_SEC_C]; + + /* + * There are two boot regions of equal size, defined in + * multiples of 128K. + */ + card->ext_csd.boot_size = ext_csd[EXT_CSD_BOOT_MULT] << 17; } if (card->ext_csd.rev >= 4) { @@ -576,10 +585,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, /* * Ensure eMMC user default partition is enabled */ - if (card->ext_csd.bootconfig & 0x7) { - card->ext_csd.bootconfig &= ~0x7; - mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG, - card->ext_csd.bootconfig, 0); + if (card->ext_csd.part_config & EXT_CSD_PART_CONFIG_ACC_MASK) { + card->ext_csd.part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK; + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_PART_CONFIG, + card->ext_csd.part_config, + card->ext_csd.part_time); + if (err && err != -EBADMSG) + goto free_card; } /* diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 937f852cf01e..0c7a58b14343 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -48,7 +48,8 @@ struct mmc_ext_csd { u8 sec_feature_support; u8 rel_sectors; u8 rel_param; - u8 bootconfig; + u8 part_config; + unsigned int part_time; /* Units: ms */ unsigned int sa_timeout; /* Units: 100ns */ unsigned int hs_max_dtr; unsigned int sectors; @@ -61,6 +62,7 @@ struct mmc_ext_csd { bool enhanced_area_en; /* enable bit */ unsigned long long enhanced_area_offset; /* Units: Byte */ unsigned int enhanced_area_size; /* Units: KB */ + unsigned int boot_size; /* in bytes */ }; struct sd_scr { diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 390aa6eef676..373b2bf5e5b5 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -257,19 +257,21 @@ struct _mmc_csd { #define EXT_CSD_PARTITION_SUPPORT 160 /* RO */ #define EXT_CSD_WR_REL_PARAM 166 /* RO */ #define EXT_CSD_ERASE_GROUP_DEF 175 /* R/W */ -#define EXT_CSD_BOOT_CONFIG 179 /* R/W */ +#define EXT_CSD_PART_CONFIG 179 /* R/W */ #define EXT_CSD_ERASED_MEM_CONT 181 /* RO */ #define EXT_CSD_BUS_WIDTH 183 /* R/W */ #define EXT_CSD_HS_TIMING 185 /* R/W */ #define EXT_CSD_REV 192 /* RO */ #define EXT_CSD_STRUCTURE 194 /* RO */ #define EXT_CSD_CARD_TYPE 196 /* RO */ +#define EXT_CSD_PART_SWITCH_TIME 199 /* RO */ #define EXT_CSD_SEC_CNT 212 /* RO, 4 bytes */ #define EXT_CSD_S_A_TIMEOUT 217 /* RO */ #define EXT_CSD_REL_WR_SEC_C 222 /* RO */ #define EXT_CSD_HC_WP_GRP_SIZE 221 /* RO */ #define EXT_CSD_ERASE_TIMEOUT_MULT 223 /* RO */ #define EXT_CSD_HC_ERASE_GRP_SIZE 224 /* RO */ +#define EXT_CSD_BOOT_MULT 226 /* RO */ #define EXT_CSD_SEC_TRIM_MULT 229 /* RO */ #define EXT_CSD_SEC_ERASE_MULT 230 /* RO */ #define EXT_CSD_SEC_FEATURE_SUPPORT 231 /* RO */ @@ -281,6 +283,10 @@ struct _mmc_csd { #define EXT_CSD_WR_REL_PARAM_EN (1<<2) +#define EXT_CSD_PART_CONFIG_ACC_MASK (0x7) +#define EXT_CSD_PART_CONFIG_ACC_BOOT0 (0x1) +#define EXT_CSD_PART_CONFIG_ACC_BOOT1 (0x2) + #define EXT_CSD_CMD_SET_NORMAL (1<<0) #define EXT_CSD_CMD_SET_SECURE (1<<1) #define EXT_CSD_CMD_SET_CPSECURE (1<<2) -- cgit v1.2.3 From 6a7a6b45f454686a1549729bfbae31f0b3b595d6 Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Tue, 12 Apr 2011 15:06:53 -0500 Subject: mmc: quirks: Fix erase/trim for certain SanDisk cards. CMD38 argument is passed through EXT_CSD[113]. Signed-off-by: Andrei Warkentin Signed-off-by: Chris Ball --- drivers/mmc/card/block.c | 43 ++++++++++++++++++++++++++++++++++++++++++- include/linux/mmc/card.h | 1 + 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 288d27394ef9..c20995323348 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -48,6 +48,13 @@ MODULE_ALIAS("mmc:block"); #endif #define MODULE_PARAM_PREFIX "mmcblk." +#define INAND_CMD38_ARG_EXT_CSD 113 +#define INAND_CMD38_ARG_ERASE 0x00 +#define INAND_CMD38_ARG_TRIM 0x01 +#define INAND_CMD38_ARG_SECERASE 0x80 +#define INAND_CMD38_ARG_SECTRIM1 0x81 +#define INAND_CMD38_ARG_SECTRIM2 0x88 + #define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) && \ (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || \ ((card)->ext_csd.rel_sectors))) @@ -356,6 +363,16 @@ static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) else arg = MMC_ERASE_ARG; + if (card->quirks & MMC_QUIRK_INAND_CMD38) { + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + INAND_CMD38_ARG_EXT_CSD, + arg == MMC_TRIM_ARG ? + INAND_CMD38_ARG_TRIM : + INAND_CMD38_ARG_ERASE, + 0); + if (err) + goto out; + } err = mmc_erase(card, from, nr, arg); out: spin_lock_irq(&md->lock); @@ -386,9 +403,28 @@ static int mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, else arg = MMC_SECURE_ERASE_ARG; + if (card->quirks & MMC_QUIRK_INAND_CMD38) { + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + INAND_CMD38_ARG_EXT_CSD, + arg == MMC_SECURE_TRIM1_ARG ? + INAND_CMD38_ARG_SECTRIM1 : + INAND_CMD38_ARG_SECERASE, + 0); + if (err) + goto out; + } err = mmc_erase(card, from, nr, arg); - if (!err && arg == MMC_SECURE_TRIM1_ARG) + if (!err && arg == MMC_SECURE_TRIM1_ARG) { + if (card->quirks & MMC_QUIRK_INAND_CMD38) { + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + INAND_CMD38_ARG_EXT_CSD, + INAND_CMD38_ARG_SECTRIM2, + 0); + if (err) + goto out; + } err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG); + } out: spin_lock_irq(&md->lock); __blk_end_request(req, err, blk_rq_bytes(req)); @@ -941,6 +977,11 @@ static int mmc_add_disk(struct mmc_blk_data *md) static const struct mmc_fixup blk_fixups[] = { + MMC_FIXUP("SEM02G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM04G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM08G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM16G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), + MMC_FIXUP("SEM32G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), END_FIXUP }; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 0c7a58b14343..72a98681ef47 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -133,6 +133,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3) /* clock gating the sdio bus will make card fail */ #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4) /* SDIO card has nonstd function interfaces */ #define MMC_QUIRK_DISABLE_CD (1<<5) /* disconnect CD/DAT[3] resistor */ +#define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */ unsigned int erase_size; /* erase size in sectors */ unsigned int erase_shift; /* if erase unit is power 2 */ -- cgit v1.2.3 From 82b0e23a295cc58d1290017ee97a40956ad68d94 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Apr 2011 20:26:38 +0200 Subject: mmc: sdhci: Fix read-only detection with JMicron 388 chip On HP laptops with JMicron 388 chip, the write-locked SD card isn't detected correctly as read-only in many cases. This is because the PRESENT_STATE register becomes unsable just after plugging, and it returns the WRITE_PROTECT bit wrongly at the first read. This patch fixes the read-only detection by adding a new sdhci quirk indicating to check the register more intensively with a relatively long delay. The patch is tested with 2.6.39-rc4 kernel. Cc: Aries Lee Signed-off-by: Takashi Iwai Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-pci.c | 5 +++++ drivers/mmc/host/sdhci.c | 28 ++++++++++++++++++++++++---- include/linux/mmc/sdhci.h | 2 ++ 3 files changed, 31 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 82c5dc412679..936bbca19c0a 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -327,6 +327,11 @@ static int jmicron_probe(struct sdhci_pci_chip *chip) return ret; } + /* quirk for unsable RO-detection on JM388 chips */ + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_SD || + chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) + chip->quirks |= SDHCI_QUIRK_UNSTABLE_RO_DETECT; + return 0; } diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index a70a3d1ef35a..e5cfe70dc23b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1256,14 +1256,11 @@ out: spin_unlock_irqrestore(&host->lock, flags); } -static int sdhci_get_ro(struct mmc_host *mmc) +static int check_ro(struct sdhci_host *host) { - struct sdhci_host *host; unsigned long flags; int is_readonly; - host = mmc_priv(mmc); - spin_lock_irqsave(&host->lock, flags); if (host->flags & SDHCI_DEVICE_DEAD) @@ -1281,6 +1278,29 @@ static int sdhci_get_ro(struct mmc_host *mmc) !is_readonly : is_readonly; } +#define SAMPLE_COUNT 5 + +static int sdhci_get_ro(struct mmc_host *mmc) +{ + struct sdhci_host *host; + int i, ro_count; + + host = mmc_priv(mmc); + + if (!(host->quirks & SDHCI_QUIRK_UNSTABLE_RO_DETECT)) + return check_ro(host); + + ro_count = 0; + for (i = 0; i < SAMPLE_COUNT; i++) { + if (check_ro(host)) { + if (++ro_count > SAMPLE_COUNT / 2) + return 1; + } + msleep(30); + } + return 0; +} + static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) { struct sdhci_host *host; diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 83bd9f76709a..92e1c9ad126c 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -85,6 +85,8 @@ struct sdhci_host { #define SDHCI_QUIRK_NO_HISPD_BIT (1<<29) /* Controller treats ADMA descriptors with length 0000h incorrectly */ #define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC (1<<30) +/* The read-only detection via SDHCI_PRESENT_STATE register is unstable */ +#define SDHCI_QUIRK_UNSTABLE_RO_DETECT (1<<31) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ -- cgit v1.2.3 From cb87ea28ed9e75a41eb456bfcb547b4e6f10e750 Mon Sep 17 00:00:00 2001 From: John Calixto Date: Tue, 26 Apr 2011 18:56:29 -0400 Subject: mmc: core: Add mmc CMD+ACMD passthrough ioctl Allows appropriately-privileged applications to send CMD (normal) and ACMD (application-specific; preceded with CMD55) commands to cards/devices on the mmc bus. This is primarily useful for enabling the security functionality built in to every SD card. It can also be used as a generic passthrough (e.g. to enable virtual machines to control mmc bus devices directly). However, this use case has not been tested rigorously. Generic passthrough testing was only conducted for a few non-security opcodes to prove the feasibility of the passthrough. Since any opcode can be sent using this passthrough, it is very possible to render the card/device unusable. Applications that use this ioctl must have CAP_SYS_RAWIO. Security commands tested on TI PCIxx12 (SDHCI), Sigma Designs SMP8652 SoC, TI OMAP3621/OMAP3630 SoC, Samsung S5PC110 SoC, Qualcomm MSM7200A SoC. Signed-off-by: John Calixto Reviewed-by: Andrei Warkentin Reviewed-by: Arnd Bergmann Signed-off-by: Chris Ball --- Documentation/ioctl/ioctl-number.txt | 1 + drivers/mmc/card/block.c | 201 +++++++++++++++++++++++++++++++++++ drivers/mmc/core/sd_ops.c | 3 +- include/linux/Kbuild | 1 + include/linux/mmc/Kbuild | 1 + include/linux/mmc/core.h | 1 + include/linux/mmc/ioctl.h | 54 ++++++++++ 7 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 include/linux/mmc/Kbuild create mode 100644 include/linux/mmc/ioctl.h (limited to 'include/linux') diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index a0a5d82b6b0b..2a34d822e6d6 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -304,6 +304,7 @@ Code Seq#(hex) Include File Comments 0xB0 all RATIO devices in development: 0xB1 00-1F PPPoX +0xB3 00 linux/mmc/ioctl.h 0xC0 00-0F linux/usb/iowarrior.h 0xCB 00-1F CBM serial IEC bus in development: diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index e7efd6faeaf9..407836d55712 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -31,7 +31,11 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -218,11 +222,208 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } +struct mmc_blk_ioc_data { + struct mmc_ioc_cmd ic; + unsigned char *buf; + u64 buf_bytes; +}; + +static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( + struct mmc_ioc_cmd __user *user) +{ + struct mmc_blk_ioc_data *idata; + int err; + + idata = kzalloc(sizeof(*idata), GFP_KERNEL); + if (!idata) { + err = -ENOMEM; + goto copy_err; + } + + if (copy_from_user(&idata->ic, user, sizeof(idata->ic))) { + err = -EFAULT; + goto copy_err; + } + + idata->buf_bytes = (u64) idata->ic.blksz * idata->ic.blocks; + if (idata->buf_bytes > MMC_IOC_MAX_BYTES) { + err = -EOVERFLOW; + goto copy_err; + } + + idata->buf = kzalloc(idata->buf_bytes, GFP_KERNEL); + if (!idata->buf) { + err = -ENOMEM; + goto copy_err; + } + + if (copy_from_user(idata->buf, (void __user *)(unsigned long) + idata->ic.data_ptr, idata->buf_bytes)) { + err = -EFAULT; + goto copy_err; + } + + return idata; + +copy_err: + kfree(idata->buf); + kfree(idata); + return ERR_PTR(err); + +} + +static int mmc_blk_ioctl_cmd(struct block_device *bdev, + struct mmc_ioc_cmd __user *ic_ptr) +{ + struct mmc_blk_ioc_data *idata; + struct mmc_blk_data *md; + struct mmc_card *card; + struct mmc_command cmd = {0}; + struct mmc_data data = {0}; + struct mmc_request mrq = {0}; + struct scatterlist sg; + int err; + + /* + * The caller must have CAP_SYS_RAWIO, and must be calling this on the + * whole block device, not on a partition. This prevents overspray + * between sibling partitions. + */ + if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) + return -EPERM; + + idata = mmc_blk_ioctl_copy_from_user(ic_ptr); + if (IS_ERR(idata)) + return PTR_ERR(idata); + + cmd.opcode = idata->ic.opcode; + cmd.arg = idata->ic.arg; + cmd.flags = idata->ic.flags; + + data.sg = &sg; + data.sg_len = 1; + data.blksz = idata->ic.blksz; + data.blocks = idata->ic.blocks; + + sg_init_one(data.sg, idata->buf, idata->buf_bytes); + + if (idata->ic.write_flag) + data.flags = MMC_DATA_WRITE; + else + data.flags = MMC_DATA_READ; + + mrq.cmd = &cmd; + mrq.data = &data; + + md = mmc_blk_get(bdev->bd_disk); + if (!md) { + err = -EINVAL; + goto cmd_done; + } + + card = md->queue.card; + if (IS_ERR(card)) { + err = PTR_ERR(card); + goto cmd_done; + } + + mmc_claim_host(card->host); + + if (idata->ic.is_acmd) { + err = mmc_app_cmd(card->host, card); + if (err) + goto cmd_rel_host; + } + + /* data.flags must already be set before doing this. */ + mmc_set_data_timeout(&data, card); + /* Allow overriding the timeout_ns for empirical tuning. */ + if (idata->ic.data_timeout_ns) + data.timeout_ns = idata->ic.data_timeout_ns; + + if ((cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) { + /* + * Pretend this is a data transfer and rely on the host driver + * to compute timeout. When all host drivers support + * cmd.cmd_timeout for R1B, this can be changed to: + * + * mrq.data = NULL; + * cmd.cmd_timeout = idata->ic.cmd_timeout_ms; + */ + data.timeout_ns = idata->ic.cmd_timeout_ms * 1000000; + } + + mmc_wait_for_req(card->host, &mrq); + + if (cmd.error) { + dev_err(mmc_dev(card->host), "%s: cmd error %d\n", + __func__, cmd.error); + err = cmd.error; + goto cmd_rel_host; + } + if (data.error) { + dev_err(mmc_dev(card->host), "%s: data error %d\n", + __func__, data.error); + err = data.error; + goto cmd_rel_host; + } + + /* + * According to the SD specs, some commands require a delay after + * issuing the command. + */ + if (idata->ic.postsleep_min_us) + usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us); + + if (copy_to_user(&(ic_ptr->response), cmd.resp, sizeof(cmd.resp))) { + err = -EFAULT; + goto cmd_rel_host; + } + + if (!idata->ic.write_flag) { + if (copy_to_user((void __user *)(unsigned long) idata->ic.data_ptr, + idata->buf, idata->buf_bytes)) { + err = -EFAULT; + goto cmd_rel_host; + } + } + +cmd_rel_host: + mmc_release_host(card->host); + +cmd_done: + mmc_blk_put(md); + kfree(idata->buf); + kfree(idata); + return err; +} + +static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + int ret = -EINVAL; + if (cmd == MMC_IOC_CMD) + ret = mmc_blk_ioctl_cmd(bdev, (struct mmc_ioc_cmd __user *)arg); + return ret; +} + +#ifdef CONFIG_COMPAT +static int mmc_blk_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + return mmc_blk_ioctl(bdev, mode, cmd, (unsigned long) compat_ptr(arg)); +} +#endif + static const struct block_device_operations mmc_bdops = { .open = mmc_blk_open, .release = mmc_blk_release, .getgeo = mmc_blk_getgeo, .owner = THIS_MODULE, + .ioctl = mmc_blk_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = mmc_blk_compat_ioctl, +#endif }; struct mmc_blk_request { diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c index a206aea5360d..021fed153804 100644 --- a/drivers/mmc/core/sd_ops.c +++ b/drivers/mmc/core/sd_ops.c @@ -21,7 +21,7 @@ #include "core.h" #include "sd_ops.h" -static int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card) +int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card) { int err; struct mmc_command cmd = {0}; @@ -49,6 +49,7 @@ static int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card) return 0; } +EXPORT_SYMBOL_GPL(mmc_app_cmd); /** * mmc_wait_for_app_cmd - start an application command and wait for diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 75cf611641e6..ed38527599ee 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -4,6 +4,7 @@ header-y += caif/ header-y += dvb/ header-y += hdlc/ header-y += isdn/ +header-y += mmc/ header-y += nfsd/ header-y += raid/ header-y += spi/ diff --git a/include/linux/mmc/Kbuild b/include/linux/mmc/Kbuild new file mode 100644 index 000000000000..1fb26448faa9 --- /dev/null +++ b/include/linux/mmc/Kbuild @@ -0,0 +1 @@ +header-y += ioctl.h diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index f8e4bcbd2846..cbe8d55f64c4 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -133,6 +133,7 @@ struct mmc_card; extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); +extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *); extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int); diff --git a/include/linux/mmc/ioctl.h b/include/linux/mmc/ioctl.h new file mode 100644 index 000000000000..5baf2983a12f --- /dev/null +++ b/include/linux/mmc/ioctl.h @@ -0,0 +1,54 @@ +#ifndef LINUX_MMC_IOCTL_H +#define LINUX_MMC_IOCTL_H +struct mmc_ioc_cmd { + /* Implies direction of data. true = write, false = read */ + int write_flag; + + /* Application-specific command. true = precede with CMD55 */ + int is_acmd; + + __u32 opcode; + __u32 arg; + __u32 response[4]; /* CMD response */ + unsigned int flags; + unsigned int blksz; + unsigned int blocks; + + /* + * Sleep at least postsleep_min_us useconds, and at most + * postsleep_max_us useconds *after* issuing command. Needed for + * some read commands for which cards have no other way of indicating + * they're ready for the next command (i.e. there is no equivalent of + * a "busy" indicator for read operations). + */ + unsigned int postsleep_min_us; + unsigned int postsleep_max_us; + + /* + * Override driver-computed timeouts. Note the difference in units! + */ + unsigned int data_timeout_ns; + unsigned int cmd_timeout_ms; + + /* + * For 64-bit machines, the next member, ``__u64 data_ptr``, wants to + * be 8-byte aligned. Make sure this struct is the same size when + * built for 32-bit. + */ + __u32 __pad; + + /* DAT buffer */ + __u64 data_ptr; +}; +#define mmc_ioc_cmd_set_data(ic, ptr) ic.data_ptr = (__u64)(unsigned long) ptr + +#define MMC_IOC_CMD _IOWR(MMC_BLOCK_MAJOR, 0, struct mmc_ioc_cmd) + +/* + * Since this ioctl is only meant to enhance (and not replace) normal access + * to the mmc bus device, an upper data transfer limit of MMC_IOC_MAX_BYTES + * is enforced per ioctl call. For larger data transfers, use the normal + * block device operations. + */ +#define MMC_IOC_MAX_BYTES (512L * 256) +#endif /* LINUX_MMC_IOCTL_H */ -- cgit v1.2.3 From f2119df6b764609af4baceb68caf1e848c1c8aa7 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:18:57 +0530 Subject: mmc: sd: add support for signal voltage switch procedure Host Controller v3.00 adds another Capabilities register. Apart from other things, this new register indicates whether the Host Controller supports SDR50, SDR104, and DDR50 UHS-I modes. The spec doesn't mention about explicit support for SDR12 and SDR25 UHS-I modes, so the Host Controller v3.00 should support them by default. Also if the controller supports SDR104 mode, it will also support SDR50 mode as well. So depending on the host support, we set the corresponding MMC_CAP_* flags. One more new register. Host Control2 is added in v3.00, which is used during Signal Voltage Switch procedure described below. Since as per v3.00 spec, UHS-I supported hosts should set S18R to 1, we set S18R (bit 24) of OCR before sending ACMD41. We also need to set XPC (bit 28) of OCR in case the host can supply >150mA. This support is indicated by the Maximum Current Capabilities register of the Host Controller. If the response of ACMD41 has both CCS and S18A set, we start the signal voltage switch procedure, which if successfull, will switch the card from 3.3V signalling to 1.8V signalling. Signal voltage switch procedure adds support for a new command CMD11 in the Physical Layer Spec v3.01. As part of this procedure, we need to set 1.8V Signalling Enable (bit 3) of Host Control2 register, which if remains set after 5ms, means the switch to 1.8V signalling is successfull. Otherwise, we clear bit 24 of OCR and retry the initialization sequence. When we remove the card, and insert the same or another card, we need to make sure that we start with 3.3V signalling voltage. So we call mmc_set_signal_voltage() with MMC_SIGNAL_VOLTAGE_330 set so that we are back to 3.3V signalling voltage before we actually start initializing the card. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 32 ++++++++ drivers/mmc/core/core.h | 1 + drivers/mmc/core/sd.c | 36 ++++++++- drivers/mmc/host/sdhci.c | 192 +++++++++++++++++++++++++++++++++++++++++++---- drivers/mmc/host/sdhci.h | 18 ++++- include/linux/mmc/host.h | 15 ++++ include/linux/mmc/sd.h | 7 ++ 7 files changed, 284 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 1dbc18576219..5005a6323165 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -942,6 +942,38 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr) return ocr; } +int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage) +{ + struct mmc_command cmd = {0}; + int err = 0; + + BUG_ON(!host); + + /* + * Send CMD11 only if the request is to switch the card to + * 1.8V signalling. + */ + if (signal_voltage == MMC_SIGNAL_VOLTAGE_180) { + cmd.opcode = SD_SWITCH_VOLTAGE; + cmd.arg = 0; + cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) + return err; + + if (!mmc_host_is_spi(host) && (cmd.resp[0] & R1_ERROR)) + return -EIO; + } + + host->ios.signal_voltage = signal_voltage; + + if (host->ops->start_signal_voltage_switch) + err = host->ops->start_signal_voltage_switch(host, &host->ios); + + return err; +} + /* * Select timing parameters for host. */ diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index ca1fdde29df6..7745dea430b8 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -41,6 +41,7 @@ void mmc_set_bus_width(struct mmc_host *host, unsigned int width); void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, unsigned int ddr); u32 mmc_select_voltage(struct mmc_host *host, u32 ocr); +int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage); void mmc_set_timing(struct mmc_host *host, unsigned int timing); static inline void mmc_delay(unsigned int ms) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 6dac89fe0535..b0cd285d272a 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -403,6 +403,7 @@ struct device_type sd_type = { int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid) { int err; + u32 rocr; /* * Since we're changing the OCR value, we seem to @@ -420,12 +421,38 @@ int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid) */ err = mmc_send_if_cond(host, ocr); if (!err) - ocr |= 1 << 30; + ocr |= SD_OCR_CCS; - err = mmc_send_app_op_cond(host, ocr, NULL); + /* + * If the host supports one of UHS-I modes, request the card + * to switch to 1.8V signaling level. + */ + if (host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)) + ocr |= SD_OCR_S18R; + + /* If the host can supply more than 150mA, XPC should be set to 1. */ + if (host->caps & (MMC_CAP_SET_XPC_330 | MMC_CAP_SET_XPC_300 | + MMC_CAP_SET_XPC_180)) + ocr |= SD_OCR_XPC; + +try_again: + err = mmc_send_app_op_cond(host, ocr, &rocr); if (err) return err; + /* + * In case CCS and S18A in the response is set, start Signal Voltage + * Switch procedure. SPI mode doesn't support CMD11. + */ + if (!mmc_host_is_spi(host) && ((rocr & 0x41000000) == 0x41000000)) { + err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180); + if (err) { + ocr &= ~SD_OCR_S18R; + goto try_again; + } + } + if (mmc_host_is_spi(host)) err = mmc_send_cid(host, cid); else @@ -773,6 +800,11 @@ int mmc_attach_sd(struct mmc_host *host) BUG_ON(!host); WARN_ON(!host->claimed); + /* Make sure we are at 3.3V signalling voltage */ + err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330); + if (err) + return err; + err = mmc_send_app_op_cond(host, 0, &ocr); if (err) return err; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index e5cfe70dc23b..4e2031449a23 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -83,6 +83,8 @@ static void sdhci_dumpregs(struct sdhci_host *host) printk(KERN_DEBUG DRIVER_NAME ": Cmd: 0x%08x | Max curr: 0x%08x\n", sdhci_readw(host, SDHCI_COMMAND), sdhci_readl(host, SDHCI_MAX_CURRENT)); + printk(KERN_DEBUG DRIVER_NAME ": Host ctl2: 0x%08x\n", + sdhci_readw(host, SDHCI_HOST_CONTROL2)); if (host->flags & SDHCI_USE_ADMA) printk(KERN_DEBUG DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x\n", @@ -1323,11 +1325,114 @@ out: spin_unlock_irqrestore(&host->lock, flags); } +static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + struct sdhci_host *host; + u8 pwr; + u16 clk, ctrl; + u32 present_state; + + host = mmc_priv(mmc); + + /* + * Signal Voltage Switching is only applicable for Host Controllers + * v3.00 and above. + */ + if (host->version < SDHCI_SPEC_300) + return 0; + + /* + * We first check whether the request is to set signalling voltage + * to 3.3V. If so, we change the voltage to 3.3V and return quickly. + */ + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) { + /* Set 1.8V Signal Enable in the Host Control2 register to 0 */ + ctrl &= ~SDHCI_CTRL_VDD_180; + sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + + /* Wait for 5ms */ + usleep_range(5000, 5500); + + /* 3.3V regulator output should be stable within 5 ms */ + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + if (!(ctrl & SDHCI_CTRL_VDD_180)) + return 0; + else { + printk(KERN_INFO DRIVER_NAME ": Switching to 3.3V " + "signalling voltage failed\n"); + return -EIO; + } + } else if (!(ctrl & SDHCI_CTRL_VDD_180) && + (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180)) { + /* Stop SDCLK */ + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + clk &= ~SDHCI_CLOCK_CARD_EN; + sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); + + /* Check whether DAT[3:0] is 0000 */ + present_state = sdhci_readl(host, SDHCI_PRESENT_STATE); + if (!((present_state & SDHCI_DATA_LVL_MASK) >> + SDHCI_DATA_LVL_SHIFT)) { + /* + * Enable 1.8V Signal Enable in the Host Control2 + * register + */ + ctrl |= SDHCI_CTRL_VDD_180; + sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + + /* Wait for 5ms */ + usleep_range(5000, 5500); + + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + if (ctrl & SDHCI_CTRL_VDD_180) { + /* Provide SDCLK again and wait for 1ms*/ + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + clk |= SDHCI_CLOCK_CARD_EN; + sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); + usleep_range(1000, 1500); + + /* + * If DAT[3:0] level is 1111b, then the card + * was successfully switched to 1.8V signaling. + */ + present_state = sdhci_readl(host, + SDHCI_PRESENT_STATE); + if ((present_state & SDHCI_DATA_LVL_MASK) == + SDHCI_DATA_LVL_MASK) + return 0; + } + } + + /* + * If we are here, that means the switch to 1.8V signaling + * failed. We power cycle the card, and retry initialization + * sequence by setting S18R to 0. + */ + pwr = sdhci_readb(host, SDHCI_POWER_CONTROL); + pwr &= ~SDHCI_POWER_ON; + sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL); + + /* Wait for 1ms as per the spec */ + usleep_range(1000, 1500); + pwr |= SDHCI_POWER_ON; + sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL); + + printk(KERN_INFO DRIVER_NAME ": Switching to 1.8V signalling " + "voltage failed, retrying with S18R set to 0\n"); + return -EAGAIN; + } else + /* No signal voltage switch required */ + return 0; +} + static const struct mmc_host_ops sdhci_ops = { .request = sdhci_request, .set_ios = sdhci_set_ios, .get_ro = sdhci_get_ro, .enable_sdio_irq = sdhci_enable_sdio_irq, + .start_signal_voltage_switch = sdhci_start_signal_voltage_switch, }; /*****************************************************************************\ @@ -1808,7 +1913,9 @@ EXPORT_SYMBOL_GPL(sdhci_alloc_host); int sdhci_add_host(struct sdhci_host *host) { struct mmc_host *mmc; - unsigned int caps, ocr_avail; + u32 caps[2]; + u32 max_current_caps; + unsigned int ocr_avail; int ret; WARN_ON(host == NULL); @@ -1831,12 +1938,15 @@ int sdhci_add_host(struct sdhci_host *host) host->version); } - caps = (host->quirks & SDHCI_QUIRK_MISSING_CAPS) ? host->caps : + caps[0] = (host->quirks & SDHCI_QUIRK_MISSING_CAPS) ? host->caps : sdhci_readl(host, SDHCI_CAPABILITIES); + caps[1] = (host->version >= SDHCI_SPEC_300) ? + sdhci_readl(host, SDHCI_CAPABILITIES_1) : 0; + if (host->quirks & SDHCI_QUIRK_FORCE_DMA) host->flags |= SDHCI_USE_SDMA; - else if (!(caps & SDHCI_CAN_DO_SDMA)) + else if (!(caps[0] & SDHCI_CAN_DO_SDMA)) DBG("Controller doesn't have SDMA capability\n"); else host->flags |= SDHCI_USE_SDMA; @@ -1847,7 +1957,8 @@ int sdhci_add_host(struct sdhci_host *host) host->flags &= ~SDHCI_USE_SDMA; } - if ((host->version >= SDHCI_SPEC_200) && (caps & SDHCI_CAN_DO_ADMA2)) + if ((host->version >= SDHCI_SPEC_200) && + (caps[0] & SDHCI_CAN_DO_ADMA2)) host->flags |= SDHCI_USE_ADMA; if ((host->quirks & SDHCI_QUIRK_BROKEN_ADMA) && @@ -1897,10 +2008,10 @@ int sdhci_add_host(struct sdhci_host *host) } if (host->version >= SDHCI_SPEC_300) - host->max_clk = (caps & SDHCI_CLOCK_V3_BASE_MASK) + host->max_clk = (caps[0] & SDHCI_CLOCK_V3_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT; else - host->max_clk = (caps & SDHCI_CLOCK_BASE_MASK) + host->max_clk = (caps[0] & SDHCI_CLOCK_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT; host->max_clk *= 1000000; @@ -1916,7 +2027,7 @@ int sdhci_add_host(struct sdhci_host *host) } host->timeout_clk = - (caps & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT; + (caps[0] & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT; if (host->timeout_clk == 0) { if (host->ops->get_timeout_clock) { host->timeout_clk = host->ops->get_timeout_clock(host); @@ -1928,7 +2039,7 @@ int sdhci_add_host(struct sdhci_host *host) return -ENODEV; } } - if (caps & SDHCI_TIMEOUT_CLK_UNIT) + if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT) host->timeout_clk *= 1000; /* @@ -1955,21 +2066,76 @@ int sdhci_add_host(struct sdhci_host *host) if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA)) mmc->caps |= MMC_CAP_4_BIT_DATA; - if (caps & SDHCI_CAN_DO_HISPD) + if (caps[0] & SDHCI_CAN_DO_HISPD) mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED; if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) && mmc_card_is_removable(mmc)) mmc->caps |= MMC_CAP_NEEDS_POLL; + /* UHS-I mode(s) supported by the host controller. */ + if (host->version >= SDHCI_SPEC_300) + mmc->caps |= MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25; + + /* SDR104 supports also implies SDR50 support */ + if (caps[1] & SDHCI_SUPPORT_SDR104) + mmc->caps |= MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_SDR50; + else if (caps[1] & SDHCI_SUPPORT_SDR50) + mmc->caps |= MMC_CAP_UHS_SDR50; + + if (caps[1] & SDHCI_SUPPORT_DDR50) + mmc->caps |= MMC_CAP_UHS_DDR50; + ocr_avail = 0; - if (caps & SDHCI_CAN_VDD_330) + /* + * According to SD Host Controller spec v3.00, if the Host System + * can afford more than 150mA, Host Driver should set XPC to 1. Also + * the value is meaningful only if Voltage Support in the Capabilities + * register is set. The actual current value is 4 times the register + * value. + */ + max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT); + + if (caps[0] & SDHCI_CAN_VDD_330) { + int max_current_330; + ocr_avail |= MMC_VDD_32_33 | MMC_VDD_33_34; - if (caps & SDHCI_CAN_VDD_300) + + max_current_330 = ((max_current_caps & + SDHCI_MAX_CURRENT_330_MASK) >> + SDHCI_MAX_CURRENT_330_SHIFT) * + SDHCI_MAX_CURRENT_MULTIPLIER; + + if (max_current_330 > 150) + mmc->caps |= MMC_CAP_SET_XPC_330; + } + if (caps[0] & SDHCI_CAN_VDD_300) { + int max_current_300; + ocr_avail |= MMC_VDD_29_30 | MMC_VDD_30_31; - if (caps & SDHCI_CAN_VDD_180) + + max_current_300 = ((max_current_caps & + SDHCI_MAX_CURRENT_300_MASK) >> + SDHCI_MAX_CURRENT_300_SHIFT) * + SDHCI_MAX_CURRENT_MULTIPLIER; + + if (max_current_300 > 150) + mmc->caps |= MMC_CAP_SET_XPC_300; + } + if (caps[0] & SDHCI_CAN_VDD_180) { + int max_current_180; + ocr_avail |= MMC_VDD_165_195; + max_current_180 = ((max_current_caps & + SDHCI_MAX_CURRENT_180_MASK) >> + SDHCI_MAX_CURRENT_180_SHIFT) * + SDHCI_MAX_CURRENT_MULTIPLIER; + + if (max_current_180 > 150) + mmc->caps |= MMC_CAP_SET_XPC_180; + } + mmc->ocr_avail = ocr_avail; mmc->ocr_avail_sdio = ocr_avail; if (host->ocr_avail_sdio) @@ -2029,7 +2195,7 @@ int sdhci_add_host(struct sdhci_host *host) if (host->quirks & SDHCI_QUIRK_FORCE_BLK_SZ_2048) { mmc->max_blk_size = 2; } else { - mmc->max_blk_size = (caps & SDHCI_MAX_BLOCK_MASK) >> + mmc->max_blk_size = (caps[0] & SDHCI_MAX_BLOCK_MASK) >> SDHCI_MAX_BLOCK_SHIFT; if (mmc->max_blk_size >= 3) { printk(KERN_WARNING "%s: Invalid maximum block size, " diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index c6e25a76d269..5cba2fea46e0 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -68,6 +68,8 @@ #define SDHCI_DATA_AVAILABLE 0x00000800 #define SDHCI_CARD_PRESENT 0x00010000 #define SDHCI_WRITE_PROTECT 0x00080000 +#define SDHCI_DATA_LVL_MASK 0x00F00000 +#define SDHCI_DATA_LVL_SHIFT 20 #define SDHCI_HOST_CONTROL 0x28 #define SDHCI_CTRL_LED 0x01 @@ -146,7 +148,8 @@ #define SDHCI_ACMD12_ERR 0x3C -/* 3E-3F reserved */ +#define SDHCI_HOST_CONTROL2 0x3E +#define SDHCI_CTRL_VDD_180 0x0008 #define SDHCI_CAPABILITIES 0x40 #define SDHCI_TIMEOUT_CLK_MASK 0x0000003F @@ -167,9 +170,20 @@ #define SDHCI_CAN_VDD_180 0x04000000 #define SDHCI_CAN_64BIT 0x10000000 +#define SDHCI_SUPPORT_SDR50 0x00000001 +#define SDHCI_SUPPORT_SDR104 0x00000002 +#define SDHCI_SUPPORT_DDR50 0x00000004 + #define SDHCI_CAPABILITIES_1 0x44 -#define SDHCI_MAX_CURRENT 0x48 +#define SDHCI_MAX_CURRENT 0x48 +#define SDHCI_MAX_CURRENT_330_MASK 0x0000FF +#define SDHCI_MAX_CURRENT_330_SHIFT 0 +#define SDHCI_MAX_CURRENT_300_MASK 0x00FF00 +#define SDHCI_MAX_CURRENT_300_SHIFT 8 +#define SDHCI_MAX_CURRENT_180_MASK 0xFF0000 +#define SDHCI_MAX_CURRENT_180_SHIFT 16 +#define SDHCI_MAX_CURRENT_MULTIPLIER 4 /* 4C-4F reserved for more max current */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0fffa5cdc183..bde5a0b1c47e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -56,6 +56,11 @@ struct mmc_ios { #define MMC_SDR_MODE 0 #define MMC_1_2V_DDR_MODE 1 #define MMC_1_8V_DDR_MODE 2 + + unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ + +#define MMC_SIGNAL_VOLTAGE_330 0 +#define MMC_SIGNAL_VOLTAGE_180 1 }; struct mmc_host_ops { @@ -117,6 +122,8 @@ struct mmc_host_ops { /* optional callback for HC quirks */ void (*init_card)(struct mmc_host *host, struct mmc_card *card); + + int (*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios); }; struct mmc_card; @@ -173,6 +180,14 @@ struct mmc_host { /* DDR mode at 1.2V */ #define MMC_CAP_POWER_OFF_CARD (1 << 13) /* Can power off after boot */ #define MMC_CAP_BUS_WIDTH_TEST (1 << 14) /* CMD14/CMD19 bus width ok */ +#define MMC_CAP_UHS_SDR12 (1 << 15) /* Host supports UHS SDR12 mode */ +#define MMC_CAP_UHS_SDR25 (1 << 16) /* Host supports UHS SDR25 mode */ +#define MMC_CAP_UHS_SDR50 (1 << 17) /* Host supports UHS SDR50 mode */ +#define MMC_CAP_UHS_SDR104 (1 << 18) /* Host supports UHS SDR104 mode */ +#define MMC_CAP_UHS_DDR50 (1 << 19) /* Host supports UHS DDR50 mode */ +#define MMC_CAP_SET_XPC_330 (1 << 20) /* Host supports >150mA current at 3.3V */ +#define MMC_CAP_SET_XPC_300 (1 << 21) /* Host supports >150mA current at 3.0V */ +#define MMC_CAP_SET_XPC_180 (1 << 22) /* Host supports >150mA current at 1.8V */ mmc_pm_flag_t pm_caps; /* supported pm features */ diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h index 3fd85e088cc3..c648878f6734 100644 --- a/include/linux/mmc/sd.h +++ b/include/linux/mmc/sd.h @@ -17,6 +17,7 @@ /* This is basically the same command as for MMC with some quirks. */ #define SD_SEND_RELATIVE_ADDR 3 /* bcr R6 */ #define SD_SEND_IF_COND 8 /* bcr [11:0] See below R7 */ +#define SD_SWITCH_VOLTAGE 11 /* ac R1 */ /* class 10 */ #define SD_SWITCH 6 /* adtc [31:0] See below R1 */ @@ -32,6 +33,12 @@ #define SD_APP_OP_COND 41 /* bcr [31:0] OCR R3 */ #define SD_APP_SEND_SCR 51 /* adtc R1 */ +/* OCR bit definitions */ +#define SD_OCR_S18R (1 << 24) /* 1.8V switching request */ +#define SD_ROCR_S18A SD_OCR_S18R /* 1.8V switching accepted by card */ +#define SD_OCR_XPC (1 << 28) /* SDXC power control */ +#define SD_OCR_CCS (1 << 30) /* Card Capacity Status */ + /* * SD_SWITCH argument format: * -- cgit v1.2.3 From 013909c4ffd16ded4895528b856fd8782df04dc6 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:18:58 +0530 Subject: mmc: sd: query function modes for uhs cards SD cards which conform to Physical Layer Spec v3.01 can support additional Bus Speed Modes, Driver Strength, and Current Limit other than the default values. We use CMD6 mode 0 to read these additional card functions. The values read here will be used during UHS-I initialization steps. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/core/sd.c | 68 +++++++++++++++++++++++++++++++++++++++++------- include/linux/mmc/card.h | 4 +++ 2 files changed, 62 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index b0cd285d272a..8285842f19e9 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -189,6 +189,9 @@ static int mmc_decode_scr(struct mmc_card *card) scr->sda_vsn = UNSTUFF_BITS(resp, 56, 4); scr->bus_widths = UNSTUFF_BITS(resp, 48, 4); + if (scr->sda_vsn == SCR_SPEC_VER_2) + /* Check if Physical Layer Spec v3.0 is supported */ + scr->sda_spec3 = UNSTUFF_BITS(resp, 47, 1); if (UNSTUFF_BITS(resp, 55, 1)) card->erased_byte = 0xFF; @@ -274,29 +277,74 @@ static int mmc_read_switch(struct mmc_card *card) status = kmalloc(64, GFP_KERNEL); if (!status) { printk(KERN_ERR "%s: could not allocate a buffer for " - "switch capabilities.\n", mmc_hostname(card->host)); + "switch capabilities.\n", + mmc_hostname(card->host)); return -ENOMEM; } + /* Find out the supported Bus Speed Modes. */ err = mmc_sd_switch(card, 0, 0, 1, status); if (err) { - /* If the host or the card can't do the switch, - * fail more gracefully. */ - if ((err != -EINVAL) - && (err != -ENOSYS) - && (err != -EFAULT)) + /* + * If the host or the card can't do the switch, + * fail more gracefully. + */ + if (err != -EINVAL && err != -ENOSYS && err != -EFAULT) goto out; - printk(KERN_WARNING "%s: problem reading switch " - "capabilities, performance might suffer.\n", + printk(KERN_WARNING "%s: problem reading Bus Speed modes.\n", mmc_hostname(card->host)); err = 0; goto out; } - if (status[13] & 0x02) - card->sw_caps.hs_max_dtr = 50000000; + if (card->scr.sda_spec3) { + card->sw_caps.sd3_bus_mode = status[13]; + + /* Find out Driver Strengths supported by the card */ + err = mmc_sd_switch(card, 0, 2, 1, status); + if (err) { + /* + * If the host or the card can't do the switch, + * fail more gracefully. + */ + if (err != -EINVAL && err != -ENOSYS && err != -EFAULT) + goto out; + + printk(KERN_WARNING "%s: problem reading " + "Driver Strength.\n", + mmc_hostname(card->host)); + err = 0; + + goto out; + } + + card->sw_caps.sd3_drv_type = status[9]; + + /* Find out Current Limits supported by the card */ + err = mmc_sd_switch(card, 0, 3, 1, status); + if (err) { + /* + * If the host or the card can't do the switch, + * fail more gracefully. + */ + if (err != -EINVAL && err != -ENOSYS && err != -EFAULT) + goto out; + + printk(KERN_WARNING "%s: problem reading " + "Current Limit.\n", + mmc_hostname(card->host)); + err = 0; + + goto out; + } + + card->sw_caps.sd3_curr_limit = status[7]; + } else { + if (status[13] & 0x02) + card->sw_caps.hs_max_dtr = 50000000; + } out: kfree(status); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 72a98681ef47..56f4d9234a66 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -67,6 +67,7 @@ struct mmc_ext_csd { struct sd_scr { unsigned char sda_vsn; + unsigned char sda_spec3; unsigned char bus_widths; #define SD_SCR_BUS_WIDTH_1 (1<<0) #define SD_SCR_BUS_WIDTH_4 (1<<2) @@ -80,6 +81,9 @@ struct sd_ssr { struct sd_switch_caps { unsigned int hs_max_dtr; + unsigned int sd3_bus_mode; + unsigned int sd3_drv_type; + unsigned int sd3_curr_limit; }; struct sdio_cccr { -- cgit v1.2.3 From d6d50a15a2897d4133d536dd4343b5cf21163db3 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:18:59 +0530 Subject: mmc: sd: add support for driver type selection This patch adds support for setting driver strength during UHS-I initialization procedure. Since UHS-I cards set S18A (bit 24) in response to ACMD41, we use this as a base for UHS-I initialization. We modify the parameter list of mmc_sd_get_cid() so that we can save the ROCR from ACMD41 to check whether bit 24 is set. We decide whether the Host Controller supports A, C, or D driver type depending on the Capabilities register. Driver type B is suported by default. We then set the appropriate driver type for the card using CMD6 mode 1. As per Host Controller spec v3.00, we set driver type for the host only if Preset Value Enable in the Host Control2 register is not set. SDHCI_HOST_CONTROL has been renamed to SDHCI_HOST_CONTROL1 to conform to the spec. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 9 +++ drivers/mmc/core/core.h | 1 + drivers/mmc/core/sd.c | 152 +++++++++++++++++++++++++++++++++++++++-------- drivers/mmc/core/sd.h | 2 +- drivers/mmc/core/sdio.c | 4 +- drivers/mmc/host/sdhci.c | 27 +++++++++ drivers/mmc/host/sdhci.h | 11 +++- include/linux/mmc/card.h | 4 ++ include/linux/mmc/host.h | 10 ++++ 9 files changed, 190 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 5005a6323165..61c6c0b8f0e0 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -983,6 +983,15 @@ void mmc_set_timing(struct mmc_host *host, unsigned int timing) mmc_set_ios(host); } +/* + * Select appropriate driver type for host. + */ +void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type) +{ + host->ios.drv_type = drv_type; + mmc_set_ios(host); +} + /* * Apply power to the MMC stack. This is a two-stage process. * First, we enable power to the card without the clock running. diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 7745dea430b8..93f33973de39 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -43,6 +43,7 @@ void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, u32 mmc_select_voltage(struct mmc_host *host, u32 ocr); int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage); void mmc_set_timing(struct mmc_host *host, unsigned int timing); +void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type); static inline void mmc_delay(unsigned int ms) { diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 8285842f19e9..5b7c99855635 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -400,6 +400,98 @@ out: return err; } +static int sd_select_driver_type(struct mmc_card *card, u8 *status) +{ + int host_drv_type = 0, card_drv_type = 0; + int err; + + /* + * If the host doesn't support any of the Driver Types A,C or D, + * default Driver Type B is used. + */ + if (!(card->host->caps & (MMC_CAP_DRIVER_TYPE_A | MMC_CAP_DRIVER_TYPE_C + | MMC_CAP_DRIVER_TYPE_D))) + return 0; + + if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) { + host_drv_type = MMC_SET_DRIVER_TYPE_A; + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A) + card_drv_type = MMC_SET_DRIVER_TYPE_A; + else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B) + card_drv_type = MMC_SET_DRIVER_TYPE_B; + else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) + card_drv_type = MMC_SET_DRIVER_TYPE_C; + } else if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) { + host_drv_type = MMC_SET_DRIVER_TYPE_C; + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) + card_drv_type = MMC_SET_DRIVER_TYPE_C; + } else if (!(card->host->caps & MMC_CAP_DRIVER_TYPE_D)) { + /* + * If we are here, that means only the default driver type + * B is supported by the host. + */ + host_drv_type = MMC_SET_DRIVER_TYPE_B; + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B) + card_drv_type = MMC_SET_DRIVER_TYPE_B; + else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) + card_drv_type = MMC_SET_DRIVER_TYPE_C; + } + + err = mmc_sd_switch(card, 1, 2, card_drv_type, status); + if (err) + return err; + + if ((status[15] & 0xF) != card_drv_type) { + printk(KERN_WARNING "%s: Problem setting driver strength!\n", + mmc_hostname(card->host)); + return 0; + } + + mmc_set_driver_type(card->host, host_drv_type); + + return 0; +} + +/* + * UHS-I specific initialization procedure + */ +static int mmc_sd_init_uhs_card(struct mmc_card *card) +{ + int err; + u8 *status; + + if (!card->scr.sda_spec3) + return 0; + + if (!(card->csd.cmdclass & CCC_SWITCH)) + return 0; + + status = kmalloc(64, GFP_KERNEL); + if (!status) { + printk(KERN_ERR "%s: could not allocate a buffer for " + "switch capabilities.\n", mmc_hostname(card->host)); + return -ENOMEM; + } + + /* Set 4-bit bus width */ + if ((card->host->caps & MMC_CAP_4_BIT_DATA) && + (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) { + err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4); + if (err) + goto out; + + mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4); + } + + /* Set the driver strength for the card */ + err = sd_select_driver_type(card, status); + +out: + kfree(status); + + return err; +} + MMC_DEV_ATTR(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1], card->raw_cid[2], card->raw_cid[3]); MMC_DEV_ATTR(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1], @@ -448,10 +540,9 @@ struct device_type sd_type = { /* * Fetch CID from card. */ -int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid) +int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr) { int err; - u32 rocr; /* * Since we're changing the OCR value, we seem to @@ -485,7 +576,7 @@ int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid) ocr |= SD_OCR_XPC; try_again: - err = mmc_send_app_op_cond(host, ocr, &rocr); + err = mmc_send_app_op_cond(host, ocr, rocr); if (err) return err; @@ -493,7 +584,8 @@ try_again: * In case CCS and S18A in the response is set, start Signal Voltage * Switch procedure. SPI mode doesn't support CMD11. */ - if (!mmc_host_is_spi(host) && ((rocr & 0x41000000) == 0x41000000)) { + if (!mmc_host_is_spi(host) && rocr && + ((*rocr & 0x41000000) == 0x41000000)) { err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180); if (err) { ocr &= ~SD_OCR_S18R; @@ -628,11 +720,12 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, struct mmc_card *card; int err; u32 cid[4]; + u32 rocr = 0; BUG_ON(!host); WARN_ON(!host->claimed); - err = mmc_sd_get_cid(host, ocr, cid); + err = mmc_sd_get_cid(host, ocr, cid, &rocr); if (err) return err; @@ -685,30 +778,37 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, if (err) goto free_card; - /* - * Attempt to change to high-speed (if supported) - */ - err = mmc_sd_switch_hs(card); - if (err > 0) - mmc_sd_go_highspeed(card); - else if (err) - goto free_card; - - /* - * Set bus speed. - */ - mmc_set_clock(host, mmc_sd_get_max_clock(card)); - - /* - * Switch to wider bus (if supported). - */ - if ((host->caps & MMC_CAP_4_BIT_DATA) && - (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) { - err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4); + /* Initialization sequence for UHS-I cards */ + if (rocr & SD_ROCR_S18A) { + err = mmc_sd_init_uhs_card(card); if (err) goto free_card; + } else { + /* + * Attempt to change to high-speed (if supported) + */ + err = mmc_sd_switch_hs(card); + if (err > 0) + mmc_sd_go_highspeed(card); + else if (err) + goto free_card; + + /* + * Set bus speed. + */ + mmc_set_clock(host, mmc_sd_get_max_clock(card)); - mmc_set_bus_width(host, MMC_BUS_WIDTH_4); + /* + * Switch to wider bus (if supported). + */ + if ((host->caps & MMC_CAP_4_BIT_DATA) && + (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) { + err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4); + if (err) + goto free_card; + + mmc_set_bus_width(host, MMC_BUS_WIDTH_4); + } } host->card = card; diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h index 3d8800fa7600..4b34b24f3f76 100644 --- a/drivers/mmc/core/sd.h +++ b/drivers/mmc/core/sd.h @@ -5,7 +5,7 @@ extern struct device_type sd_type; -int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid); +int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr); int mmc_sd_get_csd(struct mmc_host *host, struct mmc_card *card); void mmc_decode_cid(struct mmc_card *card); int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 1e6095961500..4d0c15bfa514 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -369,8 +369,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, goto err; } - if (ocr & R4_MEMORY_PRESENT - && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) { + if ((ocr & R4_MEMORY_PRESENT) && + mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid, NULL) == 0) { card->type = MMC_TYPE_SD_COMBO; if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO || diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 4e2031449a23..8072e16d6d46 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1245,6 +1245,25 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); + if (host->version >= SDHCI_SPEC_300) { + u16 ctrl_2; + + ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); + if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) { + /* + * We only need to set Driver Strength if the + * preset value enable is not set. + */ + ctrl_2 &= ~SDHCI_CTRL_DRV_TYPE_MASK; + if (ios->drv_type == MMC_SET_DRIVER_TYPE_A) + ctrl_2 |= SDHCI_CTRL_DRV_TYPE_A; + else if (ios->drv_type == MMC_SET_DRIVER_TYPE_C) + ctrl_2 |= SDHCI_CTRL_DRV_TYPE_C; + + sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + } + } + /* * Some (ENE) controllers go apeshit on some ios operation, * signalling timeout and CRC errors even on CMD0. Resetting @@ -2086,6 +2105,14 @@ int sdhci_add_host(struct sdhci_host *host) if (caps[1] & SDHCI_SUPPORT_DDR50) mmc->caps |= MMC_CAP_UHS_DDR50; + /* Driver Type(s) (A, C, D) supported by the host */ + if (caps[1] & SDHCI_DRIVER_TYPE_A) + mmc->caps |= MMC_CAP_DRIVER_TYPE_A; + if (caps[1] & SDHCI_DRIVER_TYPE_C) + mmc->caps |= MMC_CAP_DRIVER_TYPE_C; + if (caps[1] & SDHCI_DRIVER_TYPE_D) + mmc->caps |= MMC_CAP_DRIVER_TYPE_D; + ocr_avail = 0; /* * According to SD Host Controller spec v3.00, if the Host System diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 5cba2fea46e0..667bf8874be7 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -71,7 +71,7 @@ #define SDHCI_DATA_LVL_MASK 0x00F00000 #define SDHCI_DATA_LVL_SHIFT 20 -#define SDHCI_HOST_CONTROL 0x28 +#define SDHCI_HOST_CONTROL 0x28 #define SDHCI_CTRL_LED 0x01 #define SDHCI_CTRL_4BITBUS 0x02 #define SDHCI_CTRL_HISPD 0x04 @@ -150,6 +150,12 @@ #define SDHCI_HOST_CONTROL2 0x3E #define SDHCI_CTRL_VDD_180 0x0008 +#define SDHCI_CTRL_DRV_TYPE_MASK 0x0030 +#define SDHCI_CTRL_DRV_TYPE_B 0x0000 +#define SDHCI_CTRL_DRV_TYPE_A 0x0010 +#define SDHCI_CTRL_DRV_TYPE_C 0x0020 +#define SDHCI_CTRL_DRV_TYPE_D 0x0030 +#define SDHCI_CTRL_PRESET_VAL_ENABLE 0x8000 #define SDHCI_CAPABILITIES 0x40 #define SDHCI_TIMEOUT_CLK_MASK 0x0000003F @@ -173,6 +179,9 @@ #define SDHCI_SUPPORT_SDR50 0x00000001 #define SDHCI_SUPPORT_SDR104 0x00000002 #define SDHCI_SUPPORT_DDR50 0x00000004 +#define SDHCI_DRIVER_TYPE_A 0x00000010 +#define SDHCI_DRIVER_TYPE_C 0x00000020 +#define SDHCI_DRIVER_TYPE_D 0x00000040 #define SDHCI_CAPABILITIES_1 0x44 diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 56f4d9234a66..539327260dc1 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -83,6 +83,10 @@ struct sd_switch_caps { unsigned int hs_max_dtr; unsigned int sd3_bus_mode; unsigned int sd3_drv_type; +#define SD_DRIVER_TYPE_B 0x01 +#define SD_DRIVER_TYPE_A 0x02 +#define SD_DRIVER_TYPE_C 0x04 +#define SD_DRIVER_TYPE_D 0x08 unsigned int sd3_curr_limit; }; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index bde5a0b1c47e..949e4d525989 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -61,6 +61,13 @@ struct mmc_ios { #define MMC_SIGNAL_VOLTAGE_330 0 #define MMC_SIGNAL_VOLTAGE_180 1 + + unsigned char drv_type; /* driver type (A, B, C, D) */ + +#define MMC_SET_DRIVER_TYPE_B 0 +#define MMC_SET_DRIVER_TYPE_A 1 +#define MMC_SET_DRIVER_TYPE_C 2 +#define MMC_SET_DRIVER_TYPE_D 3 }; struct mmc_host_ops { @@ -188,6 +195,9 @@ struct mmc_host { #define MMC_CAP_SET_XPC_330 (1 << 20) /* Host supports >150mA current at 3.3V */ #define MMC_CAP_SET_XPC_300 (1 << 21) /* Host supports >150mA current at 3.0V */ #define MMC_CAP_SET_XPC_180 (1 << 22) /* Host supports >150mA current at 1.8V */ +#define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ +#define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ +#define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ mmc_pm_flag_t pm_caps; /* supported pm features */ -- cgit v1.2.3 From 49c468fcf878d2c86e31920cf54aa90c88418a66 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:19:01 +0530 Subject: mmc: sd: add support for uhs bus speed mode selection This patch adds support for setting UHS-I bus speed mode during UHS-I initialization procedure. Since both the host and card can support more than one bus speed, we select the highest speed based on both of their capabilities. First we set the bus speed mode for the card using CMD6 mode 1, and then we program the host controller to support the required speed mode. We also set High Speed Enable in case one of the UHS-I modes is selected. We take care to reset SD clock before setting UHS mode in the Host Control2 register, and then re-enable it as per the Host Controller spec v3.00. We then set the clock frequency for the UHS-I mode selected. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/core/sd.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/mmc/host/sdhci.c | 40 ++++++++++++++++++++++++++--- drivers/mmc/host/sdhci.h | 6 +++++ include/linux/mmc/card.h | 19 ++++++++++++++ include/linux/mmc/host.h | 5 ++++ 5 files changed, 132 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 5b7c99855635..6970b82171f7 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -452,6 +452,66 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status) return 0; } +static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status) +{ + unsigned int bus_speed = 0, timing = 0; + int err; + + /* + * If the host doesn't support any of the UHS-I modes, fallback on + * default speed. + */ + if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50))) + return 0; + + if ((card->host->caps & MMC_CAP_UHS_SDR104) && + (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) { + bus_speed = UHS_SDR104_BUS_SPEED; + timing = MMC_TIMING_UHS_SDR104; + card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR; + } else if ((card->host->caps & MMC_CAP_UHS_DDR50) && + (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) { + bus_speed = UHS_DDR50_BUS_SPEED; + timing = MMC_TIMING_UHS_DDR50; + card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR; + } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | + MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode & + SD_MODE_UHS_SDR50)) { + bus_speed = UHS_SDR50_BUS_SPEED; + timing = MMC_TIMING_UHS_SDR50; + card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR; + } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) && + (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) { + bus_speed = UHS_SDR25_BUS_SPEED; + timing = MMC_TIMING_UHS_SDR25; + card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR; + } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 | + MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode & + SD_MODE_UHS_SDR12)) { + bus_speed = UHS_SDR12_BUS_SPEED; + timing = MMC_TIMING_UHS_SDR12; + card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR; + } + + card->sd_bus_speed = bus_speed; + err = mmc_sd_switch(card, 1, 0, bus_speed, status); + if (err) + return err; + + if ((status[16] & 0xF) != bus_speed) + printk(KERN_WARNING "%s: Problem setting bus speed mode!\n", + mmc_hostname(card->host)); + else { + mmc_set_timing(card->host, timing); + mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr); + } + + return 0; +} + /* * UHS-I specific initialization procedure */ @@ -485,6 +545,11 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) /* Set the driver strength for the card */ err = sd_select_driver_type(card, status); + if (err) + goto out; + + /* Set bus speed mode of the card */ + err = sd_set_bus_speed_mode(card, status); out: kfree(status); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 409cde5970ae..8994493dd940 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1244,7 +1244,16 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) ctrl &= ~SDHCI_CTRL_HISPD; if (host->version >= SDHCI_SPEC_300) { - u16 ctrl_2; + u16 clk, ctrl_2; + unsigned int clock; + + /* In case of UHS-I modes, set High Speed Enable */ + if ((ios->timing == MMC_TIMING_UHS_SDR50) || + (ios->timing == MMC_TIMING_UHS_SDR104) || + (ios->timing == MMC_TIMING_UHS_DDR50) || + (ios->timing == MMC_TIMING_UHS_SDR25) || + (ios->timing == MMC_TIMING_UHS_SDR12)) + ctrl |= SDHCI_CTRL_HISPD; ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) { @@ -1267,8 +1276,6 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) * need to reset SD Clock Enable before changing High * Speed Enable to avoid generating clock gliches. */ - u16 clk; - unsigned int clock; /* Reset SD Clock Enable */ clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); @@ -1282,6 +1289,33 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->clock = 0; sdhci_set_clock(host, clock); } + + ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); + + /* Select Bus Speed Mode for host */ + ctrl_2 &= ~SDHCI_CTRL_UHS_MASK; + if (ios->timing == MMC_TIMING_UHS_SDR12) + ctrl_2 |= SDHCI_CTRL_UHS_SDR12; + else if (ios->timing == MMC_TIMING_UHS_SDR25) + ctrl_2 |= SDHCI_CTRL_UHS_SDR25; + else if (ios->timing == MMC_TIMING_UHS_SDR50) + ctrl_2 |= SDHCI_CTRL_UHS_SDR50; + else if (ios->timing == MMC_TIMING_UHS_SDR104) + ctrl_2 |= SDHCI_CTRL_UHS_SDR104; + else if (ios->timing == MMC_TIMING_UHS_DDR50) + ctrl_2 |= SDHCI_CTRL_UHS_DDR50; + + /* Reset SD Clock Enable */ + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + clk &= ~SDHCI_CLOCK_CARD_EN; + sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); + + sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + + /* Re-enable SD Clock */ + clock = host->clock; + host->clock = 0; + sdhci_set_clock(host, clock); } else sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 667bf8874be7..d96f6afcca1f 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -149,6 +149,12 @@ #define SDHCI_ACMD12_ERR 0x3C #define SDHCI_HOST_CONTROL2 0x3E +#define SDHCI_CTRL_UHS_MASK 0x0007 +#define SDHCI_CTRL_UHS_SDR12 0x0000 +#define SDHCI_CTRL_UHS_SDR25 0x0001 +#define SDHCI_CTRL_UHS_SDR50 0x0002 +#define SDHCI_CTRL_UHS_SDR104 0x0003 +#define SDHCI_CTRL_UHS_DDR50 0x0004 #define SDHCI_CTRL_VDD_180 0x0008 #define SDHCI_CTRL_DRV_TYPE_MASK 0x0030 #define SDHCI_CTRL_DRV_TYPE_B 0x0000 diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 539327260dc1..4ef6ded6347d 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -81,7 +81,24 @@ struct sd_ssr { struct sd_switch_caps { unsigned int hs_max_dtr; + unsigned int uhs_max_dtr; +#define UHS_SDR104_MAX_DTR 208000000 +#define UHS_SDR50_MAX_DTR 100000000 +#define UHS_DDR50_MAX_DTR 50000000 +#define UHS_SDR25_MAX_DTR UHS_DDR50_MAX_DTR +#define UHS_SDR12_MAX_DTR 25000000 unsigned int sd3_bus_mode; +#define UHS_SDR12_BUS_SPEED 0 +#define UHS_SDR25_BUS_SPEED 1 +#define UHS_SDR50_BUS_SPEED 2 +#define UHS_SDR104_BUS_SPEED 3 +#define UHS_DDR50_BUS_SPEED 4 + +#define SD_MODE_UHS_SDR12 (1 << UHS_SDR12_BUS_SPEED) +#define SD_MODE_UHS_SDR25 (1 << UHS_SDR25_BUS_SPEED) +#define SD_MODE_UHS_SDR50 (1 << UHS_SDR50_BUS_SPEED) +#define SD_MODE_UHS_SDR104 (1 << UHS_SDR104_BUS_SPEED) +#define SD_MODE_UHS_DDR50 (1 << UHS_DDR50_BUS_SPEED) unsigned int sd3_drv_type; #define SD_DRIVER_TYPE_B 0x01 #define SD_DRIVER_TYPE_A 0x02 @@ -166,6 +183,8 @@ struct mmc_card { const char **info; /* info strings */ struct sdio_func_tuple *tuples; /* unknown common tuples */ + unsigned int sd_bus_speed; /* Bus Speed Mode set for the card */ + struct dentry *debugfs_root; }; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 949e4d525989..62375992bdd6 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -50,6 +50,11 @@ struct mmc_ios { #define MMC_TIMING_LEGACY 0 #define MMC_TIMING_MMC_HS 1 #define MMC_TIMING_SD_HS 2 +#define MMC_TIMING_UHS_SDR12 MMC_TIMING_LEGACY +#define MMC_TIMING_UHS_SDR25 MMC_TIMING_SD_HS +#define MMC_TIMING_UHS_SDR50 3 +#define MMC_TIMING_UHS_SDR104 4 +#define MMC_TIMING_UHS_DDR50 5 unsigned char ddr; /* dual data rate used */ -- cgit v1.2.3 From 5371c927bcd06a5c9dd6785bab2d452b87d9abc6 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:19:02 +0530 Subject: mmc: sd: set current limit for uhs cards We decide on the current limit to be set for the card based on the Capability of Host Controller to provide current at 1.8V signalling, and the maximum current limit of the card as indicated by CMD6 mode 0. We then set the current limit for the card using CMD6 mode 1. As per the Physical Layer Spec v3.01, the current limit switch is only applicable for SDR50, SDR104, and DDR50 bus speed modes. For other UHS-I modes, we set the default current limit of 200mA. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/core/sd.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/mmc/host/sdhci.c | 10 ++++++++ include/linux/mmc/card.h | 9 +++++++ include/linux/mmc/host.h | 4 +++ 4 files changed, 86 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 6970b82171f7..8e2d8012e4cb 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -512,6 +512,64 @@ static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status) return 0; } +static int sd_set_current_limit(struct mmc_card *card, u8 *status) +{ + int current_limit = 0; + int err; + + /* + * Current limit switch is only defined for SDR50, SDR104, and DDR50 + * bus speed modes. For other bus speed modes, we set the default + * current limit of 200mA. + */ + if ((card->sd_bus_speed == UHS_SDR50_BUS_SPEED) || + (card->sd_bus_speed == UHS_SDR104_BUS_SPEED) || + (card->sd_bus_speed == UHS_DDR50_BUS_SPEED)) { + if (card->host->caps & MMC_CAP_MAX_CURRENT_800) { + if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_800) + current_limit = SD_SET_CURRENT_LIMIT_800; + else if (card->sw_caps.sd3_curr_limit & + SD_MAX_CURRENT_600) + current_limit = SD_SET_CURRENT_LIMIT_600; + else if (card->sw_caps.sd3_curr_limit & + SD_MAX_CURRENT_400) + current_limit = SD_SET_CURRENT_LIMIT_400; + else if (card->sw_caps.sd3_curr_limit & + SD_MAX_CURRENT_200) + current_limit = SD_SET_CURRENT_LIMIT_200; + } else if (card->host->caps & MMC_CAP_MAX_CURRENT_600) { + if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_600) + current_limit = SD_SET_CURRENT_LIMIT_600; + else if (card->sw_caps.sd3_curr_limit & + SD_MAX_CURRENT_400) + current_limit = SD_SET_CURRENT_LIMIT_400; + else if (card->sw_caps.sd3_curr_limit & + SD_MAX_CURRENT_200) + current_limit = SD_SET_CURRENT_LIMIT_200; + } else if (card->host->caps & MMC_CAP_MAX_CURRENT_400) { + if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_400) + current_limit = SD_SET_CURRENT_LIMIT_400; + else if (card->sw_caps.sd3_curr_limit & + SD_MAX_CURRENT_200) + current_limit = SD_SET_CURRENT_LIMIT_200; + } else if (card->host->caps & MMC_CAP_MAX_CURRENT_200) { + if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_200) + current_limit = SD_SET_CURRENT_LIMIT_200; + } + } else + current_limit = SD_SET_CURRENT_LIMIT_200; + + err = mmc_sd_switch(card, 1, 3, current_limit, status); + if (err) + return err; + + if (((status[15] >> 4) & 0x0F) != current_limit) + printk(KERN_WARNING "%s: Problem setting current limit!\n", + mmc_hostname(card->host)); + + return 0; +} + /* * UHS-I specific initialization procedure */ @@ -550,6 +608,11 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) /* Set bus speed mode of the card */ err = sd_set_bus_speed_mode(card, status); + if (err) + goto out; + + /* Set current limit for the card */ + err = sd_set_current_limit(card, status); out: kfree(status); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 8994493dd940..2a15aad2eba5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2216,6 +2216,16 @@ int sdhci_add_host(struct sdhci_host *host) if (max_current_180 > 150) mmc->caps |= MMC_CAP_SET_XPC_180; + + /* Maximum current capabilities of the host at 1.8V */ + if (max_current_180 >= 800) + mmc->caps |= MMC_CAP_MAX_CURRENT_800; + else if (max_current_180 >= 600) + mmc->caps |= MMC_CAP_MAX_CURRENT_600; + else if (max_current_180 >= 400) + mmc->caps |= MMC_CAP_MAX_CURRENT_400; + else + mmc->caps |= MMC_CAP_MAX_CURRENT_200; } mmc->ocr_avail = ocr_avail; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 4ef6ded6347d..47b5ad3960b7 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -105,6 +105,15 @@ struct sd_switch_caps { #define SD_DRIVER_TYPE_C 0x04 #define SD_DRIVER_TYPE_D 0x08 unsigned int sd3_curr_limit; +#define SD_SET_CURRENT_LIMIT_200 0 +#define SD_SET_CURRENT_LIMIT_400 1 +#define SD_SET_CURRENT_LIMIT_600 2 +#define SD_SET_CURRENT_LIMIT_800 3 + +#define SD_MAX_CURRENT_200 (1 << SD_SET_CURRENT_LIMIT_200) +#define SD_MAX_CURRENT_400 (1 << SD_SET_CURRENT_LIMIT_400) +#define SD_MAX_CURRENT_600 (1 << SD_SET_CURRENT_LIMIT_600) +#define SD_MAX_CURRENT_800 (1 << SD_SET_CURRENT_LIMIT_800) }; struct sdio_cccr { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 62375992bdd6..52b5dc914a8c 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -203,6 +203,10 @@ struct mmc_host { #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ +#define MMC_CAP_MAX_CURRENT_200 (1 << 26) /* Host max current limit is 200mA */ +#define MMC_CAP_MAX_CURRENT_400 (1 << 27) /* Host max current limit is 400mA */ +#define MMC_CAP_MAX_CURRENT_600 (1 << 28) /* Host max current limit is 600mA */ +#define MMC_CAP_MAX_CURRENT_800 (1 << 29) /* Host max current limit is 800mA */ mmc_pm_flag_t pm_caps; /* supported pm features */ -- cgit v1.2.3 From 3a3035114307cd55e024662bb295a87b849f0bd4 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:19:03 +0530 Subject: mmc: sd: report correct speed and capacity of uhs cards Since only UHS-I cards respond with S18A set in response to ACMD41, we set the card as ultra-high-speed after successfull initialization. We need to decide whether a card is SDXC based on the C_SIZE field of CSDv2.0 register. According to Physical Layer spec v3.01, the minimum value of C_SIZE for SDXC card is 00FFFFh. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/core/bus.c | 11 ++++++++--- drivers/mmc/core/sd.c | 10 +++++++++- include/linux/mmc/card.h | 7 +++++++ 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index d6d62fd07ee9..393d817ed040 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -274,8 +274,12 @@ int mmc_add_card(struct mmc_card *card) break; case MMC_TYPE_SD: type = "SD"; - if (mmc_card_blockaddr(card)) - type = "SDHC"; + if (mmc_card_blockaddr(card)) { + if (mmc_card_ext_capacity(card)) + type = "SDXC"; + else + type = "SDHC"; + } break; case MMC_TYPE_SDIO: type = "SDIO"; @@ -299,7 +303,8 @@ int mmc_add_card(struct mmc_card *card) } else { printk(KERN_INFO "%s: new %s%s%s card at address %04x\n", mmc_hostname(card->host), - mmc_card_highspeed(card) ? "high speed " : "", + mmc_sd_card_uhs(card) ? "ultra high speed " : + (mmc_card_highspeed(card) ? "high speed " : ""), mmc_card_ddr_mode(card) ? "DDR " : "", type, card->rca); } diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 8e2d8012e4cb..732c3171ceca 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -130,7 +130,7 @@ static int mmc_decode_csd(struct mmc_card *card) break; case 1: /* - * This is a block-addressed SDHC card. Most + * This is a block-addressed SDHC or SDXC card. Most * interesting fields are unused and have fixed * values. To avoid getting tripped by buggy cards, * we assume those fixed values ourselves. @@ -144,6 +144,11 @@ static int mmc_decode_csd(struct mmc_card *card) e = UNSTUFF_BITS(resp, 96, 3); csd->max_dtr = tran_exp[e] * tran_mant[m]; csd->cmdclass = UNSTUFF_BITS(resp, 84, 12); + csd->c_size = UNSTUFF_BITS(resp, 48, 22); + + /* SDXC cards have a minimum C_SIZE of 0x00FFFF */ + if (csd->c_size >= 0xFFFF) + mmc_card_set_ext_capacity(card); m = UNSTUFF_BITS(resp, 48, 22); csd->capacity = (1 + m) << 10; @@ -911,6 +916,9 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, err = mmc_sd_init_uhs_card(card); if (err) goto free_card; + + /* Card is an ultra-high-speed card */ + mmc_sd_card_set_uhs(card); } else { /* * Attempt to change to high-speed (if supported) diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 47b5ad3960b7..d8dffc992ce2 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -30,6 +30,7 @@ struct mmc_csd { unsigned short cmdclass; unsigned short tacc_clks; unsigned int tacc_ns; + unsigned int c_size; unsigned int r2w_factor; unsigned int max_dtr; unsigned int erase_size; /* In sectors */ @@ -158,6 +159,8 @@ struct mmc_card { #define MMC_STATE_HIGHSPEED (1<<2) /* card is in high speed mode */ #define MMC_STATE_BLOCKADDR (1<<3) /* card uses block-addressing */ #define MMC_STATE_HIGHSPEED_DDR (1<<4) /* card is in high speed mode */ +#define MMC_STATE_ULTRAHIGHSPEED (1<<5) /* card is in ultra high speed mode */ +#define MMC_CARD_SDXC (1<<6) /* card is SDXC */ unsigned int quirks; /* card quirks */ #define MMC_QUIRK_LENIENT_FN0 (1<<0) /* allow SDIO FN0 writes outside of the VS CCCR range */ #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1) /* use func->cur_blksize */ @@ -293,12 +296,16 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) #define mmc_card_highspeed(c) ((c)->state & MMC_STATE_HIGHSPEED) #define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR) #define mmc_card_ddr_mode(c) ((c)->state & MMC_STATE_HIGHSPEED_DDR) +#define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED) +#define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED) #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR) +#define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED) +#define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC) static inline int mmc_card_lenient_fn0(const struct mmc_card *c) { -- cgit v1.2.3 From b513ea250eb7c36a8afb3df938d632ca6b4df7cd Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:19:04 +0530 Subject: mmc: sd: add support for tuning during uhs initialization Host Controller needs tuning during initialization to operate SDR50 and SDR104 UHS-I cards. Whether SDR50 mode actually needs tuning is indicated by bit 45 of the Host Controller Capabilities register. A new command CMD19 has been defined in the Physical Layer spec v3.01 to request the card to send tuning pattern. We enable Buffer Read Ready interrupt at the very begining of tuning procedure, because that is the only interrupt generated by the Host Controller during tuning. We program the block size to 64 in the Block Size register. We make sure that DMA Enable and Multi Block Select in the Transfer Mode register are set to 0 before actually sending CMD19. The tuning block is sent by the card to the Host Controller using DAT lines, so we set Data Present Select (bit 5) in the Command register. The Host Controller is responsible for doing the verfication of tuning block sent by the card at the hardware level. After sending CMD19, we wait for Buffer Read Ready interrupt. In case we don't receive an interrupt after the specified timeout value, we fall back on fixed sampling clock by setting Execute Tuning (bit 6) and Sampling Clock Select (bit 7) of Host Control2 register to 0. Before exiting the tuning procedure, we disable Buffer Read Ready interrupt and re-enable other interrupts. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/core/sd.c | 6 ++ drivers/mmc/host/sdhci.c | 168 +++++++++++++++++++++++++++++++++++++++++++++- drivers/mmc/host/sdhci.h | 3 + include/linux/mmc/host.h | 1 + include/linux/mmc/mmc.h | 1 + include/linux/mmc/sdhci.h | 4 ++ 6 files changed, 182 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 732c3171ceca..fc65475a26ee 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -618,6 +618,12 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) /* Set current limit for the card */ err = sd_set_current_limit(card, status); + if (err) + goto out; + + /* SPI mode doesn't define CMD19 */ + if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning) + err = card->host->ops->execute_tuning(card->host); out: kfree(status); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 2a15aad2eba5..8a56eacea34d 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -38,6 +38,8 @@ #define SDHCI_USE_LEDS_CLASS #endif +#define MAX_TUNING_LOOP 40 + static unsigned int debug_quirks = 0; static void sdhci_finish_data(struct sdhci_host *); @@ -968,7 +970,9 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd) flags |= SDHCI_CMD_CRC; if (cmd->flags & MMC_RSP_OPCODE) flags |= SDHCI_CMD_INDEX; - if (cmd->data) + + /* CMD19 is special in that the Data Present Select should be set */ + if (cmd->data || (cmd->opcode == MMC_SEND_TUNING_BLOCK)) flags |= SDHCI_CMD_DATA; sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND); @@ -1501,12 +1505,157 @@ static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc, return 0; } +static int sdhci_execute_tuning(struct mmc_host *mmc) +{ + struct sdhci_host *host; + u16 ctrl; + u32 ier; + int tuning_loop_counter = MAX_TUNING_LOOP; + unsigned long timeout; + int err = 0; + + host = mmc_priv(mmc); + + disable_irq(host->irq); + spin_lock(&host->lock); + + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + + /* + * Host Controller needs tuning only in case of SDR104 mode + * and for SDR50 mode when Use Tuning for SDR50 is set in + * Capabilities register. + */ + if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) || + (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) && + (host->flags & SDHCI_SDR50_NEEDS_TUNING))) + ctrl |= SDHCI_CTRL_EXEC_TUNING; + else { + spin_unlock(&host->lock); + enable_irq(host->irq); + return 0; + } + + sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + + /* + * As per the Host Controller spec v3.00, tuning command + * generates Buffer Read Ready interrupt, so enable that. + * + * Note: The spec clearly says that when tuning sequence + * is being performed, the controller does not generate + * interrupts other than Buffer Read Ready interrupt. But + * to make sure we don't hit a controller bug, we _only_ + * enable Buffer Read Ready interrupt here. + */ + ier = sdhci_readl(host, SDHCI_INT_ENABLE); + sdhci_clear_set_irqs(host, ier, SDHCI_INT_DATA_AVAIL); + + /* + * Issue CMD19 repeatedly till Execute Tuning is set to 0 or the number + * of loops reaches 40 times or a timeout of 150ms occurs. + */ + timeout = 150; + do { + struct mmc_command cmd = {0}; + struct mmc_request mrq = {0}; + + if (!tuning_loop_counter && !timeout) + break; + + cmd.opcode = MMC_SEND_TUNING_BLOCK; + cmd.arg = 0; + cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; + cmd.retries = 0; + cmd.data = NULL; + cmd.error = 0; + + mrq.cmd = &cmd; + host->mrq = &mrq; + + /* + * In response to CMD19, the card sends 64 bytes of tuning + * block to the Host Controller. So we set the block size + * to 64 here. + */ + sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), SDHCI_BLOCK_SIZE); + + /* + * The tuning block is sent by the card to the host controller. + * So we set the TRNS_READ bit in the Transfer Mode register. + * This also takes care of setting DMA Enable and Multi Block + * Select in the same register to 0. + */ + sdhci_writew(host, SDHCI_TRNS_READ, SDHCI_TRANSFER_MODE); + + sdhci_send_command(host, &cmd); + + host->cmd = NULL; + host->mrq = NULL; + + spin_unlock(&host->lock); + enable_irq(host->irq); + + /* Wait for Buffer Read Ready interrupt */ + wait_event_interruptible_timeout(host->buf_ready_int, + (host->tuning_done == 1), + msecs_to_jiffies(50)); + disable_irq(host->irq); + spin_lock(&host->lock); + + if (!host->tuning_done) { + printk(KERN_INFO DRIVER_NAME ": Timeout waiting for " + "Buffer Read Ready interrupt during tuning " + "procedure, falling back to fixed sampling " + "clock\n"); + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + ctrl &= ~SDHCI_CTRL_TUNED_CLK; + ctrl &= ~SDHCI_CTRL_EXEC_TUNING; + sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + + err = -EIO; + goto out; + } + + host->tuning_done = 0; + + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + tuning_loop_counter--; + timeout--; + mdelay(1); + } while (ctrl & SDHCI_CTRL_EXEC_TUNING); + + /* + * The Host Driver has exhausted the maximum number of loops allowed, + * so use fixed sampling frequency. + */ + if (!tuning_loop_counter || !timeout) { + ctrl &= ~SDHCI_CTRL_TUNED_CLK; + sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + } else { + if (!(ctrl & SDHCI_CTRL_TUNED_CLK)) { + printk(KERN_INFO DRIVER_NAME ": Tuning procedure" + " failed, falling back to fixed sampling" + " clock\n"); + err = -EIO; + } + } + +out: + sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier); + spin_unlock(&host->lock); + enable_irq(host->irq); + + return err; +} + static const struct mmc_host_ops sdhci_ops = { .request = sdhci_request, .set_ios = sdhci_set_ios, .get_ro = sdhci_get_ro, .enable_sdio_irq = sdhci_enable_sdio_irq, .start_signal_voltage_switch = sdhci_start_signal_voltage_switch, + .execute_tuning = sdhci_execute_tuning, }; /*****************************************************************************\ @@ -1724,6 +1873,16 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) { BUG_ON(intmask == 0); + /* CMD19 generates _only_ Buffer Read Ready interrupt */ + if (intmask & SDHCI_INT_DATA_AVAIL) { + if (SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)) == + MMC_SEND_TUNING_BLOCK) { + host->tuning_done = 1; + wake_up(&host->buf_ready_int); + return; + } + } + if (!host->data) { /* * The "data complete" interrupt is also used to @@ -2160,6 +2319,10 @@ int sdhci_add_host(struct sdhci_host *host) if (caps[1] & SDHCI_SUPPORT_DDR50) mmc->caps |= MMC_CAP_UHS_DDR50; + /* Does the host needs tuning for SDR50? */ + if (caps[1] & SDHCI_USE_SDR50_TUNING) + host->flags |= SDHCI_SDR50_NEEDS_TUNING; + /* Driver Type(s) (A, C, D) supported by the host */ if (caps[1] & SDHCI_DRIVER_TYPE_A) mmc->caps |= MMC_CAP_DRIVER_TYPE_A; @@ -2313,6 +2476,9 @@ int sdhci_add_host(struct sdhci_host *host) setup_timer(&host->timer, sdhci_timeout_timer, (unsigned long)host); + if (host->version >= SDHCI_SPEC_300) + init_waitqueue_head(&host->buf_ready_int); + ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, mmc_hostname(mmc), host); if (ret) diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index d96f6afcca1f..e62367491eee 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -161,6 +161,8 @@ #define SDHCI_CTRL_DRV_TYPE_A 0x0010 #define SDHCI_CTRL_DRV_TYPE_C 0x0020 #define SDHCI_CTRL_DRV_TYPE_D 0x0030 +#define SDHCI_CTRL_EXEC_TUNING 0x0040 +#define SDHCI_CTRL_TUNED_CLK 0x0080 #define SDHCI_CTRL_PRESET_VAL_ENABLE 0x8000 #define SDHCI_CAPABILITIES 0x40 @@ -188,6 +190,7 @@ #define SDHCI_DRIVER_TYPE_A 0x00000010 #define SDHCI_DRIVER_TYPE_C 0x00000020 #define SDHCI_DRIVER_TYPE_D 0x00000040 +#define SDHCI_USE_SDR50_TUNING 0x00002000 #define SDHCI_CAPABILITIES_1 0x44 diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 52b5dc914a8c..ca7007fdb399 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -136,6 +136,7 @@ struct mmc_host_ops { void (*init_card)(struct mmc_host *host, struct mmc_card *card); int (*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios); + int (*execute_tuning)(struct mmc_host *host); }; struct mmc_card; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 373b2bf5e5b5..9fa5a73f393d 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -50,6 +50,7 @@ #define MMC_SET_BLOCKLEN 16 /* ac [31:0] block len R1 */ #define MMC_READ_SINGLE_BLOCK 17 /* adtc [31:0] data addr R1 */ #define MMC_READ_MULTIPLE_BLOCK 18 /* adtc [31:0] data addr R1 */ +#define MMC_SEND_TUNING_BLOCK 19 /* adtc R1 */ /* class 3 */ #define MMC_WRITE_DAT_UNTIL_STOP 20 /* adtc [31:0] data addr R1 */ diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 92e1c9ad126c..b74c8530e959 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -111,6 +111,7 @@ struct sdhci_host { #define SDHCI_USE_ADMA (1<<1) /* Host is ADMA capable */ #define SDHCI_REQ_USE_DMA (1<<2) /* Use DMA for this req. */ #define SDHCI_DEVICE_DEAD (1<<3) /* Device unresponsive */ +#define SDHCI_SDR50_NEEDS_TUNING (1<<4) /* SDR50 needs tuning */ unsigned int version; /* SDHCI spec. version */ @@ -147,6 +148,9 @@ struct sdhci_host { unsigned int ocr_avail_sd; unsigned int ocr_avail_mmc; + wait_queue_head_t buf_ready_int; /* Waitqueue for Buffer Read Ready interrupt */ + unsigned int tuning_done; /* Condition flag set when CMD19 succeeds */ + unsigned long private[0] ____cacheline_aligned; }; #endif /* __SDHCI_H */ -- cgit v1.2.3 From 4d55c5a13a189a80d40383f02c8026f9a87d7c87 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:19:05 +0530 Subject: mmc: sdhci: enable preset value after uhs initialization According to the Host Controller spec v3.00, setting Preset Value Enable in the Host Control2 register lets SDCLK Frequency Select, Clock Generator Select and Driver Strength Select to be set automatically by the Host Controller based on the UHS-I mode set. This patch enables this feature. Since Preset Value Enable makes sense only for UHS-I cards, we enable this feature after successfull UHS-I initialization. We also reset Preset Value Enable next time before initialization. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/core/sd.c | 11 +++++++++++ drivers/mmc/host/sdhci.c | 32 ++++++++++++++++++++++++++++++++ include/linux/mmc/host.h | 1 + 3 files changed, 44 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index fc65475a26ee..b461b290ce25 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -925,6 +925,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, /* Card is an ultra-high-speed card */ mmc_sd_card_set_uhs(card); + + /* + * Since initialization is now complete, enable preset + * value registers for UHS-I cards. + */ + if (host->ops->enable_preset_value) + host->ops->enable_preset_value(host, true); } else { /* * Attempt to change to high-speed (if supported) @@ -1095,6 +1102,10 @@ int mmc_attach_sd(struct mmc_host *host) if (err) return err; + /* Disable preset value enable if already set since last time */ + if (host->ops->enable_preset_value) + host->ops->enable_preset_value(host, false); + err = mmc_send_app_op_cond(host, 0, &ocr); if (err) return err; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 8a56eacea34d..a1ab22415883 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1649,6 +1649,37 @@ out: return err; } +static void sdhci_enable_preset_value(struct mmc_host *mmc, bool enable) +{ + struct sdhci_host *host; + u16 ctrl; + unsigned long flags; + + host = mmc_priv(mmc); + + /* Host Controller v3.00 defines preset value registers */ + if (host->version < SDHCI_SPEC_300) + return; + + spin_lock_irqsave(&host->lock, flags); + + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + + /* + * We only enable or disable Preset Value if they are not already + * enabled or disabled respectively. Otherwise, we bail out. + */ + if (enable && !(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) { + ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE; + sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + } else if (!enable && (ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) { + ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE; + sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + } + + spin_unlock_irqrestore(&host->lock, flags); +} + static const struct mmc_host_ops sdhci_ops = { .request = sdhci_request, .set_ios = sdhci_set_ios, @@ -1656,6 +1687,7 @@ static const struct mmc_host_ops sdhci_ops = { .enable_sdio_irq = sdhci_enable_sdio_irq, .start_signal_voltage_switch = sdhci_start_signal_voltage_switch, .execute_tuning = sdhci_execute_tuning, + .enable_preset_value = sdhci_enable_preset_value, }; /*****************************************************************************\ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index ca7007fdb399..6716bd12eccd 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -137,6 +137,7 @@ struct mmc_host_ops { int (*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios); int (*execute_tuning)(struct mmc_host *host); + void (*enable_preset_value)(struct mmc_host *host, bool enable); }; struct mmc_card; -- cgit v1.2.3 From c3ed3877625f10d600b0eca2ca48a68c46aed660 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:19:06 +0530 Subject: mmc: sdhci: add support for programmable clock mode Host Controller v3.00 supports programmable clock mode as an optional feature. The support for this mode is indicated by non-zero value in bits 48-55 of the Capabilities register. If supported, the actual value of Clock Multiplier is one more than the value provided in the bit fields. We only set Clock Generator Select (bit 5) and SDCLK Frequency Select (bits 8-15) of the Clock Control register in case Preset Value Enable is not set, otherwise these fields are automatically set by the Host Controller based on the UHS mode selected. Also, since the maximum and minimum clock frequency in this mode can be (Base Clock * Clock Mul) and (Base Clock * Clock Mul)/1024 respectively, f_max and f_min have been recalculated to reflect this change. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 81 ++++++++++++++++++++++++++++++++++++++--------- drivers/mmc/host/sdhci.h | 3 ++ include/linux/mmc/sdhci.h | 1 + 3 files changed, 70 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index a1ab22415883..07cca557c4b5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1013,8 +1013,8 @@ static void sdhci_finish_command(struct sdhci_host *host) static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) { - int div; - u16 clk; + int div = 0; /* Initialized for compiler warning */ + u16 clk = 0; unsigned long timeout; if (clock == host->clock) @@ -1032,14 +1032,45 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) goto out; if (host->version >= SDHCI_SPEC_300) { - /* Version 3.00 divisors must be a multiple of 2. */ - if (host->max_clk <= clock) - div = 1; - else { - for (div = 2; div < SDHCI_MAX_DIV_SPEC_300; div += 2) { - if ((host->max_clk / div) <= clock) - break; + /* + * Check if the Host Controller supports Programmable Clock + * Mode. + */ + if (host->clk_mul) { + u16 ctrl; + + /* + * We need to figure out whether the Host Driver needs + * to select Programmable Clock Mode, or the value can + * be set automatically by the Host Controller based on + * the Preset Value registers. + */ + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + if (!(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) { + for (div = 1; div <= 1024; div++) { + if (((host->max_clk * host->clk_mul) / + div) <= clock) + break; + } + /* + * Set Programmable Clock Mode in the Clock + * Control register. + */ + clk = SDHCI_PROG_CLOCK_MODE; + div--; + } + } else { + /* Version 3.00 divisors must be a multiple of 2. */ + if (host->max_clk <= clock) + div = 1; + else { + for (div = 2; div < SDHCI_MAX_DIV_SPEC_300; + div += 2) { + if ((host->max_clk / div) <= clock) + break; + } } + div >>= 1; } } else { /* Version 2.00 divisors must be a power of 2. */ @@ -1047,10 +1078,10 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) if ((host->max_clk / div) <= clock) break; } + div >>= 1; } - div >>= 1; - clk = (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT; + clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT; clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN) << SDHCI_DIVIDER_HI_SHIFT; clk |= SDHCI_CLOCK_INT_EN; @@ -2307,18 +2338,38 @@ int sdhci_add_host(struct sdhci_host *host) if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT) host->timeout_clk *= 1000; + /* + * In case of Host Controller v3.00, find out whether clock + * multiplier is supported. + */ + host->clk_mul = (caps[1] & SDHCI_CLOCK_MUL_MASK) >> + SDHCI_CLOCK_MUL_SHIFT; + + /* + * In case the value in Clock Multiplier is 0, then programmable + * clock mode is not supported, otherwise the actual clock + * multiplier is one more than the value of Clock Multiplier + * in the Capabilities Register. + */ + if (host->clk_mul) + host->clk_mul += 1; + /* * Set host parameters. */ mmc->ops = &sdhci_ops; + mmc->f_max = host->max_clk; if (host->ops->get_min_clock) mmc->f_min = host->ops->get_min_clock(host); - else if (host->version >= SDHCI_SPEC_300) - mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; - else + else if (host->version >= SDHCI_SPEC_300) { + if (host->clk_mul) { + mmc->f_min = (host->max_clk * host->clk_mul) / 1024; + mmc->f_max = host->max_clk * host->clk_mul; + } else + mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; + } else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; - mmc->f_max = host->max_clk; mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE; /* diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index e62367491eee..6b0a0ee9ac6e 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -101,6 +101,7 @@ #define SDHCI_DIV_MASK 0xFF #define SDHCI_DIV_MASK_LEN 8 #define SDHCI_DIV_HI_MASK 0x300 +#define SDHCI_PROG_CLOCK_MODE 0x0020 #define SDHCI_CLOCK_CARD_EN 0x0004 #define SDHCI_CLOCK_INT_STABLE 0x0002 #define SDHCI_CLOCK_INT_EN 0x0001 @@ -191,6 +192,8 @@ #define SDHCI_DRIVER_TYPE_C 0x00000020 #define SDHCI_DRIVER_TYPE_D 0x00000040 #define SDHCI_USE_SDR50_TUNING 0x00002000 +#define SDHCI_CLOCK_MUL_MASK 0x00FF0000 +#define SDHCI_CLOCK_MUL_SHIFT 16 #define SDHCI_CAPABILITIES_1 0x44 diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index b74c8530e959..a88a799ed7c3 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -117,6 +117,7 @@ struct sdhci_host { unsigned int max_clk; /* Max possible freq (MHz) */ unsigned int timeout_clk; /* Timeout freq (KHz) */ + unsigned int clk_mul; /* Clock Muliplier value */ unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ -- cgit v1.2.3 From cf2b5eea1ea0ff9b3184bc6771bcb93a9fdcd1d9 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Thu, 5 May 2011 12:19:07 +0530 Subject: mmc: sdhci: add support for retuning mode 1 Host Controller v3.00 can support retuning modes 1,2 or 3 depending on the bits 46-47 of the Capabilities register. Also, the timer count for retuning is indicated by bits 40-43 of the same register. We initialize timer_list for retuning the first time we execute tuning procedure. This condition is indicated by SDHCI_NEEDS_RETUNING not being set. Since retuning mode 1 sets a limit of 4MB on the maximum data length, we set max_blk_count appropriately. Once the tuning timer expires, we set SDHCI_NEEDS_RETUNING flag, and if the flag is set, we execute tuning procedure before sending the next command. We need to restore mmc_request structure after executing retuning procedure since host->mrq is used inside the procedure to send CMD19. We also disable and re-enable this flag during suspend and resume respectively, as per the spec v3.00. Tested by Zhangfei Gao with a Toshiba uhs card and general hs card, on mmp2 in SDMA mode. Signed-off-by: Arindam Nath Reviewed-by: Philip Rakity Tested-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 109 +++++++++++++++++++++++++++++++++++++++++++++- drivers/mmc/host/sdhci.h | 6 ++- include/linux/mmc/sdhci.h | 6 +++ 3 files changed, 118 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 07cca557c4b5..fa3301644b15 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -46,6 +46,8 @@ static void sdhci_finish_data(struct sdhci_host *); static void sdhci_send_command(struct sdhci_host *, struct mmc_command *); static void sdhci_finish_command(struct sdhci_host *); +static int sdhci_execute_tuning(struct mmc_host *mmc); +static void sdhci_tuning_timer(unsigned long data); static void sdhci_dumpregs(struct sdhci_host *host) { @@ -1206,8 +1208,27 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) if (!present || host->flags & SDHCI_DEVICE_DEAD) { host->mrq->cmd->error = -ENOMEDIUM; tasklet_schedule(&host->finish_tasklet); - } else + } else { + u32 present_state; + + present_state = sdhci_readl(host, SDHCI_PRESENT_STATE); + /* + * Check if the re-tuning timer has already expired and there + * is no on-going data transfer. If so, we need to execute + * tuning procedure before sending command. + */ + if ((host->flags & SDHCI_NEEDS_RETUNING) && + !(present_state & (SDHCI_DOING_WRITE | SDHCI_DOING_READ))) { + spin_unlock_irqrestore(&host->lock, flags); + sdhci_execute_tuning(mmc); + spin_lock_irqsave(&host->lock, flags); + + /* Restore original mmc_request structure */ + host->mrq = mrq; + } + sdhci_send_command(host, mrq->cmd); + } mmiowb(); spin_unlock_irqrestore(&host->lock, flags); @@ -1673,6 +1694,37 @@ static int sdhci_execute_tuning(struct mmc_host *mmc) } out: + /* + * If this is the very first time we are here, we start the retuning + * timer. Since only during the first time, SDHCI_NEEDS_RETUNING + * flag won't be set, we check this condition before actually starting + * the timer. + */ + if (!(host->flags & SDHCI_NEEDS_RETUNING) && host->tuning_count && + (host->tuning_mode == SDHCI_TUNING_MODE_1)) { + mod_timer(&host->tuning_timer, jiffies + + host->tuning_count * HZ); + /* Tuning mode 1 limits the maximum data length to 4MB */ + mmc->max_blk_count = (4 * 1024 * 1024) / mmc->max_blk_size; + } else { + host->flags &= ~SDHCI_NEEDS_RETUNING; + /* Reload the new initial value for timer */ + if (host->tuning_mode == SDHCI_TUNING_MODE_1) + mod_timer(&host->tuning_timer, jiffies + + host->tuning_count * HZ); + } + + /* + * In case tuning fails, host controllers which support re-tuning can + * try tuning again at a later time, when the re-tuning timer expires. + * So for these controllers, we return 0. Since there might be other + * controllers who do not have this capability, we return error for + * them. + */ + if (err && host->tuning_count && + host->tuning_mode == SDHCI_TUNING_MODE_1) + err = 0; + sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier); spin_unlock(&host->lock); enable_irq(host->irq); @@ -1775,6 +1827,9 @@ static void sdhci_tasklet_finish(unsigned long param) del_timer(&host->timer); + if (host->version >= SDHCI_SPEC_300) + del_timer(&host->tuning_timer); + mrq = host->mrq; /* @@ -1848,6 +1903,20 @@ static void sdhci_timeout_timer(unsigned long data) spin_unlock_irqrestore(&host->lock, flags); } +static void sdhci_tuning_timer(unsigned long data) +{ + struct sdhci_host *host; + unsigned long flags; + + host = (struct sdhci_host *)data; + + spin_lock_irqsave(&host->lock, flags); + + host->flags |= SDHCI_NEEDS_RETUNING; + + spin_unlock_irqrestore(&host->lock, flags); +} + /*****************************************************************************\ * * * Interrupt handling * @@ -2122,6 +2191,14 @@ int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state) sdhci_disable_card_detection(host); + /* Disable tuning since we are suspending */ + if (host->version >= SDHCI_SPEC_300 && host->tuning_count && + host->tuning_mode == SDHCI_TUNING_MODE_1) { + host->flags &= ~SDHCI_NEEDS_RETUNING; + mod_timer(&host->tuning_timer, jiffies + + host->tuning_count * HZ); + } + ret = mmc_suspend_host(host->mmc); if (ret) return ret; @@ -2163,6 +2240,11 @@ int sdhci_resume_host(struct sdhci_host *host) ret = mmc_resume_host(host->mmc); sdhci_enable_card_detection(host); + /* Set the re-tuning expiration flag */ + if ((host->version >= SDHCI_SPEC_300) && host->tuning_count && + (host->tuning_mode == SDHCI_TUNING_MODE_1)) + host->flags |= SDHCI_NEEDS_RETUNING; + return ret; } @@ -2414,6 +2496,21 @@ int sdhci_add_host(struct sdhci_host *host) if (caps[1] & SDHCI_DRIVER_TYPE_D) mmc->caps |= MMC_CAP_DRIVER_TYPE_D; + /* Initial value for re-tuning timer count */ + host->tuning_count = (caps[1] & SDHCI_RETUNING_TIMER_COUNT_MASK) >> + SDHCI_RETUNING_TIMER_COUNT_SHIFT; + + /* + * In case Re-tuning Timer is not disabled, the actual value of + * re-tuning timer will be 2 ^ (n - 1). + */ + if (host->tuning_count) + host->tuning_count = 1 << (host->tuning_count - 1); + + /* Re-tuning mode supported by the Host Controller */ + host->tuning_mode = (caps[1] & SDHCI_RETUNING_MODE_MASK) >> + SDHCI_RETUNING_MODE_SHIFT; + ocr_avail = 0; /* * According to SD Host Controller spec v3.00, if the Host System @@ -2559,9 +2656,15 @@ int sdhci_add_host(struct sdhci_host *host) setup_timer(&host->timer, sdhci_timeout_timer, (unsigned long)host); - if (host->version >= SDHCI_SPEC_300) + if (host->version >= SDHCI_SPEC_300) { init_waitqueue_head(&host->buf_ready_int); + /* Initialize re-tuning timer */ + init_timer(&host->tuning_timer); + host->tuning_timer.data = (unsigned long)host; + host->tuning_timer.function = sdhci_tuning_timer; + } + ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, mmc_hostname(mmc), host); if (ret) @@ -2655,6 +2758,8 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) free_irq(host->irq, host); del_timer_sync(&host->timer); + if (host->version >= SDHCI_SPEC_300) + del_timer_sync(&host->tuning_timer); tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 6b0a0ee9ac6e..8ea11b7cf89a 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -191,7 +191,11 @@ #define SDHCI_DRIVER_TYPE_A 0x00000010 #define SDHCI_DRIVER_TYPE_C 0x00000020 #define SDHCI_DRIVER_TYPE_D 0x00000040 -#define SDHCI_USE_SDR50_TUNING 0x00002000 +#define SDHCI_RETUNING_TIMER_COUNT_MASK 0x00000F00 +#define SDHCI_RETUNING_TIMER_COUNT_SHIFT 8 +#define SDHCI_USE_SDR50_TUNING 0x00002000 +#define SDHCI_RETUNING_MODE_MASK 0x0000C000 +#define SDHCI_RETUNING_MODE_SHIFT 14 #define SDHCI_CLOCK_MUL_MASK 0x00FF0000 #define SDHCI_CLOCK_MUL_SHIFT 16 diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index a88a799ed7c3..e902618d68f4 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -112,6 +112,7 @@ struct sdhci_host { #define SDHCI_REQ_USE_DMA (1<<2) /* Use DMA for this req. */ #define SDHCI_DEVICE_DEAD (1<<3) /* Device unresponsive */ #define SDHCI_SDR50_NEEDS_TUNING (1<<4) /* SDR50 needs tuning */ +#define SDHCI_NEEDS_RETUNING (1<<5) /* Host needs retuning */ unsigned int version; /* SDHCI spec. version */ @@ -152,6 +153,11 @@ struct sdhci_host { wait_queue_head_t buf_ready_int; /* Waitqueue for Buffer Read Ready interrupt */ unsigned int tuning_done; /* Condition flag set when CMD19 succeeds */ + unsigned int tuning_count; /* Timer count for re-tuning */ + unsigned int tuning_mode; /* Re-tuning mode supported by host */ +#define SDHCI_TUNING_MODE_1 0 + struct timer_list tuning_timer; /* Timer for tuning */ + unsigned long private[0] ____cacheline_aligned; }; #endif /* __SDHCI_H */ -- cgit v1.2.3 From 06e8935febe687e2a561707d4c7ca4245d261dbe Mon Sep 17 00:00:00 2001 From: Stefan Nilsson XK Date: Wed, 11 May 2011 17:48:05 +0200 Subject: mmc: sdio: optimized SDIO IRQ handling for single irq If there is only 1 function interrupt registered it is possible to improve performance by directly calling the irq handler and avoiding the overhead of reading the CCCR registers. Signed-off-by: Per Forlin Acked-by: Ulf Hansson Reviewed-by: Nicolas Pitre Signed-off-by: Chris Ball --- drivers/mmc/core/sdio_irq.c | 33 ++++++++++++++++++++++++++++++++- include/linux/mmc/card.h | 1 + 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c index b3001617e67d..03ead028d2ce 100644 --- a/drivers/mmc/core/sdio_irq.c +++ b/drivers/mmc/core/sdio_irq.c @@ -31,6 +31,17 @@ static int process_sdio_pending_irqs(struct mmc_card *card) { int i, ret, count; unsigned char pending; + struct sdio_func *func; + + /* + * Optimization, if there is only 1 function interrupt registered + * call irq handler directly + */ + func = card->sdio_single_irq; + if (func) { + func->irq_handler(func); + return 1; + } ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_INTx, 0, &pending); if (ret) { @@ -42,7 +53,7 @@ static int process_sdio_pending_irqs(struct mmc_card *card) count = 0; for (i = 1; i <= 7; i++) { if (pending & (1 << i)) { - struct sdio_func *func = card->sdio_func[i - 1]; + func = card->sdio_func[i - 1]; if (!func) { printk(KERN_WARNING "%s: pending IRQ for " "non-existent function\n", @@ -186,6 +197,24 @@ static int sdio_card_irq_put(struct mmc_card *card) return 0; } +/* If there is only 1 function registered set sdio_single_irq */ +static void sdio_single_irq_set(struct mmc_card *card) +{ + struct sdio_func *func; + int i; + + card->sdio_single_irq = NULL; + if ((card->host->caps & MMC_CAP_SDIO_IRQ) && + card->host->sdio_irqs == 1) + for (i = 0; i < card->sdio_funcs; i++) { + func = card->sdio_func[i]; + if (func && func->irq_handler) { + card->sdio_single_irq = func; + break; + } + } +} + /** * sdio_claim_irq - claim the IRQ for a SDIO function * @func: SDIO function @@ -227,6 +256,7 @@ int sdio_claim_irq(struct sdio_func *func, sdio_irq_handler_t *handler) ret = sdio_card_irq_get(func->card); if (ret) func->irq_handler = NULL; + sdio_single_irq_set(func->card); return ret; } @@ -251,6 +281,7 @@ int sdio_release_irq(struct sdio_func *func) if (func->irq_handler) { func->irq_handler = NULL; sdio_card_irq_put(func->card); + sdio_single_irq_set(func->card); } ret = mmc_io_rw_direct(func->card, 0, 0, SDIO_CCCR_IENx, 0, ®); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index d8dffc992ce2..4910dec47bb7 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -191,6 +191,7 @@ struct mmc_card { struct sdio_cccr cccr; /* common card info */ struct sdio_cis cis; /* common tuple info */ struct sdio_func *sdio_func[SDIO_MAX_FUNCS]; /* SDIO functions (devices) */ + struct sdio_func *sdio_single_irq; /* SDIO function when only one IRQ active */ unsigned num_info; /* number of info strings */ const char **info; /* info strings */ struct sdio_func_tuple *tuples; /* unknown common tuples */ -- cgit v1.2.3 From 7311bef0697bcfbbcb898c3c22e61e23f203ae9d Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 11 May 2011 16:51:11 +0000 Subject: mmc: tmio: runtime suspend the controller, where possible The TMIO MMC controller cannot be powered off to save power, when no card is plugged in, because then it will not be able to detect a new card-insertion event. On some implementations, however, it is possible to switch to using another source to detect card insertion. This patch adds support for such implementations. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Chris Ball --- drivers/mmc/host/tmio_mmc.h | 3 ++ drivers/mmc/host/tmio_mmc_pio.c | 64 +++++++++++++++++++++++++++++++++++++---- include/linux/mfd/tmio.h | 17 +++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index c6bf726c8f44..8260bc2c34e3 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -131,4 +131,7 @@ int tmio_mmc_host_resume(struct device *dev); #define tmio_mmc_host_resume NULL #endif +int tmio_mmc_host_runtime_suspend(struct device *dev); +int tmio_mmc_host_runtime_resume(struct device *dev); + #endif diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index af5d4f6d2233..ad6347bb02dd 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -746,6 +746,7 @@ fail: static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct tmio_mmc_host *host = mmc_priv(mmc); + struct tmio_mmc_data *pdata = host->pdata; unsigned long flags; spin_lock_irqsave(&host->lock, flags); @@ -775,13 +776,24 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) /* Power sequence - OFF -> UP -> ON */ if (ios->power_mode == MMC_POWER_UP) { + if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && !pdata->power) { + pm_runtime_get_sync(&host->pdev->dev); + pdata->power = true; + } /* power up SD bus */ if (host->set_pwr) host->set_pwr(host->pdev, 1); } else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) { /* power down SD bus */ - if (ios->power_mode == MMC_POWER_OFF && host->set_pwr) - host->set_pwr(host->pdev, 0); + if (ios->power_mode == MMC_POWER_OFF) { + if (host->set_pwr) + host->set_pwr(host->pdev, 0); + if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && + pdata->power) { + pdata->power = false; + pm_runtime_put(&host->pdev->dev); + } + } tmio_mmc_clk_stop(host); } else { /* start bus clock */ @@ -853,6 +865,7 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host, if (!mmc) return -ENOMEM; + pdata->dev = &pdev->dev; _host = mmc_priv(mmc); _host->pdata = pdata; _host->mmc = mmc; @@ -886,6 +899,7 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host, else mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + pdata->power = false; pm_runtime_enable(&pdev->dev); ret = pm_runtime_resume(&pdev->dev); if (ret < 0) @@ -907,7 +921,8 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host, tmio_mmc_request_dma(_host, pdata); /* We have to keep the device powered for its card detection to work */ - pm_runtime_get_noresume(&pdev->dev); + if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) + pm_runtime_get_noresume(&pdev->dev); mmc_add_host(mmc); @@ -937,15 +952,25 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) { struct platform_device *pdev = host->pdev; + /* + * We don't have to manipulate pdata->power here: if there is a card in + * the slot, the runtime PM is active and our .runtime_resume() will not + * be run. If there is no card in the slot and the platform can suspend + * the controller, the runtime PM is suspended and pdata->power == false, + * so, our .runtime_resume() will not try to detect a card in the slot. + */ + if (host->pdata->flags & TMIO_MMC_HAS_COLD_CD) + pm_runtime_get_sync(&pdev->dev); + mmc_remove_host(host->mmc); cancel_delayed_work_sync(&host->delayed_reset_work); tmio_mmc_release_dma(host); - iounmap(host->ctl); - mmc_free_host(host->mmc); - /* Compensate for pm_runtime_get_sync() in probe() above */ pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); + + iounmap(host->ctl); + mmc_free_host(host->mmc); } EXPORT_SYMBOL(tmio_mmc_host_remove); @@ -970,6 +995,9 @@ int tmio_mmc_host_resume(struct device *dev) struct mmc_host *mmc = dev_get_drvdata(dev); struct tmio_mmc_host *host = mmc_priv(mmc); + /* The MMC core will perform the complete set up */ + host->pdata->power = false; + if (!host->pm_error) pm_runtime_get_sync(dev); @@ -982,4 +1010,28 @@ EXPORT_SYMBOL(tmio_mmc_host_resume); #endif /* CONFIG_PM */ +int tmio_mmc_host_runtime_suspend(struct device *dev) +{ + return 0; +} +EXPORT_SYMBOL(tmio_mmc_host_runtime_suspend); + +int tmio_mmc_host_runtime_resume(struct device *dev) +{ + struct mmc_host *mmc = dev_get_drvdata(dev); + struct tmio_mmc_host *host = mmc_priv(mmc); + struct tmio_mmc_data *pdata = host->pdata; + + tmio_mmc_reset(host); + + if (pdata->power) { + /* Only entered after a card-insert interrupt */ + tmio_mmc_set_ios(mmc, &mmc->ios); + mmc_detect_change(mmc, msecs_to_jiffies(100)); + } + + return 0; +} +EXPORT_SYMBOL(tmio_mmc_host_runtime_resume); + MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 8e70310ee945..5a90266c3a5a 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -4,6 +4,7 @@ #include #include #include +#include #define tmio_ioread8(addr) readb(addr) #define tmio_ioread16(addr) readw(addr) @@ -61,6 +62,12 @@ * Some controllers can support SDIO IRQ signalling. */ #define TMIO_MMC_SDIO_IRQ (1 << 2) +/* + * Some platforms can detect card insertion events with controller powered + * down, in which case they have to call tmio_mmc_cd_wakeup() to power up the + * controller and report the event to the driver. + */ +#define TMIO_MMC_HAS_COLD_CD (1 << 3) int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); @@ -82,11 +89,21 @@ struct tmio_mmc_data { unsigned long flags; u32 ocr_mask; /* available voltages */ struct tmio_mmc_dma *dma; + struct device *dev; + bool power; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); int (*get_cd)(struct platform_device *host); }; +static inline void tmio_mmc_cd_wakeup(struct tmio_mmc_data *pdata) +{ + if (pdata && !pdata->power) { + pdata->power = true; + pm_runtime_get(pdata->dev); + } +} + /* * data for the NAND controller */ -- cgit v1.2.3 From 2595880481ac894d390365092de9aaf92b44e147 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 11 May 2011 16:51:15 +0000 Subject: mmc: sdhi: allow powering down controller with no card inserted Supply a link to TMIO private data for platforms to implement their own card detection. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Chris Ball --- drivers/mmc/host/sh_mobile_sdhi.c | 6 ++++++ include/linux/mmc/sh_mobile_sdhi.h | 4 ++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index d264bbeb529b..b3654293017b 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -71,6 +71,7 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) } mmc_data = &priv->mmc_data; + p->pdata = mmc_data; snprintf(clk_name, sizeof(clk_name), "sdhi%d", pdev->id); priv->clk = clk_get(&pdev->dev, clk_name); @@ -159,8 +160,11 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev) struct mmc_host *mmc = platform_get_drvdata(pdev); struct tmio_mmc_host *host = mmc_priv(mmc); struct sh_mobile_sdhi *priv = container_of(host->pdata, struct sh_mobile_sdhi, mmc_data); + struct sh_mobile_sdhi_info *p = pdev->dev.platform_data; int i, irq; + p->pdata = NULL; + for (i = 0; i < 3; i++) { irq = platform_get_irq(pdev, i); if (irq >= 0) @@ -178,6 +182,8 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev) static const struct dev_pm_ops tmio_mmc_dev_pm_ops = { .suspend = tmio_mmc_host_suspend, .resume = tmio_mmc_host_resume, + .runtime_suspend = tmio_mmc_host_runtime_suspend, + .runtime_resume = tmio_mmc_host_runtime_resume, }; static struct platform_driver sh_mobile_sdhi_driver = { diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h index c981b959760f..faf32b6ec185 100644 --- a/include/linux/mmc/sh_mobile_sdhi.h +++ b/include/linux/mmc/sh_mobile_sdhi.h @@ -3,12 +3,16 @@ #include +struct platform_device; +struct tmio_mmc_data; + struct sh_mobile_sdhi_info { int dma_slave_tx; int dma_slave_rx; unsigned long tmio_flags; unsigned long tmio_caps; u32 tmio_ocr_mask; /* available MMC voltages */ + struct tmio_mmc_data *pdata; void (*set_pwr)(struct platform_device *pdev, int state); int (*get_cd)(struct platform_device *pdev); }; -- cgit v1.2.3 From 4c4cb171054230c2e58ed6574d7faa1871c75bbe Mon Sep 17 00:00:00 2001 From: Philip Rakity Date: Fri, 13 May 2011 11:17:18 +0530 Subject: mmc: core: add support for eMMC Dual Data Rate eMMC voltage change not required for 1.8V. 3.3V and 1.8V vcc are capable of doing DDR. vccq of 1.8v is not required. Signed-off-by: Philip Rakity Reviewed-by: Arindam Nath Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 14 ++------------ drivers/mmc/core/core.h | 2 -- drivers/mmc/core/mmc.c | 35 ++++++++++++++++++++++++++++++----- include/linux/mmc/host.h | 1 + 4 files changed, 33 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 94c8d5a9ecae..7863eedf3d9a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -717,23 +717,13 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode) mmc_set_ios(host); } -/* - * Change data bus width and DDR mode of a host. - */ -void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, - unsigned int ddr) -{ - host->ios.bus_width = width; - host->ios.ddr = ddr; - mmc_set_ios(host); -} - /* * Change data bus width of a host. */ void mmc_set_bus_width(struct mmc_host *host, unsigned int width) { - mmc_set_bus_width_ddr(host, width, MMC_SDR_MODE); + host->ios.bus_width = width; + mmc_set_ios(host); } /** diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 53d23c240324..d9411ed2a39b 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -38,8 +38,6 @@ void mmc_ungate_clock(struct mmc_host *host); void mmc_set_ungated(struct mmc_host *host); void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode); void mmc_set_bus_width(struct mmc_host *host, unsigned int width); -void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, - unsigned int ddr); u32 mmc_select_voltage(struct mmc_host *host, u32 ocr); int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, bool cmd11); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a2c795e8f9dd..0433fe66cbad 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -20,6 +20,7 @@ #include "core.h" #include "bus.h" #include "mmc_ops.h" +#include "sd_ops.h" static const unsigned int tran_exp[] = { 10000, 100000, 1000000, 10000000, @@ -633,10 +634,14 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, */ if (mmc_card_highspeed(card)) { if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) - && (host->caps & (MMC_CAP_1_8V_DDR))) + && ((host->caps & (MMC_CAP_1_8V_DDR | + MMC_CAP_UHS_DDR50)) + == (MMC_CAP_1_8V_DDR | MMC_CAP_UHS_DDR50))) ddr = MMC_1_8V_DDR_MODE; else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V) - && (host->caps & (MMC_CAP_1_2V_DDR))) + && ((host->caps & (MMC_CAP_1_2V_DDR | + MMC_CAP_UHS_DDR50)) + == (MMC_CAP_1_2V_DDR | MMC_CAP_UHS_DDR50))) ddr = MMC_1_2V_DDR_MODE; } @@ -670,8 +675,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, ext_csd_bits[idx][0], 0); if (!err) { - mmc_set_bus_width_ddr(card->host, - bus_width, MMC_SDR_MODE); + mmc_set_bus_width(card->host, bus_width); /* * If controller can't handle bus width test, * use the highest bus width to maintain @@ -697,8 +701,29 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, 1 << bus_width, ddr); goto free_card; } else if (ddr) { + /* + * eMMC cards can support 3.3V to 1.2V i/o (vccq) + * signaling. + * + * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq. + * + * 1.8V vccq at 3.3V core voltage (vcc) is not required + * in the JEDEC spec for DDR. + * + * Do not force change in vccq since we are obviously + * working and no change to vccq is needed. + * + * WARNING: eMMC rules are NOT the same as SD DDR + */ + if (ddr == EXT_CSD_CARD_TYPE_DDR_1_2V) { + err = mmc_set_signal_voltage(host, + MMC_SIGNAL_VOLTAGE_120, 0); + if (err) + goto err; + } mmc_card_set_ddr_mode(card); - mmc_set_bus_width_ddr(card->host, bus_width, ddr); + mmc_set_timing(card->host, MMC_TIMING_UHS_DDR50); + mmc_set_bus_width(card->host, bus_width); } } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 6716bd12eccd..de32e6aa018a 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -66,6 +66,7 @@ struct mmc_ios { #define MMC_SIGNAL_VOLTAGE_330 0 #define MMC_SIGNAL_VOLTAGE_180 1 +#define MMC_SIGNAL_VOLTAGE_120 2 unsigned char drv_type; /* driver type (A, B, C, D) */ -- cgit v1.2.3 From c59de9287993b5c36f9005f745a3ce0b1008131d Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 23 May 2011 15:06:35 -0500 Subject: mmc: quirks: Add/remove quirks conditional support. Conditional add/remove quirks for MMC and SD. Signed-off-by: Andrei Warkentin Signed-off-by: Chris Ball --- include/linux/mmc/card.h | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 4910dec47bb7..7190aa2096f7 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -273,16 +273,14 @@ struct mmc_fixup { card->cid.month) /* - * This hook just adds a quirk unconditionally. + * Unconditionally quirk add/remove. */ + static inline void __maybe_unused add_quirk(struct mmc_card *card, int data) { card->quirks |= data; } -/* - * This hook just removes a quirk unconditionally. - */ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) { card->quirks &= ~data; @@ -308,6 +306,40 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED) #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC) +/* + * Quirk add/remove for MMC products. + */ + +static inline void __maybe_unused add_quirk_mmc(struct mmc_card *card, int data) +{ + if (mmc_card_mmc(card)) + card->quirks |= data; +} + +static inline void __maybe_unused remove_quirk_mmc(struct mmc_card *card, + int data) +{ + if (mmc_card_mmc(card)) + card->quirks &= ~data; +} + +/* + * Quirk add/remove for SD products. + */ + +static inline void __maybe_unused add_quirk_sd(struct mmc_card *card, int data) +{ + if (mmc_card_sd(card)) + card->quirks |= data; +} + +static inline void __maybe_unused remove_quirk_sd(struct mmc_card *card, + int data) +{ + if (mmc_card_sd(card)) + card->quirks &= ~data; +} + static inline int mmc_card_lenient_fn0(const struct mmc_card *c) { return c->quirks & MMC_QUIRK_LENIENT_FN0; -- cgit v1.2.3 From 7bf02ea22c6cdd09e2d3f1d3c3fe366b834ae9af Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 24 May 2011 17:11:16 -0700 Subject: arch, mm: filter disallowed nodes from arch specific show_mem functions Architectures that implement their own show_mem() function did not pass the filter argument to show_free_areas() to appropriately avoid emitting the state of nodes that are disallowed in the current context. This patch now passes the filter argument to show_free_areas() so those nodes are now avoided. This patch also removes the show_free_areas() wrapper around __show_free_areas() and converts existing callers to pass an empty filter. ia64 emits additional information for each node, so skip_free_areas_zone() must be made global to filter disallowed nodes and it is converted to use a nid argument rather than a zone for this use case. Signed-off-by: David Rientjes Cc: Russell King Cc: Tony Luck Cc: Fenghua Yu Cc: Kyle McMartin Cc: Helge Deller Cc: James Bottomley Cc: "David S. Miller" Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/init.c | 2 +- arch/ia64/mm/contig.c | 10 ++++++---- arch/ia64/mm/discontig.c | 10 ++++++---- arch/parisc/mm/init.c | 2 +- arch/sparc/kernel/setup_32.c | 2 +- arch/sparc/mm/init_32.c | 2 +- arch/unicore32/mm/init.c | 2 +- drivers/net/ioc3-eth.c | 2 +- drivers/tty/serial/68328serial.c | 2 +- include/linux/mm.h | 6 +++--- lib/show_mem.c | 2 +- mm/nommu.c | 6 +++--- mm/page_alloc.c | 22 ++++++++-------------- 13 files changed, 34 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 76f82ae44efb..3f17ea146f0e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -85,7 +85,7 @@ void show_mem(unsigned int filter) struct meminfo * mi = &meminfo; printk("Mem-info:\n"); - show_free_areas(); + show_free_areas(filter); for_each_bank (i, mi) { struct membank *bank = &mi->bank[i]; diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 9a018cde5d84..f114a3b14c6a 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -44,13 +44,16 @@ void show_mem(unsigned int filter) pg_data_t *pgdat; printk(KERN_INFO "Mem-info:\n"); - show_free_areas(); + show_free_areas(filter); printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; + int nid = pgdat->node_id; + if (skip_free_areas_node(filter, nid)) + continue; pgdat_resize_lock(pgdat, &flags); present = pgdat->node_present_pages; for(i = 0; i < pgdat->node_spanned_pages; i++) { @@ -64,8 +67,7 @@ void show_mem(unsigned int filter) if (max_gap < LARGE_GAP) continue; #endif - i = vmemmap_find_next_valid_pfn(pgdat->node_id, - i) - 1; + i = vmemmap_find_next_valid_pfn(nid, i) - 1; continue; } if (PageReserved(page)) @@ -81,7 +83,7 @@ void show_mem(unsigned int filter) total_cached += cached; total_shared += shared; printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " - "shrd: %10d, swpd: %10d\n", pgdat->node_id, + "shrd: %10d, swpd: %10d\n", nid, present, reserved, shared, cached); } printk(KERN_INFO "%ld pages of RAM\n", total_present); diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 82ab1bc6afb1..c641333cd997 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -622,13 +622,16 @@ void show_mem(unsigned int filter) pg_data_t *pgdat; printk(KERN_INFO "Mem-info:\n"); - show_free_areas(); + show_free_areas(filter); printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; + int nid = pgdat->node_id; + if (skip_free_areas_node(filter, nid)) + continue; pgdat_resize_lock(pgdat, &flags); present = pgdat->node_present_pages; for(i = 0; i < pgdat->node_spanned_pages; i++) { @@ -638,8 +641,7 @@ void show_mem(unsigned int filter) if (pfn_valid(pgdat->node_start_pfn + i)) page = pfn_to_page(pgdat->node_start_pfn + i); else { - i = vmemmap_find_next_valid_pfn(pgdat->node_id, - i) - 1; + i = vmemmap_find_next_valid_pfn(nid, i) - 1; continue; } if (PageReserved(page)) @@ -655,7 +657,7 @@ void show_mem(unsigned int filter) total_cached += cached; total_shared += shared; printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " - "shrd: %10d, swpd: %10d\n", pgdat->node_id, + "shrd: %10d, swpd: %10d\n", nid, present, reserved, shared, cached); } printk(KERN_INFO "%ld pages of RAM\n", total_present); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 5fa1e273006e..c5c9c65e502d 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -686,7 +686,7 @@ void show_mem(unsigned int filter) int shared = 0, cached = 0; printk(KERN_INFO "Mem-info:\n"); - show_free_areas(); + show_free_areas(filter); #ifndef CONFIG_DISCONTIGMEM i = max_mapnr; while (i-- > 0) { diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 3609bdee9ed2..3249d3f3234d 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -82,7 +82,7 @@ static void prom_sync_me(void) "nop\n\t" : : "r" (&trapbase)); prom_printf("PROM SYNC COMMAND...\n"); - show_free_areas(); + show_free_areas(0); if(current->pid != 0) { local_irq_enable(); sys_sync(); diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 4c31e2b6e71b..28c2cc81c9a9 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -78,7 +78,7 @@ void __init kmap_init(void) void show_mem(unsigned int filter) { printk("Mem-info:\n"); - show_free_areas(); + show_free_areas(filter); printk("Free swap: %6ldkB\n", nr_swap_pages << (PAGE_SHIFT-10)); printk("%ld pages of RAM\n", totalram_pages); diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 1fc02633f700..2d3e7112d2a3 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -62,7 +62,7 @@ void show_mem(unsigned int filter) struct meminfo *mi = &meminfo; printk(KERN_DEFAULT "Mem-info:\n"); - show_free_areas(); + show_free_areas(filter); for_each_bank(i, mi) { struct membank *bank = &mi->bank[i]; diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c index 96c95617195f..32f07f868d89 100644 --- a/drivers/net/ioc3-eth.c +++ b/drivers/net/ioc3-eth.c @@ -915,7 +915,7 @@ static void ioc3_alloc_rings(struct net_device *dev) skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); if (!skb) { - show_free_areas(); + show_free_areas(0); continue; } diff --git a/drivers/tty/serial/68328serial.c b/drivers/tty/serial/68328serial.c index d5bfd41707e7..e0a77540b8ca 100644 --- a/drivers/tty/serial/68328serial.c +++ b/drivers/tty/serial/68328serial.c @@ -281,7 +281,7 @@ static void receive_chars(struct m68k_serial *info, unsigned short rx) #ifdef CONFIG_MAGIC_SYSRQ } else if (ch == 0x10) { /* ^P */ show_state(); - show_free_areas(); + show_free_areas(0); show_buffers(); /* show_net_buffers(); */ return; diff --git a/include/linux/mm.h b/include/linux/mm.h index 6507dde38b16..1746f67c33de 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -862,13 +862,13 @@ extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) /* - * Flags passed to show_mem() and __show_free_areas() to suppress output in + * Flags passed to show_mem() and show_free_areas() to suppress output in * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */ -extern void show_free_areas(void); -extern void __show_free_areas(unsigned int flags); +extern void show_free_areas(unsigned int flags); +extern bool skip_free_areas_node(unsigned int flags, int nid); int shmem_lock(struct file *file, int lock, struct user_struct *user); struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); diff --git a/lib/show_mem.c b/lib/show_mem.c index 90cbe4bb5960..4407f8c9b1f7 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -16,7 +16,7 @@ void show_mem(unsigned int filter) nonshared = 0, highmem = 0; printk("Mem-Info:\n"); - __show_free_areas(filter); + show_free_areas(filter); for_each_online_pgdat(pgdat) { unsigned long i, flags; diff --git a/mm/nommu.c b/mm/nommu.c index c4c542c736a9..5afce48ce339 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1235,7 +1235,7 @@ error_free: enomem: printk("Allocation of length %lu from process %d (%s) failed\n", len, current->pid, current->comm); - show_free_areas(); + show_free_areas(0); return -ENOMEM; } @@ -1468,14 +1468,14 @@ error_getting_vma: printk(KERN_WARNING "Allocation of vma for %lu byte allocation" " from process %d failed\n", len, current->pid); - show_free_areas(); + show_free_areas(0); return -ENOMEM; error_getting_region: printk(KERN_WARNING "Allocation of vm region for %lu byte allocation" " from process %d failed\n", len, current->pid); - show_free_areas(); + show_free_areas(0); return -ENOMEM; } EXPORT_SYMBOL(do_mmap_pgoff); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9d5498e2d0f5..b1447522d346 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2473,10 +2473,10 @@ void si_meminfo_node(struct sysinfo *val, int nid) #endif /* - * Determine whether the zone's node should be displayed or not, depending on - * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas(). + * Determine whether the node should be displayed or not, depending on whether + * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). */ -static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone) +bool skip_free_areas_node(unsigned int flags, int nid) { bool ret = false; @@ -2484,8 +2484,7 @@ static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone) goto out; get_mems_allowed(); - ret = !node_isset(zone->zone_pgdat->node_id, - cpuset_current_mems_allowed); + ret = !node_isset(nid, cpuset_current_mems_allowed); put_mems_allowed(); out: return ret; @@ -2500,13 +2499,13 @@ out: * Suppresses nodes that are not allowed by current's cpuset if * SHOW_MEM_FILTER_NODES is passed. */ -void __show_free_areas(unsigned int filter) +void show_free_areas(unsigned int filter) { int cpu; struct zone *zone; for_each_populated_zone(zone) { - if (skip_free_areas_zone(filter, zone)) + if (skip_free_areas_node(filter, zone_to_nid(zone))) continue; show_node(zone); printk("%s per-cpu:\n", zone->name); @@ -2549,7 +2548,7 @@ void __show_free_areas(unsigned int filter) for_each_populated_zone(zone) { int i; - if (skip_free_areas_zone(filter, zone)) + if (skip_free_areas_node(filter, zone_to_nid(zone))) continue; show_node(zone); printk("%s" @@ -2618,7 +2617,7 @@ void __show_free_areas(unsigned int filter) for_each_populated_zone(zone) { unsigned long nr[MAX_ORDER], flags, order, total = 0; - if (skip_free_areas_zone(filter, zone)) + if (skip_free_areas_node(filter, zone_to_nid(zone))) continue; show_node(zone); printk("%s: ", zone->name); @@ -2639,11 +2638,6 @@ void __show_free_areas(unsigned int filter) show_swap_cache_info(); } -void show_free_areas(void) -{ - __show_free_areas(0); -} - static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) { zoneref->zone = zone; -- cgit v1.2.3 From fa25c503dfa203b921199ea42c0046c89f2ed49f Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:11:28 -0700 Subject: mm: per-node vmstat: show proper vmstats commit 2ac390370a ("writeback: add /sys/devices/system/node//vmstat") added vmstat entry. But strangely it only show nr_written and nr_dirtied. # cat /sys/devices/system/node/node20/vmstat nr_written 0 nr_dirtied 0 Of course, It's not adequate. With this patch, the vmstat show all vm stastics as /proc/vmstat. # cat /sys/devices/system/node/node0/vmstat nr_free_pages 899224 nr_inactive_anon 201 nr_active_anon 17380 nr_inactive_file 31572 nr_active_file 28277 nr_unevictable 0 nr_mlock 0 nr_anon_pages 17321 nr_mapped 8640 nr_file_pages 60107 nr_dirty 33 nr_writeback 0 nr_slab_reclaimable 6850 nr_slab_unreclaimable 7604 nr_page_table_pages 3105 nr_kernel_stack 175 nr_unstable 0 nr_bounce 0 nr_vmscan_write 0 nr_writeback_temp 0 nr_isolated_anon 0 nr_isolated_file 0 nr_shmem 260 nr_dirtied 1050 nr_written 938 numa_hit 962872 numa_miss 0 numa_foreign 0 numa_interleave 8617 numa_local 962872 numa_other 0 nr_anon_transparent_hugepages 0 [akpm@linux-foundation.org: no externs in .c files] Signed-off-by: KOSAKI Motohiro Cc: Michael Rubin Cc: Wu Fengguang Acked-by: David Rientjes Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 14 ++- include/linux/vmstat.h | 4 +- mm/vmstat.c | 261 +++++++++++++++++++++++++------------------------ 3 files changed, 144 insertions(+), 135 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/node.c b/drivers/base/node.c index b3b72d64e805..793f796c4da3 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -179,11 +180,14 @@ static ssize_t node_read_vmstat(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { int nid = dev->id; - return sprintf(buf, - "nr_written %lu\n" - "nr_dirtied %lu\n", - node_page_state(nid, NR_WRITTEN), - node_page_state(nid, NR_DIRTIED)); + int i; + int n = 0; + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) + n += sprintf(buf+n, "%s %lu\n", vmstat_text[i], + node_page_state(nid, i)); + + return n; } static SYSDEV_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 2b3831b58aa4..e73d1030f2f7 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -313,6 +313,8 @@ static inline void __dec_zone_page_state(struct page *page, #define set_pgdat_percpu_threshold(pgdat, callback) { } static inline void refresh_cpu_vm_stats(int cpu) { } -#endif +#endif /* CONFIG_SMP */ + +extern const char * const vmstat_text[]; #endif /* _LINUX_VMSTAT_H */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 897ea9e88238..209546a8bdd5 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -659,6 +659,138 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, } #endif +#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) +#ifdef CONFIG_ZONE_DMA +#define TEXT_FOR_DMA(xx) xx "_dma", +#else +#define TEXT_FOR_DMA(xx) +#endif + +#ifdef CONFIG_ZONE_DMA32 +#define TEXT_FOR_DMA32(xx) xx "_dma32", +#else +#define TEXT_FOR_DMA32(xx) +#endif + +#ifdef CONFIG_HIGHMEM +#define TEXT_FOR_HIGHMEM(xx) xx "_high", +#else +#define TEXT_FOR_HIGHMEM(xx) +#endif + +#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ + TEXT_FOR_HIGHMEM(xx) xx "_movable", + +const char * const vmstat_text[] = { + /* Zoned VM counters */ + "nr_free_pages", + "nr_inactive_anon", + "nr_active_anon", + "nr_inactive_file", + "nr_active_file", + "nr_unevictable", + "nr_mlock", + "nr_anon_pages", + "nr_mapped", + "nr_file_pages", + "nr_dirty", + "nr_writeback", + "nr_slab_reclaimable", + "nr_slab_unreclaimable", + "nr_page_table_pages", + "nr_kernel_stack", + "nr_unstable", + "nr_bounce", + "nr_vmscan_write", + "nr_writeback_temp", + "nr_isolated_anon", + "nr_isolated_file", + "nr_shmem", + "nr_dirtied", + "nr_written", + +#ifdef CONFIG_NUMA + "numa_hit", + "numa_miss", + "numa_foreign", + "numa_interleave", + "numa_local", + "numa_other", +#endif + "nr_anon_transparent_hugepages", + "nr_dirty_threshold", + "nr_dirty_background_threshold", + +#ifdef CONFIG_VM_EVENT_COUNTERS + "pgpgin", + "pgpgout", + "pswpin", + "pswpout", + + TEXTS_FOR_ZONES("pgalloc") + + "pgfree", + "pgactivate", + "pgdeactivate", + + "pgfault", + "pgmajfault", + + TEXTS_FOR_ZONES("pgrefill") + TEXTS_FOR_ZONES("pgsteal") + TEXTS_FOR_ZONES("pgscan_kswapd") + TEXTS_FOR_ZONES("pgscan_direct") + +#ifdef CONFIG_NUMA + "zone_reclaim_failed", +#endif + "pginodesteal", + "slabs_scanned", + "kswapd_steal", + "kswapd_inodesteal", + "kswapd_low_wmark_hit_quickly", + "kswapd_high_wmark_hit_quickly", + "kswapd_skip_congestion_wait", + "pageoutrun", + "allocstall", + + "pgrotated", + +#ifdef CONFIG_COMPACTION + "compact_blocks_moved", + "compact_pages_moved", + "compact_pagemigrate_failed", + "compact_stall", + "compact_fail", + "compact_success", +#endif + +#ifdef CONFIG_HUGETLB_PAGE + "htlb_buddy_alloc_success", + "htlb_buddy_alloc_fail", +#endif + "unevictable_pgs_culled", + "unevictable_pgs_scanned", + "unevictable_pgs_rescued", + "unevictable_pgs_mlocked", + "unevictable_pgs_munlocked", + "unevictable_pgs_cleared", + "unevictable_pgs_stranded", + "unevictable_pgs_mlockfreed", + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + "thp_fault_alloc", + "thp_fault_fallback", + "thp_collapse_alloc", + "thp_collapse_alloc_failed", + "thp_split", +#endif + +#endif /* CONFIG_VM_EVENTS_COUNTERS */ +}; +#endif /* CONFIG_PROC_FS || CONFIG_SYSFS */ + + #ifdef CONFIG_PROC_FS static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone) @@ -831,135 +963,6 @@ static const struct file_operations pagetypeinfo_file_ops = { .release = seq_release, }; -#ifdef CONFIG_ZONE_DMA -#define TEXT_FOR_DMA(xx) xx "_dma", -#else -#define TEXT_FOR_DMA(xx) -#endif - -#ifdef CONFIG_ZONE_DMA32 -#define TEXT_FOR_DMA32(xx) xx "_dma32", -#else -#define TEXT_FOR_DMA32(xx) -#endif - -#ifdef CONFIG_HIGHMEM -#define TEXT_FOR_HIGHMEM(xx) xx "_high", -#else -#define TEXT_FOR_HIGHMEM(xx) -#endif - -#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ - TEXT_FOR_HIGHMEM(xx) xx "_movable", - -static const char * const vmstat_text[] = { - /* Zoned VM counters */ - "nr_free_pages", - "nr_inactive_anon", - "nr_active_anon", - "nr_inactive_file", - "nr_active_file", - "nr_unevictable", - "nr_mlock", - "nr_anon_pages", - "nr_mapped", - "nr_file_pages", - "nr_dirty", - "nr_writeback", - "nr_slab_reclaimable", - "nr_slab_unreclaimable", - "nr_page_table_pages", - "nr_kernel_stack", - "nr_unstable", - "nr_bounce", - "nr_vmscan_write", - "nr_writeback_temp", - "nr_isolated_anon", - "nr_isolated_file", - "nr_shmem", - "nr_dirtied", - "nr_written", - -#ifdef CONFIG_NUMA - "numa_hit", - "numa_miss", - "numa_foreign", - "numa_interleave", - "numa_local", - "numa_other", -#endif - "nr_anon_transparent_hugepages", - "nr_dirty_threshold", - "nr_dirty_background_threshold", - -#ifdef CONFIG_VM_EVENT_COUNTERS - "pgpgin", - "pgpgout", - "pswpin", - "pswpout", - - TEXTS_FOR_ZONES("pgalloc") - - "pgfree", - "pgactivate", - "pgdeactivate", - - "pgfault", - "pgmajfault", - - TEXTS_FOR_ZONES("pgrefill") - TEXTS_FOR_ZONES("pgsteal") - TEXTS_FOR_ZONES("pgscan_kswapd") - TEXTS_FOR_ZONES("pgscan_direct") - -#ifdef CONFIG_NUMA - "zone_reclaim_failed", -#endif - "pginodesteal", - "slabs_scanned", - "kswapd_steal", - "kswapd_inodesteal", - "kswapd_low_wmark_hit_quickly", - "kswapd_high_wmark_hit_quickly", - "kswapd_skip_congestion_wait", - "pageoutrun", - "allocstall", - - "pgrotated", - -#ifdef CONFIG_COMPACTION - "compact_blocks_moved", - "compact_pages_moved", - "compact_pagemigrate_failed", - "compact_stall", - "compact_fail", - "compact_success", -#endif - -#ifdef CONFIG_HUGETLB_PAGE - "htlb_buddy_alloc_success", - "htlb_buddy_alloc_fail", -#endif - "unevictable_pgs_culled", - "unevictable_pgs_scanned", - "unevictable_pgs_rescued", - "unevictable_pgs_mlocked", - "unevictable_pgs_munlocked", - "unevictable_pgs_cleared", - "unevictable_pgs_stranded", - "unevictable_pgs_mlockfreed", - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - "thp_fault_alloc", - "thp_fault_fallback", - "thp_collapse_alloc", - "thp_collapse_alloc_failed", - "thp_split", -#endif - -#endif /* CONFIG_VM_EVENTS_COUNTERS */ -}; - static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone) { -- cgit v1.2.3 From f62e00cc3a00bfbd394a79fc22b334c31f91bd5f Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:11:29 -0700 Subject: mm: introduce wait_on_page_locked_killable() commit 2687a356 ("Add lock_page_killable") introduced killable lock_page(). Similarly this patch introdues killable wait_on_page_locked(). Signed-off-by: KOSAKI Motohiro Acked-by: KAMEZAWA Hiroyuki Reviewed-by: Minchan Kim Cc: Matthew Wilcox Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 9 +++++++++ mm/filemap.c | 11 +++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c11950652646..ea268080380d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -357,6 +357,15 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, */ extern void wait_on_page_bit(struct page *page, int bit_nr); +extern int wait_on_page_bit_killable(struct page *page, int bit_nr); + +static inline int wait_on_page_locked_killable(struct page *page) +{ + if (PageLocked(page)) + return wait_on_page_bit_killable(page, PG_locked); + return 0; +} + /* * Wait for a page to be unlocked. * diff --git a/mm/filemap.c b/mm/filemap.c index c641edf553a9..dea8a38bb2bb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -562,6 +562,17 @@ void wait_on_page_bit(struct page *page, int bit_nr) } EXPORT_SYMBOL(wait_on_page_bit); +int wait_on_page_bit_killable(struct page *page, int bit_nr) +{ + DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); + + if (!test_bit(bit_nr, &page->flags)) + return 0; + + return __wait_on_bit(page_waitqueue(page), &wait, + sleep_on_page_killable, TASK_KILLABLE); +} + /** * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue * @page: Page defining the wait queue of interest -- cgit v1.2.3 From 37b23e0525d393d48a7d59f870b3bc061a30ccdb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:11:30 -0700 Subject: x86,mm: make pagefault killable When an oom killing occurs, almost all processes are getting stuck at the following two points. 1) __alloc_pages_nodemask 2) __lock_page_or_retry 1) is not very problematic because TIF_MEMDIE leads to an allocation failure and getting out from page allocator. 2) is more problematic. In an OOM situation, zones typically don't have page cache at all and memory starvation might lead to greatly reduced IO performance. When a fork bomb occurs, TIF_MEMDIE tasks don't die quickly, meaning that a fork bomb may create new process quickly rather than the oom-killer killing it. Then, the system may become livelocked. This patch makes the pagefault interruptible by SIGKILL. Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Cc: Minchan Kim Cc: Matthew Wilcox Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 12 +++++++++++- include/linux/mm.h | 1 + mm/filemap.c | 31 ++++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index bcb394dfbb35..f7a2a054a3c0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -965,7 +965,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) struct mm_struct *mm; int fault; int write = error_code & PF_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (write ? FAULT_FLAG_WRITE : 0); tsk = current; @@ -1138,6 +1138,16 @@ good_area: return; } + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue pagefault. + */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); + return; + } + /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely diff --git a/include/linux/mm.h b/include/linux/mm.h index 1746f67c33de..57d3d5fade16 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -153,6 +153,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ #define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ +#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is diff --git a/mm/filemap.c b/mm/filemap.c index dea8a38bb2bb..8144f87dcbb4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -654,15 +654,32 @@ EXPORT_SYMBOL_GPL(__lock_page_killable); int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { - if (!(flags & FAULT_FLAG_ALLOW_RETRY)) { - __lock_page(page); - return 1; - } else { - if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) { - up_read(&mm->mmap_sem); + if (flags & FAULT_FLAG_ALLOW_RETRY) { + /* + * CAUTION! In this case, mmap_sem is not released + * even though return 0. + */ + if (flags & FAULT_FLAG_RETRY_NOWAIT) + return 0; + + up_read(&mm->mmap_sem); + if (flags & FAULT_FLAG_KILLABLE) + wait_on_page_locked_killable(page); + else wait_on_page_locked(page); - } return 0; + } else { + if (flags & FAULT_FLAG_KILLABLE) { + int ret; + + ret = __lock_page_killable(page); + if (ret) { + up_read(&mm->mmap_sem); + return 0; + } + } else + __lock_page(page); + return 1; } } -- cgit v1.2.3 From 1b79acc91115ba47e744b70bb166b77bd94f5855 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:11:32 -0700 Subject: mm, mem-hotplug: recalculate lowmem_reserve when memory hotplug occurs Currently, memory hotplug calls setup_per_zone_wmarks() and calculate_zone_inactive_ratio(), but doesn't call setup_per_zone_lowmem_reserve(). It means the number of reserved pages aren't updated even if memory hot plug occur. This patch fixes it. Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Mel Gorman Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- mm/memory_hotplug.c | 9 +++++---- mm/page_alloc.c | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 57d3d5fade16..e173cd297d88 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1381,7 +1381,7 @@ extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); extern void setup_per_zone_wmarks(void); -extern void calculate_zone_inactive_ratio(struct zone *zone); +extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); extern void show_mem(unsigned int flags); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2c4edc459fb0..59ac18fefd65 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -459,8 +459,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages) zone_pcp_update(zone); mutex_unlock(&zonelists_mutex); - setup_per_zone_wmarks(); - calculate_zone_inactive_ratio(zone); + + init_per_zone_wmark_min(); + if (onlined_pages) { kswapd_run(zone_to_nid(zone)); node_set_state(zone_to_nid(zone), N_HIGH_MEMORY); @@ -893,8 +894,8 @@ repeat: zone->zone_pgdat->node_present_pages -= offlined_pages; totalram_pages -= offlined_pages; - setup_per_zone_wmarks(); - calculate_zone_inactive_ratio(zone); + init_per_zone_wmark_min(); + if (!node_present_pages(node)) { node_clear_state(node, N_HIGH_MEMORY); kswapd_stop(node); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 56d0be36be9d..e133cea36932 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5094,7 +5094,7 @@ void setup_per_zone_wmarks(void) * 1TB 101 10GB * 10TB 320 32GB */ -void __meminit calculate_zone_inactive_ratio(struct zone *zone) +static void __meminit calculate_zone_inactive_ratio(struct zone *zone) { unsigned int gb, ratio; @@ -5140,7 +5140,7 @@ static void __meminit setup_per_zone_inactive_ratio(void) * 8192MB: 11584k * 16384MB: 16384k */ -static int __init init_per_zone_wmark_min(void) +int __meminit init_per_zone_wmark_min(void) { unsigned long lowmem_kbytes; -- cgit v1.2.3 From a6cccdc36c966e51fd969560d870cfd37afbfa9c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:11:33 -0700 Subject: mm, mem-hotplug: update pcp->stat_threshold when memory hotplug occur Currently, cpu hotplug updates pcp->stat_threshold, but memory hotplug doesn't. There is no reason for this. [akpm@linux-foundation.org: fix CONFIG_SMP=n build] Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Mel Gorman Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 3 +++ mm/page_alloc.c | 2 ++ mm/vmstat.c | 3 +-- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index e73d1030f2f7..51359837511a 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -261,6 +261,7 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); void refresh_cpu_vm_stats(int); +void refresh_zone_stat_thresholds(void); int calculate_pressure_threshold(struct zone *zone); int calculate_normal_threshold(struct zone *zone); @@ -313,6 +314,8 @@ static inline void __dec_zone_page_state(struct page *page, #define set_pgdat_percpu_threshold(pgdat, callback) { } static inline void refresh_cpu_vm_stats(int cpu) { } +static inline void refresh_zone_stat_thresholds(void) { } + #endif /* CONFIG_SMP */ extern const char * const vmstat_text[]; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e133cea36932..77773329aa72 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -5152,6 +5153,7 @@ int __meminit init_per_zone_wmark_min(void) if (min_free_kbytes > 65536) min_free_kbytes = 65536; setup_per_zone_wmarks(); + refresh_zone_stat_thresholds(); setup_per_zone_lowmem_reserve(); setup_per_zone_inactive_ratio(); return 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index 209546a8bdd5..20c18b7694b2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -157,7 +157,7 @@ int calculate_normal_threshold(struct zone *zone) /* * Refresh the thresholds for each zone. */ -static void refresh_zone_stat_thresholds(void) +void refresh_zone_stat_thresholds(void) { struct zone *zone; int cpu; @@ -1201,7 +1201,6 @@ static int __init setup_vmstat(void) #ifdef CONFIG_SMP int cpu; - refresh_zone_stat_thresholds(); register_cpu_notifier(&vmstat_notifier); for_each_online_cpu(cpu) -- cgit v1.2.3 From 72788c385604523422592249c19cba0187021e9b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 24 May 2011 17:11:40 -0700 Subject: oom: replace PF_OOM_ORIGIN with toggling oom_score_adj There's a kernel-wide shortage of per-process flags, so it's always helpful to trim one when possible without incurring a significant penalty. It's even more important when you're planning on adding a per- process flag yourself, which I plan to do shortly for transparent hugepages. PF_OOM_ORIGIN is used by ksm and swapoff to prefer current since it has a tendency to allocate large amounts of memory and should be preferred for killing over other tasks. We'd rather immediately kill the task making the errant syscall rather than penalizing an innocent task. This patch removes PF_OOM_ORIGIN since its behavior is equivalent to setting the process's oom_score_adj to OOM_SCORE_ADJ_MAX. The process's old oom_score_adj is stored and then set to OOM_SCORE_ADJ_MAX during the time it used to have PF_OOM_ORIGIN. The old value is then reinstated when the process should no longer be considered a high priority for oom killing. Signed-off-by: David Rientjes Reviewed-by: KOSAKI Motohiro Reviewed-by: Minchan Kim Cc: Hugh Dickins Cc: Izik Eidus Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 2 ++ include/linux/sched.h | 1 - mm/ksm.c | 7 +++++-- mm/oom_kill.c | 36 +++++++++++++++++++++++++++--------- mm/swapfile.c | 6 ++++-- 5 files changed, 38 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 5e3aa8311c5e..4952fb874ad3 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -40,6 +40,8 @@ enum oom_constraint { CONSTRAINT_MEMCG, }; +extern int test_set_oom_score_adj(int new_val); + extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages); extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); diff --git a/include/linux/sched.h b/include/linux/sched.h index aaf71e08222c..44b8faaac7c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1753,7 +1753,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ #define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_OOM_ORIGIN 0x00080000 /* Allocating much memory to others */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ diff --git a/mm/ksm.c b/mm/ksm.c index 942dfc73a2ff..d708b3ef2260 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1894,9 +1895,11 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, if (ksm_run != flags) { ksm_run = flags; if (flags & KSM_RUN_UNMERGE) { - current->flags |= PF_OOM_ORIGIN; + int oom_score_adj; + + oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); err = unmerge_and_remove_all_rmap_items(); - current->flags &= ~PF_OOM_ORIGIN; + test_set_oom_score_adj(oom_score_adj); if (err) { ksm_run = KSM_RUN_STOP; count = err; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f52e85c80e8d..e4b0991ca351 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -38,6 +38,33 @@ int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks = 1; static DEFINE_SPINLOCK(zone_scan_lock); +/** + * test_set_oom_score_adj() - set current's oom_score_adj and return old value + * @new_val: new oom_score_adj value + * + * Sets the oom_score_adj value for current to @new_val with proper + * synchronization and returns the old value. Usually used to temporarily + * set a value, save the old value in the caller, and then reinstate it later. + */ +int test_set_oom_score_adj(int new_val) +{ + struct sighand_struct *sighand = current->sighand; + int old_val; + + spin_lock_irq(&sighand->siglock); + old_val = current->signal->oom_score_adj; + if (new_val != old_val) { + if (new_val == OOM_SCORE_ADJ_MIN) + atomic_inc(¤t->mm->oom_disable_count); + else if (old_val == OOM_SCORE_ADJ_MIN) + atomic_dec(¤t->mm->oom_disable_count); + current->signal->oom_score_adj = new_val; + } + spin_unlock_irq(&sighand->siglock); + + return old_val; +} + #ifdef CONFIG_NUMA /** * has_intersects_mems_allowed() - check task eligiblity for kill @@ -154,15 +181,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, return 0; } - /* - * When the PF_OOM_ORIGIN bit is set, it indicates the task should have - * priority for oom killing. - */ - if (p->flags & PF_OOM_ORIGIN) { - task_unlock(p); - return 1000; - } - /* * The memory controller may have a limit of 0 bytes, so avoid a divide * by zero, if necessary. diff --git a/mm/swapfile.c b/mm/swapfile.c index 8c6b3ce38f09..d537d29e9b7b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -1555,6 +1556,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) struct address_space *mapping; struct inode *inode; char *pathname; + int oom_score_adj; int i, type, prev; int err; @@ -1613,9 +1615,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) p->flags &= ~SWP_WRITEOK; spin_unlock(&swap_lock); - current->flags |= PF_OOM_ORIGIN; + oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); err = try_to_unuse(type); - current->flags &= ~PF_OOM_ORIGIN; + test_set_oom_score_adj(oom_score_adj); if (err) { /* -- cgit v1.2.3 From 15fa8f425557a0d698f933627771f520ef4ae34b Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 24 May 2011 17:11:41 -0700 Subject: include/linux/gfp.h: work around apparent sparse confusion Running sparse on page_alloc.c today, it errors out: include/linux/gfp.h:254:17: error: bad constant expression include/linux/gfp.h:254:17: error: cannot size expression which is a line in gfp_zone(): BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1); That's really unfortunate, because it ends up hiding all of the other legitimate sparse messages like this: mm/page_alloc.c:5315:59: warning: incorrect type in argument 1 (different base types) mm/page_alloc.c:5315:59: expected unsigned long [unsigned] [usertype] size mm/page_alloc.c:5315:59: got restricted gfp_t [usertype] ... Having sparse be able to catch these very oopsable bugs is a lot more important than keeping a BUILD_BUG_ON(). Kill the BUILD_BUG_ON(). Compiles on x86_64 with and without CONFIG_DEBUG_VM=y. defconfig boots fine for me. Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 56d8fc87fbbc..7e8f5d863c76 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -249,14 +249,9 @@ static inline enum zone_type gfp_zone(gfp_t flags) z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) & ((1 << ZONES_SHIFT) - 1); - - if (__builtin_constant_p(bit)) - BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1); - else { #ifdef CONFIG_DEBUG_VM - BUG_ON((GFP_ZONE_BAD >> bit) & 1); + BUG_ON((GFP_ZONE_BAD >> bit) & 1); #endif - } return z; } -- cgit v1.2.3 From 82d4b5779a75887750748609f3415f01c1bb9f81 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 24 May 2011 17:11:42 -0700 Subject: include/linux/gfp.h: convert BUG_ON() into VM_BUG_ON() VM_BUG_ON() if effectively a BUG_ON() undef #ifdef CONFIG_DEBUG_VM. That is exactly what we have here now, and two different folks have suggested doing it this way. Signed-off-by: Dave Hansen Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 7e8f5d863c76..cb4089254f01 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -249,9 +249,7 @@ static inline enum zone_type gfp_zone(gfp_t flags) z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) & ((1 << ZONES_SHIFT) - 1); -#ifdef CONFIG_DEBUG_VM - BUG_ON((GFP_ZONE_BAD >> bit) & 1); -#endif + VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1); return z; } -- cgit v1.2.3 From d05f3169c0fbca16132ec7c2be71685c6de638b5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 24 May 2011 17:11:44 -0700 Subject: mm: make expand_downwards() symmetrical with expand_upwards() Currently we have expand_upwards exported while expand_downwards is accessible only via expand_stack or expand_stack_downwards. check_stack_guard_page is a nice example of the asymmetry. It uses expand_stack for VM_GROWSDOWN while expand_upwards is called for VM_GROWSUP case. Let's clean this up by exporting both functions and make those names consistent. Let's use expand_{upwards,downwards} because expanding doesn't always involve stack manipulation (an example is ia64_do_page_fault which uses expand_upwards for registers backing store expansion). expand_downwards has to be defined for both CONFIG_STACK_GROWS{UP,DOWN} because get_arg_page calls the downwards version in the early process initialization phase for growsup configuration. Signed-off-by: Michal Hocko Acked-by: Hugh Dickins Cc: James Bottomley Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/mm.h | 8 +++++--- mm/memory.c | 2 +- mm/mmap.c | 7 +------ 4 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index c1cf372f17a7..e276d5e0abb9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -200,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, #ifdef CONFIG_STACK_GROWSUP if (write) { - ret = expand_stack_downwards(bprm->vma, pos); + ret = expand_downwards(bprm->vma, pos); if (ret < 0) return NULL; } diff --git a/include/linux/mm.h b/include/linux/mm.h index e173cd297d88..d2948af126ca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1518,15 +1518,17 @@ unsigned long ra_submit(struct file_ra_state *ra, struct address_space *mapping, struct file *filp); -/* Do stack extension */ +/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); + +/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */ +extern int expand_downwards(struct vm_area_struct *vma, + unsigned long address); #if VM_GROWSUP extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); #else #define expand_upwards(vma, address) do { } while (0) #endif -extern int expand_stack_downwards(struct vm_area_struct *vma, - unsigned long address); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); diff --git a/mm/memory.c b/mm/memory.c index 61e66f026563..4c6ea10f3d18 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2966,7 +2966,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (prev && prev->vm_end == address) return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - expand_stack(vma, address - PAGE_SIZE); + expand_downwards(vma, address - PAGE_SIZE); } if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { struct vm_area_struct *next = vma->vm_next; diff --git a/mm/mmap.c b/mm/mmap.c index e76f8d752884..adb12527fd0e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1774,7 +1774,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* * vma is the first one with address < vma->vm_start. Have to extend vma. */ -static int expand_downwards(struct vm_area_struct *vma, +int expand_downwards(struct vm_area_struct *vma, unsigned long address) { int error; @@ -1821,11 +1821,6 @@ static int expand_downwards(struct vm_area_struct *vma, return error; } -int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address) -{ - return expand_downwards(vma, address); -} - #ifdef CONFIG_STACK_GROWSUP int expand_stack(struct vm_area_struct *vma, unsigned long address) { -- cgit v1.2.3 From d16dfc550f5326a4000f3322582a7c05dec91d7a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:11:45 -0700 Subject: mm: mmu_gather rework Rework the existing mmu_gather infrastructure. The direct purpose of these patches was to allow preemptible mmu_gather, but even without that I think these patches provide an improvement to the status quo. The first 9 patches rework the mmu_gather infrastructure. For review purpose I've split them into generic and per-arch patches with the last of those a generic cleanup. The next patch provides generic RCU page-table freeing, and the followup is a patch converting s390 to use this. I've also got 4 patches from DaveM lined up (not included in this series) that uses this to implement gup_fast() for sparc64. Then there is one patch that extends the generic mmu_gather batching. After that follow the mm preemptibility patches, these make part of the mm a lot more preemptible. It converts i_mmap_lock and anon_vma->lock to mutexes which together with the mmu_gather rework makes mmu_gather preemptible as well. Making i_mmap_lock a mutex also enables a clean-up of the truncate code. This also allows for preemptible mmu_notifiers, something that XPMEM I think wants. Furthermore, it removes the new and universially detested unmap_mutex. This patch: Remove the first obstacle towards a fully preemptible mmu_gather. The current scheme assumes mmu_gather is always done with preemption disabled and uses per-cpu storage for the page batches. Change this to try and allocate a page for batching and in case of failure, use a small on-stack array to make some progress. Preemptible mmu_gather is desired in general and usable once i_mmap_lock becomes a mutex. Doing it before the mutex conversion saves us from having to rework the code by moving the mmu_gather bits inside the pte_lock. Also avoid flushing the tlb batches from under the pte lock, this is useful even without the i_mmap_lock conversion as it significantly reduces pte lock hold times. [akpm@linux-foundation.org: fix comment tpyo] Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Acked-by: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 ++--- include/asm-generic/tlb.h | 96 ++++++++++++++++++++++++++++++++++------------- include/linux/mm.h | 2 +- mm/memory.c | 46 +++++++++++------------ mm/mmap.c | 18 ++++----- 5 files changed, 107 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index e276d5e0abb9..936f5776655c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -600,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) unsigned long length = old_end - old_start; unsigned long new_start = old_start - shift; unsigned long new_end = old_end - shift; - struct mmu_gather *tlb; + struct mmu_gather tlb; BUG_ON(new_start > new_end); @@ -626,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) return -ENOMEM; lru_add_drain(); - tlb = tlb_gather_mmu(mm, 0); + tlb_gather_mmu(&tlb, mm, 0); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. */ - free_pgd_range(tlb, new_end, old_end, new_end, + free_pgd_range(&tlb, new_end, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } else { /* @@ -640,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * have constraints on va-space that make this illegal (IA64) - * for the others its just a little faster. */ - free_pgd_range(tlb, old_start, old_end, new_end, + free_pgd_range(&tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } - tlb_finish_mmu(tlb, new_end, old_end); + tlb_finish_mmu(&tlb, new_end, old_end); /* * Shrink the vma to just the new range. Always succeeds. diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index e43f9766259f..2d3547c84235 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -5,6 +5,8 @@ * Copyright 2001 Red Hat, Inc. * Based on code from mm/memory.c Copyright Linus Torvalds and others. * + * Copyright 2011 Red Hat, Inc., Peter Zijlstra + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -22,51 +24,71 @@ * and page free order so much.. */ #ifdef CONFIG_SMP - #ifdef ARCH_FREE_PTR_NR - #define FREE_PTR_NR ARCH_FREE_PTR_NR - #else - #define FREE_PTE_NR 506 - #endif #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U) #else - #define FREE_PTE_NR 1 #define tlb_fast_mode(tlb) 1 #endif +/* + * If we can't allocate a page to make a big batch of page pointers + * to work on, then just handle a few from the on-stack structure. + */ +#define MMU_GATHER_BUNDLE 8 + /* struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ struct mmu_gather { struct mm_struct *mm; unsigned int nr; /* set to ~0U means fast mode */ + unsigned int max; /* nr < max */ unsigned int need_flush;/* Really unmapped some ptes? */ unsigned int fullmm; /* non-zero means full mm flush */ - struct page * pages[FREE_PTE_NR]; +#ifdef HAVE_ARCH_MMU_GATHER + struct arch_mmu_gather arch; +#endif + struct page **pages; + struct page *local[MMU_GATHER_BUNDLE]; }; -/* Users of the generic TLB shootdown code must declare this storage space. */ -DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); +static inline void __tlb_alloc_page(struct mmu_gather *tlb) +{ + unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + + if (addr) { + tlb->pages = (void *)addr; + tlb->max = PAGE_SIZE / sizeof(struct page *); + } +} /* tlb_gather_mmu - * Return a pointer to an initialized struct mmu_gather. + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @fullmm argument is used when @mm is without + * users and we're going to destroy the full address space (exit/execve). */ -static inline struct mmu_gather * -tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) +static inline void +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) { - struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); - tlb->mm = mm; - /* Use fast mode if only one CPU is online */ - tlb->nr = num_online_cpus() > 1 ? 0U : ~0U; + tlb->max = ARRAY_SIZE(tlb->local); + tlb->pages = tlb->local; + + if (num_online_cpus() > 1) { + tlb->nr = 0; + __tlb_alloc_page(tlb); + } else /* Use fast mode if only one CPU is online */ + tlb->nr = ~0U; - tlb->fullmm = full_mm_flush; + tlb->fullmm = fullmm; - return tlb; +#ifdef HAVE_ARCH_MMU_GATHER + tlb->arch = ARCH_MMU_GATHER_INIT; +#endif } static inline void -tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +tlb_flush_mmu(struct mmu_gather *tlb) { if (!tlb->need_flush) return; @@ -75,6 +97,13 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) if (!tlb_fast_mode(tlb)) { free_pages_and_swap_cache(tlb->pages, tlb->nr); tlb->nr = 0; + /* + * If we are using the local on-stack array of pages for MMU + * gather, try allocating an off-stack array again as we have + * recently freed pages. + */ + if (tlb->pages == tlb->local) + __tlb_alloc_page(tlb); } } @@ -85,29 +114,42 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - tlb_flush_mmu(tlb, start, end); + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ check_pgt_cache(); - put_cpu_var(mmu_gathers); + if (tlb->pages != tlb->local) + free_pages((unsigned long)tlb->pages, 0); } -/* tlb_remove_page +/* __tlb_remove_page * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while * handling the additional races in SMP caused by other CPUs caching valid - * mappings in their TLBs. + * mappings in their TLBs. Returns the number of free page slots left. + * When out of page slots we must call tlb_flush_mmu(). */ -static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { tlb->need_flush = 1; if (tlb_fast_mode(tlb)) { free_page_and_swap_cache(page); - return; + return 1; /* avoid calling tlb_flush_mmu() */ } tlb->pages[tlb->nr++] = page; - if (tlb->nr >= FREE_PTE_NR) - tlb_flush_mmu(tlb, 0, 0); + VM_BUG_ON(tlb->nr > tlb->max); + + return tlb->max - tlb->nr; +} + +/* tlb_remove_page + * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when + * required. + */ +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + if (!__tlb_remove_page(tlb, page)) + tlb_flush_mmu(tlb); } /** diff --git a/include/linux/mm.h b/include/linux/mm.h index d2948af126ca..ffcce9bf2b54 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -906,7 +906,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); -unsigned long unmap_vmas(struct mmu_gather **tlb, +unsigned long unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *); diff --git a/mm/memory.c b/mm/memory.c index 4c6ea10f3d18..19b2d44de9f0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -912,12 +912,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, long *zap_work, struct zap_details *details) { struct mm_struct *mm = tlb->mm; + int force_flush = 0; pte_t *pte; spinlock_t *ptl; int rss[NR_MM_COUNTERS]; init_rss_vec(rss); - +again: pte = pte_offset_map_lock(mm, pmd, addr, &ptl); arch_enter_lazy_mmu_mode(); do { @@ -974,7 +975,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, page_remove_rmap(page); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); - tlb_remove_page(tlb, page); + force_flush = !__tlb_remove_page(tlb, page); + if (force_flush) + break; continue; } /* @@ -1001,6 +1004,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); + /* + * mmu_gather ran out of room to batch pages, we break out of + * the PTE lock to avoid doing the potential expensive TLB invalidate + * and page-free while holding it. + */ + if (force_flush) { + force_flush = 0; + tlb_flush_mmu(tlb); + if (addr != end) + goto again; + } + return addr; } @@ -1121,17 +1136,14 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, * ensure that any thus-far unmapped pages are flushed before unmap_vmas() * drops the lock and schedules. */ -unsigned long unmap_vmas(struct mmu_gather **tlbp, +unsigned long unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *details) { long zap_work = ZAP_BLOCK_SIZE; - unsigned long tlb_start = 0; /* For tlb_finish_mmu */ - int tlb_start_valid = 0; unsigned long start = start_addr; spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; - int fullmm = (*tlbp)->fullmm; struct mm_struct *mm = vma->vm_mm; mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); @@ -1152,11 +1164,6 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, untrack_pfn_vma(vma, 0, 0); while (start != end) { - if (!tlb_start_valid) { - tlb_start = start; - tlb_start_valid = 1; - } - if (unlikely(is_vm_hugetlb_page(vma))) { /* * It is undesirable to test vma->vm_file as it @@ -1177,7 +1184,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, start = end; } else - start = unmap_page_range(*tlbp, vma, + start = unmap_page_range(tlb, vma, start, end, &zap_work, details); if (zap_work > 0) { @@ -1185,19 +1192,13 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, break; } - tlb_finish_mmu(*tlbp, tlb_start, start); - if (need_resched() || (i_mmap_lock && spin_needbreak(i_mmap_lock))) { - if (i_mmap_lock) { - *tlbp = NULL; + if (i_mmap_lock) goto out; - } cond_resched(); } - *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); - tlb_start_valid = 0; zap_work = ZAP_BLOCK_SIZE; } } @@ -1217,16 +1218,15 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { struct mm_struct *mm = vma->vm_mm; - struct mmu_gather *tlb; + struct mmu_gather tlb; unsigned long end = address + size; unsigned long nr_accounted = 0; lru_add_drain(); - tlb = tlb_gather_mmu(mm, 0); + tlb_gather_mmu(&tlb, mm, 0); update_hiwater_rss(mm); end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); - if (tlb) - tlb_finish_mmu(tlb, address, end); + tlb_finish_mmu(&tlb, address, end); return end; } diff --git a/mm/mmap.c b/mm/mmap.c index adb12527fd0e..40d49986e714 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1903,17 +1903,17 @@ static void unmap_region(struct mm_struct *mm, unsigned long start, unsigned long end) { struct vm_area_struct *next = prev? prev->vm_next: mm->mmap; - struct mmu_gather *tlb; + struct mmu_gather tlb; unsigned long nr_accounted = 0; lru_add_drain(); - tlb = tlb_gather_mmu(mm, 0); + tlb_gather_mmu(&tlb, mm, 0); update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); - free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, - next? next->vm_start: 0); - tlb_finish_mmu(tlb, start, end); + free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, + next ? next->vm_start : 0); + tlb_finish_mmu(&tlb, start, end); } /* @@ -2255,7 +2255,7 @@ EXPORT_SYMBOL(do_brk); /* Release all mmaps. */ void exit_mmap(struct mm_struct *mm) { - struct mmu_gather *tlb; + struct mmu_gather tlb; struct vm_area_struct *vma; unsigned long nr_accounted = 0; unsigned long end; @@ -2280,14 +2280,14 @@ void exit_mmap(struct mm_struct *mm) lru_add_drain(); flush_cache_mm(mm); - tlb = tlb_gather_mmu(mm, 1); + tlb_gather_mmu(&tlb, mm, 1); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); - free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); - tlb_finish_mmu(tlb, 0, end); + free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); + tlb_finish_mmu(&tlb, 0, end); /* * Walk the list again, actually closing and freeing it, -- cgit v1.2.3 From e4c70a6629f9c74c4b0de258a3951890e9047c82 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:12:03 -0700 Subject: lockdep, mutex: provide mutex_lock_nest_lock In order to convert i_mmap_lock to a mutex we need a mutex equivalent to spin_lock_nest_lock(), thus provide the mutex_lock_nest_lock() annotation. As with spin_lock_nest_lock(), mutex_lock_nest_lock() allows annotation of the locking pattern where an outer lock serializes the acquisition order of nested locks. That is, if every time you lock multiple locks A, say A1 and A2 you first acquire N, the order of acquiring A1 and A2 is irrelevant. Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 3 +++ include/linux/mutex.h | 9 +++++++++ kernel/mutex.c | 25 +++++++++++++++++-------- 3 files changed, 29 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 4aef1dda6406..ef820a3c378b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -487,12 +487,15 @@ static inline void print_irqtrace_events(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCK_ALLOC # ifdef CONFIG_PROVE_LOCKING # define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) +# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) # else # define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) +# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) # endif # define mutex_release(l, n, i) lock_release(l, n, i) #else # define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_acquire_nest(l, s, t, n, i) do { } while (0) # define mutex_release(l, n, i) do { } while (0) #endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index c75471db576e..a940fe435aca 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -132,6 +132,7 @@ static inline int mutex_is_locked(struct mutex *lock) */ #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); +extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); extern int __must_check mutex_lock_killable_nested(struct mutex *lock, @@ -140,6 +141,13 @@ extern int __must_check mutex_lock_killable_nested(struct mutex *lock, #define mutex_lock(lock) mutex_lock_nested(lock, 0) #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) #define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0) + +#define mutex_lock_nest_lock(lock, nest_lock) \ +do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ +} while (0) + #else extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); @@ -148,6 +156,7 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) #endif /* diff --git a/kernel/mutex.c b/kernel/mutex.c index 2c938e2337cd..d607ed5dd441 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -131,14 +131,14 @@ EXPORT_SYMBOL(mutex_unlock); */ static inline int __sched __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - unsigned long ip) + struct lockdep_map *nest_lock, unsigned long ip) { struct task_struct *task = current; struct mutex_waiter waiter; unsigned long flags; preempt_disable(); - mutex_acquire(&lock->dep_map, subclass, 0, ip); + mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* @@ -269,16 +269,25 @@ void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_nested); +void __sched +_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) +{ + might_sleep(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_); +} + +EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); + int __sched mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_); + return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); @@ -287,7 +296,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, - subclass, _RET_IP_); + subclass, NULL, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -393,7 +402,7 @@ __mutex_lock_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); } static noinline int __sched @@ -401,7 +410,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_); + return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); } static noinline int __sched @@ -409,7 +418,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); } #endif -- cgit v1.2.3 From 97a894136f29802da19a15541de3c019e1ca147e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:12:04 -0700 Subject: mm: Remove i_mmap_lock lockbreak Hugh says: "The only significant loser, I think, would be page reclaim (when concurrent with truncation): could spin for a long time waiting for the i_mmap_mutex it expects would soon be dropped? " Counter points: - cpu contention makes the spin stop (need_resched()) - zap pages should be freeing pages at a higher rate than reclaim ever can I think the simplification of the truncate code is definitely worth it. Effectively reverts: 2aa15890f3c ("mm: prevent concurrent unmap_mapping_range() on the same inode") and takes out the code that caused its problem. Signed-off-by: Peter Zijlstra Reviewed-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 1 - include/linux/fs.h | 2 - include/linux/mm.h | 2 - include/linux/mm_types.h | 1 - kernel/fork.c | 1 - mm/memory.c | 195 +++++++---------------------------------------- mm/mmap.c | 13 +--- mm/mremap.c | 1 - 8 files changed, 28 insertions(+), 188 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 05f4fa521325..7a7284c71abd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -331,7 +331,6 @@ void address_space_init_once(struct address_space *mapping) spin_lock_init(&mapping->private_lock); INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); - mutex_init(&mapping->unmap_mutex); } EXPORT_SYMBOL(address_space_init_once); diff --git a/include/linux/fs.h b/include/linux/fs.h index cdf9495df204..5d2c86bdf5ba 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -635,7 +635,6 @@ struct address_space { struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ spinlock_t i_mmap_lock; /* protect tree, count, list */ - unsigned int truncate_count; /* Cover race condition with truncate */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ @@ -644,7 +643,6 @@ struct address_space { spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ struct address_space *assoc_mapping; /* ditto */ - struct mutex unmap_mutex; /* to protect unmapping */ } __attribute__((aligned(sizeof(long)))); /* * On most architectures that alignment is already the case; but diff --git a/include/linux/mm.h b/include/linux/mm.h index ffcce9bf2b54..2ad0ac8c3f32 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -895,8 +895,6 @@ struct zap_details { struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ - spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ - unsigned long truncate_count; /* Compare vm_truncate_count */ }; struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 02aa5619709b..201998e5b530 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -175,7 +175,6 @@ struct vm_area_struct { units, *not* PAGE_CACHE_SIZE */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ - unsigned long vm_truncate_count;/* truncate_count or restart_addr */ #ifndef CONFIG_MMU struct vm_region *vm_region; /* NOMMU mapping region */ diff --git a/kernel/fork.c b/kernel/fork.c index 2b44d82b8237..4eef925477fc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -386,7 +386,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) spin_lock(&mapping->i_mmap_lock); if (tmp->vm_flags & VM_SHARED) mapping->i_mmap_writable++; - tmp->vm_truncate_count = mpnt->vm_truncate_count; flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ vma_prio_tree_add(tmp, mpnt); diff --git a/mm/memory.c b/mm/memory.c index 17193d74f302..18655878b9f8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -986,13 +986,13 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, - long *zap_work, struct zap_details *details) + struct zap_details *details) { struct mm_struct *mm = tlb->mm; int force_flush = 0; - pte_t *pte; - spinlock_t *ptl; int rss[NR_MM_COUNTERS]; + spinlock_t *ptl; + pte_t *pte; again: init_rss_vec(rss); @@ -1001,12 +1001,9 @@ again: do { pte_t ptent = *pte; if (pte_none(ptent)) { - (*zap_work)--; continue; } - (*zap_work) -= PAGE_SIZE; - if (pte_present(ptent)) { struct page *page; @@ -1075,7 +1072,7 @@ again: print_bad_pte(vma, addr, ptent, NULL); } pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); - } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); + } while (pte++, addr += PAGE_SIZE, addr != end); add_mm_rss_vec(mm, rss); arch_leave_lazy_mmu_mode(); @@ -1099,7 +1096,7 @@ again: static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, - long *zap_work, struct zap_details *details) + struct zap_details *details) { pmd_t *pmd; unsigned long next; @@ -1111,19 +1108,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, if (next-addr != HPAGE_PMD_SIZE) { VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); split_huge_page_pmd(vma->vm_mm, pmd); - } else if (zap_huge_pmd(tlb, vma, pmd)) { - (*zap_work)--; + } else if (zap_huge_pmd(tlb, vma, pmd)) continue; - } /* fall through */ } - if (pmd_none_or_clear_bad(pmd)) { - (*zap_work)--; + if (pmd_none_or_clear_bad(pmd)) continue; - } - next = zap_pte_range(tlb, vma, pmd, addr, next, - zap_work, details); - } while (pmd++, addr = next, (addr != end && *zap_work > 0)); + next = zap_pte_range(tlb, vma, pmd, addr, next, details); + cond_resched(); + } while (pmd++, addr = next, addr != end); return addr; } @@ -1131,7 +1124,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, static inline unsigned long zap_pud_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, - long *zap_work, struct zap_details *details) + struct zap_details *details) { pud_t *pud; unsigned long next; @@ -1139,13 +1132,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) { - (*zap_work)--; + if (pud_none_or_clear_bad(pud)) continue; - } - next = zap_pmd_range(tlb, vma, pud, addr, next, - zap_work, details); - } while (pud++, addr = next, (addr != end && *zap_work > 0)); + next = zap_pmd_range(tlb, vma, pud, addr, next, details); + } while (pud++, addr = next, addr != end); return addr; } @@ -1153,7 +1143,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, static unsigned long unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, - long *zap_work, struct zap_details *details) + struct zap_details *details) { pgd_t *pgd; unsigned long next; @@ -1167,13 +1157,10 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) { - (*zap_work)--; + if (pgd_none_or_clear_bad(pgd)) continue; - } - next = zap_pud_range(tlb, vma, pgd, addr, next, - zap_work, details); - } while (pgd++, addr = next, (addr != end && *zap_work > 0)); + next = zap_pud_range(tlb, vma, pgd, addr, next, details); + } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); mem_cgroup_uncharge_end(); @@ -1218,9 +1205,7 @@ unsigned long unmap_vmas(struct mmu_gather *tlb, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *details) { - long zap_work = ZAP_BLOCK_SIZE; unsigned long start = start_addr; - spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; struct mm_struct *mm = vma->vm_mm; mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); @@ -1253,33 +1238,15 @@ unsigned long unmap_vmas(struct mmu_gather *tlb, * Since no pte has actually been setup, it is * safe to do nothing in this case. */ - if (vma->vm_file) { + if (vma->vm_file) unmap_hugepage_range(vma, start, end, NULL); - zap_work -= (end - start) / - pages_per_huge_page(hstate_vma(vma)); - } start = end; } else - start = unmap_page_range(tlb, vma, - start, end, &zap_work, details); - - if (zap_work > 0) { - BUG_ON(start != end); - break; - } - - if (need_resched() || - (i_mmap_lock && spin_needbreak(i_mmap_lock))) { - if (i_mmap_lock) - goto out; - cond_resched(); - } - - zap_work = ZAP_BLOCK_SIZE; + start = unmap_page_range(tlb, vma, start, end, details); } } -out: + mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); return start; /* which is now the end (or restart) address */ } @@ -2612,96 +2579,11 @@ unwritable_page: return ret; } -/* - * Helper functions for unmap_mapping_range(). - * - * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __ - * - * We have to restart searching the prio_tree whenever we drop the lock, - * since the iterator is only valid while the lock is held, and anyway - * a later vma might be split and reinserted earlier while lock dropped. - * - * The list of nonlinear vmas could be handled more efficiently, using - * a placeholder, but handle it in the same way until a need is shown. - * It is important to search the prio_tree before nonlinear list: a vma - * may become nonlinear and be shifted from prio_tree to nonlinear list - * while the lock is dropped; but never shifted from list to prio_tree. - * - * In order to make forward progress despite restarting the search, - * vm_truncate_count is used to mark a vma as now dealt with, so we can - * quickly skip it next time around. Since the prio_tree search only - * shows us those vmas affected by unmapping the range in question, we - * can't efficiently keep all vmas in step with mapping->truncate_count: - * so instead reset them all whenever it wraps back to 0 (then go to 1). - * mapping->truncate_count and vma->vm_truncate_count are protected by - * i_mmap_lock. - * - * In order to make forward progress despite repeatedly restarting some - * large vma, note the restart_addr from unmap_vmas when it breaks out: - * and restart from that address when we reach that vma again. It might - * have been split or merged, shrunk or extended, but never shifted: so - * restart_addr remains valid so long as it remains in the vma's range. - * unmap_mapping_range forces truncate_count to leap over page-aligned - * values so we can save vma's restart_addr in its truncate_count field. - */ -#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK)) - -static void reset_vma_truncate_counts(struct address_space *mapping) -{ - struct vm_area_struct *vma; - struct prio_tree_iter iter; - - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) - vma->vm_truncate_count = 0; - list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) - vma->vm_truncate_count = 0; -} - -static int unmap_mapping_range_vma(struct vm_area_struct *vma, +static void unmap_mapping_range_vma(struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, struct zap_details *details) { - unsigned long restart_addr; - int need_break; - - /* - * files that support invalidating or truncating portions of the - * file from under mmaped areas must have their ->fault function - * return a locked page (and set VM_FAULT_LOCKED in the return). - * This provides synchronisation against concurrent unmapping here. - */ - -again: - restart_addr = vma->vm_truncate_count; - if (is_restart_addr(restart_addr) && start_addr < restart_addr) { - start_addr = restart_addr; - if (start_addr >= end_addr) { - /* Top of vma has been split off since last time */ - vma->vm_truncate_count = details->truncate_count; - return 0; - } - } - - restart_addr = zap_page_range(vma, start_addr, - end_addr - start_addr, details); - need_break = need_resched() || spin_needbreak(details->i_mmap_lock); - - if (restart_addr >= end_addr) { - /* We have now completed this vma: mark it so */ - vma->vm_truncate_count = details->truncate_count; - if (!need_break) - return 0; - } else { - /* Note restart_addr in vma's truncate_count field */ - vma->vm_truncate_count = restart_addr; - if (!need_break) - goto again; - } - - spin_unlock(details->i_mmap_lock); - cond_resched(); - spin_lock(details->i_mmap_lock); - return -EINTR; + zap_page_range(vma, start_addr, end_addr - start_addr, details); } static inline void unmap_mapping_range_tree(struct prio_tree_root *root, @@ -2711,12 +2593,8 @@ static inline void unmap_mapping_range_tree(struct prio_tree_root *root, struct prio_tree_iter iter; pgoff_t vba, vea, zba, zea; -restart: vma_prio_tree_foreach(vma, &iter, root, details->first_index, details->last_index) { - /* Skip quickly over those we have already dealt with */ - if (vma->vm_truncate_count == details->truncate_count) - continue; vba = vma->vm_pgoff; vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; @@ -2728,11 +2606,10 @@ restart: if (zea > vea) zea = vea; - if (unmap_mapping_range_vma(vma, + unmap_mapping_range_vma(vma, ((zba - vba) << PAGE_SHIFT) + vma->vm_start, ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, - details) < 0) - goto restart; + details); } } @@ -2747,15 +2624,9 @@ static inline void unmap_mapping_range_list(struct list_head *head, * across *all* the pages in each nonlinear VMA, not just the pages * whose virtual address lies outside the file truncation point. */ -restart: list_for_each_entry(vma, head, shared.vm_set.list) { - /* Skip quickly over those we have already dealt with */ - if (vma->vm_truncate_count == details->truncate_count) - continue; details->nonlinear_vma = vma; - if (unmap_mapping_range_vma(vma, vma->vm_start, - vma->vm_end, details) < 0) - goto restart; + unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); } } @@ -2794,26 +2665,14 @@ void unmap_mapping_range(struct address_space *mapping, details.last_index = hba + hlen - 1; if (details.last_index < details.first_index) details.last_index = ULONG_MAX; - details.i_mmap_lock = &mapping->i_mmap_lock; - mutex_lock(&mapping->unmap_mutex); - spin_lock(&mapping->i_mmap_lock); - - /* Protect against endless unmapping loops */ - mapping->truncate_count++; - if (unlikely(is_restart_addr(mapping->truncate_count))) { - if (mapping->truncate_count == 0) - reset_vma_truncate_counts(mapping); - mapping->truncate_count++; - } - details.truncate_count = mapping->truncate_count; + spin_lock(&mapping->i_mmap_lock); if (unlikely(!prio_tree_empty(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); spin_unlock(&mapping->i_mmap_lock); - mutex_unlock(&mapping->unmap_mutex); } EXPORT_SYMBOL(unmap_mapping_range); diff --git a/mm/mmap.c b/mm/mmap.c index 40d49986e714..50cb04bb56bf 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -445,10 +445,8 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_file) mapping = vma->vm_file->f_mapping; - if (mapping) { + if (mapping) spin_lock(&mapping->i_mmap_lock); - vma->vm_truncate_count = mapping->truncate_count; - } __vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link_file(vma); @@ -558,16 +556,7 @@ again: remove_next = 1 + (end > next->vm_end); if (!(vma->vm_flags & VM_NONLINEAR)) root = &mapping->i_mmap; spin_lock(&mapping->i_mmap_lock); - if (importer && - vma->vm_truncate_count != next->vm_truncate_count) { - /* - * unmap_mapping_range might be in progress: - * ensure that the expanding vma is rescanned. - */ - importer->vm_truncate_count = 0; - } if (insert) { - insert->vm_truncate_count = vma->vm_truncate_count; /* * Put into prio_tree now, so instantiated pages * are visible to arm/parisc __flush_dcache_page diff --git a/mm/mremap.c b/mm/mremap.c index a7c1f9f9b941..909e1e1e99b1 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -94,7 +94,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, */ mapping = vma->vm_file->f_mapping; spin_lock(&mapping->i_mmap_lock); - new_vma->vm_truncate_count = 0; } /* -- cgit v1.2.3 From 3d48ae45e72390ddf8cc5256ac32ed6f7a19cbea Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:12:06 -0700 Subject: mm: Convert i_mmap_lock to a mutex Straightforward conversion of i_mmap_lock to a mutex. Signed-off-by: Peter Zijlstra Acked-by: Hugh Dickins Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/lockstat.txt | 2 +- Documentation/vm/locking | 2 +- arch/x86/mm/hugetlbpage.c | 4 ++-- fs/hugetlbfs/inode.c | 4 ++-- fs/inode.c | 2 +- include/linux/fs.h | 2 +- include/linux/mmu_notifier.h | 2 +- kernel/fork.c | 4 ++-- mm/filemap.c | 10 +++++----- mm/filemap_xip.c | 4 ++-- mm/fremap.c | 4 ++-- mm/hugetlb.c | 14 +++++++------- mm/memory-failure.c | 4 ++-- mm/memory.c | 4 ++-- mm/mmap.c | 22 +++++++++++----------- mm/mremap.c | 4 ++-- mm/rmap.c | 28 ++++++++++++++-------------- 17 files changed, 58 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 65f4c795015d..9c0a80d17a23 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -136,7 +136,7 @@ View the top contending locks: dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 &inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74 &zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06 - &inode->i_data.i_mmap_lock: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 + &inode->i_data.i_mmap_mutex: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 &q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52 &rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62 &rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63 diff --git a/Documentation/vm/locking b/Documentation/vm/locking index 25fadb448760..f61228bd6395 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking @@ -66,7 +66,7 @@ in some cases it is not really needed. Eg, vm_start is modified by expand_stack(), it is hard to come up with a destructive scenario without having the vmlist protection in this case. -The page_table_lock nests with the inode i_mmap_lock and the kmem cache +The page_table_lock nests with the inode i_mmap_mutex and the kmem cache c_spinlock spinlocks. This is okay, since the kmem code asks for pages after dropping c_spinlock. The page_table_lock also nests with pagecache_lock and pagemap_lru_lock spinlocks, and no code asks for memory with these locks diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index d4203988504a..f581a18c0d4d 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -72,7 +72,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (!vma_shareable(vma, addr)) return; - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; @@ -97,7 +97,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) put_page(virt_to_page(spte)); spin_unlock(&mm->page_table_lock); out: - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); } /* diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b9eeb1cd03ff..e7a035781b7d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) pgoff = offset >> PAGE_SHIFT; i_size_write(inode, offset); - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); if (!prio_tree_empty(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); truncate_hugepages(inode, offset); return 0; } diff --git a/fs/inode.c b/fs/inode.c index 7a7284c71abd..88aa3bcc7681 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -326,7 +326,7 @@ void address_space_init_once(struct address_space *mapping) memset(mapping, 0, sizeof(*mapping)); INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); spin_lock_init(&mapping->tree_lock); - spin_lock_init(&mapping->i_mmap_lock); + mutex_init(&mapping->i_mmap_mutex); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5d2c86bdf5ba..5bb9e826019b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -634,7 +634,7 @@ struct address_space { unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ - spinlock_t i_mmap_lock; /* protect tree, count, list */ + struct mutex i_mmap_mutex; /* protect tree, count, list */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index cc2e7dfea9d7..a877dfc243eb 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -150,7 +150,7 @@ struct mmu_notifier_ops { * Therefore notifier chains can only be traversed when either * * 1. mmap_sem is held. - * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock). + * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->lock). * 3. No other concurrent thread can access the list (release) */ struct mmu_notifier { diff --git a/kernel/fork.c b/kernel/fork.c index 4eef925477fc..927692734bcf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -383,14 +383,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); if (tmp->vm_flags & VM_SHARED) mapping->i_mmap_writable++; flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ vma_prio_tree_add(tmp, mpnt); flush_dcache_mmap_unlock(mapping); - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); } /* diff --git a/mm/filemap.c b/mm/filemap.c index 8144f87dcbb4..88354ae0b1fd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -58,16 +58,16 @@ /* * Lock ordering: * - * ->i_mmap_lock (truncate_pagecache) + * ->i_mmap_mutex (truncate_pagecache) * ->private_lock (__free_pte->__set_page_dirty_buffers) * ->swap_lock (exclusive_swap_page, others) * ->mapping->tree_lock * * ->i_mutex - * ->i_mmap_lock (truncate->unmap_mapping_range) + * ->i_mmap_mutex (truncate->unmap_mapping_range) * * ->mmap_sem - * ->i_mmap_lock + * ->i_mmap_mutex * ->page_table_lock or pte_lock (various, mainly in memory.c) * ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock) * @@ -84,7 +84,7 @@ * sb_lock (fs/fs-writeback.c) * ->mapping->tree_lock (__sync_single_inode) * - * ->i_mmap_lock + * ->i_mmap_mutex * ->anon_vma.lock (vma_adjust) * * ->anon_vma.lock @@ -106,7 +106,7 @@ * * (code doesn't rely on that order, so you could switch it around) * ->tasklist_lock (memory_failure, collect_procs_ao) - * ->i_mmap_lock + * ->i_mmap_mutex */ /* diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 83364df74a33..93356cd12828 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -183,7 +183,7 @@ __xip_unmap (struct address_space * mapping, return; retry: - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { mm = vma->vm_mm; address = vma->vm_start + @@ -201,7 +201,7 @@ retry: page_cache_release(page); } } - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); if (locked) { mutex_unlock(&xip_sparse_mutex); diff --git a/mm/fremap.c b/mm/fremap.c index ec520c7b28df..7f4123056e06 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -211,13 +211,13 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, } goto out; } - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); vma->vm_flags |= VM_NONLINEAR; vma_prio_tree_remove(vma, &mapping->i_mmap); vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); flush_dcache_mmap_unlock(mapping); - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); } if (vma->vm_flags & VM_LOCKED) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bbb4a5bbb958..5fd68b95c671 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2205,7 +2205,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long sz = huge_page_size(h); /* - * A page gathering list, protected by per file i_mmap_lock. The + * A page gathering list, protected by per file i_mmap_mutex. The * lock is used to avoid list corruption from multiple unmapping * of the same page since we are using page->lru. */ @@ -2274,9 +2274,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { - spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); + mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); __unmap_hugepage_range(vma, start, end, ref_page); - spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); + mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); } /* @@ -2308,7 +2308,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * this mapping should be shared between all the VMAs, * __unmap_hugepage_range() is called as the lock is already held */ - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) { /* Do not unmap the current VMA */ if (iter_vma == vma) @@ -2326,7 +2326,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, address, address + huge_page_size(h), page); } - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); return 1; } @@ -2810,7 +2810,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma, BUG_ON(address >= end); flush_cache_range(vma, address, end); - spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); + mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); spin_lock(&mm->page_table_lock); for (; address < end; address += huge_page_size(h)) { ptep = huge_pte_offset(mm, address); @@ -2825,7 +2825,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma, } } spin_unlock(&mm->page_table_lock); - spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); + mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); flush_tlb_range(vma, start, end); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2b9a5eef39e0..12178ec32ab5 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -429,7 +429,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, */ read_lock(&tasklist_lock); - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); for_each_process(tsk) { pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); @@ -449,7 +449,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, add_to_kill(tsk, page, vma, to_kill, tkc); } } - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); read_unlock(&tasklist_lock); } diff --git a/mm/memory.c b/mm/memory.c index 18655878b9f8..7bbe4d3df756 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2667,12 +2667,12 @@ void unmap_mapping_range(struct address_space *mapping, details.last_index = ULONG_MAX; - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); if (unlikely(!prio_tree_empty(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); } EXPORT_SYMBOL(unmap_mapping_range); diff --git a/mm/mmap.c b/mm/mmap.c index 50cb04bb56bf..26efbfca0b20 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -194,7 +194,7 @@ error: } /* - * Requires inode->i_mapping->i_mmap_lock + * Requires inode->i_mapping->i_mmap_mutex */ static void __remove_shared_vm_struct(struct vm_area_struct *vma, struct file *file, struct address_space *mapping) @@ -222,9 +222,9 @@ void unlink_file_vma(struct vm_area_struct *vma) if (file) { struct address_space *mapping = file->f_mapping; - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); __remove_shared_vm_struct(vma, file, mapping); - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); } } @@ -446,13 +446,13 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, mapping = vma->vm_file->f_mapping; if (mapping) - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); __vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link_file(vma); if (mapping) - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); mm->map_count++; validate_mm(mm); @@ -555,7 +555,7 @@ again: remove_next = 1 + (end > next->vm_end); mapping = file->f_mapping; if (!(vma->vm_flags & VM_NONLINEAR)) root = &mapping->i_mmap; - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); if (insert) { /* * Put into prio_tree now, so instantiated pages @@ -622,7 +622,7 @@ again: remove_next = 1 + (end > next->vm_end); if (anon_vma) anon_vma_unlock(anon_vma); if (mapping) - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); if (remove_next) { if (file) { @@ -2290,7 +2290,7 @@ void exit_mmap(struct mm_struct *mm) /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL - * then i_mmap_lock is taken here. + * then i_mmap_mutex is taken here. */ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) { @@ -2532,7 +2532,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) */ if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) BUG(); - spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem); + mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem); } } @@ -2559,7 +2559,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) * vma in this mm is backed by the same anon_vma or address_space. * * We can take all the locks in random order because the VM code - * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never + * taking i_mmap_mutex or anon_vma->lock outside the mmap_sem never * takes more than one of them in a row. Secondly we're protected * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. * @@ -2631,7 +2631,7 @@ static void vm_unlock_mapping(struct address_space *mapping) * AS_MM_ALL_LOCKS can't change to 0 from under us * because we hold the mm_all_locks_mutex. */ - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); if (!test_and_clear_bit(AS_MM_ALL_LOCKS, &mapping->flags)) BUG(); diff --git a/mm/mremap.c b/mm/mremap.c index 909e1e1e99b1..506fa44403df 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -93,7 +93,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, * and we propagate stale pages into the dst afterward. */ mapping = vma->vm_file->f_mapping; - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); } /* @@ -122,7 +122,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte_unmap(new_pte - 1); pte_unmap_unlock(old_pte - 1, old_ptl); if (mapping) - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end); } diff --git a/mm/rmap.c b/mm/rmap.c index 522e4a93cadd..f0ef7ea5423a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -24,7 +24,7 @@ * inode->i_alloc_sem (vmtruncate_range) * mm->mmap_sem * page->flags PG_locked (lock_page) - * mapping->i_mmap_lock + * mapping->i_mmap_mutex * anon_vma->lock * mm->page_table_lock or pte_lock * zone->lru_lock (in mark_page_accessed, isolate_lru_page) @@ -646,14 +646,14 @@ static int page_referenced_file(struct page *page, * The page lock not only makes sure that page->mapping cannot * suddenly be NULLified by truncation, it makes sure that the * structure at mapping cannot be freed and reused yet, - * so we can safely take mapping->i_mmap_lock. + * so we can safely take mapping->i_mmap_mutex. */ BUG_ON(!PageLocked(page)); - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); /* - * i_mmap_lock does not stabilize mapcount at all, but mapcount + * i_mmap_mutex does not stabilize mapcount at all, but mapcount * is more likely to be accurate if we note it after spinning. */ mapcount = page_mapcount(page); @@ -675,7 +675,7 @@ static int page_referenced_file(struct page *page, break; } - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); return referenced; } @@ -762,7 +762,7 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page) BUG_ON(PageAnon(page)); - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { if (vma->vm_flags & VM_SHARED) { unsigned long address = vma_address(page, vma); @@ -771,7 +771,7 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page) ret += page_mkclean_one(page, vma, address); } } - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); return ret; } @@ -1119,7 +1119,7 @@ out_mlock: /* * We need mmap_sem locking, Otherwise VM_LOCKED check makes * unstable result and race. Plus, We can't wait here because - * we now hold anon_vma->lock or mapping->i_mmap_lock. + * we now hold anon_vma->lock or mapping->i_mmap_mutex. * if trylock failed, the page remain in evictable lru and later * vmscan could retry to move the page to unevictable lru if the * page is actually mlocked. @@ -1345,7 +1345,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) unsigned long max_nl_size = 0; unsigned int mapcount; - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) @@ -1391,7 +1391,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) mapcount = page_mapcount(page); if (!mapcount) goto out; - cond_resched_lock(&mapping->i_mmap_lock); + cond_resched(); max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; if (max_nl_cursor == 0) @@ -1413,7 +1413,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) } vma->vm_private_data = (void *) max_nl_cursor; } - cond_resched_lock(&mapping->i_mmap_lock); + cond_resched(); max_nl_cursor += CLUSTER_SIZE; } while (max_nl_cursor <= max_nl_size); @@ -1425,7 +1425,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) vma->vm_private_data = NULL; out: - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); return ret; } @@ -1544,7 +1544,7 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, if (!mapping) return ret; - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) @@ -1558,7 +1558,7 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, * never contain migration ptes. Decide what to do about this * limitation to linear when we need rmap_walk() on nonlinear. */ - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); return ret; } -- cgit v1.2.3 From 25aeeb046e695c3093a86aa9386128ffb3b1bc32 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:12:07 -0700 Subject: mm: revert page_lock_anon_vma() lock annotation Its beyond ugly and gets in the way. Signed-off-by: Peter Zijlstra Acked-by: Hugh Dickins Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Namhyung Kim Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 15 +-------------- mm/rmap.c | 4 +--- 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 830e65dc01ee..590c291a8cd9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -218,20 +218,7 @@ int try_to_munlock(struct page *); /* * Called by memory-failure.c to kill processes. */ -struct anon_vma *__page_lock_anon_vma(struct page *page); - -static inline struct anon_vma *page_lock_anon_vma(struct page *page) -{ - struct anon_vma *anon_vma; - - __cond_lock(RCU, anon_vma = __page_lock_anon_vma(page)); - - /* (void) is needed to make gcc happy */ - (void) __cond_lock(&anon_vma->root->lock, anon_vma); - - return anon_vma; -} - +struct anon_vma *page_lock_anon_vma(struct page *page); void page_unlock_anon_vma(struct anon_vma *anon_vma); int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); diff --git a/mm/rmap.c b/mm/rmap.c index f0ef7ea5423a..c6044761617e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -323,7 +323,7 @@ void __init anon_vma_init(void) * Getting a lock on a stable anon_vma from a page off the LRU is * tricky: page_lock_anon_vma rely on RCU to guard against the races. */ -struct anon_vma *__page_lock_anon_vma(struct page *page) +struct anon_vma *page_lock_anon_vma(struct page *page) { struct anon_vma *anon_vma, *root_anon_vma; unsigned long anon_mapping; @@ -357,8 +357,6 @@ out: } void page_unlock_anon_vma(struct anon_vma *anon_vma) - __releases(&anon_vma->root->lock) - __releases(RCU) { anon_vma_unlock(anon_vma); rcu_read_unlock(); -- cgit v1.2.3 From 2b575eb64f7a9c701fb4bfdb12388ac547f6c2b6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:12:11 -0700 Subject: mm: convert anon_vma->lock to a mutex Straightforward conversion of anon_vma->lock to a mutex. Signed-off-by: Peter Zijlstra Acked-by: Hugh Dickins Reviewed-by: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 8 ++------ include/linux/mmu_notifier.h | 2 +- include/linux/rmap.h | 14 +++++++------- mm/huge_memory.c | 4 ++-- mm/mmap.c | 10 +++++----- mm/rmap.c | 8 ++++---- 6 files changed, 21 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 8847c8c29791..48c32ebf65a7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -92,12 +92,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); #define wait_split_huge_page(__anon_vma, __pmd) \ do { \ pmd_t *____pmd = (__pmd); \ - spin_unlock_wait(&(__anon_vma)->root->lock); \ - /* \ - * spin_unlock_wait() is just a loop in C and so the \ - * CPU can reorder anything around it. \ - */ \ - smp_mb(); \ + anon_vma_lock(__anon_vma); \ + anon_vma_unlock(__anon_vma); \ BUG_ON(pmd_trans_splitting(*____pmd) || \ pmd_trans_huge(*____pmd)); \ } while (0) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index a877dfc243eb..1d1b1e13f79f 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -150,7 +150,7 @@ struct mmu_notifier_ops { * Therefore notifier chains can only be traversed when either * * 1. mmap_sem is held. - * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->lock). + * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->mutex). * 3. No other concurrent thread can access the list (release) */ struct mmu_notifier { diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 590c291a8cd9..2148b122779b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include /* @@ -26,7 +26,7 @@ */ struct anon_vma { struct anon_vma *root; /* Root of this anon_vma tree */ - spinlock_t lock; /* Serialize access to vma list */ + struct mutex mutex; /* Serialize access to vma list */ /* * The refcount is taken on an anon_vma when there is no * guarantee that the vma of page tables will exist for @@ -64,7 +64,7 @@ struct anon_vma_chain { struct vm_area_struct *vma; struct anon_vma *anon_vma; struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ - struct list_head same_anon_vma; /* locked by anon_vma->lock */ + struct list_head same_anon_vma; /* locked by anon_vma->mutex */ }; #ifdef CONFIG_MMU @@ -93,24 +93,24 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; if (anon_vma) - spin_lock(&anon_vma->root->lock); + mutex_lock(&anon_vma->root->mutex); } static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; if (anon_vma) - spin_unlock(&anon_vma->root->lock); + mutex_unlock(&anon_vma->root->mutex); } static inline void anon_vma_lock(struct anon_vma *anon_vma) { - spin_lock(&anon_vma->root->lock); + mutex_lock(&anon_vma->root->mutex); } static inline void anon_vma_unlock(struct anon_vma *anon_vma) { - spin_unlock(&anon_vma->root->lock); + mutex_unlock(&anon_vma->root->mutex); } /* diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 83326ad66d9b..90eef404ec2e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1139,7 +1139,7 @@ static int __split_huge_page_splitting(struct page *page, * We can't temporarily set the pmd to null in order * to split it, the pmd must remain marked huge at all * times or the VM won't take the pmd_trans_huge paths - * and it won't wait on the anon_vma->root->lock to + * and it won't wait on the anon_vma->root->mutex to * serialize against split_huge_page*. */ pmdp_splitting_flush_notify(vma, address, pmd); @@ -1333,7 +1333,7 @@ static int __split_huge_page_map(struct page *page, return ret; } -/* must be called with anon_vma->root->lock hold */ +/* must be called with anon_vma->root->mutex hold */ static void __split_huge_page(struct page *page, struct anon_vma *anon_vma) { diff --git a/mm/mmap.c b/mm/mmap.c index 26efbfca0b20..ac2631b7477f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2502,15 +2502,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. */ - spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem); + mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem); /* * We can safely modify head.next after taking the - * anon_vma->root->lock. If some other vma in this mm shares + * anon_vma->root->mutex. If some other vma in this mm shares * the same anon_vma we won't take it again. * * No need of atomic instructions here, head.next * can't change from under us thanks to the - * anon_vma->root->lock. + * anon_vma->root->mutex. */ if (__test_and_set_bit(0, (unsigned long *) &anon_vma->root->head.next)) @@ -2559,7 +2559,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) * vma in this mm is backed by the same anon_vma or address_space. * * We can take all the locks in random order because the VM code - * taking i_mmap_mutex or anon_vma->lock outside the mmap_sem never + * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never * takes more than one of them in a row. Secondly we're protected * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. * @@ -2615,7 +2615,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) * * No need of atomic instructions here, head.next * can't change from under us until we release the - * anon_vma->root->lock. + * anon_vma->root->mutex. */ if (!__test_and_clear_bit(0, (unsigned long *) &anon_vma->root->head.next)) diff --git a/mm/rmap.c b/mm/rmap.c index d271845d7d15..ce29d405d093 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -25,7 +25,7 @@ * mm->mmap_sem * page->flags PG_locked (lock_page) * mapping->i_mmap_mutex - * anon_vma->lock + * anon_vma->mutex * mm->page_table_lock or pte_lock * zone->lru_lock (in mark_page_accessed, isolate_lru_page) * swap_lock (in swap_duplicate, swap_info_get) @@ -40,7 +40,7 @@ * * (code doesn't rely on that order so it could be switched around) * ->tasklist_lock - * anon_vma->lock (memory_failure, collect_procs_anon) + * anon_vma->mutex (memory_failure, collect_procs_anon) * pte map lock */ @@ -307,7 +307,7 @@ static void anon_vma_ctor(void *data) { struct anon_vma *anon_vma = data; - spin_lock_init(&anon_vma->lock); + mutex_init(&anon_vma->mutex); atomic_set(&anon_vma->refcount, 0); INIT_LIST_HEAD(&anon_vma->head); } @@ -1143,7 +1143,7 @@ out_mlock: /* * We need mmap_sem locking, Otherwise VM_LOCKED check makes * unstable result and race. Plus, We can't wait here because - * we now hold anon_vma->lock or mapping->i_mmap_mutex. + * we now hold anon_vma->mutex or mapping->i_mmap_mutex. * if trylock failed, the page remain in evictable lru and later * vmscan could retry to move the page to unevictable lru if the * page is actually mlocked. -- cgit v1.2.3 From de03c72cfce5b263a674d04348b58475ec50163c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:12:15 -0700 Subject: mm: convert mm->cpu_vm_cpumask into cpumask_var_t cpumask_t is very big struct and cpu_vm_mask is placed wrong position. It might lead to reduce cache hit ratio. This patch has two change. 1) Move the place of cpumask into last of mm_struct. Because usually cpumask is accessed only front bits when the system has cpu-hotplug capability 2) Convert cpu_vm_mask into cpumask_var_t. It may help to reduce memory footprint if cpumask_size() will use nr_cpumask_bits properly in future. In addition, this patch change the name of cpu_vm_mask with cpu_vm_mask_var. It may help to detect out of tree cpu_vm_mask users. This patch has no functional change. [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro Cc: David Howells Cc: Koichi Yasutake Cc: Hugh Dickins Cc: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cachetlb.txt | 2 +- arch/x86/kernel/tboot.c | 1 - include/linux/mm_types.h | 9 ++++++--- include/linux/sched.h | 1 + init/main.c | 2 ++ kernel/fork.c | 37 ++++++++++++++++++++++++++++++++++--- mm/init-mm.c | 1 - 7 files changed, 44 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt index 9164ae3b83bc..9b728dc17535 100644 --- a/Documentation/cachetlb.txt +++ b/Documentation/cachetlb.txt @@ -16,7 +16,7 @@ on all processors in the system. Don't let this scare you into thinking SMP cache/tlb flushing must be so inefficient, this is in fact an area where many optimizations are possible. For example, if it can be proven that a user address space has never executed -on a cpu (see vma->cpu_vm_mask), one need not perform a flush +on a cpu (see mm_cpumask()), one need not perform a flush for this address space on that cpu. First, the TLB flushing interfaces, since they are the simplest. The diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 998e972f3b1a..30ac65df7d4e 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -110,7 +110,6 @@ static struct mm_struct tboot_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), - .cpu_vm_mask = CPU_MASK_ALL, }; static inline void switch_to_tboot_pt(void) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 201998e5b530..c2f9ea7922f4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -265,8 +265,6 @@ struct mm_struct { struct linux_binfmt *binfmt; - cpumask_t cpu_vm_mask; - /* Architecture-specific MM context */ mm_context_t context; @@ -316,9 +314,14 @@ struct mm_struct { #ifdef CONFIG_TRANSPARENT_HUGEPAGE pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif + + cpumask_var_t cpu_vm_mask_var; }; /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ -#define mm_cpumask(mm) (&(mm)->cpu_vm_mask) +static inline cpumask_t *mm_cpumask(struct mm_struct *mm) +{ + return mm->cpu_vm_mask_var; +} #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 44b8faaac7c0..f18300eddfcb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2176,6 +2176,7 @@ static inline void mmdrop(struct mm_struct * mm) if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } +extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm); /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); diff --git a/init/main.c b/init/main.c index 48df882d51d2..22da33918aef 100644 --- a/init/main.c +++ b/init/main.c @@ -509,6 +509,8 @@ asmlinkage void __init start_kernel(void) sort_main_extable(); trap_init(); mm_init(); + BUG_ON(mm_init_cpumask(&init_mm, 0)); + /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() diff --git a/kernel/fork.c b/kernel/fork.c index 927692734bcf..8e7e135d0817 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -485,6 +485,20 @@ static void mm_init_aio(struct mm_struct *mm) #endif } +int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm) +{ +#ifdef CONFIG_CPUMASK_OFFSTACK + if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL)) + return -ENOMEM; + + if (oldmm) + cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm)); + else + memset(mm_cpumask(mm), 0, cpumask_size()); +#endif + return 0; +} + static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) { atomic_set(&mm->mm_users, 1); @@ -521,10 +535,20 @@ struct mm_struct * mm_alloc(void) struct mm_struct * mm; mm = allocate_mm(); - if (mm) { - memset(mm, 0, sizeof(*mm)); - mm = mm_init(mm, current); + if (!mm) + return NULL; + + memset(mm, 0, sizeof(*mm)); + mm = mm_init(mm, current); + if (!mm) + return NULL; + + if (mm_init_cpumask(mm, NULL)) { + mm_free_pgd(mm); + free_mm(mm); + return NULL; } + return mm; } @@ -536,6 +560,7 @@ struct mm_struct * mm_alloc(void) void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); + free_cpumask_var(mm->cpu_vm_mask_var); mm_free_pgd(mm); destroy_context(mm); mmu_notifier_mm_destroy(mm); @@ -690,6 +715,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk) if (!mm_init(mm, tsk)) goto fail_nomem; + if (mm_init_cpumask(mm, oldmm)) + goto fail_nocpumask; + if (init_new_context(tsk, mm)) goto fail_nocontext; @@ -716,6 +744,9 @@ fail_nomem: return NULL; fail_nocontext: + free_cpumask_var(mm->cpu_vm_mask_var); + +fail_nocpumask: /* * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() diff --git a/mm/init-mm.c b/mm/init-mm.c index 1d29cdfe8ebb..4019979b2637 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -21,6 +21,5 @@ struct mm_struct init_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), - .cpu_vm_mask = CPU_MASK_ALL, INIT_MM_CONTEXT(init_mm) }; -- cgit v1.2.3 From a238ab5b0239575c179f4976064192c3f7409dad Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 24 May 2011 17:12:16 -0700 Subject: mm: break out page allocation warning code This originally started as a simple patch to give vmalloc() some more verbose output on failure on top of the plain page allocator messages. Johannes suggested that it might be nicer to lead with the vmalloc() info _before_ the page allocator messages. But, I do think there's a lot of value in what __alloc_pages_slowpath() does with its filtering and so forth. This patch creates a new function which other allocators can call instead of relying on the internal page allocator warnings. It also gives this function private rate-limiting which separates it from other printk_ratelimit() users. Signed-off-by: Dave Hansen Cc: Johannes Weiner Cc: David Rientjes Cc: Michal Nazarewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ mm/page_alloc.c | 62 ++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 43 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2ad0ac8c3f32..8e2f2cd821f7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1387,6 +1387,8 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; +extern void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); + extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 83eaa2eb72f8..44019da9632e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1736,6 +1737,45 @@ static inline bool should_suppress_show_mem(void) return ret; } +static DEFINE_RATELIMIT_STATE(nopage_rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + +void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) +{ + va_list args; + unsigned int filter = SHOW_MEM_FILTER_NODES; + + if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) + return; + + /* + * This documents exceptions given to allocations in certain + * contexts that are allowed to allocate outside current's set + * of allowed nodes. + */ + if (!(gfp_mask & __GFP_NOMEMALLOC)) + if (test_thread_flag(TIF_MEMDIE) || + (current->flags & (PF_MEMALLOC | PF_EXITING))) + filter &= ~SHOW_MEM_FILTER_NODES; + if (in_interrupt() || !(gfp_mask & __GFP_WAIT)) + filter &= ~SHOW_MEM_FILTER_NODES; + + if (fmt) { + printk(KERN_WARNING); + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + } + + pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n", + current->comm, order, gfp_mask); + + dump_stack(); + if (!should_suppress_show_mem()) + show_mem(filter); +} + static inline int should_alloc_retry(gfp_t gfp_mask, unsigned int order, unsigned long pages_reclaimed) @@ -2178,27 +2218,7 @@ rebalance: } nopage: - if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { - unsigned int filter = SHOW_MEM_FILTER_NODES; - - /* - * This documents exceptions given to allocations in certain - * contexts that are allowed to allocate outside current's set - * of allowed nodes. - */ - if (!(gfp_mask & __GFP_NOMEMALLOC)) - if (test_thread_flag(TIF_MEMDIE) || - (current->flags & (PF_MEMALLOC | PF_EXITING))) - filter &= ~SHOW_MEM_FILTER_NODES; - if (in_interrupt() || !wait) - filter &= ~SHOW_MEM_FILTER_NODES; - - pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n", - current->comm, order, gfp_mask); - dump_stack(); - if (!should_suppress_show_mem()) - show_mem(filter); - } + warn_alloc_failed(gfp_mask, order, NULL); return page; got_pg: if (kmemcheck_enabled) -- cgit v1.2.3 From 7b1de5868b124d8f399d8791ed30a9b679d64d4d Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 24 May 2011 17:12:25 -0700 Subject: readahead: readahead page allocations are OK to fail Pass __GFP_NORETRY|__GFP_NOWARN for readahead page allocations. readahead page allocations are completely optional. They are OK to fail and in particular shall not trigger OOM on themselves. Reported-by: Dave Young Reviewed-by: KOSAKI Motohiro Signed-off-by: Wu Fengguang Reviewed-by: Minchan Kim Reviewed-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 6 ++++++ mm/readahead.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ea268080380d..716875e53520 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -219,6 +219,12 @@ static inline struct page *page_cache_alloc_cold(struct address_space *x) return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); } +static inline struct page *page_cache_alloc_readahead(struct address_space *x) +{ + return __page_cache_alloc(mapping_gfp_mask(x) | + __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN); +} + typedef int filler_t(void *, struct page *); extern struct page * find_get_page(struct address_space *mapping, diff --git a/mm/readahead.c b/mm/readahead.c index 2c0cc489e288..867f9dd82dcd 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -180,7 +180,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, if (page) continue; - page = page_cache_alloc_cold(mapping); + page = page_cache_alloc_readahead(mapping); if (!page) break; page->index = page_offset; -- cgit v1.2.3 From a09ed5e00084448453c8bada4dcd31e5fbfc2f21 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 24 May 2011 17:12:26 -0700 Subject: vmscan: change shrink_slab() interfaces by passing shrink_control Consolidate the existing parameters to shrink_slab() into a new shrink_control struct. This is needed later to pass the same struct to shrinkers. Signed-off-by: Ying Han Cc: KOSAKI Motohiro Cc: Minchan Kim Acked-by: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Hugh Dickins Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/drop_caches.c | 6 +++++- include/linux/mm.h | 13 +++++++++++-- mm/memory-failure.c | 7 ++++++- mm/vmscan.c | 46 +++++++++++++++++++++++++++++++++------------- 4 files changed, 55 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 98b77c89494c..440999c24353 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -40,9 +40,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) static void drop_slab(void) { int nr_objects; + struct shrink_control shrink = { + .gfp_mask = GFP_KERNEL, + .nr_scanned = 1000, + }; do { - nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); + nr_objects = shrink_slab(&shrink, 1000); } while (nr_objects > 10); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e2f2cd821f7..32cfa9602d00 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1161,6 +1161,15 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) } #endif +/* + * This struct is used to pass information from page reclaim to the shrinkers. + * We consolidate the values for easier extention later. + */ +struct shrink_control { + unsigned long nr_scanned; + gfp_t gfp_mask; +}; + /* * A callback you can register to apply pressure to ageable caches. * @@ -1630,8 +1639,8 @@ int in_gate_area_no_mm(unsigned long addr); int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, - unsigned long lru_pages); +unsigned long shrink_slab(struct shrink_control *shrink, + unsigned long lru_pages); #ifndef CONFIG_MMU #define randomize_va_space 0 diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 369b80e81416..341341b2b47b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -239,7 +239,12 @@ void shake_page(struct page *p, int access) if (access) { int nr; do { - nr = shrink_slab(1000, GFP_KERNEL, 1000); + struct shrink_control shrink = { + .gfp_mask = GFP_KERNEL, + .nr_scanned = 1000, + }; + + nr = shrink_slab(&shrink, 1000); if (page_count(p) == 1) break; } while (nr > 10); diff --git a/mm/vmscan.c b/mm/vmscan.c index 890f54184d9a..e4e245ed1a5b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -222,11 +222,13 @@ EXPORT_SYMBOL(unregister_shrinker); * * Returns the number of slab objects which we shrunk. */ -unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, - unsigned long lru_pages) +unsigned long shrink_slab(struct shrink_control *shrink, + unsigned long lru_pages) { struct shrinker *shrinker; unsigned long ret = 0; + unsigned long scanned = shrink->nr_scanned; + gfp_t gfp_mask = shrink->gfp_mask; if (scanned == 0) scanned = SWAP_CLUSTER_MAX; @@ -2035,7 +2037,8 @@ static bool all_unreclaimable(struct zonelist *zonelist, * else, the number of pages reclaimed */ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, - struct scan_control *sc) + struct scan_control *sc, + struct shrink_control *shrink) { int priority; unsigned long total_scanned = 0; @@ -2069,7 +2072,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, lru_pages += zone_reclaimable_pages(zone); } - shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); + shrink->nr_scanned = sc->nr_scanned; + shrink_slab(shrink, lru_pages); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; reclaim_state->reclaimed_slab = 0; @@ -2141,12 +2145,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .mem_cgroup = NULL, .nodemask = nodemask, }; + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, + }; trace_mm_vmscan_direct_reclaim_begin(order, sc.may_writepage, gfp_mask); - nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); @@ -2206,17 +2213,20 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .order = 0, .mem_cgroup = mem_cont, .nodemask = NULL, /* we don't care the placement */ + .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), + }; + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, }; - sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | - (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); zonelist = NODE_DATA(numa_node_id())->node_zonelists; trace_mm_vmscan_memcg_reclaim_begin(0, sc.may_writepage, sc.gfp_mask); - nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); @@ -2344,6 +2354,9 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, .order = order, .mem_cgroup = NULL, }; + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, + }; loop_again: total_scanned = 0; sc.nr_reclaimed = 0; @@ -2443,8 +2456,8 @@ loop_again: end_zone, 0)) shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; - nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, - lru_pages); + shrink.nr_scanned = sc.nr_scanned; + nr_slab = shrink_slab(&shrink, lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; @@ -2796,7 +2809,10 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) .swappiness = vm_swappiness, .order = 0, }; - struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, + }; + struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); struct task_struct *p = current; unsigned long nr_reclaimed; @@ -2805,7 +2821,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); p->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -2980,6 +2996,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) .swappiness = vm_swappiness, .order = order, }; + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, + }; unsigned long nr_slab_pages0, nr_slab_pages1; cond_resched(); @@ -3006,6 +3025,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) } nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); + shrink.nr_scanned = sc.nr_scanned; if (nr_slab_pages0 > zone->min_slab_pages) { /* * shrink_slab() does not currently allow us to determine how @@ -3021,7 +3041,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) unsigned long lru_pages = zone_reclaimable_pages(zone); /* No reclaimable slab or very low memory pressure */ - if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages)) + if (!shrink_slab(&shrink, lru_pages)) break; /* Freed enough memory */ -- cgit v1.2.3 From 1495f230fa7750479c79e3656286b9183d662077 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 24 May 2011 17:12:27 -0700 Subject: vmscan: change shrinker API by passing shrink_control struct Change each shrinker's API by consolidating the existing parameters into shrink_control struct. This will simplify any further features added w/o touching each file of shrinker. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: fix warning] [kosaki.motohiro@jp.fujitsu.com: fix up new shrinker API] [akpm@linux-foundation.org: fix xfs warning] [akpm@linux-foundation.org: update gfs2] Signed-off-by: Ying Han Cc: KOSAKI Motohiro Cc: Minchan Kim Acked-by: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Hugh Dickins Cc: Dave Hansen Cc: Steven Whitehouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/mmu.c | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 9 +++------ drivers/gpu/drm/ttm/ttm_page_alloc.c | 4 +++- drivers/staging/zcache/zcache.c | 5 ++++- fs/dcache.c | 8 ++++++-- fs/drop_caches.c | 3 +-- fs/gfs2/glock.c | 5 ++++- fs/gfs2/quota.c | 12 +++++++----- fs/gfs2/quota.h | 4 +++- fs/inode.c | 6 +++++- fs/mbcache.c | 10 ++++++---- fs/nfs/dir.c | 5 ++++- fs/nfs/internal.h | 2 +- fs/quota/dquot.c | 5 ++++- fs/xfs/linux-2.6/xfs_buf.c | 4 ++-- fs/xfs/linux-2.6/xfs_sync.c | 5 +++-- fs/xfs/quota/xfs_qm.c | 6 +++--- include/linux/mm.h | 19 +++++++++++-------- mm/memory-failure.c | 3 +-- mm/vmscan.c | 34 +++++++++++++++++++--------------- net/sunrpc/auth.c | 4 +++- 21 files changed, 95 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 28418054b880..bd14bb4c8594 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3545,10 +3545,11 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); } -static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; struct kvm *kvm_freed = NULL; + int nr_to_scan = sc->nr_to_scan; if (nr_to_scan == 0) goto out; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c6289034e29a..0b2e167d2bce 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -56,9 +56,7 @@ static int i915_gem_phys_pwrite(struct drm_device *dev, static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); static int i915_gem_inactive_shrink(struct shrinker *shrinker, - int nr_to_scan, - gfp_t gfp_mask); - + struct shrink_control *sc); /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, @@ -4092,9 +4090,7 @@ i915_gpu_is_active(struct drm_device *dev) } static int -i915_gem_inactive_shrink(struct shrinker *shrinker, - int nr_to_scan, - gfp_t gfp_mask) +i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = container_of(shrinker, @@ -4102,6 +4098,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, mm.inactive_shrinker); struct drm_device *dev = dev_priv->dev; struct drm_i915_gem_object *obj, *next; + int nr_to_scan = sc->nr_to_scan; int cnt; if (!mutex_trylock(&dev->struct_mutex)) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 9d9d92945f8c..d948575717bf 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -395,12 +395,14 @@ static int ttm_pool_get_num_unused_pages(void) /** * Callback for mm to request pool to reduce number of page held. */ -static int ttm_pool_mm_shrink(struct shrinker *shrink, int shrink_pages, gfp_t gfp_mask) +static int ttm_pool_mm_shrink(struct shrinker *shrink, + struct shrink_control *sc) { static atomic_t start_pool = ATOMIC_INIT(0); unsigned i; unsigned pool_offset = atomic_add_return(1, &start_pool); struct ttm_page_pool *pool; + int shrink_pages = sc->nr_to_scan; pool_offset = pool_offset % NUM_POOLS; /* select start pool in round robin fashion */ diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c index b8a2b30a1572..77ac2d4d3ef1 100644 --- a/drivers/staging/zcache/zcache.c +++ b/drivers/staging/zcache/zcache.c @@ -1181,9 +1181,12 @@ static bool zcache_freeze; /* * zcache shrinker interface (only useful for ephemeral pages, so zbud only) */ -static int shrink_zcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_zcache_memory(struct shrinker *shrink, + struct shrink_control *sc) { int ret = -1; + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; if (nr >= 0) { if (!(gfp_mask & __GFP_FS)) diff --git a/fs/dcache.c b/fs/dcache.c index 18b2a1f10ed8..37f72ee5bf7c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1220,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent) EXPORT_SYMBOL(shrink_dcache_parent); /* - * Scan `nr' dentries and return the number which remain. + * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain. * * We need to avoid reentering the filesystem if the caller is performing a * GFP_NOFS allocation attempt. One example deadlock is: @@ -1231,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent); * * In this case we return -1 to tell the caller that we baled. */ -static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_dcache_memory(struct shrinker *shrink, + struct shrink_control *sc) { + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; + if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 440999c24353..c00e055b6282 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -42,11 +42,10 @@ static void drop_slab(void) int nr_objects; struct shrink_control shrink = { .gfp_mask = GFP_KERNEL, - .nr_scanned = 1000, }; do { - nr_objects = shrink_slab(&shrink, 1000); + nr_objects = shrink_slab(&shrink, 1000, 1000); } while (nr_objects > 10); } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a2a6abbccc07..2792a790e50b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1346,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } -static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int gfs2_shrink_glock_memory(struct shrinker *shrink, + struct shrink_control *sc) { struct gfs2_glock *gl; int may_demote; int nr_skipped = 0; + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; LIST_HEAD(skipped); if (nr == 0) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e23d9864c418..42e8d23bc047 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(qd_lru_lock); -int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; + int nr_to_scan = sc->nr_to_scan; - if (nr == 0) + if (nr_to_scan == 0) goto out; - if (!(gfp_mask & __GFP_FS)) + if (!(sc->gfp_mask & __GFP_FS)) return -1; spin_lock(&qd_lru_lock); - while (nr && !list_empty(&qd_lru_list)) { + while (nr_to_scan && !list_empty(&qd_lru_list)) { qd = list_entry(qd_lru_list.next, struct gfs2_quota_data, qd_reclaim); sdp = qd->qd_gl->gl_sbd; @@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) spin_unlock(&qd_lru_lock); kmem_cache_free(gfs2_quotad_cachep, qd); spin_lock(&qd_lru_lock); - nr--; + nr_to_scan--; } spin_unlock(&qd_lru_lock); diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index e7d236ca48bd..90bf1c302a98 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -12,6 +12,7 @@ struct gfs2_inode; struct gfs2_sbd; +struct shrink_control; #define NO_QUOTA_CHANGE ((u32)-1) @@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); +extern int gfs2_shrink_qd_memory(struct shrinker *shrink, + struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/inode.c b/fs/inode.c index 88aa3bcc7681..990d284877a1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan) * This function is passed the number of inodes to scan, and it returns the * total number of remaining possibly-reclaimable inodes. */ -static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_icache_memory(struct shrinker *shrink, + struct shrink_control *sc) { + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; + if (nr) { /* * Nasty deadlock avoidance. We may hold various FS locks, diff --git a/fs/mbcache.c b/fs/mbcache.c index 2f174be06555..8c32ef3ba88e 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock); * What the mbcache registers as to get shrunk dynamically. */ -static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); +static int mb_cache_shrink_fn(struct shrinker *shrink, + struct shrink_control *sc); static struct shrinker mb_cache_shrinker = { .shrink = mb_cache_shrink_fn, @@ -156,18 +157,19 @@ forget: * gets low. * * @shrink: (ignored) - * @nr_to_scan: Number of objects to scan - * @gfp_mask: (ignored) + * @sc: shrink_control passed from reclaim * * Returns the number of objects which are present in the cache. */ static int -mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free_list); struct mb_cache *cache; struct mb_cache_entry *entry, *tmp; int count = 0; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; mb_debug("trying to free %d entries", nr_to_scan); spin_lock(&mb_cache_spinlock); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7237672216c8..424e47773a84 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head) } } -int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +int nfs_access_cache_shrinker(struct shrinker *shrink, + struct shrink_control *sc) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return (nr_to_scan == 0) ? 0 : -1; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ce118ce885dd..2df6ca7b5898 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp, /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, - int nr_to_scan, gfp_t gfp_mask); + struct shrink_control *sc); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d3c032f5fa0a..5b572c89e6c4 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -691,8 +691,11 @@ static void prune_dqcache(int count) * This is called from kswapd when we think we need some * more memory */ -static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_dqcache_memory(struct shrinker *shrink, + struct shrink_control *sc) { + int nr = sc->nr_to_scan; + if (nr) { spin_lock(&dq_list_lock); prune_dqcache(nr); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 52b2b5da566e..5e68099db2a5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1422,12 +1422,12 @@ restart: int xfs_buftarg_shrink( struct shrinker *shrink, - int nr_to_scan, - gfp_t mask) + struct shrink_control *sc) { struct xfs_buftarg *btp = container_of(shrink, struct xfs_buftarg, bt_shrinker); struct xfs_buf *bp; + int nr_to_scan = sc->nr_to_scan; LIST_HEAD(dispose); if (!nr_to_scan) diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index cb1bb2080e44..8ecad5ff9f9b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -1032,13 +1032,14 @@ xfs_reclaim_inodes( static int xfs_reclaim_inode_shrink( struct shrinker *shrink, - int nr_to_scan, - gfp_t gfp_mask) + struct shrink_control *sc) { struct xfs_mount *mp; struct xfs_perag *pag; xfs_agnumber_t ag; int reclaimable; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; mp = container_of(shrink, struct xfs_mount, m_inode_shrink); if (nr_to_scan) { diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 69228aa8605a..b94dace4e785 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); +STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); static struct shrinker xfs_qm_shaker = { .shrink = xfs_qm_shake, @@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist( STATIC int xfs_qm_shake( struct shrinker *shrink, - int nr_to_scan, - gfp_t gfp_mask) + struct shrink_control *sc) { int ndqused, nfree, n; + gfp_t gfp_mask = sc->gfp_mask; if (!kmem_shake_allow(gfp_mask)) return 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 32cfa9602d00..5cbbf78eaac7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1166,18 +1166,20 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) * We consolidate the values for easier extention later. */ struct shrink_control { - unsigned long nr_scanned; gfp_t gfp_mask; + + /* How many slab objects shrinker() should scan and try to reclaim */ + unsigned long nr_to_scan; }; /* * A callback you can register to apply pressure to ageable caches. * - * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should - * look through the least-recently-used 'nr_to_scan' entries and - * attempt to free them up. It should return the number of objects - * which remain in the cache. If it returns -1, it means it cannot do - * any scanning at this time (eg. there is a risk of deadlock). + * 'sc' is passed shrink_control which includes a count 'nr_to_scan' + * and a 'gfpmask'. It should look through the least-recently-used + * 'nr_to_scan' entries and attempt to free them up. It should return + * the number of objects which remain in the cache. If it returns -1, it means + * it cannot do any scanning at this time (eg. there is a risk of deadlock). * * The 'gfpmask' refers to the allocation we are currently trying to * fulfil. @@ -1186,7 +1188,7 @@ struct shrink_control { * querying the cache size, so a fastpath for that case is appropriate. */ struct shrinker { - int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask); + int (*shrink)(struct shrinker *, struct shrink_control *sc); int seeks; /* seeks to recreate an obj */ /* These are for internal use */ @@ -1640,7 +1642,8 @@ int in_gate_area_no_mm(unsigned long addr); int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); unsigned long shrink_slab(struct shrink_control *shrink, - unsigned long lru_pages); + unsigned long nr_pages_scanned, + unsigned long lru_pages); #ifndef CONFIG_MMU #define randomize_va_space 0 diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 341341b2b47b..5c8f7e08928d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -241,10 +241,9 @@ void shake_page(struct page *p, int access) do { struct shrink_control shrink = { .gfp_mask = GFP_KERNEL, - .nr_scanned = 1000, }; - nr = shrink_slab(&shrink, 1000); + nr = shrink_slab(&shrink, 1000, 1000); if (page_count(p) == 1) break; } while (nr > 10); diff --git a/mm/vmscan.c b/mm/vmscan.c index e4e245ed1a5b..7e0116150dc7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -202,6 +202,14 @@ void unregister_shrinker(struct shrinker *shrinker) } EXPORT_SYMBOL(unregister_shrinker); +static inline int do_shrinker_shrink(struct shrinker *shrinker, + struct shrink_control *sc, + unsigned long nr_to_scan) +{ + sc->nr_to_scan = nr_to_scan; + return (*shrinker->shrink)(shrinker, sc); +} + #define SHRINK_BATCH 128 /* * Call the shrink functions to age shrinkable caches @@ -223,15 +231,14 @@ EXPORT_SYMBOL(unregister_shrinker); * Returns the number of slab objects which we shrunk. */ unsigned long shrink_slab(struct shrink_control *shrink, + unsigned long nr_pages_scanned, unsigned long lru_pages) { struct shrinker *shrinker; unsigned long ret = 0; - unsigned long scanned = shrink->nr_scanned; - gfp_t gfp_mask = shrink->gfp_mask; - if (scanned == 0) - scanned = SWAP_CLUSTER_MAX; + if (nr_pages_scanned == 0) + nr_pages_scanned = SWAP_CLUSTER_MAX; if (!down_read_trylock(&shrinker_rwsem)) { /* Assume we'll be able to shrink next time */ @@ -244,8 +251,8 @@ unsigned long shrink_slab(struct shrink_control *shrink, unsigned long total_scan; unsigned long max_pass; - max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask); - delta = (4 * scanned) / shrinker->seeks; + max_pass = do_shrinker_shrink(shrinker, shrink, 0); + delta = (4 * nr_pages_scanned) / shrinker->seeks; delta *= max_pass; do_div(delta, lru_pages + 1); shrinker->nr += delta; @@ -272,9 +279,9 @@ unsigned long shrink_slab(struct shrink_control *shrink, int shrink_ret; int nr_before; - nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask); - shrink_ret = (*shrinker->shrink)(shrinker, this_scan, - gfp_mask); + nr_before = do_shrinker_shrink(shrinker, shrink, 0); + shrink_ret = do_shrinker_shrink(shrinker, shrink, + this_scan); if (shrink_ret == -1) break; if (shrink_ret < nr_before) @@ -2072,8 +2079,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, lru_pages += zone_reclaimable_pages(zone); } - shrink->nr_scanned = sc->nr_scanned; - shrink_slab(shrink, lru_pages); + shrink_slab(shrink, sc->nr_scanned, lru_pages); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; reclaim_state->reclaimed_slab = 0; @@ -2456,8 +2462,7 @@ loop_again: end_zone, 0)) shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; - shrink.nr_scanned = sc.nr_scanned; - nr_slab = shrink_slab(&shrink, lru_pages); + nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; @@ -3025,7 +3030,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) } nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); - shrink.nr_scanned = sc.nr_scanned; if (nr_slab_pages0 > zone->min_slab_pages) { /* * shrink_slab() does not currently allow us to determine how @@ -3041,7 +3045,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) unsigned long lru_pages = zone_reclaimable_pages(zone); /* No reclaimable slab or very low memory pressure */ - if (!shrink_slab(&shrink, lru_pages)) + if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages)) break; /* Freed enough memory */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 67e31276682a..cd6e4aa19dbf 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) * Run memory cache shrinker. */ static int -rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free); int res; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return (nr_to_scan == 0) ? 0 : -1; -- cgit v1.2.3 From bf4e8902ee5080f5d2c810b639e7e778c8082b52 Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Tue, 24 May 2011 17:12:32 -0700 Subject: mm: enable set_page_section() only if CONFIG_SPARSEMEM and !CONFIG_SPARSEMEM_VMEMMAP set_page_section() is meaningful only in CONFIG_SPARSEMEM and !CONFIG_SPARSEMEM_VMEMMAP context. Move it to proper place and amend accordingly functions which are using it. Signed-off-by: Daniel Kiper Acked-by: Dave Hansen Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5cbbf78eaac7..6702171f8d0a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -682,6 +682,12 @@ static inline struct zone *page_zone(struct page *page) } #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +static inline void set_page_section(struct page *page, unsigned long section) +{ + page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); + page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; +} + static inline unsigned long page_to_section(struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; @@ -700,18 +706,14 @@ static inline void set_page_node(struct page *page, unsigned long node) page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; } -static inline void set_page_section(struct page *page, unsigned long section) -{ - page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); - page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; -} - static inline void set_page_links(struct page *page, enum zone_type zone, unsigned long node, unsigned long pfn) { set_page_zone(page, zone); set_page_node(page, node); +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) set_page_section(page, pfn_to_section_nr(pfn)); +#endif } /* -- cgit v1.2.3 From e3c40f379a144f35e53864a2cd970e238071afd7 Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Tue, 24 May 2011 17:12:33 -0700 Subject: mm: pfn_to_section_nr()/section_nr_to_pfn() is valid only in CONFIG_SPARSEMEM context pfn_to_section_nr()/section_nr_to_pfn() is valid only in CONFIG_SPARSEMEM context. Move it to proper place. Signed-off-by: Daniel Kiper Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e56f835274c9..d7152002e18d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -928,9 +928,6 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #define pfn_to_nid(pfn) (0) #endif -#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) -#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) - #ifdef CONFIG_SPARSEMEM /* @@ -956,6 +953,9 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif +#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) +#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) + struct page; struct page_cgroup; struct mem_section { -- cgit v1.2.3 From ba93fa81b5f2bf0076407a3a777fff122ce16220 Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Tue, 24 May 2011 17:12:34 -0700 Subject: mm: do not define PFN_SECTION_SHIFT if !CONFIG_SPARSEMEM Do not define PFN_SECTION_SHIFT if !CONFIG_SPARSEMEM. Signed-off-by: Daniel Kiper Acked-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6702171f8d0a..32309f6542e8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -605,10 +605,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) #define NODE_NOT_IN_PAGE_FLAGS #endif -#ifndef PFN_SECTION_SHIFT -#define PFN_SECTION_SHIFT 0 -#endif - /* * Define the bit shifts to access each section. For non-existent * sections we define the shift as 0; that plus a 0 mask ensures -- cgit v1.2.3 From 172703b08cd05e2d5196ac13e94cc186f629d58b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 24 May 2011 17:12:36 -0700 Subject: mm: delete non-atomic mm counter implementation The problem with having two different types of counters is that developers adding new code need to keep in mind whether it's safe to use both the atomic and non-atomic implementations. For example, when adding new callers of the *_mm_counter() functions a developer needs to ensure that those paths are always executed with page_table_lock held, in case we're using the non-atomic implementation of mm counters. Hugh Dickins introduced the atomic mm counters in commit f412ac08c986 ("[PATCH] mm: fix rss and mmlist locking"). When asked why he left the non-atomic counters around he said, | The only reason was to avoid adding costly atomic operations into a | configuration that had no need for them there: the page_table_lock | sufficed. | | Certainly it would be simpler just to delete the non-atomic variant. | | And I think it's fair to say that any configuration on which we're | measuring performance to that degree (rather than "does it boot fast?" | type measurements), would already be going the split ptlocks route. Removing the non-atomic counters eases the maintenance burden because developers no longer have to mindful of the two implementations when using *_mm_counter(). Note that all architectures provide a means of atomically updating atomic_long_t variables, even if they have to revert to the generic spinlock implementation because they don't support 64-bit atomic instructions (see lib/atomic64.c). Signed-off-by: Matt Fleming Acked-by: Dave Hansen Acked-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 44 +++++++------------------------------------- include/linux/mm_types.h | 9 +++------ 2 files changed, 10 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 32309f6542e8..48e458190d88 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1053,65 +1053,35 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, /* * per-process(per-mm_struct) statistics. */ -#if defined(SPLIT_RSS_COUNTING) -/* - * The mm counters are not protected by its page_table_lock, - * so must be incremented atomically. - */ static inline void set_mm_counter(struct mm_struct *mm, int member, long value) { atomic_long_set(&mm->rss_stat.count[member], value); } +#if defined(SPLIT_RSS_COUNTING) unsigned long get_mm_counter(struct mm_struct *mm, int member); - -static inline void add_mm_counter(struct mm_struct *mm, int member, long value) -{ - atomic_long_add(value, &mm->rss_stat.count[member]); -} - -static inline void inc_mm_counter(struct mm_struct *mm, int member) -{ - atomic_long_inc(&mm->rss_stat.count[member]); -} - -static inline void dec_mm_counter(struct mm_struct *mm, int member) -{ - atomic_long_dec(&mm->rss_stat.count[member]); -} - -#else /* !USE_SPLIT_PTLOCKS */ -/* - * The mm counters are protected by its page_table_lock, - * so can be incremented directly. - */ -static inline void set_mm_counter(struct mm_struct *mm, int member, long value) -{ - mm->rss_stat.count[member] = value; -} - +#else static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) { - return mm->rss_stat.count[member]; + return atomic_long_read(&mm->rss_stat.count[member]); } +#endif static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { - mm->rss_stat.count[member] += value; + atomic_long_add(value, &mm->rss_stat.count[member]); } static inline void inc_mm_counter(struct mm_struct *mm, int member) { - mm->rss_stat.count[member]++; + atomic_long_inc(&mm->rss_stat.count[member]); } static inline void dec_mm_counter(struct mm_struct *mm, int member) { - mm->rss_stat.count[member]--; + atomic_long_dec(&mm->rss_stat.count[member]); } -#endif /* !USE_SPLIT_PTLOCKS */ - static inline unsigned long get_mm_rss(struct mm_struct *mm) { return get_mm_counter(mm, MM_FILEPAGES) + diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c2f9ea7922f4..071d459e866b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -204,19 +204,16 @@ enum { #if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU) #define SPLIT_RSS_COUNTING -struct mm_rss_stat { - atomic_long_t count[NR_MM_COUNTERS]; -}; /* per-thread cached information, */ struct task_rss_stat { int events; /* for synchronization threshold */ int count[NR_MM_COUNTERS]; }; -#else /* !USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTLOCKS */ + struct mm_rss_stat { - unsigned long count[NR_MM_COUNTERS]; + atomic_long_t count[NR_MM_COUNTERS]; }; -#endif /* !USE_SPLIT_PTLOCKS */ struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ -- cgit v1.2.3 From 8bba154ef29e16331e578029e9050c74b87b7ff9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 May 2011 17:12:37 -0700 Subject: memblock/nobootmem: allow alloc_bootmem() to take 0 as low limit The bootmem wrapper with memblock supports top-down now, so we do not need to set the low limit to __pa(MAX_DMA_ADDRESS). The logic should be: good to allocate above __pa(MAX_DMA_ADDRESS), but it is ok if we can not find memory above 16M on system that has a small amount of RAM. Signed-off-by: Yinghai LU Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Olaf Hering Cc: Tejun Heo Cc: Lucas De Marchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 01eca1794e14..ab344a521105 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -99,24 +99,31 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long align, unsigned long goal); +#ifdef CONFIG_NO_BOOTMEM +/* We are using top down, so it is safe to use 0 here */ +#define BOOTMEM_LOW_LIMIT 0 +#else +#define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS) +#endif + #define alloc_bootmem(x) \ - __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_align(x, align) \ - __alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_nopanic(x) \ - __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_pages(x) \ - __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_pages_nopanic(x) \ - __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_node(pgdat, x) \ - __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_node_nopanic(pgdat, x) \ - __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_pages_node(pgdat, x) \ - __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_pages_node_nopanic(pgdat, x) \ - __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_low(x) \ __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) -- cgit v1.2.3 From b09e0fa4b4ea66266058eead43350bd7d55fec67 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 24 May 2011 17:12:39 -0700 Subject: tmpfs: implement generic xattr support Implement generic xattrs for tmpfs filesystems. The Feodra project, while trying to replace suid apps with file capabilities, realized that tmpfs, which is used on the build systems, does not support file capabilities and thus cannot be used to build packages which use file capabilities. Xattrs are also needed for overlayfs. The xattr interface is a bit odd. If a filesystem does not implement any {get,set,list}xattr functions the VFS will call into some random LSM hooks and the running LSM can then implement some method for handling xattrs. SELinux for example provides a method to support security.selinux but no other security.* xattrs. As it stands today when one enables CONFIG_TMPFS_POSIX_ACL tmpfs will have xattr handler routines specifically to handle acls. Because of this tmpfs would loose the VFS/LSM helpers to support the running LSM. To make up for that tmpfs had stub functions that did nothing but call into the LSM hooks which implement the helpers. This new patch does not use the LSM fallback functions and instead just implements a native get/set/list xattr feature for the full security.* and trusted.* namespace like a normal filesystem. This means that tmpfs can now support both security.selinux and security.capability, which was not previously possible. The basic implementation is that I attach a: struct shmem_xattr { struct list_head list; /* anchored by shmem_inode_info->xattr_list */ char *name; size_t size; char value[0]; }; Into the struct shmem_inode_info for each xattr that is set. This implementation could easily support the user.* namespace as well, except some care needs to be taken to prevent large amounts of unswappable memory being allocated for unprivileged users. [mszeredi@suse.cz: new config option, suport trusted.*, support symlinks] Signed-off-by: Eric Paris Signed-off-by: Miklos Szeredi Acked-by: Serge Hallyn Tested-by: Serge Hallyn Cc: Kyle McMartin Acked-by: Hugh Dickins Tested-by: Jordi Pujol Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 18 ++- include/linux/shmem_fs.h | 8 +- mm/shmem.c | 320 +++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 290 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/fs/Kconfig b/fs/Kconfig index f3aa9b08b228..979992dcb386 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -121,9 +121,25 @@ config TMPFS See for details. +config TMPFS_XATTR + bool "Tmpfs extended attributes" + depends on TMPFS + default n + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + for details). + + Currently this enables support for the trusted.* and + security.* namespaces. + + If unsure, say N. + + You need this for POSIX ACL support on tmpfs. + config TMPFS_POSIX_ACL bool "Tmpfs POSIX Access Control Lists" - depends on TMPFS + depends on TMPFS_XATTR select GENERIC_ACL help POSIX Access Control Lists (ACLs) support permissions for users and diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 399be5ad2f99..2b7fec840517 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -9,6 +9,8 @@ #define SHMEM_NR_DIRECT 16 +#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t)) + struct shmem_inode_info { spinlock_t lock; unsigned long flags; @@ -17,8 +19,12 @@ struct shmem_inode_info { unsigned long next_index; /* highest alloced index + 1 */ struct shared_policy policy; /* NUMA memory alloc policy */ struct page *i_indirect; /* top indirect blocks page */ - swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ + union { + swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ + char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; + }; struct list_head swaplist; /* chain of maybes on swap */ + struct list_head xattr_list; /* list of shmem_xattr */ struct inode vfs_inode; }; diff --git a/mm/shmem.c b/mm/shmem.c index ba4ad28b7db6..69edb45a9f28 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -99,6 +99,13 @@ static struct vfsmount *shm_mnt; /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 +struct shmem_xattr { + struct list_head list; /* anchored by shmem_inode_info->xattr_list */ + char *name; /* xattr name */ + size_t size; + char value[0]; +}; + /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ @@ -822,6 +829,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) static void shmem_evict_inode(struct inode *inode) { struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_xattr *xattr, *nxattr; if (inode->i_mapping->a_ops == &shmem_aops) { truncate_inode_pages(inode->i_mapping, 0); @@ -834,6 +842,11 @@ static void shmem_evict_inode(struct inode *inode) mutex_unlock(&shmem_swaplist_mutex); } } + + list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { + kfree(xattr->name); + kfree(xattr); + } BUG_ON(inode->i_blocks); shmem_free_inode(inode->i_sb); end_writeback(inode); @@ -1615,6 +1628,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode spin_lock_init(&info->lock); info->flags = flags & VM_NORESERVE; INIT_LIST_HEAD(&info->swaplist); + INIT_LIST_HEAD(&info->xattr_list); cache_no_acl(inode); switch (mode & S_IFMT) { @@ -2014,9 +2028,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s info = SHMEM_I(inode); inode->i_size = len-1; - if (len <= (char *)inode - (char *)info) { + if (len <= SHMEM_SYMLINK_INLINE_LEN) { /* do it inline */ - memcpy(info, symname, len); + memcpy(info->inline_symlink, symname, len); inode->i_op = &shmem_symlink_inline_operations; } else { error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); @@ -2042,7 +2056,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) { - nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode)); + nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); return NULL; } @@ -2066,63 +2080,253 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co } } -static const struct inode_operations shmem_symlink_inline_operations = { - .readlink = generic_readlink, - .follow_link = shmem_follow_link_inline, -}; - -static const struct inode_operations shmem_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = shmem_follow_link, - .put_link = shmem_put_link, -}; - -#ifdef CONFIG_TMPFS_POSIX_ACL +#ifdef CONFIG_TMPFS_XATTR /* - * Superblocks without xattr inode operations will get security.* xattr - * support from the VFS "for free". As soon as we have any other xattrs + * Superblocks without xattr inode operations may get some security.* xattr + * support from the LSM "for free". As soon as we have any other xattrs * like ACLs, we also need to implement the security.* handlers at * filesystem level, though. */ -static size_t shmem_xattr_security_list(struct dentry *dentry, char *list, - size_t list_len, const char *name, - size_t name_len, int handler_flags) +static int shmem_xattr_get(struct dentry *dentry, const char *name, + void *buffer, size_t size) { - return security_inode_listsecurity(dentry->d_inode, list, list_len); -} + struct shmem_inode_info *info; + struct shmem_xattr *xattr; + int ret = -ENODATA; -static int shmem_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int handler_flags) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return xattr_getsecurity(dentry->d_inode, name, buffer, size); + info = SHMEM_I(dentry->d_inode); + + spin_lock(&info->lock); + list_for_each_entry(xattr, &info->xattr_list, list) { + if (strcmp(name, xattr->name)) + continue; + + ret = xattr->size; + if (buffer) { + if (size < xattr->size) + ret = -ERANGE; + else + memcpy(buffer, xattr->value, xattr->size); + } + break; + } + spin_unlock(&info->lock); + return ret; } -static int shmem_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int handler_flags) +static int shmem_xattr_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { - if (strcmp(name, "") == 0) - return -EINVAL; - return security_inode_setsecurity(dentry->d_inode, name, value, - size, flags); + struct inode *inode = dentry->d_inode; + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_xattr *xattr; + struct shmem_xattr *new_xattr = NULL; + size_t len; + int err = 0; + + /* value == NULL means remove */ + if (value) { + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len <= sizeof(*new_xattr)) + return -ENOMEM; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return -ENOMEM; + + new_xattr->name = kstrdup(name, GFP_KERNEL); + if (!new_xattr->name) { + kfree(new_xattr); + return -ENOMEM; + } + + new_xattr->size = size; + memcpy(new_xattr->value, value, size); + } + + spin_lock(&info->lock); + list_for_each_entry(xattr, &info->xattr_list, list) { + if (!strcmp(name, xattr->name)) { + if (flags & XATTR_CREATE) { + xattr = new_xattr; + err = -EEXIST; + } else if (new_xattr) { + list_replace(&xattr->list, &new_xattr->list); + } else { + list_del(&xattr->list); + } + goto out; + } + } + if (flags & XATTR_REPLACE) { + xattr = new_xattr; + err = -ENODATA; + } else { + list_add(&new_xattr->list, &info->xattr_list); + xattr = NULL; + } +out: + spin_unlock(&info->lock); + if (xattr) + kfree(xattr->name); + kfree(xattr); + return err; } -static const struct xattr_handler shmem_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .list = shmem_xattr_security_list, - .get = shmem_xattr_security_get, - .set = shmem_xattr_security_set, -}; static const struct xattr_handler *shmem_xattr_handlers[] = { +#ifdef CONFIG_TMPFS_POSIX_ACL &generic_acl_access_handler, &generic_acl_default_handler, - &shmem_xattr_security_handler, +#endif NULL }; + +static int shmem_xattr_validate(const char *name) +{ + struct { const char *prefix; size_t len; } arr[] = { + { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN }, + { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN } + }; + int i; + + for (i = 0; i < ARRAY_SIZE(arr); i++) { + size_t preflen = arr[i].len; + if (strncmp(name, arr[i].prefix, preflen) == 0) { + if (!name[preflen]) + return -EINVAL; + return 0; + } + } + return -EOPNOTSUPP; +} + +static ssize_t shmem_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + int err; + + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_getxattr(dentry, name, buffer, size); + + err = shmem_xattr_validate(name); + if (err) + return err; + + return shmem_xattr_get(dentry, name, buffer, size); +} + +static int shmem_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + int err; + + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_setxattr(dentry, name, value, size, flags); + + err = shmem_xattr_validate(name); + if (err) + return err; + + if (size == 0) + value = ""; /* empty EA, do not remove */ + + return shmem_xattr_set(dentry, name, value, size, flags); + +} + +static int shmem_removexattr(struct dentry *dentry, const char *name) +{ + int err; + + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_removexattr(dentry, name); + + err = shmem_xattr_validate(name); + if (err) + return err; + + return shmem_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE); +} + +static bool xattr_is_trusted(const char *name) +{ + return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); +} + +static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + bool trusted = capable(CAP_SYS_ADMIN); + struct shmem_xattr *xattr; + struct shmem_inode_info *info; + size_t used = 0; + + info = SHMEM_I(dentry->d_inode); + + spin_lock(&info->lock); + list_for_each_entry(xattr, &info->xattr_list, list) { + size_t len; + + /* skip "trusted." attributes for unprivileged callers */ + if (!trusted && xattr_is_trusted(xattr->name)) + continue; + + len = strlen(xattr->name) + 1; + used += len; + if (buffer) { + if (size < used) { + used = -ERANGE; + break; + } + memcpy(buffer, xattr->name, len); + buffer += len; + } + } + spin_unlock(&info->lock); + + return used; +} +#endif /* CONFIG_TMPFS_XATTR */ + +static const struct inode_operations shmem_symlink_inline_operations = { + .readlink = generic_readlink, + .follow_link = shmem_follow_link_inline, +#ifdef CONFIG_TMPFS_XATTR + .setxattr = shmem_setxattr, + .getxattr = shmem_getxattr, + .listxattr = shmem_listxattr, + .removexattr = shmem_removexattr, +#endif +}; + +static const struct inode_operations shmem_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = shmem_follow_link, + .put_link = shmem_put_link, +#ifdef CONFIG_TMPFS_XATTR + .setxattr = shmem_setxattr, + .getxattr = shmem_getxattr, + .listxattr = shmem_listxattr, + .removexattr = shmem_removexattr, #endif +}; static struct dentry *shmem_get_parent(struct dentry *child) { @@ -2402,8 +2606,10 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = TMPFS_MAGIC; sb->s_op = &shmem_ops; sb->s_time_gran = 1; -#ifdef CONFIG_TMPFS_POSIX_ACL +#ifdef CONFIG_TMPFS_XATTR sb->s_xattr = shmem_xattr_handlers; +#endif +#ifdef CONFIG_TMPFS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif @@ -2501,11 +2707,13 @@ static const struct file_operations shmem_file_operations = { static const struct inode_operations shmem_inode_operations = { .setattr = shmem_notify_change, .truncate_range = shmem_truncate_range, +#ifdef CONFIG_TMPFS_XATTR + .setxattr = shmem_setxattr, + .getxattr = shmem_getxattr, + .listxattr = shmem_listxattr, + .removexattr = shmem_removexattr, +#endif #ifdef CONFIG_TMPFS_POSIX_ACL - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, .check_acl = generic_check_acl, #endif @@ -2523,23 +2731,27 @@ static const struct inode_operations shmem_dir_inode_operations = { .mknod = shmem_mknod, .rename = shmem_rename, #endif +#ifdef CONFIG_TMPFS_XATTR + .setxattr = shmem_setxattr, + .getxattr = shmem_getxattr, + .listxattr = shmem_listxattr, + .removexattr = shmem_removexattr, +#endif #ifdef CONFIG_TMPFS_POSIX_ACL .setattr = shmem_notify_change, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, .check_acl = generic_check_acl, #endif }; static const struct inode_operations shmem_special_inode_operations = { +#ifdef CONFIG_TMPFS_XATTR + .setxattr = shmem_setxattr, + .getxattr = shmem_getxattr, + .listxattr = shmem_listxattr, + .removexattr = shmem_removexattr, +#endif #ifdef CONFIG_TMPFS_POSIX_ACL .setattr = shmem_notify_change, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, .check_acl = generic_check_acl, #endif }; -- cgit v1.2.3 From c856507f2b2b47a49d8587afb58930b463f6bff4 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 24 May 2011 17:12:40 -0700 Subject: mm: remove last trace of shmem_get_unmapped_area Remove noMMU declaration of shmem_get_unmapped_area() from mm.h: it fell out of use in 2.6.21 and ceased to exist in 2.6.29. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 48e458190d88..8eb969ebf904 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -873,14 +873,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user); struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); int shmem_zero_setup(struct vm_area_struct *); -#ifndef CONFIG_MMU -extern unsigned long shmem_get_unmapped_area(struct file *file, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags); -#endif - extern int can_do_mlock(void); extern int user_shm_lock(size_t, struct user_struct *); extern void user_shm_unlock(size_t, struct user_struct *); -- cgit v1.2.3 From d98f6cb67fb5b9376d4957d7ba9f32eac35c2e08 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Tue, 24 May 2011 17:12:41 -0700 Subject: mm: export get_vma_policy() In commit 48fce3429d ("mempolicies: unexport get_vma_policy()") get_vma_policy() was marked static as all clients were local to mempolicy.c. However, the decision to generate /proc/pid/numa_maps in the numa memory policy code and outside the procfs subsystem introduces an artificial interdependency between the two systems. Exporting get_vma_policy() once again is the first step to clean up this interdependency. Signed-off-by: Stephen Wilson Reviewed-by: KOSAKI Motohiro Cc: Hugh Dickins Cc: David Rientjes Cc: Lee Schermerhorn Cc: Alexey Dobriyan Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 3 +++ mm/mempolicy.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 31ac26ca4acf..c2f603218fbc 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -199,6 +199,9 @@ void mpol_free_shared_policy(struct shared_policy *p); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx); +struct mempolicy *get_vma_policy(struct task_struct *tsk, + struct vm_area_struct *vma, unsigned long addr); + extern void numa_default_policy(void); extern void numa_policy_init(void); extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 959a8b8c7350..5bfb03ef3cb0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1489,7 +1489,7 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, * freeing by another task. It is the caller's responsibility to free the * extra reference for shared policies. */ -static struct mempolicy *get_vma_policy(struct task_struct *task, +struct mempolicy *get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = task->mempolicy; -- cgit v1.2.3 From 13057efb0a0063eb8042d99093ec88a52c4f1593 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Tue, 24 May 2011 17:12:46 -0700 Subject: mm: declare mpol_to_str() when CONFIG_TMPFS=n When CONFIG_TMPFS=n mpol_to_str() is not declared in mempolicy.h. However, in the NUMA case, the definition is always compiled. Since it is not strictly true that tmpfs is the only client, and since the symbol was always lurking around anyways, export mpol_to_str() unconditionally. Furthermore, this will allow us to move show_numa_map() out of mempolicy.c and into the procfs subsystem. Signed-off-by: Stephen Wilson Cc: KOSAKI Motohiro Cc: Hugh Dickins Cc: David Rientjes Cc: Lee Schermerhorn Cc: Alexey Dobriyan Cc: Christoph Lameter Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index c2f603218fbc..7978eec1b7d9 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -231,10 +231,10 @@ int do_migrate_pages(struct mm_struct *mm, #ifdef CONFIG_TMPFS extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context); +#endif extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context); -#endif /* Check if a vma is migratable */ static inline int vma_migratable(struct vm_area_struct *vma) @@ -371,13 +371,13 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol, { return 1; /* error */ } +#endif static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) { return 0; } -#endif #endif /* CONFIG_NUMA */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From f2beb7983613ecca20a61604f01ab50cc7a797e6 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Tue, 24 May 2011 17:12:48 -0700 Subject: proc: make struct proc_maps_private truly private Now that mm/mempolicy.c is no longer implementing /proc/pid/numa_maps there is no need to export struct proc_maps_private to the world. Move it to fs/proc/internal.h instead. Signed-off-by: Stephen Wilson Reviewed-by: KOSAKI Motohiro Cc: Hugh Dickins Cc: David Rientjes Cc: Lee Schermerhorn Cc: Alexey Dobriyan Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 8 ++++++++ include/linux/proc_fs.h | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c03e8d3a3a5b..3763b436e69d 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; +struct proc_maps_private { + struct pid *pid; + struct task_struct *task; +#ifdef CONFIG_MMU + struct vm_area_struct *tail_vma; +#endif +}; + void proc_init_inodecache(void); static inline struct pid *proc_pid(struct inode *inode) diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index eaf4350c0f90..3686cd6c9aca 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -288,12 +288,4 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde) return pde->parent->data; } -struct proc_maps_private { - struct pid *pid; - struct task_struct *task; -#ifdef CONFIG_MMU - struct vm_area_struct *tail_vma; -#endif -}; - #endif /* _LINUX_PROC_FS_H */ -- cgit v1.2.3 From a539f3533b78e39a22723d6d3e1e11b6c14454d9 Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Tue, 24 May 2011 17:12:51 -0700 Subject: mm: add SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macro Add SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macro which aligns given pfn to upper section and lower section boundary accordingly. Required for the latest memory hotplug support for the Xen balloon driver. Signed-off-by: Daniel Kiper Reviewed-by: Konrad Rzeszutek Wilk David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d7152002e18d..217bcf6bca77 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -956,6 +956,9 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) +#define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) +#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) + struct page; struct page_cgroup; struct mem_section { -- cgit v1.2.3 From 0ac1ee0bfec2a4ad118f907ce586d0dfd8db7641 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 24 May 2011 17:13:05 -0700 Subject: ulimit: raise default hard ulimit on number of files to 4096 Apps are increasingly using more than 1024 file descriptors. See discussion in several distro bug trackers, e.g. BugLink: http://bugs.launchpad.net/bugs/663090 https://issues.rpath.com/browse/RPL-2054 You don't want to raise the default soft limit, since that might break apps that use select(), but it's safe to raise the default hard limit; that way, apps that know they need lots of file descriptors can raise their soft limit without needing root, and without user intervention. Ubuntu is doing this with a kernel change because they have a policy of not changing kernel defaults in userland. While 4096 might not be enough for *all* apps, it seems to be plenty for the apps I've seen lately that are unhappy with 1024. Signed-off-by: Tim Gardner Cc: Dan Kegel Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/resource.h | 2 +- include/linux/fs.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h index 587566f95f6c..61fa862fe08d 100644 --- a/include/asm-generic/resource.h +++ b/include/asm-generic/resource.h @@ -78,7 +78,7 @@ [RLIMIT_CORE] = { 0, RLIM_INFINITY }, \ [RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY }, \ [RLIMIT_NPROC] = { 0, 0 }, \ - [RLIMIT_NOFILE] = { INR_OPEN, INR_OPEN }, \ + [RLIMIT_NOFILE] = { INR_OPEN_CUR, INR_OPEN_MAX }, \ [RLIMIT_MEMLOCK] = { MLOCK_LIMIT, MLOCK_LIMIT }, \ [RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY }, \ [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5bb9e826019b..3f9d3251790d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -23,7 +23,8 @@ /* Fixed constants first: */ #undef NR_OPEN -#define INR_OPEN 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1< Date: Tue, 24 May 2011 17:13:06 -0700 Subject: include/linux/c2port.h: remove wrong and never used macros The macro to_class_dev() uses the deprecated structure class_device, and the c2port_device has no member named class in the definition of the macro to_c2port_device. Signed-off-by: Wanlong Gao Cc: Rodolfo Giometti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/c2port.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/c2port.h b/include/linux/c2port.h index 2a5cd867c365..a2f7d7413f30 100644 --- a/include/linux/c2port.h +++ b/include/linux/c2port.h @@ -60,9 +60,6 @@ struct c2port_ops { * Exported functions */ -#define to_class_dev(obj) container_of((obj), struct class_device, kobj) -#define to_c2port_device(obj) container_of((obj), struct c2port_device, class) - extern struct c2port_device *c2port_device_register(char *name, struct c2port_ops *ops, void *devdata); extern void c2port_device_unregister(struct c2port_device *dev); -- cgit v1.2.3 From e50c1f609c63223adaa38f5a79b18759a00adf72 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 24 May 2011 17:13:11 -0700 Subject: fscache: remove dead code under CONFIG_WORKQUEUE_DEBUGFS There is no CONFIG_WORKQUEUE_DEBUGFS any more, so this code is dead. Signed-off-by: WANG Cong Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fscache/operation.c | 10 ---------- fs/fscache/page.c | 13 ------------- include/linux/fscache-cache.h | 12 ------------ 3 files changed, 35 deletions(-) (limited to 'include/linux') diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 48a18f184d50..30afdfa7aec7 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op) _enter("{OBJ%x OP%x,%u}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); - fscache_set_op_state(op, "EnQ"); - ASSERT(list_empty(&op->pend_link)); ASSERT(op->processor != NULL); ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); @@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation); static void fscache_run_op(struct fscache_object *object, struct fscache_operation *op) { - fscache_set_op_state(op, "Run"); - object->n_in_progress++; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); @@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object, _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); - fscache_set_op_state(op, "SubmitX"); - spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); @@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object, ASSERTCMP(atomic_read(&op->usage), >, 0); - fscache_set_op_state(op, "Submit"); - spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); @@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op) if (!atomic_dec_and_test(&op->usage)) return; - fscache_set_op_state(op, "Put"); - _debug("PUT OP"); if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) BUG(); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 41c441c2058d..a2a5d19ece6a 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_stat(&fscache_n_attr_changed_calls); if (fscache_object_is_active(object)) { - fscache_set_op_state(op, "CallFS"); fscache_stat(&fscache_n_cop_attr_changed); ret = object->cache->ops->attr_changed(object); fscache_stat_d(&fscache_n_cop_attr_changed); - fscache_set_op_state(op, "Done"); if (ret < 0) fscache_abort_object(object); } @@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) fscache_operation_init(op, fscache_attr_changed_op, NULL); op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); - fscache_set_op_name(op, "Attr"); spin_lock(&cookie->lock); @@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval( op->context = context; op->start_time = jiffies; INIT_LIST_HEAD(&op->to_do); - fscache_set_op_name(&op->op, "Retr"); return op; } @@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, _leave(" = -ENOMEM"); return -ENOMEM; } - fscache_set_op_name(&op->op, "RetrRA1"); spin_lock(&cookie->lock); @@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(mapping, end_io_func, context); if (!op) return -ENOMEM; - fscache_set_op_name(&op->op, "RetrRAN"); spin_lock(&cookie->lock); @@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(page->mapping, NULL, NULL); if (!op) return -ENOMEM; - fscache_set_op_name(&op->op, "RetrAL1"); spin_lock(&cookie->lock); @@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op) _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); - fscache_set_op_state(&op->op, "GetPage"); - spin_lock(&object->lock); cookie = object->cookie; @@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op) spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); - fscache_set_op_state(&op->op, "Store"); fscache_stat(&fscache_n_store_pages); fscache_stat(&fscache_n_cop_write_page); ret = object->cache->ops->write_page(op, page); fscache_stat_d(&fscache_n_cop_write_page); - fscache_set_op_state(&op->op, "EndWrite"); fscache_end_page_write(object, page); if (ret < 0) { - fscache_set_op_state(&op->op, "Abort"); fscache_abort_object(object); } else { fscache_enqueue_operation(&op->op); @@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_operation_init(&op->op, fscache_write_op, fscache_release_write_op); op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); - fscache_set_op_name(&op->op, "Write1"); ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); if (ret < 0) diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 76427e688d15..af095b54502e 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -100,17 +100,6 @@ struct fscache_operation { /* operation releaser */ fscache_operation_release_t release; - -#ifdef CONFIG_WORKQUEUE_DEBUGFS - struct work_struct put_work; /* work to delay operation put */ - const char *name; /* operation name */ - const char *state; /* operation state */ -#define fscache_set_op_name(OP, N) do { (OP)->name = (N); } while(0) -#define fscache_set_op_state(OP, S) do { (OP)->state = (S); } while(0) -#else -#define fscache_set_op_name(OP, N) do { } while(0) -#define fscache_set_op_state(OP, S) do { } while(0) -#endif }; extern atomic_t fscache_op_debug_id; @@ -137,7 +126,6 @@ static inline void fscache_operation_init(struct fscache_operation *op, op->processor = processor; op->release = release; INIT_LIST_HEAD(&op->pend_link); - fscache_set_op_state(op, "Init"); } /* -- cgit v1.2.3 From 4b060420a596095869a6d7849caa798d23839cd1 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 24 May 2011 17:13:12 -0700 Subject: bitmap, irq: add smp_affinity_list interface to /proc/irq Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the cpu count is large. Setting smp affinity to cpus 256 to 263 would be: echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity instead of: echo 256-263 > smp_affinity_list Think about what it looks like for cpus around say, 4088 to 4095. We already have many alternate "list" interfaces: /sys/devices/system/cpu/cpuX/indexY/shared_cpu_list /sys/devices/system/cpu/cpuX/topology/thread_siblings_list /sys/devices/system/cpu/cpuX/topology/core_siblings_list /sys/devices/system/node/nodeX/cpulist /sys/devices/pci***/***/local_cpulist Add a companion interface, smp_affinity_list to use cpu lists instead of cpu maps. This conforms to other companion interfaces where both a map and a list interface exists. This required adding a bitmap_parselist_user() function in a manner similar to the bitmap_parse_user() function. [akpm@linux-foundation.org: make __bitmap_parselist() static] Signed-off-by: Mike Travis Cc: Thomas Gleixner Cc: Jack Steiner Cc: Lee Schermerhorn Cc: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/IRQ-affinity.txt | 17 ++++-- Documentation/filesystems/proc.txt | 11 +++- include/linux/bitmap.h | 5 +- include/linux/cpumask.h | 15 +++++ kernel/irq/proc.c | 54 ++++++++++++++++-- lib/bitmap.c | 109 +++++++++++++++++++++++++++++++++---- 6 files changed, 188 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt index b4a615b78403..7890fae18529 100644 --- a/Documentation/IRQ-affinity.txt +++ b/Documentation/IRQ-affinity.txt @@ -4,10 +4,11 @@ ChangeLog: SMP IRQ affinity -/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted -for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed -to turn off all CPUs, and if an IRQ controller does not support IRQ -affinity then the value will not change from the default 0xffffffff. +/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify +which target CPUs are permitted for a given IRQ source. It's a bitmask +(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not +allowed to turn off all CPUs, and if an IRQ controller does not support +IRQ affinity then the value will not change from the default of all cpus. /proc/irq/default_smp_affinity specifies default affinity mask that applies to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask @@ -54,3 +55,11 @@ round-trip min/avg/max = 0.1/0.5/585.4 ms This time around IRQ44 was delivered only to the last four processors. i.e counters for the CPU0-3 did not change. +Here is an example of limiting that same irq (44) to cpus 1024 to 1031: + +[root@moon 44]# echo 1024-1031 > smp_affinity +[root@moon 44]# cat smp_affinity +1024-1031 + +Note that to do this with a bitmask would require 32 bitmasks of zero +to follow the pertinent one. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 60740e8ecb37..f48178024067 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -574,6 +574,12 @@ The contents of each smp_affinity file is the same by default: > cat /proc/irq/0/smp_affinity ffffffff +There is an alternate interface, smp_affinity_list which allows specifying +a cpu range instead of a bitmask: + + > cat /proc/irq/0/smp_affinity_list + 1024-1031 + The default_smp_affinity mask applies to all non-active IRQs, which are the IRQs which have not yet been allocated/activated, and hence which lack a /proc/irq/[0-9]* directory. @@ -583,12 +589,13 @@ reports itself as being attached. This hardware locality information does not include information about any possible driver locality preference. prof_cpu_mask specifies which CPUs are to be profiled by the system wide -profiler. Default value is ffffffff (all cpus). +profiler. Default value is ffffffff (all cpus if there are only 32 of them). The way IRQs are routed is handled by the IO-APIC, and it's Round Robin between all the CPUs which are allowed to handle it. As usual the kernel has more info than you and does a better job than you, so the defaults are the -best choice for almost everyone. +best choice for almost everyone. [Note this applies only to those IO-APIC's +that support "Round Robin" interrupt distribution.] There are three more important subdirectories in /proc: net, scsi, and sys. The general rule is that the contents, or even the existence of these diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index daf8c480c786..dcafe0bf0005 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -55,7 +55,8 @@ * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf - * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list + * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf + * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region @@ -129,6 +130,8 @@ extern int bitmap_scnlistprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); extern int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); +extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, + unsigned long *dst, int nbits); extern void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, int bits); extern int bitmap_bitremap(int oldbit, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index bae6fe24d1f9..b24ac56477b4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -546,6 +546,21 @@ static inline int cpumask_parse_user(const char __user *buf, int len, return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } +/** + * cpumask_parselist_user - extract a cpumask from a user string + * @buf: the buffer to extract from + * @len: the length of the buffer + * @dstp: the cpumask to set. + * + * Returns -errno, or 0 for success. + */ +static inline int cpumask_parselist_user(const char __user *buf, int len, + struct cpumask *dstp) +{ + return bitmap_parselist_user(buf, len, cpumask_bits(dstp), + nr_cpumask_bits); +} + /** * cpulist_scnprintf - print a cpumask into a string as comma-separated list * @buf: the buffer to sprintf into diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 834899f2500f..64e3df6ab1ef 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir; #ifdef CONFIG_SMP -static int irq_affinity_proc_show(struct seq_file *m, void *v) +static int show_irq_affinity(int type, struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); const struct cpumask *mask = desc->irq_data.affinity; @@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v) if (irqd_is_setaffinity_pending(&desc->irq_data)) mask = desc->pending_mask; #endif - seq_cpumask(m, mask); + if (type) + seq_cpumask_list(m, mask); + else + seq_cpumask(m, mask); seq_putc(m, '\n'); return 0; } @@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) #endif int no_irq_affinity; -static ssize_t irq_affinity_proc_write(struct file *file, +static int irq_affinity_proc_show(struct seq_file *m, void *v) +{ + return show_irq_affinity(0, m, v); +} + +static int irq_affinity_list_proc_show(struct seq_file *m, void *v) +{ + return show_irq_affinity(1, m, v); +} + + +static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) { unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; @@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file, if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; - err = cpumask_parse_user(buffer, count, new_value); + if (type) + err = cpumask_parselist_user(buffer, count, new_value); + else + err = cpumask_parse_user(buffer, count, new_value); if (err) goto free_cpumask; @@ -100,11 +117,28 @@ free_cpumask: return err; } +static ssize_t irq_affinity_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + return write_irq_affinity(0, file, buffer, count, pos); +} + +static ssize_t irq_affinity_list_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + return write_irq_affinity(1, file, buffer, count, pos); +} + static int irq_affinity_proc_open(struct inode *inode, struct file *file) { return single_open(file, irq_affinity_proc_show, PDE(inode)->data); } +static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data); +} + static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) { return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); @@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = { .release = single_release, }; +static const struct file_operations irq_affinity_list_proc_fops = { + .open = irq_affinity_list_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = irq_affinity_list_proc_write, +}; + static int default_affinity_show(struct seq_file *m, void *v) { seq_cpumask(m, irq_default_affinity); @@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) proc_create_data("affinity_hint", 0400, desc->dir, &irq_affinity_hint_proc_fops, (void *)(long)irq); + /* create /proc/irq//smp_affinity_list */ + proc_create_data("smp_affinity_list", 0600, desc->dir, + &irq_affinity_list_proc_fops, (void *)(long)irq); + proc_create_data("node", 0444, desc->dir, &irq_node_proc_fops, (void *)(long)irq); #endif diff --git a/lib/bitmap.c b/lib/bitmap.c index 91e0ccfdb424..41baf02924e6 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -571,8 +571,11 @@ int bitmap_scnlistprintf(char *buf, unsigned int buflen, EXPORT_SYMBOL(bitmap_scnlistprintf); /** - * bitmap_parselist - convert list format ASCII string to bitmap + * __bitmap_parselist - convert list format ASCII string to bitmap * @bp: read nul-terminated user string from this buffer + * @buflen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @is_user: location of buffer, 0 indicates kernel space * @maskp: write resulting mask here * @nmaskbits: number of bits in mask to be written * @@ -587,20 +590,63 @@ EXPORT_SYMBOL(bitmap_scnlistprintf); * %-EINVAL: invalid character in string * %-ERANGE: bit number specified too large for mask */ -int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) +static int __bitmap_parselist(const char *buf, unsigned int buflen, + int is_user, unsigned long *maskp, + int nmaskbits) { unsigned a, b; + int c, old_c, totaldigits; + const char __user *ubuf = buf; + int exp_digit, in_range; + totaldigits = c = 0; bitmap_zero(maskp, nmaskbits); do { - if (!isdigit(*bp)) - return -EINVAL; - b = a = simple_strtoul(bp, (char **)&bp, BASEDEC); - if (*bp == '-') { - bp++; - if (!isdigit(*bp)) + exp_digit = 1; + in_range = 0; + a = b = 0; + + /* Get the next cpu# or a range of cpu#'s */ + while (buflen) { + old_c = c; + if (is_user) { + if (__get_user(c, ubuf++)) + return -EFAULT; + } else + c = *buf++; + buflen--; + if (isspace(c)) + continue; + + /* + * If the last character was a space and the current + * character isn't '\0', we've got embedded whitespace. + * This is a no-no, so throw an error. + */ + if (totaldigits && c && isspace(old_c)) + return -EINVAL; + + /* A '\0' or a ',' signal the end of a cpu# or range */ + if (c == '\0' || c == ',') + break; + + if (c == '-') { + if (exp_digit || in_range) + return -EINVAL; + b = 0; + in_range = 1; + exp_digit = 1; + continue; + } + + if (!isdigit(c)) return -EINVAL; - b = simple_strtoul(bp, (char **)&bp, BASEDEC); + + b = b * 10 + (c - '0'); + if (!in_range) + a = b; + exp_digit = 0; + totaldigits++; } if (!(a <= b)) return -EINVAL; @@ -610,13 +656,52 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) set_bit(a, maskp); a++; } - if (*bp == ',') - bp++; - } while (*bp != '\0' && *bp != '\n'); + } while (buflen && c == ','); return 0; } + +int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) +{ + char *nl = strchr(bp, '\n'); + int len; + + if (nl) + len = nl - bp; + else + len = strlen(bp); + + return __bitmap_parselist(bp, len, 0, maskp, nmaskbits); +} EXPORT_SYMBOL(bitmap_parselist); + +/** + * bitmap_parselist_user() + * + * @ubuf: pointer to user buffer containing string. + * @ulen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Wrapper for bitmap_parselist(), providing it with user buffer. + * + * We cannot have this as an inline function in bitmap.h because it needs + * linux/uaccess.h to get the access_ok() declaration and this causes + * cyclic dependencies. + */ +int bitmap_parselist_user(const char __user *ubuf, + unsigned int ulen, unsigned long *maskp, + int nmaskbits) +{ + if (!access_ok(VERIFY_READ, ubuf, ulen)) + return -EFAULT; + return __bitmap_parselist((const char *)ubuf, + ulen, 1, maskp, nmaskbits); +} +EXPORT_SYMBOL(bitmap_parselist_user); + + /** * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap * @buf: pointer to a bitmap -- cgit v1.2.3 From 1dbe39424a43e56a6c9aed12661192af51dcdb9f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 24 May 2011 17:13:13 -0700 Subject: xattr.h: expose string defines to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit af4f136056c9 ("security: move LSM xattrnames to xattr.h") moved the XATTR_CAPS_SUFFIX define from capability.h to xattr.h. This makes sense except it was previously exports to userspace but xattr.h does not export it to userspace. This patch exports these headers to userspace to fix the ABI regression. There is some slight possibility that this will cause problems in other applications which used these #defines differently (wrongly) and I could JUST export the capabilities xattr name that we broke. Does anyonehave an idea how exposing these headers could cause a problem? Below is what is being exposed to userspace, included here since it isn't clear exactly what is going to be made available from the patch. /* Namespaces */ #define XATTR_OS2_PREFIX "os2." #define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) #define XATTR_SECURITY_PREFIX "security." #define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) #define XATTR_SYSTEM_PREFIX "system." #define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) #define XATTR_TRUSTED_PREFIX "trusted." #define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) #define XATTR_USER_PREFIX "user." #define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) /* Security namespace */ #define XATTR_SELINUX_SUFFIX "selinux" #define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX #define XATTR_SMACK_SUFFIX "SMACK64" #define XATTR_SMACK_IPIN "SMACK64IPIN" #define XATTR_SMACK_IPOUT "SMACK64IPOUT" #define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX #define XATTR_NAME_SMACKIPIN XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN #define XATTR_NAME_SMACKIPOUT XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT #define XATTR_CAPS_SUFFIX "capability" #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX Reported-by: Ozan Çaglayan Signed-off-by: Eric Paris Cc: Mimi Zohar Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/xattr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 6050783005bd..aed54c50aa66 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -13,10 +13,6 @@ #define XATTR_CREATE 0x1 /* set value, fail if attr already exists */ #define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */ -#ifdef __KERNEL__ - -#include - /* Namespaces */ #define XATTR_OS2_PREFIX "os2." #define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) @@ -53,6 +49,10 @@ #define XATTR_CAPS_SUFFIX "capability" #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX +#ifdef __KERNEL__ + +#include + struct inode; struct dentry; -- cgit v1.2.3 From 903c0c7cdc21f2ccb7562a7bbc70289c0c2b16ad Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:13:16 -0700 Subject: sparse: define dummy BUILD_BUG_ON definition for sparse BUILD_BUG_ON() causes a syntax error to detect coding errors. So it causes sparse to detect an error too. This reduces sparse's usefulness. This patch makes a dummy BUILD_BUG_ON() definition for sparse. Signed-off-by: KOSAKI Motohiro Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f37ba716ef8b..22295244de18 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -638,6 +638,13 @@ struct sysinfo { char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ }; +#ifdef __CHECKER__ +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) +#define BUILD_BUG_ON_ZERO(e) +#define BUILD_BUG_ON_NULL(e) +#define BUILD_BUG_ON(condition) +#else /* __CHECKER__ */ + /* Force a compilation error if a constant expression is not a power of 2 */ #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) @@ -674,6 +681,7 @@ extern int __build_bug_on_failed; if (condition) __build_bug_on_failed = 1; \ } while(0) #endif +#endif /* __CHECKER__ */ /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) -- cgit v1.2.3 From 5bd7e6a30e1eb38f58f0046c0cd42dfc38e90d64 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:13:17 -0700 Subject: sparse: define __must_be_array() for __CHECKER__ commit c5e631cf65f ("ARRAY_SIZE: check for type") added __must_be_array(). But sparse can't parse this gcc extention. Now make C=2 makes following sparse errors a lot. kernel/futex.c:2699:25: error: No right hand side of '+'-expression Because __must_be_array() is used for ARRAY_SIZE() macro and it is used very widely. This patch fixes it. Signed-off-by: KOSAKI Motohiro Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cb4c1eb7778e..59e4028e833d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -34,8 +34,12 @@ __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ (typeof(ptr)) (__ptr + (off)); }) +#ifdef __CHECKER__ +#define __must_be_array(arr) 0 +#else /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#endif /* * Force always-inline if the user requests it so via the .config, -- cgit v1.2.3 From 746a2a838deec3ef86ef6b7c3edd4207b9a351aa Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:13:17 -0700 Subject: sparse: Undef __compiletime_{warning,error} if __CHECKER__ is defined sparse can't parse warning and error attribute. then they should be hidden from sparse. Signed-off-by: KOSAKI Motohiro Cc: Arjan van de Ven Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 64b7c003fd7a..dfadc96e9d63 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -51,7 +51,7 @@ #if __GNUC_MINOR__ > 0 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #endif -#if __GNUC_MINOR__ >= 4 +#if __GNUC_MINOR__ >= 4 && !defined(__CHECKER__) #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) #endif -- cgit v1.2.3 From 95dde501907b06e7203c74f8435acfdab9eb2659 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 May 2011 17:13:19 -0700 Subject: memblock: add error return when CONFIG_HAVE_MEMBLOCK is not set On larger systems, information in the kernel log is lost because there is so much early text printed, that it overflows the static log buffer before the log_buf_len kernel parameter can be processed, and a bigger log buffer allocated. Distros are relunctant to increase memory usage by increasing the size of the static log buffer, so minimize the problem by allocating the new log buffer as early as possible. This patch: Add an error return if CONFIG_HAVE_MEMBLOCK is not set instead of having to add #ifdef CONFIG_HAVE_MEMBLOCK around blocks of code calling that function. Signed-off-by: Mike Travis Cc: Yinghai Lu Cc: "H. Peter Anvin" Cc: Jack Steiner Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 62a10c2a11f2..7525e38c434d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -2,6 +2,8 @@ #define _LINUX_MEMBLOCK_H #ifdef __KERNEL__ +#define MEMBLOCK_ERROR 0 + #ifdef CONFIG_HAVE_MEMBLOCK /* * Logical memory blocks. @@ -20,7 +22,6 @@ #include #define INIT_MEMBLOCK_REGIONS 128 -#define MEMBLOCK_ERROR 0 struct memblock_region { phys_addr_t base; @@ -160,6 +161,12 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo #define __initdata_memblock #endif +#else +static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) +{ + return MEMBLOCK_ERROR; +} + #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 162a7e7500f9664636e649ba59defe541b7c2c60 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 24 May 2011 17:13:20 -0700 Subject: printk: allocate kernel log buffer earlier On larger systems, because of the numerous ACPI, Bootmem and EFI messages, the static log buffer overflows before the larger one specified by the log_buf_len param is allocated. Minimize the overflow by allocating the new log buffer as soon as possible. On kernels without memblock, a later call to setup_log_buf from kernel/init.c is the fallback. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix CONFIG_PRINTK=n build] Signed-off-by: Mike Travis Cc: Yinghai Lu Cc: "H. Peter Anvin" Cc: Jack Steiner Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 2 ++ include/linux/printk.h | 7 ++++ init/main.c | 1 + kernel/printk.c | 87 ++++++++++++++++++++++++++++++++----------------- 4 files changed, 68 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 605e5ae19c7f..a3e5948670c2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -946,6 +946,8 @@ void __init setup_arch(char **cmdline_p) if (init_ohci1394_dma_early) init_ohci1394_dma_on_all_controllers(); #endif + /* Allocate bigger log buffer */ + setup_log_buf(1); reserve_initrd(); diff --git a/include/linux/printk.h b/include/linux/printk.h index ee048e77e1ae..0101d55d9651 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -1,6 +1,8 @@ #ifndef __KERNEL_PRINTK__ #define __KERNEL_PRINTK__ +#include + extern const char linux_banner[]; extern const char linux_proc_banner[]; @@ -113,6 +115,7 @@ extern int dmesg_restrict; extern int kptr_restrict; void log_buf_kexec_setup(void); +void __init setup_log_buf(int early); #else static inline __attribute__ ((format (printf, 1, 0))) int vprintk(const char *s, va_list args) @@ -137,6 +140,10 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, static inline void log_buf_kexec_setup(void) { } + +static inline void setup_log_buf(int early) +{ +} #endif extern void dump_stack(void) __cold; diff --git a/init/main.c b/init/main.c index 22da33918aef..d2f1e086bf33 100644 --- a/init/main.c +++ b/init/main.c @@ -504,6 +504,7 @@ asmlinkage void __init start_kernel(void) * These use large bootmem allocations and must precede * kmem_cache_init() */ + setup_log_buf(0); pidhash_init(); vfs_caches_init_early(); sort_main_extable(); diff --git a/kernel/printk.c b/kernel/printk.c index da8ca817eae3..35185392173f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -167,46 +168,74 @@ void log_buf_kexec_setup(void) } #endif +/* requested log_buf_len from kernel cmdline */ +static unsigned long __initdata new_log_buf_len; + +/* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { unsigned size = memparse(str, &str); - unsigned long flags; if (size) size = roundup_pow_of_two(size); - if (size > log_buf_len) { - unsigned start, dest_idx, offset; - char *new_log_buf; + if (size > log_buf_len) + new_log_buf_len = size; - new_log_buf = alloc_bootmem(size); - if (!new_log_buf) { - printk(KERN_WARNING "log_buf_len: allocation failed\n"); - goto out; - } + return 0; +} +early_param("log_buf_len", log_buf_len_setup); - spin_lock_irqsave(&logbuf_lock, flags); - log_buf_len = size; - log_buf = new_log_buf; - - offset = start = min(con_start, log_start); - dest_idx = 0; - while (start != log_end) { - log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; - start++; - dest_idx++; - } - log_start -= offset; - con_start -= offset; - log_end -= offset; - spin_unlock_irqrestore(&logbuf_lock, flags); +void __init setup_log_buf(int early) +{ + unsigned long flags; + unsigned start, dest_idx, offset; + char *new_log_buf; + int free; + + if (!new_log_buf_len) + return; + + if (early) { + unsigned long mem; - printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); + mem = memblock_alloc(new_log_buf_len, PAGE_SIZE); + if (mem == MEMBLOCK_ERROR) + return; + new_log_buf = __va(mem); + } else { + new_log_buf = alloc_bootmem_nopanic(new_log_buf_len); } -out: - return 1; -} -__setup("log_buf_len=", log_buf_len_setup); + if (unlikely(!new_log_buf)) { + pr_err("log_buf_len: %ld bytes not available\n", + new_log_buf_len); + return; + } + + spin_lock_irqsave(&logbuf_lock, flags); + log_buf_len = new_log_buf_len; + log_buf = new_log_buf; + new_log_buf_len = 0; + free = __LOG_BUF_LEN - log_end; + + offset = start = min(con_start, log_start); + dest_idx = 0; + while (start != log_end) { + unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1); + + log_buf[dest_idx] = __log_buf[log_idx_mask]; + start++; + dest_idx++; + } + log_start -= offset; + con_start -= offset; + log_end -= offset; + spin_unlock_irqrestore(&logbuf_lock, flags); + + pr_info("log_buf_len: %d\n", log_buf_len); + pr_info("early log buf free: %d(%d%%)\n", + free, (free * 100) / __LOG_BUF_LEN); +} #ifdef CONFIG_BOOT_PRINTK_DELAY -- cgit v1.2.3 From 3c1ab50d0a31b27bb4e55168f4901dd91e6e5ea4 Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Tue, 24 May 2011 17:13:23 -0700 Subject: drivers/leds/leds-pca9532.c: add gpio capability Allow unused leds on pca9532 to be used as gpio. The board I am working on now has no less than 6 pca9532 chips. One chips is used for only leds, one has 14 leds and 2 gpio and the rest of the chips are gpio only. There is also one board in mainline which could use this capabilty; arch/arm/mach-iop32x/n2100.c 232 { .type = PCA9532_TYPE_NONE }, /* power OFF gpio */ 233 { .type = PCA9532_TYPE_NONE }, /* reset gpio */ This patch defines a new pin type, PCA9532_TYPE_GPIO, and registers a gpiochip if any pin has this type set. The gpio will registers all chip pins but will filter on gpio_request. [randy.dunlap@oracle.com: fix build when GPIOLIB is not enabled] Signed-off-by: Joachim Eastwood Reviewed-by: Wolfram Sang Reviewed-by: H Hartley Sweeten Cc: Richard Purdie Cc: Grant Likely Signed-off-by: Randy Dunlap Cc: Jan Weitzel Cc: Juergen Kilb Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/leds/Kconfig | 10 ++++ drivers/leds/leds-pca9532.c | 113 +++++++++++++++++++++++++++++++++++++++++-- include/linux/leds-pca9532.h | 3 +- 3 files changed, 122 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 9bec8699b8a3..09de8dac8a73 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -162,6 +162,16 @@ config LEDS_PCA9532 LED controller. It is generally only useful as a platform driver +config LEDS_PCA9532_GPIO + bool "Enable GPIO support for PCA9532" + depends on LEDS_PCA9532 + depends on GPIOLIB + help + Allow unused pins on PCA9532 to be used as gpio. + + To use a pin as gpio pca9532_type in pca9532_platform data needs to + set to PCA9532_TYPE_GPIO. + config LEDS_GPIO tristate "LED Support for GPIO connected LEDs" depends on LEDS_CLASS diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 5bf63af09ddf..ebea85603f43 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -19,7 +19,9 @@ #include #include #include +#include +#define PCA9532_REG_INPUT(i) ((i)/8) #define PCA9532_REG_PSC(i) (0x2+(i)*2) #define PCA9532_REG_PWM(i) (0x3+(i)*2) #define PCA9532_REG_LS0 0x6 @@ -34,6 +36,9 @@ struct pca9532_data { struct mutex update_lock; struct input_dev *idev; struct work_struct work; +#ifdef CONFIG_LEDS_PCA9532_GPIO + struct gpio_chip gpio; +#endif u8 pwm[2]; u8 psc[2]; }; @@ -200,16 +205,68 @@ static void pca9532_led_work(struct work_struct *work) pca9532_setled(led); } -static void pca9532_destroy_devices(struct pca9532_data *data, int n_devs) +#ifdef CONFIG_LEDS_PCA9532_GPIO +static int pca9532_gpio_request_pin(struct gpio_chip *gc, unsigned offset) +{ + struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio); + struct pca9532_led *led = &data->leds[offset]; + + if (led->type == PCA9532_TYPE_GPIO) + return 0; + + return -EBUSY; +} + +static void pca9532_gpio_set_value(struct gpio_chip *gc, unsigned offset, int val) +{ + struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio); + struct pca9532_led *led = &data->leds[offset]; + + if (val) + led->state = PCA9532_ON; + else + led->state = PCA9532_OFF; + + pca9532_setled(led); +} + +static int pca9532_gpio_get_value(struct gpio_chip *gc, unsigned offset) +{ + struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio); + unsigned char reg; + + reg = i2c_smbus_read_byte_data(data->client, PCA9532_REG_INPUT(offset)); + + return !!(reg & (1 << (offset % 8))); +} + +static int pca9532_gpio_direction_input(struct gpio_chip *gc, unsigned offset) +{ + /* To use as input ensure pin is not driven */ + pca9532_gpio_set_value(gc, offset, 0); + + return 0; +} + +static int pca9532_gpio_direction_output(struct gpio_chip *gc, unsigned offset, int val) +{ + pca9532_gpio_set_value(gc, offset, val); + + return 0; +} +#endif /* CONFIG_LEDS_PCA9532_GPIO */ + +static int pca9532_destroy_devices(struct pca9532_data *data, int n_devs) { int i = n_devs; if (!data) - return; + return -EINVAL; while (--i >= 0) { switch (data->leds[i].type) { case PCA9532_TYPE_NONE: + case PCA9532_TYPE_GPIO: break; case PCA9532_TYPE_LED: led_classdev_unregister(&data->leds[i].ldev); @@ -224,12 +281,26 @@ static void pca9532_destroy_devices(struct pca9532_data *data, int n_devs) break; } } + +#ifdef CONFIG_LEDS_PCA9532_GPIO + if (data->gpio.dev) { + int err = gpiochip_remove(&data->gpio); + if (err) { + dev_err(&data->client->dev, "%s failed, %d\n", + "gpiochip_remove()", err); + return err; + } + } +#endif + + return 0; } static int pca9532_configure(struct i2c_client *client, struct pca9532_data *data, struct pca9532_platform_data *pdata) { int i, err = 0; + int gpios = 0; for (i = 0; i < 2; i++) { data->pwm[i] = pdata->pwm[i]; @@ -249,6 +320,9 @@ static int pca9532_configure(struct i2c_client *client, switch (led->type) { case PCA9532_TYPE_NONE: break; + case PCA9532_TYPE_GPIO: + gpios++; + break; case PCA9532_TYPE_LED: led->state = pled->state; led->name = pled->name; @@ -297,6 +371,34 @@ static int pca9532_configure(struct i2c_client *client, break; } } + +#ifdef CONFIG_LEDS_PCA9532_GPIO + if (gpios) { + data->gpio.label = "gpio-pca9532"; + data->gpio.direction_input = pca9532_gpio_direction_input; + data->gpio.direction_output = pca9532_gpio_direction_output; + data->gpio.set = pca9532_gpio_set_value; + data->gpio.get = pca9532_gpio_get_value; + data->gpio.request = pca9532_gpio_request_pin; + data->gpio.can_sleep = 1; + data->gpio.base = pdata->gpio_base; + data->gpio.ngpio = 16; + data->gpio.dev = &client->dev; + data->gpio.owner = THIS_MODULE; + + err = gpiochip_add(&data->gpio); + if (err) { + /* Use data->gpio.dev as a flag for freeing gpiochip */ + data->gpio.dev = NULL; + dev_warn(&client->dev, "could not add gpiochip\n"); + } else { + dev_info(&client->dev, "gpios %i...%i\n", + data->gpio.base, data->gpio.base + + data->gpio.ngpio - 1); + } + } +#endif + return 0; exit: @@ -337,7 +439,12 @@ static int pca9532_probe(struct i2c_client *client, static int pca9532_remove(struct i2c_client *client) { struct pca9532_data *data = i2c_get_clientdata(client); - pca9532_destroy_devices(data, 16); + int err; + + err = pca9532_destroy_devices(data, 16); + if (err) + return err; + kfree(data); return 0; } diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h index f158eb1149aa..b8d6fffed4d8 100644 --- a/include/linux/leds-pca9532.h +++ b/include/linux/leds-pca9532.h @@ -25,7 +25,7 @@ enum pca9532_state { }; enum pca9532_type { PCA9532_TYPE_NONE, PCA9532_TYPE_LED, - PCA9532_TYPE_N2100_BEEP }; + PCA9532_TYPE_N2100_BEEP, PCA9532_TYPE_GPIO }; struct pca9532_led { u8 id; @@ -41,6 +41,7 @@ struct pca9532_platform_data { struct pca9532_led leds[16]; u8 pwm[2]; u8 psc[2]; + int gpio_base; }; #endif /* __LINUX_PCA9532_H */ -- cgit v1.2.3 From 4440673a95e63ad888a41db596edaa0c55d3a332 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 24 May 2011 17:13:29 -0700 Subject: leds: provide helper to register "leds-gpio" devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function makes a deep copy of the platform data to allow it to live in init memory. For a kernel that supports several machines and so includes the definition for several leds-gpio devices this saves quite some memory because all but one definition can be free'd after boot. As the function is used by arch code it must be builtin and so cannot go into leds-gpio.c. [akpm@linux-foundation.org: s/CONFIG_LED_REGISTER_GPIO/CONFIG_LEDS_REGISTER_GPIO/] Signed-off-by: Uwe Kleine-König Cc: Russell King Acked-by: Richard Purdie Cc: Fabio Estevam Cc: Sascha Hauer Tested-by: H Hartley Sweeten Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/Makefile | 2 +- drivers/leds/Kconfig | 7 +++++++ drivers/leds/Makefile | 1 + drivers/leds/leds-gpio-register.c | 42 +++++++++++++++++++++++++++++++++++++++ include/linux/leds.h | 2 ++ 5 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 drivers/leds/leds-gpio-register.c (limited to 'include/linux') diff --git a/drivers/Makefile b/drivers/Makefile index 145aeadb6c03..bceb60c85c01 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle/ obj-$(CONFIG_DMA_ENGINE) += dma/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_MEMSTICK) += memstick/ -obj-$(CONFIG_NEW_LEDS) += leds/ +obj-y += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ obj-y += firmware/ diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index a4a77734ab6e..1d027b475b22 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -14,6 +14,13 @@ config LEDS_CLASS This option enables the led sysfs class in /sys/class/leds. You'll need this to do anything useful with LEDs. If unsure, say N. +config LEDS_GPIO_REGISTER + bool + help + This option provides the function gpio_led_register_device. + As this function is used by arch code it must not be compiled as a + module. + if NEW_LEDS comment "LED drivers" diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 0b6ad375bfdb..bccb96c9bb45 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_LEDS_COBALT_QUBE) += leds-cobalt-qube.o obj-$(CONFIG_LEDS_COBALT_RAQ) += leds-cobalt-raq.o obj-$(CONFIG_LEDS_SUNFIRE) += leds-sunfire.o obj-$(CONFIG_LEDS_PCA9532) += leds-pca9532.o +obj-$(CONFIG_LEDS_GPIO_REGISTER) += leds-gpio-register.o obj-$(CONFIG_LEDS_GPIO) += leds-gpio.o obj-$(CONFIG_LEDS_LP3944) += leds-lp3944.o obj-$(CONFIG_LEDS_LP5521) += leds-lp5521.o diff --git a/drivers/leds/leds-gpio-register.c b/drivers/leds/leds-gpio-register.c new file mode 100644 index 000000000000..1c4ed5510f35 --- /dev/null +++ b/drivers/leds/leds-gpio-register.c @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2011 Pengutronix + * Uwe Kleine-Koenig + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ +#include +#include +#include +#include + +/** + * gpio_led_register_device - register a gpio-led device + * @pdata: the platform data used for the new device + * + * Makes a copy of pdata and pdata->leds and registers a new leds-gpio device + * with the result. This allows to have pdata and pdata-leds in .init.rodata + * and so saves some bytes compared to a static struct platform_device with + * static platform data. + * + * Returns the registered device or an error pointer. + */ +struct platform_device *__init gpio_led_register_device( + int id, const struct gpio_led_platform_data *pdata) +{ + struct platform_device *ret; + struct gpio_led_platform_data _pdata = *pdata; + + _pdata.leds = kmemdup(pdata->leds, + pdata->num_leds * sizeof(*pdata->leds), GFP_KERNEL); + if (!_pdata.leds) + return ERR_PTR(-ENOMEM); + + ret = platform_device_register_resndata(NULL, "leds-gpio", id, + NULL, 0, &_pdata, sizeof(_pdata)); + if (IS_ERR(ret)) + kfree(_pdata.leds); + + return ret; +} diff --git a/include/linux/leds.h b/include/linux/leds.h index 61e0340a4b77..5884def15a24 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -207,5 +207,7 @@ struct gpio_led_platform_data { unsigned long *delay_off); }; +struct platform_device *gpio_led_register_device( + int id, const struct gpio_led_platform_data *pdata); #endif /* __LINUX_LEDS_H_INCLUDED */ -- cgit v1.2.3 From c196e32a111b0ee356d67acceb938ae0b5e63ef0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 24 May 2011 17:13:31 -0700 Subject: lib: add kstrto*_from_user() There is quite a lot of code which does copy_from_user() + strict_strto*() or simple_strto*() combo in slightly different ways. Before doing conversions all over tree, let's get final API correct. Enter kstrtoull_from_user() and friends. Typical code which uses them looks very simple: TYPE val; int rv; rv = kstrtoTYPE_from_user(buf, count, 0, &val); if (rv < 0) return rv; [use val] return count; There is a tiny semantic difference from the plain kstrto*() API -- the latter allows any amount of leading zeroes, while the former copies data into buffer on stack and thus allows leading zeroes as long as it fits into buffer. This shouldn't be a problem for typical usecase "echo 42 > /proc/x". The point is to make reading one integer from userspace _very_ simple and very bug free. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 31 +++++++++++++++++++++++++++++++ lib/kstrtox.c | 26 ++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 22295244de18..fb0e7329fee1 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -248,6 +248,37 @@ int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); +int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); +int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); +int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res); +int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res); +int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res); +int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res); +int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res); +int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); +int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); +int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); + +static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) +{ + return kstrtoull_from_user(s, count, base, res); +} + +static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res) +{ + return kstrtoll_from_user(s, count, base, res); +} + +static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res) +{ + return kstrtouint_from_user(s, count, base, res); +} + +static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res) +{ + return kstrtoint_from_user(s, count, base, res); +} + extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); diff --git a/lib/kstrtox.c b/lib/kstrtox.c index a235f3cc471c..2dbae88090ac 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -17,6 +17,7 @@ #include #include #include +#include static inline char _tolower(const char c) { @@ -222,3 +223,28 @@ int kstrtos8(const char *s, unsigned int base, s8 *res) return 0; } EXPORT_SYMBOL(kstrtos8); + +#define kstrto_from_user(f, g, type) \ +int f(const char __user *s, size_t count, unsigned int base, type *res) \ +{ \ + /* sign, base 2 representation, newline, terminator */ \ + char buf[1 + sizeof(type) * 8 + 1 + 1]; \ + \ + count = min(count, sizeof(buf) - 1); \ + if (copy_from_user(buf, s, count)) \ + return -EFAULT; \ + buf[count] = '\0'; \ + return g(buf, base, res); \ +} \ +EXPORT_SYMBOL(f) + +kstrto_from_user(kstrtoull_from_user, kstrtoull, unsigned long long); +kstrto_from_user(kstrtoll_from_user, kstrtoll, long long); +kstrto_from_user(kstrtoul_from_user, kstrtoul, unsigned long); +kstrto_from_user(kstrtol_from_user, kstrtol, long); +kstrto_from_user(kstrtouint_from_user, kstrtouint, unsigned int); +kstrto_from_user(kstrtoint_from_user, kstrtoint, int); +kstrto_from_user(kstrtou16_from_user, kstrtou16, u16); +kstrto_from_user(kstrtos16_from_user, kstrtos16, s16); +kstrto_from_user(kstrtou8_from_user, kstrtou8, u8); +kstrto_from_user(kstrtos8_from_user, kstrtos8, s8); -- cgit v1.2.3 From 6aae6e0304d33e537298867dafb2703ec58c2e4f Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Tue, 24 May 2011 17:13:33 -0700 Subject: include/linux/genalloc.h: add multiple-inclusion guards Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Cc: Nicolas Ferre Cc: Patrice VILCHEZ Cc: Jes Sorensen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genalloc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 9869ef3674ac..b1c70f10c42b 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -9,6 +9,8 @@ */ +#ifndef __GENALLOC_H__ +#define __GENALLOC_H__ /* * General purpose special memory pool descriptor. */ @@ -34,3 +36,4 @@ extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int); extern void gen_pool_destroy(struct gen_pool *); extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); +#endif /* __GENALLOC_H__ */ -- cgit v1.2.3 From 3c8f370ded3483b27f1218ff0051fcf0c7a2facd Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Tue, 24 May 2011 17:13:34 -0700 Subject: lib/genalloc.c: add support for specifying the physical address So we can specify the virtual address as the base of the pool chunk and then get physical addresses for hardware IP. For example on at91 we will use this on spi, uart or macb Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Cc: Nicolas Ferre Cc: Patrice VILCHEZ Cc: Jes Sorensen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genalloc.h | 22 +++++++++++++++++++++- lib/genalloc.c | 45 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index b1c70f10c42b..5bbebda78b02 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -26,13 +26,33 @@ struct gen_pool { struct gen_pool_chunk { spinlock_t lock; struct list_head next_chunk; /* next chunk in pool */ + phys_addr_t phys_addr; /* physical starting address of memory chunk */ unsigned long start_addr; /* starting address of memory chunk */ unsigned long end_addr; /* ending address of memory chunk */ unsigned long bits[0]; /* bitmap for allocating memory chunk */ }; extern struct gen_pool *gen_pool_create(int, int); -extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long); +extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t, + size_t, int); +/** + * gen_pool_add - add a new chunk of special memory to the pool + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 + * + * Add a new chunk of special memory to the specified pool. + * + * Returns 0 on success or a -ve errno on failure. + */ +static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr, + size_t size, int nid) +{ + return gen_pool_add_virt(pool, addr, -1, size, nid); +} extern void gen_pool_destroy(struct gen_pool *); extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); diff --git a/lib/genalloc.c b/lib/genalloc.c index 1923f1490e72..577ddf805975 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -39,17 +39,20 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid) EXPORT_SYMBOL(gen_pool_create); /** - * gen_pool_add - add a new chunk of special memory to the pool + * gen_pool_add_virt - add a new chunk of special memory to the pool * @pool: pool to add new memory chunk to - * @addr: starting address of memory chunk to add to pool + * @virt: virtual starting address of memory chunk to add to pool + * @phys: physical starting address of memory chunk to add to pool * @size: size in bytes of the memory chunk to add to pool * @nid: node id of the node the chunk structure and bitmap should be * allocated on, or -1 * * Add a new chunk of special memory to the specified pool. + * + * Returns 0 on success or a -ve errno on failure. */ -int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, - int nid) +int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys, + size_t size, int nid) { struct gen_pool_chunk *chunk; int nbits = size >> pool->min_alloc_order; @@ -58,11 +61,12 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid); if (unlikely(chunk == NULL)) - return -1; + return -ENOMEM; spin_lock_init(&chunk->lock); - chunk->start_addr = addr; - chunk->end_addr = addr + size; + chunk->phys_addr = phys; + chunk->start_addr = virt; + chunk->end_addr = virt + size; write_lock(&pool->lock); list_add(&chunk->next_chunk, &pool->chunks); @@ -70,7 +74,32 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, return 0; } -EXPORT_SYMBOL(gen_pool_add); +EXPORT_SYMBOL(gen_pool_add_virt); + +/** + * gen_pool_virt_to_phys - return the physical address of memory + * @pool: pool to allocate from + * @addr: starting address of memory + * + * Returns the physical address on success, or -1 on error. + */ +phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) + return chunk->phys_addr + addr - chunk->start_addr; + } + read_unlock(&pool->lock); + + return -1; +} +EXPORT_SYMBOL(gen_pool_virt_to_phys); /** * gen_pool_destroy - destroy a special memory pool -- cgit v1.2.3 From c84598bbfa756b7d042da31aa4e198ae866a6c7d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 24 May 2011 17:13:35 -0700 Subject: percpu_counter: change return value and add comments The percpu_counter_*_positive() API in UP case doesn't check if return value is positive. Add comments to explain why we don't. Also if count < 0, returns 0 instead of 1 for *read_positive(). [akpm@linux-foundation.org: tweak comment] Signed-off-by: Shaohua Li Acked-by: Eric Dumazet Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 46f6ba56fa91..5edc9014263a 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -75,7 +75,7 @@ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) barrier(); /* Prevent reloads of fbc->count */ if (ret >= 0) return ret; - return 1; + return 0; } static inline int percpu_counter_initialized(struct percpu_counter *fbc) @@ -133,6 +133,10 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc) return fbc->count; } +/* + * percpu_counter is intended to track positive numbers. In the UP case the + * number should never be negative. + */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { return fbc->count; -- cgit v1.2.3 From 774466add7c810fd7e4c8bcf41995b6799608880 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 25 May 2011 20:43:32 +0200 Subject: hwmon: (jc42) Change detection class While the JC42-compatible chips are temperature sensors, I2C_CLASS_SPD makes more sense because these chips always live on memory modules. Signed-off-by: Jean Delvare Cc: Guenter Roeck --- drivers/hwmon/jc42.c | 2 +- include/linux/i2c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index 934991237061..02cebb74e206 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -213,7 +213,7 @@ static const struct dev_pm_ops jc42_dev_pm_ops = { /* This is the driver that will be inserted */ static struct i2c_driver jc42_driver = { - .class = I2C_CLASS_HWMON, + .class = I2C_CLASS_SPD, .driver = { .name = "jc42", .pm = JC42_DEV_PM_OPS, diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f1e3ff5880a9..a6c652ef516d 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -409,7 +409,7 @@ void i2c_unlock_adapter(struct i2c_adapter *); /* i2c adapter classes (bitmask) */ #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ -#define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ +#define I2C_CLASS_SPD (1<<7) /* Memory modules */ /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU -- cgit v1.2.3 From d0c97cfb81ebc5b416c0f92fa2fc18d2773e3023 Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 23 May 2011 15:06:36 -0500 Subject: mmc: core: Use CMD23 for multiblock transfers when we can. CMD23-prefixed instead of open-ended multiblock transfers have a performance advantage on some MMC cards. Signed-off-by: Andrei Warkentin Signed-off-by: Chris Ball --- drivers/mmc/card/block.c | 108 ++++++++++++++++++++++++++++++++++------------- include/linux/mmc/card.h | 1 + include/linux/mmc/core.h | 1 + include/linux/mmc/host.h | 6 +++ include/linux/mmc/mmc.h | 6 +++ 5 files changed, 93 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 126c7f41c5a3..a380accaba9a 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -59,10 +59,6 @@ MODULE_ALIAS("mmc:block"); #define INAND_CMD38_ARG_SECTRIM1 0x81 #define INAND_CMD38_ARG_SECTRIM2 0x88 -#define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) && \ - (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || \ - ((card)->ext_csd.rel_sectors))) - static DEFINE_MUTEX(block_mutex); /* @@ -90,6 +86,10 @@ struct mmc_blk_data { struct mmc_queue queue; struct list_head part; + unsigned int flags; +#define MMC_BLK_CMD23 (1 << 0) /* Can do SET_BLOCK_COUNT for multiblock */ +#define MMC_BLK_REL_WR (1 << 1) /* MMC Reliable write support */ + unsigned int usage; unsigned int read_only; unsigned int part_type; @@ -429,6 +429,7 @@ static const struct block_device_operations mmc_bdops = { struct mmc_blk_request { struct mmc_request mrq; + struct mmc_command sbc; struct mmc_command cmd; struct mmc_command stop; struct mmc_data data; @@ -652,13 +653,10 @@ static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) * reliable write can handle, thus finish the request in * partial completions. */ -static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq, - struct mmc_card *card, - struct request *req) +static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq, + struct mmc_card *card, + struct request *req) { - int err; - struct mmc_command set_count = {0}; - if (!(card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN)) { /* Legacy mode imposes restrictions on transfers. */ if (!IS_ALIGNED(brq->cmd.arg, card->ext_csd.rel_sectors)) @@ -669,15 +667,6 @@ static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq, else if (brq->data.blocks < card->ext_csd.rel_sectors) brq->data.blocks = 1; } - - set_count.opcode = MMC_SET_BLOCK_COUNT; - set_count.arg = brq->data.blocks | (1 << 31); - set_count.flags = MMC_RSP_R1 | MMC_CMD_AC; - err = mmc_wait_for_cmd(card->host, &set_count, 0); - if (err) - printk(KERN_ERR "%s: error %d SET_BLOCK_COUNT\n", - req->rq_disk->disk_name, err); - return err; } static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) @@ -694,7 +683,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) bool do_rel_wr = ((req->cmd_flags & REQ_FUA) || (req->cmd_flags & REQ_META)) && (rq_data_dir(req) == WRITE) && - REL_WRITES_SUPPORTED(card); + (md->flags & MMC_BLK_REL_WR); do { struct mmc_command cmd = {0}; @@ -732,11 +721,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) if (brq.data.blocks > 1 || do_rel_wr) { /* SPI multiblock writes terminate using a special - * token, not a STOP_TRANSMISSION request. Reliable - * writes use SET_BLOCK_COUNT and do not use a - * STOP_TRANSMISSION request either. + * token, not a STOP_TRANSMISSION request. */ - if ((!mmc_host_is_spi(card->host) && !do_rel_wr) || + if (!mmc_host_is_spi(card->host) || rq_data_dir(req) == READ) brq.mrq.stop = &brq.stop; readcmd = MMC_READ_MULTIPLE_BLOCK; @@ -754,8 +741,37 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) brq.data.flags |= MMC_DATA_WRITE; } - if (do_rel_wr && mmc_apply_rel_rw(&brq, card, req)) - goto cmd_err; + if (do_rel_wr) + mmc_apply_rel_rw(&brq, card, req); + + /* + * Pre-defined multi-block transfers are preferable to + * open ended-ones (and necessary for reliable writes). + * However, it is not sufficient to just send CMD23, + * and avoid the final CMD12, as on an error condition + * CMD12 (stop) needs to be sent anyway. This, coupled + * with Auto-CMD23 enhancements provided by some + * hosts, means that the complexity of dealing + * with this is best left to the host. If CMD23 is + * supported by card and host, we'll fill sbc in and let + * the host deal with handling it correctly. This means + * that for hosts that don't expose MMC_CAP_CMD23, no + * change of behavior will be observed. + * + * N.B: Some MMC cards experience perf degradation. + * We'll avoid using CMD23-bounded multiblock writes for + * these, while retaining features like reliable writes. + */ + + if ((md->flags & MMC_BLK_CMD23) && + mmc_op_multi(brq.cmd.opcode) && + (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) { + brq.sbc.opcode = MMC_SET_BLOCK_COUNT; + brq.sbc.arg = brq.data.blocks | + (do_rel_wr ? (1 << 31) : 0); + brq.sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; + brq.mrq.sbc = &brq.sbc; + } mmc_set_data_timeout(&brq.data, card); @@ -792,7 +808,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) * until later as we need to wait for the card to leave * programming mode even when things go wrong. */ - if (brq.cmd.error || brq.data.error || brq.stop.error) { + if (brq.sbc.error || brq.cmd.error || + brq.data.error || brq.stop.error) { if (brq.data.blocks > 1 && rq_data_dir(req) == READ) { /* Redo read one sector at a time */ printk(KERN_WARNING "%s: retrying using single " @@ -803,6 +820,13 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) status = get_card_status(card, req); } + if (brq.sbc.error) { + printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT " + "command, response %#x, card status %#x\n", + req->rq_disk->disk_name, brq.sbc.error, + brq.sbc.resp[0], status); + } + if (brq.cmd.error) { printk(KERN_ERR "%s: error %d sending read/write " "command, response %#x, card status %#x\n", @@ -1014,8 +1038,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, md->disk->queue = md->queue.queue; md->disk->driverfs_dev = parent; set_disk_ro(md->disk, md->read_only || default_ro); - if (REL_WRITES_SUPPORTED(card)) - blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA); /* * As discussed on lkml, GENHD_FL_REMOVABLE should: @@ -1034,6 +1056,19 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, blk_queue_logical_block_size(md->queue.queue, 512); set_capacity(md->disk, size); + + if (mmc_host_cmd23(card->host) && + mmc_card_mmc(card)) + md->flags |= MMC_BLK_CMD23; + + if (mmc_card_mmc(card) && + md->flags & MMC_BLK_CMD23 && + ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || + card->ext_csd.rel_sectors)) { + md->flags |= MMC_BLK_REL_WR; + blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA); + } + return md; err_putdisk: @@ -1189,6 +1224,21 @@ static const struct mmc_fixup blk_fixups[] = MMC_FIXUP("SEM08G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), MMC_FIXUP("SEM16G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), MMC_FIXUP("SEM32G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38), + + /* + * Some MMC cards experience performance degradation with CMD23 + * instead of CMD12-bounded multiblock transfers. For now we'll + * black list what's bad... + * - Certain Toshiba cards. + * + * N.B. This doesn't affect SD cards. + */ + MMC_FIXUP("MMC08G", 0x11, CID_OEMID_ANY, add_quirk_mmc, + MMC_QUIRK_BLK_NO_CMD23), + MMC_FIXUP("MMC16G", 0x11, CID_OEMID_ANY, add_quirk_mmc, + MMC_QUIRK_BLK_NO_CMD23), + MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc, + MMC_QUIRK_BLK_NO_CMD23), END_FIXUP }; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 7190aa2096f7..4a0e27baaea0 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -171,6 +171,7 @@ struct mmc_card { #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4) /* SDIO card has nonstd function interfaces */ #define MMC_QUIRK_DISABLE_CD (1<<5) /* disconnect CD/DAT[3] resistor */ #define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */ +#define MMC_QUIRK_BLK_NO_CMD23 (1<<7) /* Avoid CMD23 for regular multiblock */ unsigned int erase_size; /* erase size in sectors */ unsigned int erase_shift; /* if erase unit is power 2 */ diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index cbe8d55f64c4..b6718e549a51 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -120,6 +120,7 @@ struct mmc_data { }; struct mmc_request { + struct mmc_command *sbc; /* SET_BLOCK_COUNT for multiblock */ struct mmc_command *cmd; struct mmc_data *data; struct mmc_command *stop; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index de32e6aa018a..e946bd10fe3f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -210,6 +210,7 @@ struct mmc_host { #define MMC_CAP_MAX_CURRENT_400 (1 << 27) /* Host max current limit is 400mA */ #define MMC_CAP_MAX_CURRENT_600 (1 << 28) /* Host max current limit is 600mA */ #define MMC_CAP_MAX_CURRENT_800 (1 << 29) /* Host max current limit is 800mA */ +#define MMC_CAP_CMD23 (1 << 30) /* CMD23 supported. */ mmc_pm_flag_t pm_caps; /* supported pm features */ @@ -366,5 +367,10 @@ static inline int mmc_card_wake_sdio_irq(struct mmc_host *host) { return host->pm_flags & MMC_PM_WAKE_SDIO_IRQ; } + +static inline int mmc_host_cmd23(struct mmc_host *host) +{ + return host->caps & MMC_CAP_CMD23; +} #endif diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9fa5a73f393d..ac26a685cca8 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -83,6 +83,12 @@ #define MMC_APP_CMD 55 /* ac [31:16] RCA R1 */ #define MMC_GEN_CMD 56 /* adtc [0] RD/WR R1 */ +static inline bool mmc_op_multi(u32 opcode) +{ + return opcode == MMC_WRITE_MULTIPLE_BLOCK || + opcode == MMC_READ_MULTIPLE_BLOCK; +} + /* * MMC_SWITCH argument format: * -- cgit v1.2.3 From e89d456fcdde2df008c032bf928e69e628e07a28 Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 23 May 2011 15:06:37 -0500 Subject: mmc: sdhci: Implement MMC_CAP_CMD23 for SDHCI. Implements support for multiblock transfers bounded by SET_BLOCK_COUNT (CMD23). Signed-off-by: Andrei Warkentin Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 63 +++++++++++++++++++++++++++++++++++------------ drivers/mmc/host/sdhci.h | 2 +- include/linux/mmc/sdhci.h | 1 + 3 files changed, 49 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index cc63f5ed2310..b9ed66307ac3 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -838,9 +838,10 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) } static void sdhci_set_transfer_mode(struct sdhci_host *host, - struct mmc_data *data) + struct mmc_command *cmd) { u16 mode; + struct mmc_data *data = cmd->data; if (data == NULL) return; @@ -848,11 +849,14 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host, WARN_ON(!host->data); mode = SDHCI_TRNS_BLK_CNT_EN; - if (data->blocks > 1) { - if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) - mode |= SDHCI_TRNS_MULTI | SDHCI_TRNS_ACMD12; - else - mode |= SDHCI_TRNS_MULTI; + if (mmc_op_multi(cmd->opcode) || data->blocks > 1) { + mode |= SDHCI_TRNS_MULTI; + /* + * If we are sending CMD23, CMD12 never gets sent + * on successful completion (so no Auto-CMD12). + */ + if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) + mode |= SDHCI_TRNS_AUTO_CMD12; } if (data->flags & MMC_DATA_READ) mode |= SDHCI_TRNS_READ; @@ -893,7 +897,15 @@ static void sdhci_finish_data(struct sdhci_host *host) else data->bytes_xfered = data->blksz * data->blocks; - if (data->stop) { + /* + * Need to send CMD12 if - + * a) open-ended multiblock transfer (no CMD23) + * b) error in multiblock transfer + */ + if (data->stop && + (data->error || + !host->mrq->sbc)) { + /* * The controller needs a reset of internal state machines * upon error conditions. @@ -949,7 +961,7 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd) sdhci_writel(host, cmd->arg, SDHCI_ARGUMENT); - sdhci_set_transfer_mode(host, cmd->data); + sdhci_set_transfer_mode(host, cmd); if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) { printk(KERN_ERR "%s: Unsupported response type!\n", @@ -1004,13 +1016,21 @@ static void sdhci_finish_command(struct sdhci_host *host) host->cmd->error = 0; - if (host->data && host->data_early) - sdhci_finish_data(host); + /* Finished CMD23, now send actual command. */ + if (host->cmd == host->mrq->sbc) { + host->cmd = NULL; + sdhci_send_command(host, host->mrq->cmd); + } else { - if (!host->cmd->data) - tasklet_schedule(&host->finish_tasklet); + /* Processed actual command. */ + if (host->data && host->data_early) + sdhci_finish_data(host); - host->cmd = NULL; + if (!host->cmd->data) + tasklet_schedule(&host->finish_tasklet); + + host->cmd = NULL; + } } static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) @@ -1189,7 +1209,12 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) #ifndef SDHCI_USE_LEDS_CLASS sdhci_activate_led(host); #endif - if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) { + + /* + * Ensure we don't send the STOP for non-SET_BLOCK_COUNTED + * requests if Auto-CMD12 is enabled. + */ + if (!mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) { if (mrq->stop) { mrq->data->stop = NULL; mrq->stop = NULL; @@ -1227,7 +1252,10 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->mrq = mrq; } - sdhci_send_command(host, mrq->cmd); + if (mrq->sbc) + sdhci_send_command(host, mrq->sbc); + else + sdhci_send_command(host, mrq->cmd); } mmiowb(); @@ -2455,7 +2483,10 @@ int sdhci_add_host(struct sdhci_host *host) } else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; - mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE; + mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23; + + if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) + host->flags |= SDHCI_AUTO_CMD12; /* * A controller may support 8-bit width, but the board itself diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 7e28eec97f04..2c3fbc5a4c07 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -36,7 +36,7 @@ #define SDHCI_TRANSFER_MODE 0x0C #define SDHCI_TRNS_DMA 0x01 #define SDHCI_TRNS_BLK_CNT_EN 0x02 -#define SDHCI_TRNS_ACMD12 0x04 +#define SDHCI_TRNS_AUTO_CMD12 0x04 #define SDHCI_TRNS_READ 0x10 #define SDHCI_TRNS_MULTI 0x20 diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index e902618d68f4..73e27ba51e99 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -113,6 +113,7 @@ struct sdhci_host { #define SDHCI_DEVICE_DEAD (1<<3) /* Device unresponsive */ #define SDHCI_SDR50_NEEDS_TUNING (1<<4) /* SDR50 needs tuning */ #define SDHCI_NEEDS_RETUNING (1<<5) /* Host needs retuning */ +#define SDHCI_AUTO_CMD12 (1<<6) /* Auto CMD12 support */ unsigned int version; /* SDHCI spec. version */ -- cgit v1.2.3 From f0d89972b01798cf9d245dfa1cacfa0ee78a3593 Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 23 May 2011 15:06:38 -0500 Subject: mmc: core: Block CMD23 support for UHS104/SDXC cards. SD cards operating at UHS104 or better support SET_BLOCK_COUNT. Signed-off-by: Andrei Warkentin Reviewed-by: Arindam Nath Signed-off-by: Chris Ball --- drivers/mmc/card/block.c | 9 ++++++--- drivers/mmc/core/sd.c | 2 ++ include/linux/mmc/card.h | 3 +++ include/linux/mmc/sd.h | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index a380accaba9a..71da5641e258 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1057,9 +1057,12 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, blk_queue_logical_block_size(md->queue.queue, 512); set_capacity(md->disk, size); - if (mmc_host_cmd23(card->host) && - mmc_card_mmc(card)) - md->flags |= MMC_BLK_CMD23; + if (mmc_host_cmd23(card->host)) { + if (mmc_card_mmc(card) || + (mmc_card_sd(card) && + card->scr.cmds & SD_SCR_CMD23_SUPPORT)) + md->flags |= MMC_BLK_CMD23; + } if (mmc_card_mmc(card) && md->flags & MMC_BLK_CMD23 && diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 596d0b9d30b8..ff2774128aa9 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -203,6 +203,8 @@ static int mmc_decode_scr(struct mmc_card *card) else card->erased_byte = 0x0; + if (scr->sda_spec3) + scr->cmds = UNSTUFF_BITS(resp, 32, 2); return 0; } diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 4a0e27baaea0..c6927a4d157f 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -72,6 +72,9 @@ struct sd_scr { unsigned char bus_widths; #define SD_SCR_BUS_WIDTH_1 (1<<0) #define SD_SCR_BUS_WIDTH_4 (1<<2) + unsigned char cmds; +#define SD_SCR_CMD20_SUPPORT (1<<0) +#define SD_SCR_CMD23_SUPPORT (1<<1) }; struct sd_ssr { diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h index c648878f6734..7d35d52c3df3 100644 --- a/include/linux/mmc/sd.h +++ b/include/linux/mmc/sd.h @@ -66,7 +66,7 @@ #define SCR_SPEC_VER_0 0 /* Implements system specification 1.0 - 1.01 */ #define SCR_SPEC_VER_1 1 /* Implements system specification 1.10 */ -#define SCR_SPEC_VER_2 2 /* Implements system specification 2.00 */ +#define SCR_SPEC_VER_2 2 /* Implements system specification 2.00-3.0X */ /* * SD bus widths -- cgit v1.2.3 From 8edf63710bd43e62d59bfe017df542fa0713bbb3 Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Mon, 23 May 2011 15:06:39 -0500 Subject: mmc: sdhci: Auto-CMD23 support. Enables Auto-CMD23 support where available (SDHCI 3.0 controllers) Signed-off-by: Andrei Warkentin Tested-by: Arindam Nath Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 17 ++++++++++++++++- drivers/mmc/host/sdhci.h | 2 ++ include/linux/mmc/sdhci.h | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index b9ed66307ac3..f845fd6a64ae 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -857,7 +857,12 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host, */ if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) mode |= SDHCI_TRNS_AUTO_CMD12; + else if (host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) { + mode |= SDHCI_TRNS_AUTO_CMD23; + sdhci_writel(host, host->mrq->sbc->arg, SDHCI_ARGUMENT2); + } } + if (data->flags & MMC_DATA_READ) mode |= SDHCI_TRNS_READ; if (host->flags & SDHCI_REQ_USE_DMA) @@ -1252,7 +1257,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->mrq = mrq; } - if (mrq->sbc) + if (mrq->sbc && !(host->flags & SDHCI_AUTO_CMD23)) sdhci_send_command(host, mrq->sbc); else sdhci_send_command(host, mrq->cmd); @@ -2488,6 +2493,16 @@ int sdhci_add_host(struct sdhci_host *host) if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) host->flags |= SDHCI_AUTO_CMD12; + /* Auto-CMD23 stuff only works in ADMA or PIO. */ + if ((host->version == SDHCI_SPEC_300) && + ((host->flags & SDHCI_USE_ADMA) || + !(host->flags & SDHCI_REQ_USE_DMA))) { + host->flags |= SDHCI_AUTO_CMD23; + DBG("%s: Auto-CMD23 available\n", mmc_hostname(mmc)); + } else { + DBG("%s: Auto-CMD23 unavailable\n", mmc_hostname(mmc)); + } + /* * A controller may support 8-bit width, but the board itself * might not have the pins brought out. Boards that support diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 2c3fbc5a4c07..745c42fa41ed 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -25,6 +25,7 @@ */ #define SDHCI_DMA_ADDRESS 0x00 +#define SDHCI_ARGUMENT2 SDHCI_DMA_ADDRESS #define SDHCI_BLOCK_SIZE 0x04 #define SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF)) @@ -37,6 +38,7 @@ #define SDHCI_TRNS_DMA 0x01 #define SDHCI_TRNS_BLK_CNT_EN 0x02 #define SDHCI_TRNS_AUTO_CMD12 0x04 +#define SDHCI_TRNS_AUTO_CMD23 0x08 #define SDHCI_TRNS_READ 0x10 #define SDHCI_TRNS_MULTI 0x20 diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 73e27ba51e99..6a68c4eb4e44 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -114,6 +114,7 @@ struct sdhci_host { #define SDHCI_SDR50_NEEDS_TUNING (1<<4) /* SDR50 needs tuning */ #define SDHCI_NEEDS_RETUNING (1<<5) /* Host needs retuning */ #define SDHCI_AUTO_CMD12 (1<<6) /* Auto CMD12 support */ +#define SDHCI_AUTO_CMD23 (1<<7) /* Auto CMD23 support */ unsigned int version; /* SDHCI spec. version */ -- cgit v1.2.3 From 6dcbbe25dcc9bd2bdeb4f685f8fb874ffc10e6be Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 24 May 2011 08:31:08 +0000 Subject: net: move is_vlan_dev into public header file (v2) Migrate is_vlan_dev() to if_vlan.h so that core networkig can use it Signed-off-by: Neil Horman CC: davem@davemloft.net CC: bhutchings@solarflare.com Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 5 +++++ net/8021q/vlan.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 290bd8ac94cf..dc01681fbb42 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -110,6 +110,11 @@ static inline void vlan_group_set_device(struct vlan_group *vg, array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; } +static inline int is_vlan_dev(struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN; +} + #define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) #define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index c3408def8a19..9da07e30d1a2 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -118,11 +118,6 @@ extern void vlan_netlink_fini(void); extern struct rtnl_link_ops vlan_link_ops; -static inline int is_vlan_dev(struct net_device *dev) -{ - return dev->priv_flags & IFF_802_1Q_VLAN; -} - extern int vlan_net_id; struct proc_dir_entry; -- cgit v1.2.3