From 606576ce816603d9fe1fb453a88bc6eea16ca709 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:12 -0400 Subject: ftrace: rename FTRACE to FUNCTION_TRACER Due to confusion between the ftrace infrastructure and the gcc profiling tracer "ftrace", this patch renames the config options from FTRACE to FUNCTION_TRACER. The other two names that are offspring from FTRACE DYNAMIC_FTRACE and FTRACE_MCOUNT_RECORD will stay the same. This patch was generated mostly by script, and partially by hand. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a3d46151be19..0e9529589151 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -8,7 +8,7 @@ #include #include -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; extern int @@ -36,12 +36,12 @@ void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); -#else /* !CONFIG_FTRACE */ +#else /* !CONFIG_FUNCTION_TRACER */ # define register_ftrace_function(ops) do { } while (0) # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) static inline void ftrace_kill_atomic(void) { } -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE # define FTRACE_HASHBITS 10 @@ -101,7 +101,7 @@ void ftrace_kill_atomic(void); static inline void tracer_disable(void) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER ftrace_enabled = 0; #endif } @@ -113,7 +113,7 @@ static inline void tracer_disable(void) */ static inline int __ftrace_enabled_save(void) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER int saved_ftrace_enabled = ftrace_enabled; ftrace_enabled = 0; return saved_ftrace_enabled; @@ -124,7 +124,7 @@ static inline int __ftrace_enabled_save(void) static inline void __ftrace_enabled_restore(int enabled) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER ftrace_enabled = enabled; #endif } -- cgit v1.2.3 From 4ce72a2c063a7fa8e42a9435440ae3364115a58d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 22 Oct 2008 15:25:26 +0800 Subject: sched: add CONFIG_SMP consistency a patch from Henrik Austad did this: >> Do not declare select_task_rq as part of sched_class when CONFIG_SMP is >> not set. Peter observed: > While a proper cleanup, could you do it by re-arranging the methods so > as to not create an additional ifdef? Do not declare select_task_rq and some other methods as part of sched_class when CONFIG_SMP is not set. Also gather those methods to avoid CONFIG_SMP mess. Idea-by: Henrik Austad Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Henrik Austad Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 +++++++----- kernel/sched_fair.c | 5 ++--- kernel/sched_idletask.c | 5 ++--- kernel/sched_rt.c | 5 ++--- 4 files changed, 13 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f59c8e8597d..c05b45faef18 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -897,7 +897,6 @@ struct sched_class { void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); void (*yield_task) (struct rq *rq); - int (*select_task_rq)(struct task_struct *p, int sync); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); @@ -905,6 +904,8 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP + int (*select_task_rq)(struct task_struct *p, int sync); + unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, @@ -916,16 +917,17 @@ struct sched_class { void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); -#endif - void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); - void (*task_new) (struct rq *rq, struct task_struct *p); void (*set_cpus_allowed)(struct task_struct *p, const cpumask_t *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); +#endif + + void (*set_curr_task) (struct rq *rq); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); + void (*task_new) (struct rq *rq, struct task_struct *p); void (*switched_from) (struct rq *this_rq, struct task_struct *task, int running); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a0aa38b10fdd..8de48a5da354 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1593,9 +1593,6 @@ static const struct sched_class fair_sched_class = { .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_fair, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_wakeup, @@ -1603,6 +1600,8 @@ static const struct sched_class fair_sched_class = { .put_prev_task = put_prev_task_fair, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_fair, + .load_balance = load_balance_fair, .move_one_task = move_one_task_fair, #endif diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index dec4ccabe2f5..8a21a2e28c13 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = { /* dequeue is not valid, we print a debug message there: */ .dequeue_task = dequeue_task_idle, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_idle, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_curr_idle, @@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = { .put_prev_task = put_prev_task_idle, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_idle, + .load_balance = load_balance_idle, .move_one_task = move_one_task_idle, #endif diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index cdf5740ab03e..c9aa5bede226 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1502,9 +1502,6 @@ static const struct sched_class rt_sched_class = { .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, .yield_task = yield_task_rt, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_rt, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_curr_rt, @@ -1512,6 +1509,8 @@ static const struct sched_class rt_sched_class = { .put_prev_task = put_prev_task_rt, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_rt, + .load_balance = load_balance_rt, .move_one_task = move_one_task_rt, .set_cpus_allowed = set_cpus_allowed_rt, -- cgit v1.2.3 From 593eb8a2d63e95772a5f22d746f18a997c5ee463 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:32:59 -0400 Subject: ftrace: return error on failed modified text. Have the ftrace_modify_code return error values: -EFAULT on error of reading the address -EINVAL if what is read does not match what it expected -EPERM if the write fails to update after a successful match. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 14 +++++++------- include/linux/ftrace.h | 24 ++++++++++++++++++++++-- kernel/trace/ftrace.c | 21 +++++++++++++++------ 3 files changed, 44 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8821ceabf51d..428291581cb2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -62,7 +62,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; - int ret; /* * Note: Due to modules and __init, code can @@ -72,15 +71,16 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ - if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) - return 1; + if (__copy_from_user_inatomic(replaced, (char __user *)ip, + MCOUNT_INSN_SIZE)) + return -EFAULT; if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return 2; + return -EINVAL; - ret = __copy_to_user_inatomic((char __user *)ip, new_code, - MCOUNT_INSN_SIZE); - WARN_ON_ONCE(ret); + if (__copy_to_user_inatomic((char __user *)ip, new_code, + MCOUNT_INSN_SIZE)) + return -EPERM; sync_core(); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0e9529589151..79fa10cbdcfb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,13 +72,33 @@ extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_mcount_set(unsigned long *data); -extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/** + * ftrace_modify_code - modify code segment + * @ip: the address of the code segment + * @old_code: the contents of what is expected to be there + * @new_code: the code to patch in + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code); + extern int skip_trace(unsigned long ip); extern void ftrace_release(void *start, unsigned long size); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1f54a94189fe..b2de8de77356 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -596,22 +596,22 @@ ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; unsigned char *nop, *call; - int failed; + int ret; ip = rec->ip; nop = ftrace_nop_replace(); call = ftrace_call_replace(ip, mcount_addr); - failed = ftrace_modify_code(ip, call, nop); - if (failed) { - switch (failed) { - case 1: + ret = ftrace_modify_code(ip, call, nop); + if (ret) { + switch (ret) { + case -EFAULT: WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); print_ip_sym(ip); break; - case 2: + case -EINVAL: WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); @@ -620,6 +620,15 @@ ftrace_code_disable(struct dyn_ftrace *rec) print_ip_ins(" replace: ", nop); printk(KERN_CONT "\n"); break; + case -EPERM: + WARN_ON_ONCE(1); + pr_info("ftrace faulted on writing "); + print_ip_sym(ip); + break; + default: + WARN_ON_ONCE(1); + pr_info("ftrace faulted on unknown error "); + print_ip_sym(ip); } rec->flags |= FTRACE_FL_FAILED; -- cgit v1.2.3 From 81adbdc029ecc416d56563e7f159100181dd711d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:02 -0400 Subject: ftrace: only have ftrace_kill atomic When an anomaly is detected, we need a way to completely disable ftrace. Right now we have two functions: ftrace_kill and ftrace_kill_atomic. The ftrace_kill tries to do it in a "nice" way by converting everything back to a nop. The "nice" way is dangerous itself, so this patch removes it and only has the "atomic" version, which is all that is needed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 42 ++---------------------------------------- kernel/trace/trace.c | 2 +- 3 files changed, 4 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 79fa10cbdcfb..ac58e94668b7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -40,7 +40,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define register_ftrace_function(ops) do { } while (0) # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) -static inline void ftrace_kill_atomic(void) { } +static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE @@ -117,7 +117,6 @@ static inline void ftrace_release(void *start, unsigned long size) { } /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); -void ftrace_kill_atomic(void); static inline void tracer_disable(void) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b2de8de77356..93245ae046e1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1549,22 +1549,6 @@ int ftrace_force_update(void) return ret; } -static void ftrace_force_shutdown(void) -{ - struct task_struct *task; - int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC; - - mutex_lock(&ftraced_lock); - task = ftraced_task; - ftraced_task = NULL; - ftraced_suspend = -1; - ftrace_run_update_code(command); - mutex_unlock(&ftraced_lock); - - if (task) - kthread_stop(task); -} - static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1795,17 +1779,16 @@ core_initcall(ftrace_dynamic_init); # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) -# define ftrace_force_shutdown() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** - * ftrace_kill_atomic - kill ftrace from critical sections + * ftrace_kill - kill ftrace * * This function should be used by panic code. It stops ftrace * but in a not so nice way. If you need to simply kill ftrace * from a non-atomic section, use ftrace_kill. */ -void ftrace_kill_atomic(void) +void ftrace_kill(void) { ftrace_disabled = 1; ftrace_enabled = 0; @@ -1815,27 +1798,6 @@ void ftrace_kill_atomic(void) clear_ftrace_function(); } -/** - * ftrace_kill - totally shutdown ftrace - * - * This is a safety measure. If something was detected that seems - * wrong, calling this function will keep ftrace from doing - * any more modifications, and updates. - * used when something went wrong. - */ -void ftrace_kill(void) -{ - mutex_lock(&ftrace_sysctl_lock); - ftrace_disabled = 1; - ftrace_enabled = 0; - - clear_ftrace_function(); - mutex_unlock(&ftrace_sysctl_lock); - - /* Try to totally disable ftrace */ - ftrace_force_shutdown(); -} - /** * register_ftrace_function - register a function for profiling * @ops - ops structure that holds the function for profiling. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aeb2f2505bc5..333a5162149b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3097,7 +3097,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ - ftrace_kill_atomic(); + ftrace_kill(); for_each_tracing_cpu(cpu) { atomic_inc(&global_trace.data[cpu]->disabled); -- cgit v1.2.3 From 4d296c24326783bff1282ac72f310d8bac8df413 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:06 -0400 Subject: ftrace: remove mcount set The arch dependent function ftrace_mcount_set was only used by the daemon start up code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/arm/kernel/ftrace.c | 13 ------------- arch/powerpc/kernel/ftrace.c | 17 ----------------- arch/sparc64/kernel/ftrace.c | 18 ------------------ arch/x86/kernel/ftrace.c | 7 ------- include/linux/ftrace.h | 1 - kernel/trace/ftrace.c | 9 --------- 6 files changed, 65 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 76d50e6091bc..6c90479e8974 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -95,19 +95,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -int ftrace_mcount_set(unsigned long *data) -{ - unsigned long pc, old; - unsigned long *addr = data; - unsigned char *new; - - pc = (unsigned long)&mcount_call; - memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(pc, *addr); - *addr = ftrace_modify_code(pc, (unsigned char *)&old, new); - return 0; -} - /* run from kstop_machine */ int __init ftrace_dyn_arch_init(void *data) { diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 3855ceb937b0..6b75522e8b34 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -126,23 +126,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -notrace int ftrace_mcount_set(unsigned long *data) -{ - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - - return 0; -} - int __init ftrace_dyn_arch_init(void *data) { /* This is running in kstop_machine */ diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c index 4298d0aee713..447942041a7c 100644 --- a/arch/sparc64/kernel/ftrace.c +++ b/arch/sparc64/kernel/ftrace.c @@ -69,24 +69,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(ip, old, new); } -notrace int ftrace_mcount_set(unsigned long *data) -{ - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - - return 0; -} - - int __init ftrace_dyn_arch_init(void *data) { ftrace_mcount_set(data); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index da4fb0deecf7..b399eed23538 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -103,13 +103,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -notrace int ftrace_mcount_set(unsigned long *data) -{ - /* mcount is initialized as a nop */ - *data = 0; - return 0; -} - int __init ftrace_dyn_arch_init(void *data) { extern const unsigned char ftrace_test_p6nop[]; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ac58e94668b7..1c4835f86911 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -71,7 +71,6 @@ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); -extern int ftrace_mcount_set(unsigned long *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e758cab0836f..226fd9132d53 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -620,7 +620,6 @@ static int ftrace_update_code(void *ignore); static int __ftrace_modify_code(void *data) { - unsigned long addr; int *command = data; if (*command & FTRACE_ENABLE_CALLS) { @@ -639,14 +638,6 @@ static int __ftrace_modify_code(void *data) if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); - if (*command & FTRACE_ENABLE_MCOUNT) { - addr = (unsigned long)ftrace_record_ip; - ftrace_mcount_set(&addr); - } else if (*command & FTRACE_DISABLE_MCOUNT) { - addr = (unsigned long)ftrace_stub; - ftrace_mcount_set(&addr); - } - return 0; } -- cgit v1.2.3 From 08f5ac906d2c0faf96d608c54a0b03177376da8d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:07 -0400 Subject: ftrace: remove ftrace hash The ftrace hash was used by the ftrace_daemon code. The record ip function would place the calling address (ip) into the hash. The daemon would later read the hash and modify that code. The hash complicates the code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 8 +- kernel/trace/ftrace.c | 243 +++++++------------------------------------------ kernel/trace/trace.c | 3 - 3 files changed, 38 insertions(+), 216 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1c4835f86911..703eb53cfa2b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -44,8 +44,6 @@ static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE -# define FTRACE_HASHBITS 10 -# define FTRACE_HASHSIZE (1< #include #include -#include #include #include @@ -189,9 +188,7 @@ static int ftrace_filtered; static int tracing_on; static int frozen_record_count; -static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; - -static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); +static LIST_HEAD(ftrace_new_addrs); static DEFINE_MUTEX(ftrace_regex_lock); @@ -210,8 +207,6 @@ struct ftrace_page { static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static int ftrace_record_suspend; - static struct dyn_ftrace *ftrace_free_records; @@ -242,72 +237,6 @@ static inline int record_frozen(struct dyn_ftrace *rec) # define record_frozen(rec) ({ 0; }) #endif /* CONFIG_KPROBES */ -int skip_trace(unsigned long ip) -{ - unsigned long fl; - struct dyn_ftrace *rec; - struct hlist_node *t; - struct hlist_head *head; - - if (frozen_record_count == 0) - return 0; - - head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)]; - hlist_for_each_entry_rcu(rec, t, head, node) { - if (rec->ip == ip) { - if (record_frozen(rec)) { - if (rec->flags & FTRACE_FL_FAILED) - return 1; - - if (!(rec->flags & FTRACE_FL_CONVERTED)) - return 1; - - if (!tracing_on || !ftrace_enabled) - return 1; - - if (ftrace_filtered) { - fl = rec->flags & (FTRACE_FL_FILTER | - FTRACE_FL_NOTRACE); - if (!fl || (fl & FTRACE_FL_NOTRACE)) - return 1; - } - } - break; - } - } - - return 0; -} - -static inline int -ftrace_ip_in_hash(unsigned long ip, unsigned long key) -{ - struct dyn_ftrace *p; - struct hlist_node *t; - int found = 0; - - hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) { - if (p->ip == ip) { - found = 1; - break; - } - } - - return found; -} - -static inline void -ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) -{ - hlist_add_head_rcu(&node->node, &ftrace_hash[key]); -} - -/* called from kstop_machine */ -static inline void ftrace_del_hash(struct dyn_ftrace *node) -{ - hlist_del(&node->node); -} - static void ftrace_free_rec(struct dyn_ftrace *rec) { rec->ip = (unsigned long)ftrace_free_records; @@ -362,69 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) } if (ftrace_pages->index == ENTRIES_PER_PAGE) { - if (!ftrace_pages->next) - return NULL; + if (!ftrace_pages->next) { + /* allocate another page */ + ftrace_pages->next = + (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages->next) + return NULL; + } ftrace_pages = ftrace_pages->next; } return &ftrace_pages->records[ftrace_pages->index++]; } -static void +static struct dyn_ftrace * ftrace_record_ip(unsigned long ip) { - struct dyn_ftrace *node; - unsigned long key; - int resched; - int cpu; + struct dyn_ftrace *rec; if (!ftrace_enabled || ftrace_disabled) - return; - - resched = need_resched(); - preempt_disable_notrace(); - - /* - * We simply need to protect against recursion. - * Use the the raw version of smp_processor_id and not - * __get_cpu_var which can call debug hooks that can - * cause a recursive crash here. - */ - cpu = raw_smp_processor_id(); - per_cpu(ftrace_shutdown_disable_cpu, cpu)++; - if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1) - goto out; - - if (unlikely(ftrace_record_suspend)) - goto out; - - key = hash_long(ip, FTRACE_HASHBITS); - - FTRACE_WARN_ON_ONCE(key >= FTRACE_HASHSIZE); - - if (ftrace_ip_in_hash(ip, key)) - goto out; - - /* This ip may have hit the hash before the lock */ - if (ftrace_ip_in_hash(ip, key)) - goto out; - - node = ftrace_alloc_dyn_node(ip); - if (!node) - goto out; + return NULL; - node->ip = ip; + rec = ftrace_alloc_dyn_node(ip); + if (!rec) + return NULL; - ftrace_add_hash(node, key); + rec->ip = ip; - out: - per_cpu(ftrace_shutdown_disable_cpu, cpu)--; + list_add(&rec->list, &ftrace_new_addrs); - /* prevent recursion with scheduler */ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + return rec; } #define FTRACE_ADDR ((long)(ftrace_caller)) @@ -543,7 +439,6 @@ static void ftrace_replace_code(int enable) rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { - ftrace_del_hash(rec); ftrace_free_rec(rec); } } @@ -551,15 +446,6 @@ static void ftrace_replace_code(int enable) } } -static void ftrace_shutdown_replenish(void) -{ - if (ftrace_pages->next) - return; - - /* allocate another page */ - ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); -} - static void print_ip_ins(const char *fmt, unsigned char *p) { int i; @@ -616,18 +502,11 @@ ftrace_code_disable(struct dyn_ftrace *rec) return 1; } -static int ftrace_update_code(void *ignore); - static int __ftrace_modify_code(void *data) { int *command = data; if (*command & FTRACE_ENABLE_CALLS) { - /* - * Update any recorded ips now that we have the - * machine stopped - */ - ftrace_update_code(NULL); ftrace_replace_code(1); tracing_on = 1; } else if (*command & FTRACE_DISABLE_CALLS) { @@ -738,84 +617,34 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ftrace_update_code(void *ignore) +static int ftrace_update_code(void) { - int i, save_ftrace_enabled; + struct dyn_ftrace *p, *t; cycle_t start, stop; - struct dyn_ftrace *p; - struct hlist_node *t, *n; - struct hlist_head *head, temp_list; - - /* Don't be recording funcs now */ - ftrace_record_suspend++; - save_ftrace_enabled = ftrace_enabled; - ftrace_enabled = 0; start = ftrace_now(raw_smp_processor_id()); ftrace_update_cnt = 0; - /* No locks needed, the machine is stopped! */ - for (i = 0; i < FTRACE_HASHSIZE; i++) { - INIT_HLIST_HEAD(&temp_list); - head = &ftrace_hash[i]; + list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { - /* all CPUS are stopped, we are safe to modify code */ - hlist_for_each_entry_safe(p, t, n, head, node) { - /* Skip over failed records which have not been - * freed. */ - if (p->flags & FTRACE_FL_FAILED) - continue; + /* If something went wrong, bail without enabling anything */ + if (unlikely(ftrace_disabled)) + return -1; - /* Unconverted records are always at the head of the - * hash bucket. Once we encounter a converted record, - * simply skip over to the next bucket. Saves ftraced - * some processor cycles (ftrace does its bid for - * global warming :-p ). */ - if (p->flags & (FTRACE_FL_CONVERTED)) - break; + list_del_init(&p->list); - /* Ignore updates to this record's mcount site. - * Reintroduce this record at the head of this - * bucket to attempt to "convert" it again if - * the kprobe on it is unregistered before the - * next run. */ - if (get_kprobe((void *)p->ip)) { - ftrace_del_hash(p); - INIT_HLIST_NODE(&p->node); - hlist_add_head(&p->node, &temp_list); - freeze_record(p); - continue; - } else { - unfreeze_record(p); - } - - /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(p)) { - p->flags |= FTRACE_FL_CONVERTED; - ftrace_update_cnt++; - } else { - if ((system_state == SYSTEM_BOOTING) || - !core_kernel_text(p->ip)) { - ftrace_del_hash(p); - ftrace_free_rec(p); - } - } - } - - hlist_for_each_entry_safe(p, t, n, &temp_list, node) { - hlist_del(&p->node); - INIT_HLIST_NODE(&p->node); - hlist_add_head(&p->node, head); - } + /* convert record (i.e, patch mcount-call with NOP) */ + if (ftrace_code_disable(p)) { + p->flags |= FTRACE_FL_CONVERTED; + ftrace_update_cnt++; + } else + ftrace_free_rec(p); } stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; - ftrace_enabled = save_ftrace_enabled; - ftrace_record_suspend--; - return 0; } @@ -847,7 +676,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) pg = ftrace_pages = ftrace_pages_start; cnt = num_to_init / ENTRIES_PER_PAGE; - pr_info("ftrace: allocating %ld hash entries in %d pages\n", + pr_info("ftrace: allocating %ld entries in %d pages\n", num_to_init, cnt); for (i = 0; i < cnt; i++) { @@ -1451,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start, unsigned long addr; unsigned long flags; + mutex_lock(&ftrace_start_lock); p = start; while (p < end) { addr = ftrace_call_adjust(*p++); - /* should not be called from interrupt context */ - spin_lock(&ftrace_lock); ftrace_record_ip(addr); - spin_unlock(&ftrace_lock); - ftrace_shutdown_replenish(); } - /* p is ignored */ + /* disable interrupts to prevent kstop machine */ local_irq_save(flags); - ftrace_update_code(p); + ftrace_update_code(); local_irq_restore(flags); + mutex_unlock(&ftrace_start_lock); return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 333a5162149b..06951e229443 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -865,9 +865,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!ftrace_function_enabled)) return; - if (skip_trace(ip)) - return; - pc = preempt_count(); resched = need_resched(); preempt_disable_notrace(); -- cgit v1.2.3 From c3a90c788b743303c4d824780a3a7271693fb64a Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Sun, 26 Oct 2008 23:07:25 -0700 Subject: Phonet: do not reply to indication reset packets This fixes a potential error packet loop. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 1 + net/phonet/af_phonet.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index c9609f9aedac..4157faa857b6 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -72,6 +72,7 @@ struct phonetmsg { } pn_msg_u; }; #define PN_COMMON_MESSAGE 0xF0 +#define PN_COMMGR 0x10 #define PN_PREFIX 0xE0 /* resource for extended messages */ #define pn_submsg_id pn_msg_u.base.pn_submsg_id #define pn_e_submsg_id pn_msg_u.ext.pn_e_submsg_id diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index b9d97effebe3..defeb7a0d502 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -261,6 +261,8 @@ static inline int can_respond(struct sk_buff *skb) return 0; /* we are not the destination */ if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) return 0; + if (ph->pn_res == PN_COMMGR) /* indications */ + return 0; ph = pn_hdr(skb); /* re-acquires the pointer */ pm = pn_msg(skb); @@ -309,7 +311,8 @@ static int send_reset_indications(struct sk_buff *rskb) return pn_raw_send(data, sizeof(data), rskb->dev, pn_object(oph->pn_sdev, 0x00), - pn_object(oph->pn_rdev, oph->pn_robj), 0x10); + pn_object(oph->pn_rdev, oph->pn_robj), + PN_COMMGR); } -- cgit v1.2.3 From 3d5afd324a4bf9f64f59599bf1e93cd7dd1dc97a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 27 Oct 2008 12:16:15 +0100 Subject: HID: fix oops during suspend of unbound HID devices Usbhid structure is allocated on start invoked only from probe of some driver. When there is no driver, the structure is null and causes null-dereference oopses. Fix it by allocating the structure on probe and disconnect of the device itself. Also make sure we won't race between start and resume or stop and suspend respectively. References: http://bugzilla.kernel.org/show_bug.cgi?id=11827 Signed-off-by: Jiri Slaby Cc: Johannes Berg Cc: Andreas Schwab Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 58 ++++++++++++++++++++++++++++++------------- drivers/hid/usbhid/usbhid.h | 2 ++ include/linux/hid.h | 1 + 3 files changed, 44 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 42bdd83444c1..3b1c489998c3 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -776,21 +777,10 @@ static int usbhid_start(struct hid_device *hid) struct usb_interface *intf = to_usb_interface(hid->dev.parent); struct usb_host_interface *interface = intf->cur_altsetting; struct usb_device *dev = interface_to_usbdev(intf); - struct usbhid_device *usbhid; + struct usbhid_device *usbhid = hid->driver_data; unsigned int n, insize = 0; int ret; - WARN_ON(hid->driver_data); - - usbhid = kzalloc(sizeof(struct usbhid_device), GFP_KERNEL); - if (usbhid == NULL) { - ret = -ENOMEM; - goto err; - } - - hid->driver_data = usbhid; - usbhid->hid = hid; - usbhid->bufsize = HID_MIN_BUFFER_SIZE; hid_find_max_report(hid, HID_INPUT_REPORT, &usbhid->bufsize); hid_find_max_report(hid, HID_OUTPUT_REPORT, &usbhid->bufsize); @@ -804,6 +794,7 @@ static int usbhid_start(struct hid_device *hid) if (insize > HID_MAX_BUFFER_SIZE) insize = HID_MAX_BUFFER_SIZE; + mutex_lock(&usbhid->setup); if (hid_alloc_buffers(dev, hid)) { ret = -ENOMEM; goto fail; @@ -888,6 +879,9 @@ static int usbhid_start(struct hid_device *hid) usbhid_init_reports(hid); hid_dump_device(hid); + set_bit(HID_STARTED, &usbhid->iofl); + mutex_unlock(&usbhid->setup); + return 0; fail: @@ -895,8 +889,7 @@ fail: usb_free_urb(usbhid->urbout); usb_free_urb(usbhid->urbctrl); hid_free_buffers(dev, hid); - kfree(usbhid); -err: + mutex_unlock(&usbhid->setup); return ret; } @@ -907,6 +900,8 @@ static void usbhid_stop(struct hid_device *hid) if (WARN_ON(!usbhid)) return; + mutex_lock(&usbhid->setup); + clear_bit(HID_STARTED, &usbhid->iofl); spin_lock_irq(&usbhid->inlock); /* Sync with error handler */ set_bit(HID_DISCONNECTED, &usbhid->iofl); spin_unlock_irq(&usbhid->inlock); @@ -931,8 +926,7 @@ static void usbhid_stop(struct hid_device *hid) usb_free_urb(usbhid->urbout); hid_free_buffers(hid_to_usb_dev(hid), hid); - kfree(usbhid); - hid->driver_data = NULL; + mutex_unlock(&usbhid->setup); } static struct hid_ll_driver usb_hid_driver = { @@ -947,6 +941,7 @@ static struct hid_ll_driver usb_hid_driver = { static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *dev = interface_to_usbdev(intf); + struct usbhid_device *usbhid; struct hid_device *hid; size_t len; int ret; @@ -1000,14 +995,26 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id) if (usb_string(dev, dev->descriptor.iSerialNumber, hid->uniq, 64) <= 0) hid->uniq[0] = 0; + usbhid = kzalloc(sizeof(*usbhid), GFP_KERNEL); + if (usbhid == NULL) { + ret = -ENOMEM; + goto err; + } + + hid->driver_data = usbhid; + usbhid->hid = hid; + mutex_init(&usbhid->setup); /* needed on suspend/resume */ + ret = hid_add_device(hid); if (ret) { if (ret != -ENODEV) dev_err(&intf->dev, "can't add hid device: %d\n", ret); - goto err; + goto err_free; } return 0; +err_free: + kfree(usbhid); err: hid_destroy_device(hid); return ret; @@ -1016,11 +1023,14 @@ err: static void hid_disconnect(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata(intf); + struct usbhid_device *usbhid; if (WARN_ON(!hid)) return; + usbhid = hid->driver_data; hid_destroy_device(hid); + kfree(usbhid); } static int hid_suspend(struct usb_interface *intf, pm_message_t message) @@ -1028,11 +1038,18 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message) struct hid_device *hid = usb_get_intfdata (intf); struct usbhid_device *usbhid = hid->driver_data; + mutex_lock(&usbhid->setup); + if (!test_bit(HID_STARTED, &usbhid->iofl)) { + mutex_unlock(&usbhid->setup); + return 0; + } + spin_lock_irq(&usbhid->inlock); /* Sync with error handler */ set_bit(HID_SUSPENDED, &usbhid->iofl); spin_unlock_irq(&usbhid->inlock); del_timer(&usbhid->io_retry); usb_kill_urb(usbhid->urbin); + mutex_unlock(&usbhid->setup); dev_dbg(&intf->dev, "suspend\n"); return 0; } @@ -1043,9 +1060,16 @@ static int hid_resume(struct usb_interface *intf) struct usbhid_device *usbhid = hid->driver_data; int status; + mutex_lock(&usbhid->setup); + if (!test_bit(HID_STARTED, &usbhid->iofl)) { + mutex_unlock(&usbhid->setup); + return 0; + } + clear_bit(HID_SUSPENDED, &usbhid->iofl); usbhid->retry_delay = 0; status = hid_start_in(hid); + mutex_unlock(&usbhid->setup); dev_dbg(&intf->dev, "resume status %d\n", status); return status; } diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h index abedb13c623e..55973ff54008 100644 --- a/drivers/hid/usbhid/usbhid.h +++ b/drivers/hid/usbhid/usbhid.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ struct usbhid_device { dma_addr_t outbuf_dma; /* Output buffer dma */ spinlock_t outlock; /* Output fifo spinlock */ + struct mutex setup; unsigned long iofl; /* I/O flags (CTRL_RUNNING, OUT_RUNNING) */ struct timer_list io_retry; /* Retry timer */ unsigned long stop_retry; /* Time to give up, in jiffies */ diff --git a/include/linux/hid.h b/include/linux/hid.h index 5355ca4b939e..e5780f8c934a 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -410,6 +410,7 @@ struct hid_output_fifo { #define HID_SUSPENDED 5 #define HID_CLEAR_HALT 6 #define HID_DISCONNECTED 7 +#define HID_STARTED 8 struct hid_input { struct list_head list; -- cgit v1.2.3 From 5550af4df179e52753d3a43a788a113ad8cd95cd Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 15 Oct 2008 20:15:06 +0800 Subject: KVM: Fix guest shared interrupt with in-kernel irqchip Every call of kvm_set_irq() should offer an irq_source_id, which is allocated by kvm_request_irq_source_id(). Based on irq_source_id, we identify the irq source and implement logical OR for shared level interrupts. The allocated irq_source_id can be freed by kvm_free_irq_source_id(). Currently, we support at most sizeof(unsigned long) different irq sources. [Amit: - rebase to kvm.git HEAD - move definition of KVM_USERSPACE_IRQ_SOURCE_ID to common file - move kvm_request_irq_source_id to the update_irq ioctl] [Xiantao: - Add kvm/ia64 stuff and make it work for kvm/ia64 guests] Signed-off-by: Sheng Yang Signed-off-by: Amit Shah Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 3 +++ arch/ia64/kvm/kvm-ia64.c | 8 +++++--- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/i8254.c | 11 +++++++++-- arch/x86/kvm/i8254.h | 1 + arch/x86/kvm/x86.c | 6 +++++- include/linux/kvm_host.h | 7 ++++++- virt/kvm/irq_comm.c | 42 +++++++++++++++++++++++++++++++++++++--- virt/kvm/kvm_main.c | 12 ++++++++---- 9 files changed, 79 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 85db124d37f6..04c0b88f7b3a 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -417,6 +417,9 @@ struct kvm_arch { struct list_head assigned_dev_head; struct dmar_domain *intel_iommu_domain; struct hlist_head irq_ack_notifier_list; + + unsigned long irq_sources_bitmap; + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; }; union cpuid3_t { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index a312c9e9b9ef..8a2b13ff0aff 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -778,6 +778,9 @@ static void kvm_init_vm(struct kvm *kvm) kvm_build_io_pmt(kvm); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); } struct kvm *kvm_arch_create_vm(void) @@ -941,9 +944,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { mutex_lock(&kvm->lock); - kvm_ioapic_set_irq(kvm->arch.vioapic, - irq_event.irq, - irq_event.level); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); r = 0; } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 65679d006337..8346be87cfa1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -364,6 +364,9 @@ struct kvm_arch{ struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; + + unsigned long irq_sources_bitmap; + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 11c6725fb798..8772dc946823 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -545,6 +545,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) if (!pit) return NULL; + mutex_lock(&kvm->lock); + pit->irq_source_id = kvm_request_irq_source_id(kvm); + mutex_unlock(&kvm->lock); + if (pit->irq_source_id < 0) + return NULL; + mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); spin_lock_init(&pit->pit_state.inject_lock); @@ -587,6 +593,7 @@ void kvm_free_pit(struct kvm *kvm) mutex_lock(&kvm->arch.vpit->pit_state.lock); timer = &kvm->arch.vpit->pit_state.pit_timer.timer; hrtimer_cancel(timer); + kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); mutex_unlock(&kvm->arch.vpit->pit_state.lock); kfree(kvm->arch.vpit); } @@ -595,8 +602,8 @@ void kvm_free_pit(struct kvm *kvm) static void __inject_pit_timer_intr(struct kvm *kvm) { mutex_lock(&kvm->lock); - kvm_set_irq(kvm, 0, 1); - kvm_set_irq(kvm, 0, 0); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); mutex_unlock(&kvm->lock); } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index e436d4983aa1..4178022b97aa 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -44,6 +44,7 @@ struct kvm_pit { struct kvm_io_device speaker_dev; struct kvm *kvm; struct kvm_kpit_state pit_state; + int irq_source_id; }; #define KVM_PIT_BASE_ADDRESS 0x40 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4f0677d1eae8..f1f8ff2f1fa2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1742,7 +1742,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { mutex_lock(&kvm->lock); - kvm_set_irq(kvm, irq_event.irq, irq_event.level); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); r = 0; } @@ -4013,6 +4014,9 @@ struct kvm *kvm_arch_create_vm(void) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + return kvm; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3833c48fae3a..bb92be2153bc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -37,6 +37,8 @@ #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 +#define KVM_USERSPACE_IRQ_SOURCE_ID 0 + struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; @@ -306,15 +308,18 @@ struct kvm_assigned_dev_kernel { int host_irq; int guest_irq; int irq_requested; + int irq_source_id; struct pci_dev *dev; struct kvm *kvm; }; -void kvm_set_irq(struct kvm *kvm, int irq, int level); +void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); +int kvm_request_irq_source_id(struct kvm *kvm); +void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index d0169f5e6047..55ad76ee2d09 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -25,15 +25,23 @@ #include "ioapic.h" /* This should be called with the kvm->lock mutex held */ -void kvm_set_irq(struct kvm *kvm, int irq, int level) +void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { + unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; + + /* Logical OR for level trig interrupt */ + if (level) + set_bit(irq_source_id, irq_state); + else + clear_bit(irq_source_id, irq_state); + /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level); + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state)); #ifdef CONFIG_X86 - kvm_pic_set_irq(pic_irqchip(kvm), irq, level); + kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state)); #endif } @@ -58,3 +66,31 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, { hlist_del(&kian->link); } + +/* The caller must hold kvm->lock mutex */ +int kvm_request_irq_source_id(struct kvm *kvm) +{ + unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; + int irq_source_id = find_first_zero_bit(bitmap, + sizeof(kvm->arch.irq_sources_bitmap)); + if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { + printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); + irq_source_id = -EFAULT; + } else + set_bit(irq_source_id, bitmap); + return irq_source_id; +} + +void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) +{ + int i; + + if (irq_source_id <= 0 || + irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { + printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); + return; + } + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) + clear_bit(irq_source_id, &kvm->arch.irq_states[i]); + clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cf0ab8ed3845..a87f45edfae8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -105,14 +105,12 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) */ mutex_lock(&assigned_dev->kvm->lock); kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, assigned_dev->guest_irq, 1); mutex_unlock(&assigned_dev->kvm->lock); kvm_put_kvm(assigned_dev->kvm); } -/* FIXME: Implement the OR logic needed to make shared interrupts on - * this line behave properly - */ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = @@ -134,7 +132,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) dev = container_of(kian, struct kvm_assigned_dev_kernel, ack_notifier); - kvm_set_irq(dev->kvm, dev->guest_irq, 0); + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); enable_irq(dev->host_irq); } @@ -146,6 +144,7 @@ static void kvm_free_assigned_device(struct kvm *kvm, free_irq(assigned_dev->host_irq, (void *)assigned_dev); kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); if (cancel_work_sync(&assigned_dev->interrupt_work)) /* We had pending work. That means we will have to take @@ -215,6 +214,11 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, match->ack_notifier.gsi = assigned_irq->guest_irq; match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); + r = kvm_request_irq_source_id(kvm); + if (r < 0) + goto out_release; + else + match->irq_source_id = r; /* Even though this is PCI, we don't want to use shared * interrupts. Sharing host devices with guest-assigned devices -- cgit v1.2.3 From bb45e202e695dea8657bb03a01d1522c37558672 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 19 Oct 2008 16:39:45 +0200 Subject: KVM: Future-proof device assignment ABI Reserve some space so we can add more data. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 797fcd781242..f18b86fa8655 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -489,6 +489,9 @@ struct kvm_assigned_pci_dev { __u32 busnr; __u32 devfn; __u32 flags; + union { + __u32 reserved[12]; + }; }; struct kvm_assigned_irq { @@ -496,6 +499,9 @@ struct kvm_assigned_irq { __u32 host_irq; __u32 guest_irq; __u32 flags; + union { + __u32 reserved[12]; + }; }; #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 8175fe2dda1c93a9c596921c8ed4a0b4baccdefe Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sun, 26 Oct 2008 00:30:18 +0200 Subject: HID: fix hid_device_id for cross compiling struct hid_device_id contains hidden padding which is bad for cross compiling. Make the padding explicit and consistent across architectures. Signed-off-by: Andreas Schwab Signed-off-by: Jiri Kosina --- include/linux/mod_devicetable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index eb71b45fdf5a..97b91d1abb43 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -135,6 +135,7 @@ struct usb_device_id { struct hid_device_id { __u16 bus; + __u16 pad1; __u32 vendor; __u32 product; kernel_ulong_t driver_data -- cgit v1.2.3 From 3f5e26cee443eb4d3900cd3085664c3e51b72135 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Oct 2008 15:02:51 -0700 Subject: adjust init section definitions Add rodata equivalents for assembly use, and fix the section attributes used by __REFCONST. Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- include/linux/init.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 0c1264668be0..68cb0265d009 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -112,21 +112,25 @@ #define __FINIT .previous #define __INITDATA .section ".init.data","aw" +#define __INITRODATA .section ".init.rodata","a" #define __FINITDATA .previous #define __DEVINIT .section ".devinit.text", "ax" #define __DEVINITDATA .section ".devinit.data", "aw" +#define __DEVINITRODATA .section ".devinit.rodata", "a" #define __CPUINIT .section ".cpuinit.text", "ax" #define __CPUINITDATA .section ".cpuinit.data", "aw" +#define __CPUINITRODATA .section ".cpuinit.rodata", "a" #define __MEMINIT .section ".meminit.text", "ax" #define __MEMINITDATA .section ".meminit.data", "aw" +#define __MEMINITRODATA .section ".meminit.rodata", "a" /* silence warnings when references are OK */ #define __REF .section ".ref.text", "ax" #define __REFDATA .section ".ref.data", "aw" -#define __REFCONST .section ".ref.rodata", "aw" +#define __REFCONST .section ".ref.rodata", "a" #ifndef __ASSEMBLY__ /* -- cgit v1.2.3 From 0833422274ff00729a603b020fac297e69a03e40 Mon Sep 17 00:00:00 2001 From: Kurt Garloff Date: Wed, 29 Oct 2008 14:00:48 -0700 Subject: mm: increase the default mlock limit from 32k to 64k By default, non-privileged tasks can only mlock() a small amount of memory to avoid a DoS attack by ordinary users. The Linux kernel defaulted to 32k (on a 4k page size system) to accommodate the needs of gpg. However, newer gpg2 needs 64k in various circumstances and otherwise fails miserably, see bnc#329675. Change the default to 64k, and make it more agnostic to PAGE_SIZE. Signed-off-by: Kurt Garloff Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/resource.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/resource.h b/include/linux/resource.h index aaa423a6f3d9..40fc7e626082 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -59,10 +59,10 @@ struct rlimit { #define _STK_LIM (8*1024*1024) /* - * GPG wants 32kB of mlocked memory, to make sure pass phrases + * GPG2 wants 64kB of mlocked memory, to make sure pass phrases * and other sensitive information are never written to disk. */ -#define MLOCK_LIMIT (8 * PAGE_SIZE) +#define MLOCK_LIMIT ((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024) /* * Due to binary compatibility, the actual resource numbers -- cgit v1.2.3 From 00c2e63c31d0f431952ff2a671c5c6997dd4f8b2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 14:00:53 -0700 Subject: freezer_cg: use thaw_process() in unfreeze_cgroup() Don't duplicate the implementation of thaw_process(). [akpm@linux-foundation.org: make __thaw_process() static] Signed-off-by: Li Zefan Cc: Cedric Le Goater Acked-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/freezer.h | 5 ----- kernel/cgroup_freezer.c | 15 ++++----------- kernel/freezer.c | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 8f225339eee9..5a361f85cfec 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -44,11 +44,6 @@ static inline bool should_send_signal(struct task_struct *p) return !(p->flags & PF_FREEZER_NOSIG); } -/* - * Wake up a frozen process - */ -extern int __thaw_process(struct task_struct *p); - /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e9c856a265c9..5e6d26b66e88 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -275,25 +275,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) return num_cant_freeze_now ? -EBUSY : 0; } -static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) { struct cgroup_iter it; struct task_struct *task; cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { - int do_wake; - - task_lock(task); - do_wake = __thaw_process(task); - task_unlock(task); - if (do_wake) - wake_up_process(task); + thaw_process(task); } cgroup_iter_end(cgroup, &it); - freezer->state = CGROUP_THAWED; - return 0; + freezer->state = CGROUP_THAWED; } static int freezer_change_state(struct cgroup *cgroup, @@ -320,7 +313,7 @@ static int freezer_change_state(struct cgroup *cgroup, } /* state == FREEZING and goal_state == THAWED, so unfreeze */ case CGROUP_FROZEN: - retval = unfreeze_cgroup(cgroup, freezer); + unfreeze_cgroup(cgroup, freezer); break; default: break; diff --git a/kernel/freezer.c b/kernel/freezer.c index ba6248b323ef..2f4936cf7083 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p) } } -/* - * Wake up a frozen process - * - * task_lock() is needed to prevent the race with refrigerator() which may - * occur if the freezing of tasks fails. Namely, without the lock, if the - * freezing of tasks failed, thaw_tasks() might have run before a task in - * refrigerator() could call frozen_process(), in which case the task would be - * frozen and no one would thaw it. - */ -int __thaw_process(struct task_struct *p) +static int __thaw_process(struct task_struct *p) { if (frozen(p)) { p->flags &= ~PF_FROZEN; @@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p) return 0; } +/* + * Wake up a frozen process + * + * task_lock() is needed to prevent the race with refrigerator() which may + * occur if the freezing of tasks fails. Namely, without the lock, if the + * freezing of tasks failed, thaw_tasks() might have run before a task in + * refrigerator() could call frozen_process(), in which case the task would be + * frozen and no one would thaw it. + */ int thaw_process(struct task_struct *p) { task_lock(p); -- cgit v1.2.3 From 9b913735e53ab0da4a792bac0de8e178cc13dcfb Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 14:00:54 -0700 Subject: cgroups: tiny cleanups - remove 'private' field from struct subsys - remove cgroup_init_smp() Signed-off-by: Li Zefan Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8b00f6643e93..1164963c3a85 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -25,7 +25,6 @@ struct cgroup; extern int cgroup_init_early(void); extern int cgroup_init(void); -extern void cgroup_init_smp(void); extern void cgroup_lock(void); extern bool cgroup_lock_live_group(struct cgroup *cgrp); extern void cgroup_unlock(void); @@ -348,8 +347,6 @@ struct cgroup_subsys { struct cgroupfs_root *root; struct list_head sibling; - - void *private; }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; @@ -410,7 +407,6 @@ void cgroup_mm_owner_callbacks(struct task_struct *old, static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } -static inline void cgroup_init_smp(void) {} static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_fork_callbacks(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} -- cgit v1.2.3 From 4e02ed4b4a2fae34aae766a5bb93ae235f60adb8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 29 Oct 2008 14:00:55 -0700 Subject: fs: remove prepare_write/commit_write Nothing uses prepare_write or commit_write. Remove them from the tree completely. [akpm@linux-foundation.org: schedule simple_prepare_write() for unexporting] Signed-off-by: Nick Piggin Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 12 +- Documentation/filesystems/vfs.txt | 39 +----- drivers/block/loop.c | 5 +- fs/fat/inode.c | 2 +- fs/libfs.c | 2 +- fs/ocfs2/file.c | 3 +- fs/splice.c | 4 +- include/linux/fs.h | 7 -- mm/filemap.c | 242 +------------------------------------- 9 files changed, 23 insertions(+), 293 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8362860e21a7..23d2f4460deb 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -161,8 +161,12 @@ prototypes: int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + int (*write_begin)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + int (*write_end)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); @@ -180,8 +184,6 @@ sync_page: no maybe writepages: no set_page_dirty no no readpages: no -prepare_write: no yes yes -commit_write: no yes yes write_begin: no locks the page yes write_end: no yes, unlocks yes perform_write: no n/a yes @@ -191,7 +193,7 @@ releasepage: no yes direct_IO: no launder_page: no yes - ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage() + ->write_begin(), ->write_end(), ->sync_page() and ->readpage() may be called from the request handler (/dev/loop). ->readpage() unlocks the page, either synchronously or via I/O diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c4d348dabe94..5579bda58a6d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -492,7 +492,7 @@ written-back to storage typically in whole pages, however the address_space has finer control of write sizes. The read process essentially only requires 'readpage'. The write -process is more complicated and uses prepare_write/commit_write or +process is more complicated and uses write_begin/write_end or set_page_dirty to write data into the address_space, and writepage, sync_page, and writepages to writeback data to storage. @@ -521,8 +521,6 @@ struct address_space_operations { int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); @@ -598,37 +596,7 @@ struct address_space_operations { readpages is only used for read-ahead, so read errors are ignored. If anything goes wrong, feel free to give up. - prepare_write: called by the generic write path in VM to set up a write - request for a page. This indicates to the address space that - the given range of bytes is about to be written. The - address_space should check that the write will be able to - complete, by allocating space if necessary and doing any other - internal housekeeping. If the write will update parts of - any basic-blocks on storage, then those blocks should be - pre-read (if they haven't been read already) so that the - updated blocks can be written out properly. - The page will be locked. - - Note: the page _must not_ be marked uptodate in this function - (or anywhere else) unless it actually is uptodate right now. As - soon as a page is marked uptodate, it is possible for a concurrent - read(2) to copy it to userspace. - - commit_write: If prepare_write succeeds, new data will be copied - into the page and then commit_write will be called. It will - typically update the size of the file (if appropriate) and - mark the inode as dirty, and do any other related housekeeping - operations. It should avoid returning an error if possible - - errors should have been handled by prepare_write. - - write_begin: This is intended as a replacement for prepare_write. The - key differences being that: - - it returns a locked page (in *pagep) rather than being - given a pre locked page; - - it must be able to cope with short writes (where the - length passed to write_begin is greater than the number - of bytes copied into the page). - + write_begin: Called by the generic buffered write code to ask the filesystem to prepare to write len bytes at the given offset in the file. The address_space should check that the write will be able to complete, @@ -640,6 +608,9 @@ struct address_space_operations { The filesystem must return the locked pagecache page for the specified offset, in *pagep, for the caller to write into. + It must be able to cope with short writes (where the length passed to + write_begin is greater than the number of bytes copied into the page). + flags is a field for AOP_FLAG_xxx flags, described in include/linux/fs.h. diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3f09cd8bcc38..5c4ee70d5cf3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -40,8 +40,7 @@ * Heinz Mauelshagen , Feb 2002 * * Support for falling back on the write file operation when the address space - * operations prepare_write and/or commit_write are not available on the - * backing filesystem. + * operations write_begin is not available on the backing filesystem. * Anton Altaparmakov, 16 Feb 2005 * * Still To Fix: @@ -765,7 +764,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, */ if (!file->f_op->splice_read) goto out_putf; - if (aops->prepare_write || aops->write_begin) + if (aops->write_begin) lo_flags |= LO_FLAGS_USE_AOPS; if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) lo_flags |= LO_FLAGS_READ_ONLY; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 19eafbe3c379..2b2eec1283bf 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -175,7 +175,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { /* - * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), + * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), * so we need to update the ->mmu_private to block boundary. * * But we must fill the remaining area or hole by nul for diff --git a/fs/libfs.c b/fs/libfs.c index 74688598bcf7..e960a8321902 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -814,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr); EXPORT_SYMBOL(simple_link); EXPORT_SYMBOL(simple_lookup); EXPORT_SYMBOL(simple_pin_fs); -EXPORT_SYMBOL(simple_prepare_write); +EXPORT_UNUSED_SYMBOL(simple_prepare_write); EXPORT_SYMBOL(simple_readpage); EXPORT_SYMBOL(simple_release_fs); EXPORT_SYMBOL(simple_rename); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8d3225a78073..7efe937a415f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -679,8 +679,7 @@ leave: /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to - * worry about recursive locking in ->prepare_write() and - * ->commit_write(). */ + * worry about recursive locking in ->write_begin() and ->write_end(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 size) { diff --git a/fs/splice.c b/fs/splice.c index a1e701c27156..1abab5cee4ba 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, }; /* - * The actor worker might be calling ->prepare_write and - * ->commit_write. Most of the time, these expect i_mutex to + * The actor worker might be calling ->write_begin and + * ->write_end. Most of the time, these expect i_mutex to * be held. Since this may result in an ABBA deadlock with * pipe->inode, we have to order lock acquiry here. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b248d61430c..0dcdd9458f4b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -489,13 +489,6 @@ struct address_space_operations { int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - /* - * ext3 requires that a successful prepare_write() call be followed - * by a commit_write() call - they must be balanced - */ - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); - int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); diff --git a/mm/filemap.c b/mm/filemap.c index ab8553658af3..f3e5f8944d17 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping, { const struct address_space_operations *aops = mapping->a_ops; - if (aops->write_begin) { - return aops->write_begin(file, mapping, pos, len, flags, + return aops->write_begin(file, mapping, pos, len, flags, pagep, fsdata); - } else { - int ret; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - struct inode *inode = mapping->host; - struct page *page; -again: - page = __grab_cache_page(mapping, index); - *pagep = page; - if (!page) - return -ENOMEM; - - if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) { - /* - * There is no way to resolve a short write situation - * for a !Uptodate page (except by double copying in - * the caller done by generic_perform_write_2copy). - * - * Instead, we have to bring it uptodate here. - */ - ret = aops->readpage(file, page); - page_cache_release(page); - if (ret) { - if (ret == AOP_TRUNCATED_PAGE) - goto again; - return ret; - } - goto again; - } - - ret = aops->prepare_write(file, page, offset, offset+len); - if (ret) { - unlock_page(page); - page_cache_release(page); - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); - } - return ret; - } } EXPORT_SYMBOL(pagecache_write_begin); @@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { const struct address_space_operations *aops = mapping->a_ops; - int ret; - - if (aops->write_end) { - mark_page_accessed(page); - ret = aops->write_end(file, mapping, pos, len, copied, - page, fsdata); - } else { - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - struct inode *inode = mapping->host; - - flush_dcache_page(page); - ret = aops->commit_write(file, page, offset, offset+len); - unlock_page(page); - mark_page_accessed(page); - page_cache_release(page); - - if (ret < 0) { - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); - } else if (ret > 0) - ret = min_t(size_t, copied, ret); - else - ret = copied; - } - return ret; + mark_page_accessed(page); + return aops->write_end(file, mapping, pos, len, copied, page, fsdata); } EXPORT_SYMBOL(pagecache_write_end); @@ -2226,174 +2163,6 @@ repeat: } EXPORT_SYMBOL(__grab_cache_page); -static ssize_t generic_perform_write_2copy(struct file *file, - struct iov_iter *i, loff_t pos) -{ - struct address_space *mapping = file->f_mapping; - const struct address_space_operations *a_ops = mapping->a_ops; - struct inode *inode = mapping->host; - long status = 0; - ssize_t written = 0; - - do { - struct page *src_page; - struct page *page; - pgoff_t index; /* Pagecache index for current page */ - unsigned long offset; /* Offset into pagecache page */ - unsigned long bytes; /* Bytes to write to page */ - size_t copied; /* Bytes copied from user */ - - offset = (pos & (PAGE_CACHE_SIZE - 1)); - index = pos >> PAGE_CACHE_SHIFT; - bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, - iov_iter_count(i)); - - /* - * a non-NULL src_page indicates that we're doing the - * copy via get_user_pages and kmap. - */ - src_page = NULL; - - /* - * Bring in the user page that we will copy from _first_. - * Otherwise there's a nasty deadlock on copying from the - * same page as we're writing to, without it being marked - * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. - */ - if (unlikely(iov_iter_fault_in_readable(i, bytes))) { - status = -EFAULT; - break; - } - - page = __grab_cache_page(mapping, index); - if (!page) { - status = -ENOMEM; - break; - } - - /* - * non-uptodate pages cannot cope with short copies, and we - * cannot take a pagefault with the destination page locked. - * So pin the source page to copy it. - */ - if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) { - unlock_page(page); - - src_page = alloc_page(GFP_KERNEL); - if (!src_page) { - page_cache_release(page); - status = -ENOMEM; - break; - } - - /* - * Cannot get_user_pages with a page locked for the - * same reason as we can't take a page fault with a - * page locked (as explained below). - */ - copied = iov_iter_copy_from_user(src_page, i, - offset, bytes); - if (unlikely(copied == 0)) { - status = -EFAULT; - page_cache_release(page); - page_cache_release(src_page); - break; - } - bytes = copied; - - lock_page(page); - /* - * Can't handle the page going uptodate here, because - * that means we would use non-atomic usercopies, which - * zero out the tail of the page, which can cause - * zeroes to become transiently visible. We could just - * use a non-zeroing copy, but the APIs aren't too - * consistent. - */ - if (unlikely(!page->mapping || PageUptodate(page))) { - unlock_page(page); - page_cache_release(page); - page_cache_release(src_page); - continue; - } - } - - status = a_ops->prepare_write(file, page, offset, offset+bytes); - if (unlikely(status)) - goto fs_write_aop_error; - - if (!src_page) { - /* - * Must not enter the pagefault handler here, because - * we hold the page lock, so we might recursively - * deadlock on the same lock, or get an ABBA deadlock - * against a different lock, or against the mmap_sem - * (which nests outside the page lock). So increment - * preempt count, and use _atomic usercopies. - * - * The page is uptodate so we are OK to encounter a - * short copy: if unmodified parts of the page are - * marked dirty and written out to disk, it doesn't - * really matter. - */ - pagefault_disable(); - copied = iov_iter_copy_from_user_atomic(page, i, - offset, bytes); - pagefault_enable(); - } else { - void *src, *dst; - src = kmap_atomic(src_page, KM_USER0); - dst = kmap_atomic(page, KM_USER1); - memcpy(dst + offset, src + offset, bytes); - kunmap_atomic(dst, KM_USER1); - kunmap_atomic(src, KM_USER0); - copied = bytes; - } - flush_dcache_page(page); - - status = a_ops->commit_write(file, page, offset, offset+bytes); - if (unlikely(status < 0)) - goto fs_write_aop_error; - if (unlikely(status > 0)) /* filesystem did partial write */ - copied = min_t(size_t, copied, status); - - unlock_page(page); - mark_page_accessed(page); - page_cache_release(page); - if (src_page) - page_cache_release(src_page); - - iov_iter_advance(i, copied); - pos += copied; - written += copied; - - balance_dirty_pages_ratelimited(mapping); - cond_resched(); - continue; - -fs_write_aop_error: - unlock_page(page); - page_cache_release(page); - if (src_page) - page_cache_release(src_page); - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - */ - if (pos + bytes > inode->i_size) - vmtruncate(inode, inode->i_size); - break; - } while (iov_iter_count(i)); - - return written ? written : status; -} - static ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { @@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, struct iov_iter i; iov_iter_init(&i, iov, nr_segs, count, written); - if (a_ops->write_begin) - status = generic_perform_write(file, &i, pos); - else - status = generic_perform_write_2copy(file, &i, pos); + status = generic_perform_write(file, &i, pos); if (likely(status >= 0)) { written += status; -- cgit v1.2.3 From 7106a27b52940085c2c3f6e42742d3a2a84d872a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Oct 2008 14:01:15 -0700 Subject: kernel.h: fix might_sleep kernel-doc Put the kernel-doc for might_sleep() _immediately_ before the macro (no intervening lines). Otherwise kernel-doc complains like so: Warning(linux-2.6.27-rc3-git2//include/linux/kernel.h:129): No description found for parameter 'file' Warning(linux-2.6.27-rc3-git2//include/linux/kernel.h:129): No description found for parameter 'line' because kernel-doc is looking at the wrong function prototype (i.e., __might_sleep). [Yes, I have a todo note to myself to check/warn for that inconsistency in scripts/kernel-doc.] Signed-off-by: Randy Dunlap Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 396a350b87a6..fba141d3ca07 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -116,6 +116,8 @@ extern int _cond_resched(void); # define might_resched() do { } while (0) #endif +#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP + void __might_sleep(char *file, int line); /** * might_sleep - annotation for functions that can sleep * @@ -126,8 +128,6 @@ extern int _cond_resched(void); * be bitten later when the calling function happens to sleep when it is not * supposed to. */ -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP - void __might_sleep(char *file, int line); # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) #else -- cgit v1.2.3 From 731572d39fcd3498702eda4600db4c43d51e0b26 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 29 Oct 2008 14:01:20 -0700 Subject: nfsd: fix vm overcommit crash Junjiro R. Okajima reported a problem where knfsd crashes if you are using it to export shmemfs objects and run strict overcommit. In this situation the current->mm based modifier to the overcommit goes through a NULL pointer. We could simply check for NULL and skip the modifier but we've caught other real bugs in the past from mm being NULL here - cases where we did need a valid mm set up (eg the exec bug about a year ago). To preserve the checks and get the logic we want shuffle the checking around and add a new helper to the vm_ security wrappers Also fix a current->mm reference in nommu that should use the passed mm [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix build] Reported-by: Junjiro R. Okajima Acked-by: James Morris Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/security.h | 6 ++++++ mm/mmap.c | 3 ++- mm/nommu.c | 3 ++- mm/shmem.c | 8 ++++---- security/security.c | 9 +++++++++ 5 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index f5c4a51eb42e..c13f1cec9abb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1585,6 +1585,7 @@ int security_syslog(int type); int security_settime(struct timespec *ts, struct timezone *tz); int security_vm_enough_memory(long pages); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); +int security_vm_enough_memory_kern(long pages); int security_bprm_alloc(struct linux_binprm *bprm); void security_bprm_free(struct linux_binprm *bprm); void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); @@ -1820,6 +1821,11 @@ static inline int security_vm_enough_memory(long pages) return cap_vm_enough_memory(current->mm, pages); } +static inline int security_vm_enough_memory_kern(long pages) +{ + return cap_vm_enough_memory(current->mm, pages); +} + static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { return cap_vm_enough_memory(mm, pages); diff --git a/mm/mmap.c b/mm/mmap.c index 74f4d158022e..de14ac21e5b5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -175,7 +175,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) /* Don't let a single process grow too big: leave 3% of the size of this process for other processes */ - allowed -= mm->total_vm / 32; + if (mm) + allowed -= mm->total_vm / 32; /* * cast `allowed' as a signed long because vm_committed_space diff --git a/mm/nommu.c b/mm/nommu.c index 2696b24f2bb3..7695dc850785 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1454,7 +1454,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) /* Don't let a single process grow too big: leave 3% of the size of this process for other processes */ - allowed -= current->mm->total_vm / 32; + if (mm) + allowed -= mm->total_vm / 32; /* * cast `allowed' as a signed long because vm_committed_space diff --git a/mm/shmem.c b/mm/shmem.c index d38d7e61fcd0..0ed075215e5f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -161,8 +161,8 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) */ static inline int shmem_acct_size(unsigned long flags, loff_t size) { - return (flags & VM_ACCOUNT)? - security_vm_enough_memory(VM_ACCT(size)): 0; + return (flags & VM_ACCOUNT) ? + security_vm_enough_memory_kern(VM_ACCT(size)) : 0; } static inline void shmem_unacct_size(unsigned long flags, loff_t size) @@ -179,8 +179,8 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size) */ static inline int shmem_acct_block(unsigned long flags) { - return (flags & VM_ACCOUNT)? - 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); + return (flags & VM_ACCOUNT) ? + 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)); } static inline void shmem_unacct_blocks(unsigned long flags, long pages) diff --git a/security/security.c b/security/security.c index 255b08559b2b..c0acfa7177e5 100644 --- a/security/security.c +++ b/security/security.c @@ -198,14 +198,23 @@ int security_settime(struct timespec *ts, struct timezone *tz) int security_vm_enough_memory(long pages) { + WARN_ON(current->mm == NULL); return security_ops->vm_enough_memory(current->mm, pages); } int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { + WARN_ON(mm == NULL); return security_ops->vm_enough_memory(mm, pages); } +int security_vm_enough_memory_kern(long pages) +{ + /* If current->mm is a kernel thread then we will pass NULL, + for this specific case that is fine */ + return security_ops->vm_enough_memory(current->mm, pages); +} + int security_bprm_alloc(struct linux_binprm *bprm) { return security_ops->bprm_alloc_security(bprm); -- cgit v1.2.3 From effdb9492de01a51f8123e62e87e3330688f9bf1 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Wed, 29 Oct 2008 14:01:21 -0700 Subject: spi: fix compile error Fix compile error below: LD drivers/spi/built-in.o CC [M] drivers/spi/spi_gpio.o In file included from drivers/spi/spi_gpio.c:26: include/linux/spi/spi_bitbang.h:23: error: field `work' has incomplete type make[2]: *** [drivers/spi/spi_gpio.o] Error 1 make[1]: *** [drivers/spi] Error 2 make: *** [drivers] Error 2 Signed-off-by: Fernando Luis Vazquez Cao Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/spi_bitbang.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index b8db32cea1de..bf8de281b4ed 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -18,6 +18,9 @@ * duplex (MicroWire) controllers. Provide chipslect() and txrx_bufs(), * and custom setup()/cleanup() methods. */ + +#include + struct spi_bitbang { struct workqueue_struct *workqueue; struct work_struct work; -- cgit v1.2.3 From c132419e560a2ecd3c8cf77f9c37e103e74b3754 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Thu, 30 Oct 2008 18:17:06 -0700 Subject: gianfar: Fix race in TBI/SerDes configuration The init_phy() function attaches to the PHY, then configures the SerDes<->TBI link (in SGMII mode). The TBI is on the MDIO bus with the PHY (sort of) and is accessed via the gianfar's MDIO registers, using the functions gfar_local_mdio_read/write(), which don't do any locking. The previously attached PHY will start a work-queue on a timer, and probably an irq handler as well, which will talk to the PHY and thus use the MDIO bus. This uses phy_read/write(), which have locking, but not against the gfar_local_mdio versions. The result is that PHY code will try to use the MDIO bus at the same time as the SerDes setup code, corrupting the transfers. Setting up the SerDes before attaching to the PHY will insure that there is no race between the SerDes code and *our* PHY, but doesn't fix everything. Typically the PHYs for all gianfar devices are on the same MDIO bus, which is associated with the first gianfar device. This means that the first gianfar's SerDes code could corrupt the MDIO transfers for a different gianfar's PHY. The lock used by phy_read/write() is contained in the mii_bus structure, which is pointed to by the PHY. This is difficult to access from the gianfar drivers, as there is no link between a gianfar device and the mii_bus which shares the same MDIO registers. As far as the device layer and drivers are concerned they are two unrelated devices (which happen to share registers). Generally all gianfar devices' PHYs will be on the bus associated with the first gianfar. But this might not be the case, so simply locking the gianfar's PHY's mii bus might not lock the mii bus that the SerDes setup code is going to use. We solve this by having the code that creates the gianfar platform device look in the device tree for an mdio device that shares the gianfar's registers. If one is found the ID of its platform device is saved in the gianfar's platform data. A new function in the gianfar mii code, gfar_get_miibus(), can use the bus ID to search through the platform devices for a gianfar_mdio device with the right ID. The platform device's driver data is the mii_bus structure, which the SerDes setup code can use to lock the current bus. Signed-off-by: Trent Piepho CC: Andy Fleming Signed-off-by: Jeff Garzik --- arch/powerpc/sysdev/fsl_soc.c | 26 ++++++++++++++++++++++++++ drivers/net/gianfar.c | 7 +++++++ drivers/net/gianfar_mii.c | 21 +++++++++++++++++++++ drivers/net/gianfar_mii.h | 3 +++ include/linux/fsl_devices.h | 3 ++- 5 files changed, 59 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 01b884b25696..26ecb96f9731 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -223,6 +223,8 @@ static int gfar_mdio_of_init_one(struct device_node *np) if (ret) return ret; + /* The gianfar device will try to use the same ID created below to find + * this bus, to coordinate register access (since they share). */ mdio_dev = platform_device_register_simple("fsl-gianfar_mdio", res.start&0xfffff, &res, 1); if (IS_ERR(mdio_dev)) @@ -394,6 +396,30 @@ static int __init gfar_of_init(void) of_node_put(mdio); } + /* Get MDIO bus controlled by this eTSEC, if any. Normally only + * eTSEC 1 will control an MDIO bus, not necessarily the same + * bus that its PHY is on ('mdio' above), so we can't just use + * that. What we do is look for a gianfar mdio device that has + * overlapping registers with this device. That's really the + * whole point, to find the device sharing our registers to + * coordinate access with it. + */ + for_each_compatible_node(mdio, NULL, "fsl,gianfar-mdio") { + if (of_address_to_resource(mdio, 0, &res)) + continue; + + if (res.start >= r[0].start && res.end <= r[0].end) { + /* Get the ID the mdio bus platform device was + * registered with. gfar_data.bus_id is + * different because it's for finding a PHY, + * while this is for finding a MII bus. + */ + gfar_data.mdio_bus = res.start&0xfffff; + of_node_put(mdio); + break; + } + } + ret = platform_device_add_data(gfar_dev, &gfar_data, sizeof(struct diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 64b201134fdb..249541a1814b 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -586,6 +586,10 @@ static void gfar_configure_serdes(struct net_device *dev) struct gfar_mii __iomem *regs = (void __iomem *)&priv->regs->gfar_mii_regs; int tbipa = gfar_read(&priv->regs->tbipa); + struct mii_bus *bus = gfar_get_miibus(priv); + + if (bus) + mutex_lock(&bus->mdio_lock); /* Single clk mode, mii mode off(for serdes communication) */ gfar_local_mdio_write(regs, tbipa, MII_TBICON, TBICON_CLK_SELECT); @@ -596,6 +600,9 @@ static void gfar_configure_serdes(struct net_device *dev) gfar_local_mdio_write(regs, tbipa, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | BMCR_SPEED1000); + + if (bus) + mutex_unlock(&bus->mdio_lock); } static void init_registers(struct net_device *dev) diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c index bf73eea98010..0e2595d24933 100644 --- a/drivers/net/gianfar_mii.c +++ b/drivers/net/gianfar_mii.c @@ -269,6 +269,27 @@ static struct device_driver gianfar_mdio_driver = { .remove = gfar_mdio_remove, }; +static int match_mdio_bus(struct device *dev, void *data) +{ + const struct gfar_private *priv = data; + const struct platform_device *pdev = to_platform_device(dev); + + return !strcmp(pdev->name, gianfar_mdio_driver.name) && + pdev->id == priv->einfo->mdio_bus; +} + +/* Given a gfar_priv structure, find the mii_bus controlled by this device (not + * necessarily the same as the bus the gfar's PHY is on), if one exists. + * Normally only the first gianfar controls a mii_bus. */ +struct mii_bus *gfar_get_miibus(const struct gfar_private *priv) +{ + /*const*/ struct device *d; + + d = bus_find_device(gianfar_mdio_driver.bus, NULL, (void *)priv, + match_mdio_bus); + return d ? dev_get_drvdata(d) : NULL; +} + int __init gfar_mdio_init(void) { return driver_register(&gianfar_mdio_driver); diff --git a/drivers/net/gianfar_mii.h b/drivers/net/gianfar_mii.h index 2af28b16a0e2..02dc970ca1ff 100644 --- a/drivers/net/gianfar_mii.h +++ b/drivers/net/gianfar_mii.h @@ -18,6 +18,8 @@ #ifndef __GIANFAR_MII_H #define __GIANFAR_MII_H +struct gfar_private; /* forward ref */ + #define MIIMIND_BUSY 0x00000001 #define MIIMIND_NOTVALID 0x00000004 @@ -44,6 +46,7 @@ int gfar_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value); int gfar_local_mdio_write(struct gfar_mii __iomem *regs, int mii_id, int regnum, u16 value); int gfar_local_mdio_read(struct gfar_mii __iomem *regs, int mii_id, int regnum); +struct mii_bus *gfar_get_miibus(const struct gfar_private *priv); int __init gfar_mdio_init(void); void gfar_mdio_exit(void); #endif /* GIANFAR_PHY_H */ diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 4e625e0094c8..708bab58d8d0 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -49,7 +49,8 @@ struct gianfar_platform_data { u32 device_flags; /* board specific information */ u32 board_flags; - char bus_id[MII_BUS_ID_SIZE]; + int mdio_bus; /* Bus controlled by us */ + char bus_id[MII_BUS_ID_SIZE]; /* Bus PHY is on */ u32 phy_id; u8 mac_addr[6]; phy_interface_t interface; -- cgit v1.2.3 From 9ce8e3073d9cfd6f859c22a25441db41b85cbf6e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Aug 2008 15:23:18 +0200 Subject: libata: add whitelist for devices with known good pata-sata bridges libata currently imposes a UDMA5 max transfer rate and 200 sector max transfer size for SATA devices that sit behind a pata-sata bridge. Lots of devices have known good bridges that don't need this limit applied. The MTRON SSD disks are such devices. Transfer rates are increased by 20-30% with the restriction removed. So add a "blacklist" entry for the MTRON devices, with a flag indicating that the bridge is known good. Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 7 +++++++ include/linux/libata.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8824c8da3f2f..82af7011f2dd 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2158,6 +2158,10 @@ retry: static inline u8 ata_dev_knobble(struct ata_device *dev) { struct ata_port *ap = dev->link->ap; + + if (ata_dev_blacklisted(dev) & ATA_HORKAGE_BRIDGE_OK) + return 0; + return ((ap->cbl == ATA_CBL_SATA) && (!ata_id_is_sata(dev->id))); } @@ -4062,6 +4066,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "TSSTcorp CDDVDW SH-S202N", "SB00", ATA_HORKAGE_IVB, }, { "TSSTcorp CDDVDW SH-S202N", "SB01", ATA_HORKAGE_IVB, }, + /* Devices that do not need bridging limits applied */ + { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK, }, + /* End Marker */ { } }; diff --git a/include/linux/libata.h b/include/linux/libata.h index 507f53ef8038..f5441edee55f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -372,6 +372,7 @@ enum { ATA_HORKAGE_IPM = (1 << 7), /* Link PM problems */ ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ + ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From ad1d967c88e349c7e822ad75dd3247a2a50d2ea3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 30 Oct 2008 23:54:35 -0700 Subject: net: delete excess kernel-doc notation Remove excess kernel-doc function parameters from networking header & driver files: Warning(include/net/sock.h:946): Excess function parameter or struct member 'sk' description in 'sk_filter_release' Warning(include/linux/netdevice.h:1545): Excess function parameter or struct member 'cpu' description in 'netif_tx_lock' Warning(drivers/net/wan/z85230.c:712): Excess function parameter or struct member 'regs' description in 'z8530_interrupt' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/wan/z85230.c | 1 - include/linux/netdevice.h | 1 - include/net/sock.h | 1 - 3 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c index ccd9cd35ecbe..5bf7e01ef0e9 100644 --- a/drivers/net/wan/z85230.c +++ b/drivers/net/wan/z85230.c @@ -695,7 +695,6 @@ EXPORT_SYMBOL(z8530_nop); * z8530_interrupt - Handle an interrupt from a Z8530 * @irq: Interrupt number * @dev_id: The Z8530 device that is interrupting. - * @regs: unused * * A Z85[2]30 device has stuck its hand in the air for attention. * We scan both the channels on the chip for events and then call diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c8bcb59adfdf..9d77b1d7dca8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1537,7 +1537,6 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) /** * netif_tx_lock - grab network device transmit lock * @dev: network device - * @cpu: cpu number of lock owner * * Get network device transmit lock */ diff --git a/include/net/sock.h b/include/net/sock.h index ada50c04d09f..c04f9e18ea22 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -936,7 +936,6 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); /** * sk_filter_release: Release a socket filter - * @sk: socket * @fp: filter to remove * * Remove a filter from a socket and release its resources. -- cgit v1.2.3 From 9663f2e6a6cf3f82b06d8fb699b11b80f92553ba Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 30 Oct 2008 19:38:18 -0700 Subject: resources: add io-mapping functions to dynamically map large device apertures Impact: add new generic io_map_*() APIs Graphics devices have large PCI apertures which would consume a significant fraction of a 32-bit address space if mapped during driver initialization. Using ioremap at runtime is impractical as it is too slow. This new set of interfaces uses atomic mappings on 32-bit processors and a large static mapping on 64-bit processors to provide reasonable 32-bit performance and optimal 64-bit performance. The current implementation sits atop the io_map_atomic fixmap-based mechanism for 32-bit processors. This includes some editorial suggestions from Randy Dunlap for Documentation/io-mapping.txt Signed-off-by: Keith Packard Signed-off-by: Eric Anholt Signed-off-by: Ingo Molnar --- Documentation/io-mapping.txt | 76 ++++++++++++++++++++++++++++ include/linux/io-mapping.h | 118 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 Documentation/io-mapping.txt create mode 100644 include/linux/io-mapping.h (limited to 'include/linux') diff --git a/Documentation/io-mapping.txt b/Documentation/io-mapping.txt new file mode 100644 index 000000000000..cd2f726becc8 --- /dev/null +++ b/Documentation/io-mapping.txt @@ -0,0 +1,76 @@ +The io_mapping functions in linux/io-mapping.h provide an abstraction for +efficiently mapping small regions of an I/O device to the CPU. The initial +usage is to support the large graphics aperture on 32-bit processors where +ioremap_wc cannot be used to statically map the entire aperture to the CPU +as it would consume too much of the kernel address space. + +A mapping object is created during driver initialization using + + struct io_mapping *io_mapping_create_wc(unsigned long base, + unsigned long size) + + 'base' is the bus address of the region to be made + mappable, while 'size' indicates how large a mapping region to + enable. Both are in bytes. + + This _wc variant provides a mapping which may only be used + with the io_mapping_map_atomic_wc or io_mapping_map_wc. + +With this mapping object, individual pages can be mapped either atomically +or not, depending on the necessary scheduling environment. Of course, atomic +maps are more efficient: + + void *io_mapping_map_atomic_wc(struct io_mapping *mapping, + unsigned long offset) + + 'offset' is the offset within the defined mapping region. + Accessing addresses beyond the region specified in the + creation function yields undefined results. Using an offset + which is not page aligned yields an undefined result. The + return value points to a single page in CPU address space. + + This _wc variant returns a write-combining map to the + page and may only be used with mappings created by + io_mapping_create_wc + + Note that the task may not sleep while holding this page + mapped. + + void io_mapping_unmap_atomic(void *vaddr) + + 'vaddr' must be the the value returned by the last + io_mapping_map_atomic_wc call. This unmaps the specified + page and allows the task to sleep once again. + +If you need to sleep while holding the lock, you can use the non-atomic +variant, although they may be significantly slower. + + void *io_mapping_map_wc(struct io_mapping *mapping, + unsigned long offset) + + This works like io_mapping_map_atomic_wc except it allows + the task to sleep while holding the page mapped. + + void io_mapping_unmap(void *vaddr) + + This works like io_mapping_unmap_atomic, except it is used + for pages mapped with io_mapping_map_wc. + +At driver close time, the io_mapping object must be freed: + + void io_mapping_free(struct io_mapping *mapping) + +Current Implementation: + +The initial implementation of these functions uses existing mapping +mechanisms and so provides only an abstraction layer and no new +functionality. + +On 64-bit processors, io_mapping_create_wc calls ioremap_wc for the whole +range, creating a permanent kernel-visible mapping to the resource. The +map_atomic and map functions add the requested offset to the base of the +virtual address returned by ioremap_wc. + +On 32-bit processors, io_mapping_map_atomic_wc uses io_map_atomic_prot_pfn, +which uses the fixmaps to get us a mapping to a page using an atomic fashion. +For io_mapping_map_wc, ioremap_wc() is used to get a mapping of the region. diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h new file mode 100644 index 000000000000..1b566993db6e --- /dev/null +++ b/include/linux/io-mapping.h @@ -0,0 +1,118 @@ +/* + * Copyright © 2008 Keith Packard + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_IO_MAPPING_H +#define _LINUX_IO_MAPPING_H + +#include +#include +#include +#include + +/* + * The io_mapping mechanism provides an abstraction for mapping + * individual pages from an io device to the CPU in an efficient fashion. + * + * See Documentation/io_mapping.txt + */ + +/* this struct isn't actually defined anywhere */ +struct io_mapping; + +#ifdef CONFIG_X86_64 + +/* Create the io_mapping object*/ +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) ioremap_wc(base, size); +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ + iounmap(mapping); +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ +} + +/* Non-atomic map/unmap */ +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap(void *vaddr) +{ +} + +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_X86_32 +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) base; +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + offset += (unsigned long) mapping; + return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, + __pgprot(__PAGE_KERNEL_WC)); +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ + iounmap_atomic(vaddr, KM_USER0); +} + +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + offset += (unsigned long) mapping; + return ioremap_wc(offset, PAGE_SIZE); +} + +static inline void +io_mapping_unmap(void *vaddr) +{ + iounmap(vaddr); +} +#endif /* CONFIG_X86_32 */ + +#endif /* _LINUX_IO_MAPPING_H */ -- cgit v1.2.3 From 4ac96572f1f6abe44b5e02e80fdfb5a990129613 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 2 Nov 2008 09:51:27 -0500 Subject: linux/string.h: fix comment typo s/user/used/ Signed-off-by: Jeff Garzik Signed-off-by: Linus Torvalds --- include/linux/string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 810d80df0a1d..d18fc198aa2f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -1,7 +1,7 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ -/* We don't want strings.h stuff being user by user stuff by accident */ +/* We don't want strings.h stuff being used by user stuff by accident */ #ifndef __KERNEL__ #include -- cgit v1.2.3 From e5beae16901795223d677f15aa2fe192976278ee Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 3 Nov 2008 18:21:45 +0100 Subject: io mapping: clean up #ifdefs Impact: cleanup clean up ifdefs: change #ifdef CONFIG_X86_32/64 to CONFIG_HAVE_ATOMIC_IOMAP. flip around the #ifdef sections to clean up the structure. Signed-off-by: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++++ include/linux/io-mapping.h | 43 +++++++++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f20718d3156..e60c59b81bdd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1894,6 +1894,10 @@ config SYSVIPC_COMPAT endmenu +config HAVE_ATOMIC_IOMAP + def_bool y + depends on X86_32 + source "net/Kconfig" source "drivers/Kconfig" diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 1b566993db6e..82df31726a54 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -33,86 +33,93 @@ /* this struct isn't actually defined anywhere */ struct io_mapping; -#ifdef CONFIG_X86_64 +#ifdef CONFIG_HAVE_ATOMIC_IOMAP + +/* + * For small address space machines, mapping large objects + * into the kernel virtual space isn't practical. Where + * available, use fixmap support to dynamically map pages + * of the object at run time. + */ -/* Create the io_mapping object*/ static inline struct io_mapping * io_mapping_create_wc(unsigned long base, unsigned long size) { - return (struct io_mapping *) ioremap_wc(base, size); + return (struct io_mapping *) base; } static inline void io_mapping_free(struct io_mapping *mapping) { - iounmap(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + offset += (unsigned long) mapping; + return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, + __pgprot(__PAGE_KERNEL_WC)); } static inline void io_mapping_unmap_atomic(void *vaddr) { + iounmap_atomic(vaddr, KM_USER0); } -/* Non-atomic map/unmap */ static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + offset += (unsigned long) mapping; + return ioremap_wc(offset, PAGE_SIZE); } static inline void io_mapping_unmap(void *vaddr) { + iounmap(vaddr); } -#endif /* CONFIG_X86_64 */ +#else -#ifdef CONFIG_X86_32 +/* Create the io_mapping object*/ static inline struct io_mapping * io_mapping_create_wc(unsigned long base, unsigned long size) { - return (struct io_mapping *) base; + return (struct io_mapping *) ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { + iounmap(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, - __pgprot(__PAGE_KERNEL_WC)); + return ((char *) mapping) + offset; } static inline void io_mapping_unmap_atomic(void *vaddr) { - iounmap_atomic(vaddr, KM_USER0); } +/* Non-atomic map/unmap */ static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return ioremap_wc(offset, PAGE_SIZE); + return ((char *) mapping) + offset; } static inline void io_mapping_unmap(void *vaddr) { - iounmap(vaddr); } -#endif /* CONFIG_X86_32 */ + +#endif /* HAVE_ATOMIC_IOMAP */ #endif /* _LINUX_IO_MAPPING_H */ -- cgit v1.2.3 From a7b930cdf8ec790c85f81416c87f7c066679d373 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sun, 2 Nov 2008 13:32:43 -0800 Subject: PCI: annotate return value of pci_ioremap_bar with __iomem Was missing from the initial patch. Acked-by: Arjan van de Ven Signed-off-by: Harvey Harrison Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c75b82bda327..feb4657bb043 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1136,7 +1136,7 @@ static inline void pci_mmcfg_late_init(void) { } #endif #ifdef CONFIG_HAS_IOMEM -static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar) +static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) { /* * Make sure the BAR is actually a memory resource, not an IO resource -- cgit v1.2.3 From 6a87e42e955ff27e07a77f65f8f077dc7c4171e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 19:01:09 +0900 Subject: libata: implement ATA_HORKAGE_ATAPI_MOD16_DMA and apply it libata always uses PIO for ATAPI commands when the number of bytes to transfer isn't multiple of 16 but quantum DAT72 chokes on odd bytes PIO transfers. Implement a horkage to skip the mod16 check and apply it to the quantum device. This is reported by John Clark in the following thread. http://thread.gmane.org/gmane.linux.ide/34748 Signed-off-by: Tejun Heo Cc: John Clark Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 4 +++- include/linux/libata.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 82af7011f2dd..91b478f20557 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4024,6 +4024,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, + { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA }, /* Devices we expect to fail diagnostics */ @@ -4444,7 +4445,8 @@ int atapi_check_dma(struct ata_queued_cmd *qc) /* Don't allow DMA if it isn't multiple of 16 bytes. Quite a * few ATAPI devices choke on such DMA requests. */ - if (unlikely(qc->nbytes & 15)) + if (!(qc->dev->horkage & ATA_HORKAGE_ATAPI_MOD16_DMA) && + unlikely(qc->nbytes & 15)) return 1; if (ap->ops->check_atapi_dma) diff --git a/include/linux/libata.h b/include/linux/libata.h index f5441edee55f..c7665a4134c5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -373,6 +373,8 @@ enum { ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ + ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands + not multiple of 16 bytes */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 9b22ea560957de1484e6b3e8538f7eef202e3596 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 4 Nov 2008 14:49:57 -0800 Subject: net: fix packet socket delivery in rx irq handler The changes to deliver hardware accelerated VLAN packets to packet sockets (commit bc1d0411) caused a warning for non-NAPI drivers. The __vlan_hwaccel_rx() function is called directly from the drivers RX function, for non-NAPI drivers that means its still in RX IRQ context: [ 27.779463] ------------[ cut here ]------------ [ 27.779509] WARNING: at kernel/softirq.c:136 local_bh_enable+0x37/0x81() ... [ 27.782520] [] netif_nit_deliver+0x5b/0x75 [ 27.782590] [] __vlan_hwaccel_rx+0x79/0x162 [ 27.782664] [] atl1_intr+0x9a9/0xa7c [atl1] [ 27.782738] [] handle_IRQ_event+0x23/0x51 [ 27.782808] [] handle_edge_irq+0xc2/0x102 [ 27.782878] [] do_IRQ+0x4d/0x64 Split hardware accelerated VLAN reception into two parts to fix this: - __vlan_hwaccel_rx just stores the VLAN TCI and performs the VLAN device lookup, then calls netif_receive_skb()/netif_rx() - vlan_hwaccel_do_receive(), which is invoked by netif_receive_skb() in softirq context, performs the real reception and delivery to packet sockets. Reported-and-tested-by: Ramon Casellas Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 +++++++ net/8021q/vlan_core.c | 46 +++++++++++++++++++++++++++++++++------------- net/core/dev.c | 3 +++ 3 files changed, 43 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 9e7b49b8062d..a5cb0c3f6dcf 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -114,6 +114,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); +extern int vlan_hwaccel_do_receive(struct sk_buff *skb); + #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { @@ -133,6 +135,11 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, BUG(); return NET_XMIT_SUCCESS; } + +static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + return 0; +} #endif /** diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 916061f681b6..68ced4bf158c 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -3,11 +3,20 @@ #include #include "vlan.h" +struct vlan_hwaccel_cb { + struct net_device *dev; +}; + +static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb) +{ + return (struct vlan_hwaccel_cb *)skb->cb; +} + /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { - struct net_device_stats *stats; + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); if (skb_bond_should_drop(skb)) { dev_kfree_skb_any(skb); @@ -15,23 +24,35 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, } skb->vlan_tci = vlan_tci; + cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); + struct net_device *dev = cb->dev; + struct net_device_stats *stats; + netif_nit_deliver(skb); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); - if (skb->dev == NULL) { - dev_kfree_skb_any(skb); - /* Not NET_RX_DROP, this is not being dropped - * due to congestion. */ - return NET_RX_SUCCESS; + if (dev == NULL) { + kfree_skb(skb); + return -1; } - skb->dev->last_rx = jiffies; + + skb->dev = dev; + skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - stats = &skb->dev->stats; + dev->last_rx = jiffies; + + stats = &dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: break; @@ -43,13 +64,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) + dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; }; - return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + return 0; } -EXPORT_SYMBOL(__vlan_hwaccel_rx); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc1..9174c77d3112 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2218,6 +2218,9 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; + if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + return NET_RX_SUCCESS; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; -- cgit v1.2.3 From 467622ef2acb01986eab37ef96c3632b3ea35999 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 1 Nov 2008 04:19:11 -0700 Subject: [MTD] [NOR] Fix cfi_send_gen_cmd handling of x16 devices in x8 mode (v4) For "unlock" cycles to 16bit devices in 8bit compatibility mode we need to use the byte addresses 0xaaa and 0x555. These effectively match the word address 0x555 and 0x2aa, except the latter has its low bit set. Most chips don't care about the value of the 'A-1' pin in x8 mode, but some -- like the ST M29W320D -- do. So we need to be careful to set it where appropriate. cfi_send_gen_cmd is only ever passed addresses where the low byte is 0x00, 0x55 or 0xaa. Of those, only addresses ending 0xaa are affected by this patch, by masking in the extra low bit when the device is known to be in compatibility mode. [dwmw2: Do it only when (cmd_ofs & 0xff) == 0xaa] v4: Fix stupid typo in cfi_build_cmd_addr that failed to compile I'm writing this patch way to late at night. v3: Bring all of the work back into cfi_build_cmd_addr including calling of map_bankwidth(map) and cfi_interleave(cfi) So every caller doesn't need to. v2: Only modified the address if we our device_type is larger than our bus width. Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0002.c | 13 ------------- drivers/mtd/chips/jedec_probe.c | 10 ++++------ include/linux/mtd/cfi.h | 22 +++++++++++++++++++--- 3 files changed, 23 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 3e6f5d8609e8..d74ec46aa032 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -406,19 +406,6 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) /* Set the default CFI lock/unlock addresses */ cfi->addr_unlock1 = 0x555; cfi->addr_unlock2 = 0x2aa; - /* Modify the unlock address if we are in compatibility mode */ - if ( /* x16 in x8 mode */ - ((cfi->device_type == CFI_DEVICETYPE_X8) && - (cfi->cfiq->InterfaceDesc == - CFI_INTERFACE_X8_BY_X16_ASYNC)) || - /* x32 in x16 mode */ - ((cfi->device_type == CFI_DEVICETYPE_X16) && - (cfi->cfiq->InterfaceDesc == - CFI_INTERFACE_X16_BY_X32_ASYNC))) - { - cfi->addr_unlock1 = 0xaaa; - cfi->addr_unlock2 = 0x555; - } } /* CFI mode */ else if (cfi->cfi_mode == CFI_MODE_JEDEC) { diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c index f84ab6182148..2f3f2f719ba4 100644 --- a/drivers/mtd/chips/jedec_probe.c +++ b/drivers/mtd/chips/jedec_probe.c @@ -1808,9 +1808,7 @@ static inline u32 jedec_read_mfr(struct map_info *map, uint32_t base, * several first banks can contain 0x7f instead of actual ID */ do { - uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), - cfi_interleave(cfi), - cfi->device_type); + uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), map, cfi); mask = (1 << (cfi->device_type * 8)) - 1; result = map_read(map, base + ofs); bank++; @@ -1824,7 +1822,7 @@ static inline u32 jedec_read_id(struct map_info *map, uint32_t base, { map_word result; unsigned long mask; - u32 ofs = cfi_build_cmd_addr(1, cfi_interleave(cfi), cfi->device_type); + u32 ofs = cfi_build_cmd_addr(1, map, cfi); mask = (1 << (cfi->device_type * 8)) -1; result = map_read(map, base + ofs); return result.x[0] & mask; @@ -2067,8 +2065,8 @@ static int jedec_probe_chip(struct map_info *map, __u32 base, } /* Ensure the unlock addresses we try stay inside the map */ - probe_offset1 = cfi_build_cmd_addr(cfi->addr_unlock1, cfi_interleave(cfi), cfi->device_type); - probe_offset2 = cfi_build_cmd_addr(cfi->addr_unlock2, cfi_interleave(cfi), cfi->device_type); + probe_offset1 = cfi_build_cmd_addr(cfi->addr_unlock1, map, cfi); + probe_offset2 = cfi_build_cmd_addr(cfi->addr_unlock2, map, cfi); if ( ((base + probe_offset1 + map_bankwidth(map)) >= map->size) || ((base + probe_offset2 + map_bankwidth(map)) >= map->size)) goto retry; diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index ee5124ec319e..00e2b575021f 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -282,9 +282,25 @@ struct cfi_private { /* * Returns the command address according to the given geometry. */ -static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, int interleave, int type) +static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, + struct map_info *map, struct cfi_private *cfi) { - return (cmd_ofs * type) * interleave; + unsigned bankwidth = map_bankwidth(map); + unsigned interleave = cfi_interleave(cfi); + unsigned type = cfi->device_type; + uint32_t addr; + + addr = (cmd_ofs * type) * interleave; + + /* Modify the unlock address if we are in compatiblity mode. + * For 16bit devices on 8 bit busses + * and 32bit devices on 16 bit busses + * set the low bit of the alternating bit sequence of the address. + */ + if (((type * interleave) > bankwidth) && ((uint8_t)cmd_ofs == 0xaa)) + addr |= (type >> 1)*interleave; + + return addr; } /* @@ -430,7 +446,7 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t int type, map_word *prev_val) { map_word val; - uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); + uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi); val = cfi_build_cmd(cmd, map, cfi); if (prev_val) -- cgit v1.2.3 From 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Nov 2008 16:52:08 +0100 Subject: sched: re-tune balancing Impact: improve wakeup affinity on NUMA systems, tweak SMP systems Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain balancing defaults on NUMA and SMP systems. Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason why we would not want to have wakeup affinity across nodes as well. (we already do this in the standard NUMA template.) lat_ctx on a NUMA box is particularly happy about this change: before: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.60 | 2 5.70 after: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.65 | 2 2.07 a 2.75x speedup. pipe-test is similarly happy about it too: | phoenix:~/sched-tests> ./pipe-test | 18.26 usecs/loop. | 14.70 usecs/loop. | 14.38 usecs/loop. | 10.55 usecs/loop. # +WAKE_AFFINE on domain0+domain1 | 8.63 usecs/loop. | 8.59 usecs/loop. | 9.03 usecs/loop. | 8.94 usecs/loop. | 8.96 usecs/loop. | 8.63 usecs/loop. Also: - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings) - enable SD_WAKE_BALANCE on SMP domains Sysbench+postgresql improves all around the board, quite significantly: .28-rc3-11474e2c .28-rc3-11474e2c-tune ------------------------------------------------- 1: 571 688 +17.08% 2: 1236 1206 -2.55% 4: 2381 2642 +9.89% 8: 4958 5164 +3.99% 16: 9580 9574 -0.07% 32: 7128 8118 +12.20% 64: 7342 8266 +11.18% 128: 7342 8064 +8.95% 256: 7519 7884 +4.62% 512: 7350 7731 +4.93% ------------------------------------------------- SUM: 55412 59341 +6.62% So it's a win both for the runup portion, the peak area and the tail. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 7 ++++--- include/linux/topology.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 90ac7718469a..4850e4b02b61 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,7 @@ extern unsigned long node_remap_size[]; #endif -/* sched_domains SD_NODE_INIT for NUMAQ machines */ +/* sched_domains SD_NODE_INIT for NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .min_interval = 8, \ .max_interval = 32, \ @@ -169,8 +169,9 @@ extern unsigned long node_remap_size[]; .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/include/linux/topology.h b/include/linux/topology.h index 2158fc0d5a56..34a7ee0ebed2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -146,10 +146,10 @@ void arch_update_cpu_topology(void); .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3 From f92131c3dd567fc6df18ce3f46fcf57ecbdefbe0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 29 Oct 2008 14:10:51 +0100 Subject: bio: define __BIOVEC_PHYS_MERGEABLE Define __BIOVEC_PHYS_MERGEABLE as the default implementation of BIOVEC_PHYS_MERGEABLE, so that its available for reuse within an arch-specific definition of BIOVEC_PHYS_MERGEABLE. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1c91a176b9ae..6a642098e5c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -236,12 +236,16 @@ static inline void *bio_data(struct bio *bio) #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) #define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) +/* Default implementation of BIOVEC_PHYS_MERGEABLE */ +#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + /* * allow arch override, for eg virtualized architectures (put in asm/io.h) */ #ifndef BIOVEC_PHYS_MERGEABLE #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + __BIOVEC_PHYS_MERGEABLE(vec1, vec2) #endif #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ -- cgit v1.2.3 From 9c133c469d38043d5aadaa03f2fb840d88d1cf4f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Nov 2008 08:42:48 +0100 Subject: Add round_jiffies_up and related routines This patch (as1158b) adds round_jiffies_up() and friends. These routines work like the analogous round_jiffies() functions, except that they will never round down. The new routines will be useful for timeouts where we don't care exactly when the timer expires, provided it doesn't expire too soon. Signed-off-by: Alan Stern Signed-off-by: Jens Axboe --- include/linux/timer.h | 5 ++ kernel/timer.c | 129 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 104 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index d4ba79248a27..daf9685b861c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -186,4 +186,9 @@ unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); +unsigned long __round_jiffies_up(unsigned long j, int cpu); +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); +unsigned long round_jiffies_up(unsigned long j); +unsigned long round_jiffies_up_relative(unsigned long j); + #endif diff --git a/kernel/timer.c b/kernel/timer.c index 56becf373c58..dbd50fabe4c7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -112,27 +112,8 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base) tbase_get_deferrable(timer->base)); } -/** - * __round_jiffies - function to round jiffies to a full second - * @j: the time in (absolute) jiffies that should be rounded - * @cpu: the processor number on which the timeout will happen - * - * __round_jiffies() rounds an absolute time in the future (in jiffies) - * up or down to (approximately) full seconds. This is useful for timers - * for which the exact time they fire does not matter too much, as long as - * they fire approximately every X seconds. - * - * By rounding these timers to whole seconds, all such timers will fire - * at the same time, rather than at various times spread out. The goal - * of this is to have the CPU wake up less, which saves power. - * - * The exact rounding is skewed for each processor to avoid all - * processors firing at the exact same time, which could lead - * to lock contention or spurious cache line bouncing. - * - * The return value is the rounded version of the @j parameter. - */ -unsigned long __round_jiffies(unsigned long j, int cpu) +static unsigned long round_jiffies_common(unsigned long j, int cpu, + bool force_up) { int rem; unsigned long original = j; @@ -154,8 +135,9 @@ unsigned long __round_jiffies(unsigned long j, int cpu) * due to delays of the timer irq, long irq off times etc etc) then * we should round down to the whole second, not up. Use 1/4th second * as cutoff for this rounding as an extreme upper bound for this. + * But never round down if @force_up is set. */ - if (rem < HZ/4) /* round down */ + if (rem < HZ/4 && !force_up) /* round down */ j = j - rem; else /* round up */ j = j - rem + HZ; @@ -167,6 +149,31 @@ unsigned long __round_jiffies(unsigned long j, int cpu) return original; return j; } + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +unsigned long __round_jiffies(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, false); +} EXPORT_SYMBOL_GPL(__round_jiffies); /** @@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies); */ unsigned long __round_jiffies_relative(unsigned long j, int cpu) { - /* - * In theory the following code can skip a jiffy in case jiffies - * increments right between the addition and the later subtraction. - * However since the entire point of this function is to use approximate - * timeouts, it's entirely ok to not handle that. - */ - return __round_jiffies(j + jiffies, cpu) - jiffies; + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, false) - j0; } EXPORT_SYMBOL_GPL(__round_jiffies_relative); @@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); */ unsigned long round_jiffies(unsigned long j) { - return __round_jiffies(j, raw_smp_processor_id()); + return round_jiffies_common(j, raw_smp_processor_id(), false); } EXPORT_SYMBOL_GPL(round_jiffies); @@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_relative); +/** + * __round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, true); +} +EXPORT_SYMBOL_GPL(__round_jiffies_up); + +/** + * __round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) +{ + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, true) - j0; +} +EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); + +/** + * round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * This is the same as round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up(unsigned long j) +{ + return round_jiffies_common(j, raw_smp_processor_id(), true); +} +EXPORT_SYMBOL_GPL(round_jiffies_up); + +/** + * round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * + * This is the same as round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up_relative(unsigned long j) +{ + return __round_jiffies_up_relative(j, raw_smp_processor_id()); +} +EXPORT_SYMBOL_GPL(round_jiffies_up_relative); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) -- cgit v1.2.3 From 2d3854a37e8b767a51aba38ed6d22817b0631e33 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 5 Nov 2008 13:39:10 +1100 Subject: cpumask: introduce new API, without changing anything Impact: introduce new APIs We want to deprecate cpumasks on the stack, as we are headed for gynormous numbers of CPUs. Eventually, we want to head towards an undefined 'struct cpumask' so they can never be declared on stack. 1) New cpumask functions which take pointers instead of copies. (cpus_* -> cpumask_*) 2) Several new helpers to reduce requirements for temporary cpumasks (cpumask_first_and, cpumask_next_and, cpumask_any_and) 3) Helpers for declaring cpumasks on or offstack for large NR_CPUS (cpumask_var_t, alloc_cpumask_var and free_cpumask_var) 4) 'struct cpumask' for explicitness and to mark new-style code. 5) Make iterator functions stop at nr_cpu_ids (a runtime constant), not NR_CPUS for time efficiency and for smaller dynamic allocations in future. 6) cpumask_copy() so we can allocate less than a full cpumask eventually (for alloc_cpumask_var), and so we can eliminate the 'struct cpumask' definition eventually. 7) work_on_cpu() helper for doing task on a CPU, rather than saving old cpumask for current thread and manipulating it. 8) smp_call_function_many() which is smp_call_function_mask() except taking a cpumask pointer. Note that this patch simply introduces the new functions and leaves the obsolescent ones in place. This is to simplify the transition patches. Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 502 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/smp.h | 9 + include/linux/workqueue.h | 8 + kernel/cpu.c | 3 + kernel/workqueue.c | 45 +++++ lib/cpumask.c | 73 +++++++ 6 files changed, 638 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d3219d73f8e6..c8e66619097b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -5,6 +5,9 @@ * Cpumasks provide a bitmap suitable for representing the * set of CPU's in a system, one bit position per CPU number. * + * The new cpumask_ ops take a "struct cpumask *"; the old ones + * use cpumask_t. + * * See detailed comments in the file linux/bitmap.h describing the * data type on which these cpumasks are based. * @@ -31,7 +34,7 @@ * will span the entire range of NR_CPUS. * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . * - * The available cpumask operations are: + * The obsolescent cpumask operations are: * * void cpu_set(cpu, mask) turn on bit 'cpu' in mask * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask @@ -138,7 +141,7 @@ #include #include -typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; +typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; extern cpumask_t _unused_cpumask_arg_; #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) @@ -527,4 +530,499 @@ extern cpumask_t cpu_active_map; #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) +/* These are the new versions of the cpumask operators: passed by pointer. + * The older versions will be implemented in terms of these, then deleted. */ +#define cpumask_bits(maskp) ((maskp)->bits) + +#if NR_CPUS <= BITS_PER_LONG +#define CPU_BITS_ALL \ +{ \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +/* This produces more efficient code. */ +#define nr_cpumask_bits NR_CPUS + +#else /* NR_CPUS > BITS_PER_LONG */ + +#define CPU_BITS_ALL \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +#define nr_cpumask_bits nr_cpu_ids +#endif /* NR_CPUS > BITS_PER_LONG */ + +/* verify cpu argument to cpumask_* operators */ +static inline unsigned int cpumask_check(unsigned int cpu) +{ +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + WARN_ON_ONCE(cpu >= nr_cpumask_bits); +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ + return cpu; +} + +#if NR_CPUS == 1 +/* Uniprocesor. */ +#define cpumask_first(src) ({ (void)(src); 0; }) +#define cpumask_next(n, src) ({ (void)(src); 1; }) +#define cpumask_next_zero(n, src) ({ (void)(src); 1; }) +#define cpumask_next_and(n, srcp, andp) ({ (void)(srcp), (void)(andp); 1; }) +#define cpumask_any_but(mask, cpu) ({ (void)(mask); (void)(cpu); 0; }) + +#define for_each_cpu(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_and(cpu, mask, and) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) +#else +/** + * cpumask_first - get the first cpu in a cpumask + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no cpus set. + */ +static inline unsigned int cpumask_first(const struct cpumask *srcp) +{ + return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_next - get the next cpu in a cpumask + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set. + */ +static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); +} + +/** + * cpumask_next_zero - get the next unset cpu in a cpumask + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus unset. + */ +static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); +} + +int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); +int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); + +#define for_each_cpu(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next((cpu), (mask)), \ + (cpu) < nr_cpu_ids;) +#define for_each_cpu_and(cpu, mask, and) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next_and((cpu), (mask), (and)), \ + (cpu) < nr_cpu_ids;) +#endif /* SMP */ + +#define CPU_BITS_NONE \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} + +#define CPU_BITS_CPU0 \ +{ \ + [0] = 1UL \ +} + +/** + * cpumask_set_cpu - set a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + */ +static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +{ + set_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} + +/** + * cpumask_clear_cpu - clear a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + */ +static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} + +/** + * cpumask_test_cpu - test for a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @cpumask: the cpumask pointer + * + * No static inline type checking - see Subtlety (1) above. + */ +#define cpumask_test_cpu(cpu, cpumask) \ + test_bit(cpumask_check(cpu), (cpumask)->bits) + +/** + * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @cpumask: the cpumask pointer + * + * test_and_set_bit wrapper for cpumasks. + */ +static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +{ + return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); +} + +/** + * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask + * @dstp: the cpumask pointer + */ +static inline void cpumask_setall(struct cpumask *dstp) +{ + bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); +} + +/** + * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask + * @dstp: the cpumask pointer + */ +static inline void cpumask_clear(struct cpumask *dstp) +{ + bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits); +} + +/** + * cpumask_and - *dstp = *src1p & *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_and(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_or - *dstp = *src1p | *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_xor - *dstp = *src1p ^ *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_xor(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_andnot - *dstp = *src1p & ~*src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_andnot(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_complement - *dstp = ~*srcp + * @dstp: the cpumask result + * @srcp: the input to invert + */ +static inline void cpumask_complement(struct cpumask *dstp, + const struct cpumask *srcp) +{ + bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp), + nr_cpumask_bits); +} + +/** + * cpumask_equal - *src1p == *src2p + * @src1p: the first input + * @src2p: the second input + */ +static inline bool cpumask_equal(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_intersects - (*src1p & *src2p) != 0 + * @src1p: the first input + * @src2p: the second input + */ +static inline bool cpumask_intersects(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_subset - (*src1p & ~*src2p) == 0 + * @src1p: the first input + * @src2p: the second input + */ +static inline int cpumask_subset(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_empty - *srcp == 0 + * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. + */ +static inline bool cpumask_empty(const struct cpumask *srcp) +{ + return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_full - *srcp == 0xFFFFFFFF... + * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. + */ +static inline bool cpumask_full(const struct cpumask *srcp) +{ + return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_weight - Count of bits in *srcp + * @srcp: the cpumask to count bits (< nr_cpu_ids) in. + */ +static inline unsigned int cpumask_weight(const struct cpumask *srcp) +{ + return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_shift_right - *dstp = *srcp >> n + * @dstp: the cpumask result + * @srcp: the input to shift + * @n: the number of bits to shift by + */ +static inline void cpumask_shift_right(struct cpumask *dstp, + const struct cpumask *srcp, int n) +{ + bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, + nr_cpumask_bits); +} + +/** + * cpumask_shift_left - *dstp = *srcp << n + * @dstp: the cpumask result + * @srcp: the input to shift + * @n: the number of bits to shift by + */ +static inline void cpumask_shift_left(struct cpumask *dstp, + const struct cpumask *srcp, int n) +{ + bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n, + nr_cpumask_bits); +} + +/** + * cpumask_copy - *dstp = *srcp + * @dstp: the result + * @srcp: the input cpumask + */ +static inline void cpumask_copy(struct cpumask *dstp, + const struct cpumask *srcp) +{ + bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_any - pick a "random" cpu from *srcp + * @srcp: the input cpumask + * + * Returns >= nr_cpu_ids if no cpus set. + */ +#define cpumask_any(srcp) cpumask_first(srcp) + +/** + * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 + * @src1p: the first input + * @src2p: the second input + * + * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + */ +#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p)) + +/** + * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 + * @mask1: the first input cpumask + * @mask2: the second input cpumask + * + * Returns >= nr_cpu_ids if no cpus set. + */ +#define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) + +/** + * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * @bitmap: the bitmap + * + * There are a few places where cpumask_var_t isn't appropriate and + * static cpumasks must be used (eg. very early boot), yet we don't + * expose the definition of 'struct cpumask'. + * + * This does the conversion, and can be used as a constant initializer. + */ +#define to_cpumask(bitmap) \ + ((struct cpumask *)(1 ? (bitmap) \ + : (void *)sizeof(__check_is_bitmap(bitmap)))) + +static inline int __check_is_bitmap(const unsigned long *bitmap) +{ + return 1; +} + +/** + * cpumask_size - size to allocate for a 'struct cpumask' in bytes + * + * This will eventually be a runtime variable, depending on nr_cpu_ids. + */ +static inline size_t cpumask_size(void) +{ + /* FIXME: Once all cpumask assignments are eliminated, this + * can be nr_cpumask_bits */ + return BITS_TO_LONGS(NR_CPUS) * sizeof(long); +} + +/* + * cpumask_var_t: struct cpumask for stack usage. + * + * Oh, the wicked games we play! In order to make kernel coding a + * little more difficult, we typedef cpumask_var_t to an array or a + * pointer: doing &mask on an array is a noop, so it still works. + * + * ie. + * cpumask_var_t tmpmask; + * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + * return -ENOMEM; + * + * ... use 'tmpmask' like a normal struct cpumask * ... + * + * free_cpumask_var(tmpmask); + */ +#ifdef CONFIG_CPUMASK_OFFSTACK +typedef struct cpumask *cpumask_var_t; + +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); +void alloc_bootmem_cpumask_var(cpumask_var_t *mask); +void free_cpumask_var(cpumask_var_t mask); + +#else +typedef struct cpumask cpumask_var_t[1]; + +static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return true; +} + +static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) +{ +} + +static inline void free_cpumask_var(cpumask_var_t mask) +{ +} +#endif /* CONFIG_CPUMASK_OFFSTACK */ + +/* The pointer versions of the maps, these will become the primary versions. */ +#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map) +#define cpu_online_mask ((const struct cpumask *)&cpu_online_map) +#define cpu_present_mask ((const struct cpumask *)&cpu_present_map) +#define cpu_active_mask ((const struct cpumask *)&cpu_active_map) + +/* It's common to want to use cpu_all_mask in struct member initializers, + * so it has to refer to an address rather than a pointer. */ +extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); +#define cpu_all_mask to_cpumask(cpu_all_bits) + +/* First bits of cpu_bit_bitmap are in fact unset. */ +#define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) + +/* Wrappers for arch boot code to manipulate normally-constant masks */ +static inline void set_cpu_possible(unsigned int cpu, bool possible) +{ + if (possible) + cpumask_set_cpu(cpu, &cpu_possible_map); + else + cpumask_clear_cpu(cpu, &cpu_possible_map); +} + +static inline void set_cpu_present(unsigned int cpu, bool present) +{ + if (present) + cpumask_set_cpu(cpu, &cpu_present_map); + else + cpumask_clear_cpu(cpu, &cpu_present_map); +} + +static inline void set_cpu_online(unsigned int cpu, bool online) +{ + if (online) + cpumask_set_cpu(cpu, &cpu_online_map); + else + cpumask_clear_cpu(cpu, &cpu_online_map); +} + +static inline void set_cpu_active(unsigned int cpu, bool active) +{ + if (active) + cpumask_set_cpu(cpu, &cpu_active_map); + else + cpumask_clear_cpu(cpu, &cpu_active_map); +} + +static inline void init_cpu_present(const struct cpumask *src) +{ + cpumask_copy(&cpu_present_map, src); +} + +static inline void init_cpu_possible(const struct cpumask *src) +{ + cpumask_copy(&cpu_possible_map, src); +} + +static inline void init_cpu_online(const struct cpumask *src) +{ + cpumask_copy(&cpu_online_map, src); +} #endif /* __LINUX_CPUMASK_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 2e4d58b26c06..3f9a60043a97 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -64,8 +64,17 @@ extern void smp_cpus_done(unsigned int max_cpus); * Call a function on all other processors */ int smp_call_function(void(*func)(void *info), void *info, int wait); +/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */ int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, int wait); + +static inline void smp_call_function_many(const struct cpumask *mask, + void (*func)(void *info), void *info, + int wait) +{ + smp_call_function_mask(*mask, func, info, wait); +} + int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, int wait); void __smp_call_function_single(int cpuid, struct call_single_data *data); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 89a5a1231ffb..b36291130f22 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -240,4 +240,12 @@ void cancel_rearming_delayed_work(struct delayed_work *work) cancel_delayed_work_sync(work); } +#ifndef CONFIG_SMP +static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +{ + return fn(arg); +} +#else +long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg); +#endif /* CONFIG_SMP */ #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 86d49045daed..5a732c5ef08b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { #endif }; EXPORT_SYMBOL_GPL(cpu_bit_bitmap); + +const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; +EXPORT_SYMBOL(cpu_all_bits); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f928f2a87b9b..d4dc69ddebd7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -970,6 +970,51 @@ undo: return ret; } +#ifdef CONFIG_SMP +struct work_for_cpu { + struct work_struct work; + long (*fn)(void *); + void *arg; + long ret; +}; + +static void do_work_for_cpu(struct work_struct *w) +{ + struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); + + wfc->ret = wfc->fn(wfc->arg); +} + +/** + * work_on_cpu - run a function in user context on a particular cpu + * @cpu: the cpu to run on + * @fn: the function to run + * @arg: the function arg + * + * This will return -EINVAL in the cpu is not online, or the return value + * of @fn otherwise. + */ +long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +{ + struct work_for_cpu wfc; + + INIT_WORK(&wfc.work, do_work_for_cpu); + wfc.fn = fn; + wfc.arg = arg; + get_online_cpus(); + if (unlikely(!cpu_online(cpu))) + wfc.ret = -EINVAL; + else { + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); + } + put_online_cpus(); + + return wfc.ret; +} +EXPORT_SYMBOL_GPL(work_on_cpu); +#endif /* CONFIG_SMP */ + void __init init_workqueues(void) { cpu_populated_map = cpu_online_map; diff --git a/lib/cpumask.c b/lib/cpumask.c index 5f97dc25ef9c..5ceb4211c834 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -2,6 +2,7 @@ #include #include #include +#include int __first_cpu(const cpumask_t *srcp) { @@ -35,3 +36,75 @@ int __any_online_cpu(const cpumask_t *mask) return cpu; } EXPORT_SYMBOL(__any_online_cpu); + +/** + * cpumask_next_and - get the next cpu in *src1p & *src2p + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set in both. + */ +int cpumask_next_and(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) + if (cpumask_test_cpu(n, src2p)) + break; + return n; +} +EXPORT_SYMBOL(cpumask_next_and); + +/** + * cpumask_any_but - return a "random" in a cpumask, but not this one. + * @mask: the cpumask to search + * @cpu: the cpu to ignore. + * + * Often used to find any cpu but smp_processor_id() in a mask. + * Returns >= nr_cpu_ids if no cpus set. + */ +int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) +{ + unsigned int i; + + for_each_cpu(i, mask) + if (i != cpu) + break; + return i; +} + +/* These are not inline because of header tangles. */ +#ifdef CONFIG_CPUMASK_OFFSTACK +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + if (likely(slab_is_available())) + *mask = kmalloc(cpumask_size(), flags); + else { +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + printk(KERN_ERR + "=> alloc_cpumask_var: kmalloc not available!\n"); + dump_stack(); +#endif + *mask = NULL; + } +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (!*mask) { + printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); + dump_stack(); + } +#endif + return *mask != NULL; +} +EXPORT_SYMBOL(alloc_cpumask_var); + +void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) +{ + *mask = alloc_bootmem(cpumask_size()); +} + +void free_cpumask_var(cpumask_var_t mask) +{ + kfree(mask); +} +EXPORT_SYMBOL(free_cpumask_var); +#endif -- cgit v1.2.3 From 8950d89acaa8c353869e681772479d7955ae6f7a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Nov 2008 16:18:03 -0700 Subject: ACPI: remove CONFIG_ACPI_EC Remove CONFIG_ACPI_EC. It was always set the same as CONFIG_ACPI, and it had no menu label, so there was no way to set it to anything other than "y". Per section 6.5.4 of the ACPI 3.0b specification, OSPM must make Embedded Controller operation regions, accessed via the Embedded Controllers described in ECDT, available before executing any control method. The ECDT table is optional, but if it is present, the above text means that the EC it describes is a required part of the ACPI subsystem, so CONFIG_ACPI_EC=n wouldn't make sense. Signed-off-by: Bjorn Helgaas Acked-by: Alexey Starikovskiy Signed-off-by: Len Brown --- drivers/acpi/Kconfig | 8 -------- drivers/acpi/Makefile | 2 +- drivers/acpi/bus.c | 3 +-- drivers/char/sonypi.c | 4 ++-- drivers/misc/Kconfig | 4 ++-- include/acpi/acpi_drivers.h | 2 -- include/linux/acpi.h | 4 ---- 7 files changed, 6 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 4fa7866a9a5e..90cb2a823b56 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -324,14 +324,6 @@ config ACPI_DEBUG_FUNC_TRACE ACPI Debug Statements slow down ACPI processing. Function trace is about half of the penalty and is rarely useful. -config ACPI_EC - bool - default y - help - This driver is required on some systems for the proper operation of - the battery and thermal drivers. If you are compiling for a - mobile system, say Y. - config ACPI_PCI_SLOT tristate "PCI slot detection driver" default n diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 8017f63920c4..fc622316a7d8 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -39,7 +39,7 @@ obj-y += sleep/ obj-y += bus.o glue.o obj-y += scan.o # Keep EC driver first. Initialization of others depend on it. -obj-$(CONFIG_ACPI_EC) += ec.o +obj-y += ec.o obj-$(CONFIG_ACPI_AC) += ac.o obj-$(CONFIG_ACPI_BATTERY) += battery.o obj-$(CONFIG_ACPI_BUTTON) += button.o diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index c797c6473f31..765fd1c56cd6 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -774,7 +774,7 @@ static int __init acpi_bus_init(void) "Unable to initialize ACPI OS objects\n"); goto error1; } -#ifdef CONFIG_ACPI_EC + /* * ACPI 2.0 requires the EC driver to be loaded and work before * the EC device is found in the namespace (i.e. before acpi_initialize_objects() @@ -785,7 +785,6 @@ static int __init acpi_bus_init(void) */ status = acpi_ec_ecdt_probe(); /* Ignore result. Not having an ECDT is not fatal. */ -#endif status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION); if (ACPI_FAILURE(status)) { diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index 85e0eb76eeab..1b128d1e2150 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -523,7 +523,7 @@ static int acpi_driver_registered; static int sonypi_ec_write(u8 addr, u8 value) { -#ifdef CONFIG_ACPI_EC +#ifdef CONFIG_ACPI if (SONYPI_ACPI_ACTIVE) return ec_write(addr, value); #endif @@ -539,7 +539,7 @@ static int sonypi_ec_write(u8 addr, u8 value) static int sonypi_ec_read(u8 addr, u8 *value) { -#ifdef CONFIG_ACPI_EC +#ifdef CONFIG_ACPI if (SONYPI_ACPI_ACTIVE) return ec_read(addr, value); #endif diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 9494400e8fd0..4494ad27cbf1 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -230,7 +230,7 @@ config HP_WMI config MSI_LAPTOP tristate "MSI Laptop Extras" depends on X86 - depends on ACPI_EC + depends on ACPI depends on BACKLIGHT_CLASS_DEVICE ---help--- This is a driver for laptops built by MSI (MICRO-STAR @@ -260,7 +260,7 @@ config PANASONIC_LAPTOP config COMPAL_LAPTOP tristate "Compal Laptop Extras" depends on X86 - depends on ACPI_EC + depends on ACPI depends on BACKLIGHT_CLASS_DEVICE ---help--- This is a driver for laptops built by Compal: diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 818215f89e7a..b3c40dc9d6ba 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -97,10 +97,8 @@ extern int acpi_power_nocheck; /* -------------------------------------------------------------------------- Embedded Controller -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_EC int acpi_ec_ecdt_probe(void); int acpi_boot_ec_enable(void); -#endif /* -------------------------------------------------------------------------- Processor diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fd6a452b0ceb..d7846bdd2721 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -163,8 +163,6 @@ struct acpi_pci_driver { int acpi_pci_register_driver(struct acpi_pci_driver *driver); void acpi_pci_unregister_driver(struct acpi_pci_driver *driver); -#ifdef CONFIG_ACPI_EC - extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, @@ -172,8 +170,6 @@ extern int ec_transaction(u8 command, u8 *rdata, unsigned rdata_len, int force_poll); -#endif /*CONFIG_ACPI_EC*/ - #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) typedef void (*wmi_notify_handler) (u32 value, void *context); -- cgit v1.2.3 From f8d570a4745835f2238a33b537218a1bb03fc671 Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 6 Nov 2008 00:37:40 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..33e9986beb86 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3 From 9e975dae2970d22557662761c8505ce9fd165684 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:46 -0800 Subject: fat: split include/msdos_fs.h This splits __KERNEL__ stuff in include/msdos_fs.h into fs/fat/fat.h. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/cache.c | 2 +- fs/fat/dir.c | 2 +- fs/fat/fat.h | 274 ++++++++++++++++++++++++++++++++++++++++++++++ fs/fat/fatent.c | 1 + fs/fat/file.c | 2 +- fs/fat/inode.c | 2 +- fs/fat/misc.c | 2 +- fs/fat/namei_msdos.c | 2 +- fs/fat/namei_vfat.c | 3 +- include/linux/msdos_fs.h | 276 +---------------------------------------------- 10 files changed, 284 insertions(+), 282 deletions(-) create mode 100644 fs/fat/fat.h (limited to 'include/linux') diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3222f51c41cf..589edde9053c 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -9,8 +9,8 @@ */ #include -#include #include +#include "fat.h" /* this must be > 0. */ #define FAT_MAX_CACHE 8 diff --git a/fs/fat/dir.c b/fs/fat/dir.c index bae1c3292522..08b23ad25f1c 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,11 +16,11 @@ #include #include #include -#include #include #include #include #include +#include "fat.h" static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, diff --git a/fs/fat/fat.h b/fs/fat/fat.h new file mode 100644 index 000000000000..51f1c42ca5e3 --- /dev/null +++ b/fs/fat/fat.h @@ -0,0 +1,274 @@ +#ifndef _FAT_H +#define _FAT_H + +#include +#include +#include +#include +#include +#include + +/* + * vfat shortname flags + */ +#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ +#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ +#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ +#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ +#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ + +struct fat_mount_options { + uid_t fs_uid; + gid_t fs_gid; + unsigned short fs_fmask; + unsigned short fs_dmask; + unsigned short codepage; /* Codepage for shortname conversions */ + char *iocharset; /* Charset used for filename input/display */ + unsigned short shortname; /* flags for shortname display/create rule */ + unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned short allow_utime;/* permission for setting the [am]time */ + unsigned quiet:1, /* set = fake successful chmods and chowns */ + showexec:1, /* set = only set x bit for com/exe/bat */ + sys_immutable:1, /* set = system files are immutable */ + dotsOK:1, /* set = hidden and system files are named '.filename' */ + isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ + utf8:1, /* Use of UTF-8 character set (Default) */ + unicode_xlate:1, /* create escape sequences for unhandled Unicode */ + numtail:1, /* Does first alias have a numeric '~1' type tail? */ + flush:1, /* write things quickly */ + nocase:1, /* Does this need case conversion? 0=need case conversion*/ + usefree:1, /* Use free_clusters for FAT32 */ + tz_utc:1; /* Filesystem timestamps are in UTC */ +}; + +#define FAT_HASH_BITS 8 +#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) +#define FAT_HASH_MASK (FAT_HASH_SIZE-1) + +/* + * MS-DOS file system in-core superblock data + */ +struct msdos_sb_info { + unsigned short sec_per_clus; /* sectors/cluster */ + unsigned short cluster_bits; /* log2(cluster_size) */ + unsigned int cluster_size; /* cluster size */ + unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ + unsigned short fat_start; + unsigned long fat_length; /* FAT start & length (sec.) */ + unsigned long dir_start; + unsigned short dir_entries; /* root dir start & entries */ + unsigned long data_start; /* first data sector */ + unsigned long max_cluster; /* maximum cluster number */ + unsigned long root_cluster; /* first cluster of the root directory */ + unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ + struct mutex fat_lock; + unsigned int prev_free; /* previously allocated cluster number */ + unsigned int free_clusters; /* -1 if undefined */ + unsigned int free_clus_valid; /* is free_clusters valid? */ + struct fat_mount_options options; + struct nls_table *nls_disk; /* Codepage used on disk */ + struct nls_table *nls_io; /* Charset used for input and display */ + const void *dir_ops; /* Opaque; default directory operations */ + int dir_per_block; /* dir entries per block */ + int dir_per_block_bits; /* log2(dir_per_block) */ + + int fatent_shift; + struct fatent_operations *fatent_ops; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[FAT_HASH_SIZE]; +}; + +#define FAT_CACHE_VALID 0 /* special case for valid cache */ + +/* + * MS-DOS file system inode data in memory + */ +struct msdos_inode_info { + spinlock_t cache_lru_lock; + struct list_head cache_lru; + int nr_caches; + /* for avoiding the race between fat_free() and fat_get_cluster() */ + unsigned int cache_valid_id; + + loff_t mmu_private; + int i_start; /* first cluster or 0 */ + int i_logstart; /* logical first cluster */ + int i_attrs; /* unused attribute bits */ + loff_t i_pos; /* on-disk position of directory entry or 0 */ + struct hlist_node i_fat_hash; /* hash by i_location */ + struct inode vfs_inode; +}; + +struct fat_slot_info { + loff_t i_pos; /* on-disk position of directory entry */ + loff_t slot_off; /* offset for slot or de start */ + int nr_slots; /* number of slots + 1(de) in filename */ + struct msdos_dir_entry *de; + struct buffer_head *bh; +}; + +static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) +{ + return container_of(inode, struct msdos_inode_info, vfs_inode); +} + +/* Return the FAT attribute byte for this inode */ +static inline u8 fat_attr(struct inode *inode) +{ + return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | + (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | + MSDOS_I(inode)->i_attrs; +} + +static inline unsigned char fat_checksum(const __u8 *name) +{ + unsigned char s = name[0]; + s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; + s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; + s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; + s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; + s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; + return s; +} + +static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) +{ + return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus + + sbi->data_start; +} + +static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + *dst++ = src[0] | (src[1] << 8); + src += 2; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + dst[0] = *src & 0x00FF; + dst[1] = (*src & 0xFF00) >> 8; + dst += 2; + src++; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +/* fat/cache.c */ +extern void fat_cache_inval_inode(struct inode *inode); +extern int fat_get_cluster(struct inode *inode, int cluster, + int *fclus, int *dclus); +extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks); + +/* fat/dir.c */ +extern const struct file_operations fat_dir_operations; +extern int fat_search_long(struct inode *inode, const unsigned char *name, + int name_len, struct fat_slot_info *sinfo); +extern int fat_dir_empty(struct inode *dir); +extern int fat_subdirs(struct inode *dir); +extern int fat_scan(struct inode *dir, const unsigned char *name, + struct fat_slot_info *sinfo); +extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, + struct msdos_dir_entry **de, loff_t *i_pos); +extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); +extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, + struct fat_slot_info *sinfo); +extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); + +/* fat/fatent.c */ +struct fat_entry { + int entry; + union { + u8 *ent12_p[2]; + __le16 *ent16_p; + __le32 *ent32_p; + } u; + int nr_bhs; + struct buffer_head *bhs[2]; +}; + +static inline void fatent_init(struct fat_entry *fatent) +{ + fatent->nr_bhs = 0; + fatent->entry = 0; + fatent->u.ent32_p = NULL; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +static inline void fatent_set_entry(struct fat_entry *fatent, int entry) +{ + fatent->entry = entry; + fatent->u.ent32_p = NULL; +} + +static inline void fatent_brelse(struct fat_entry *fatent) +{ + int i; + fatent->u.ent32_p = NULL; + for (i = 0; i < fatent->nr_bhs; i++) + brelse(fatent->bhs[i]); + fatent->nr_bhs = 0; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +extern void fat_ent_access_init(struct super_block *sb); +extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, + int entry); +extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, + int new, int wait); +extern int fat_alloc_clusters(struct inode *inode, int *cluster, + int nr_cluster); +extern int fat_free_clusters(struct inode *inode, int cluster); +extern int fat_count_free_clusters(struct super_block *sb); + +/* fat/file.c */ +extern int fat_generic_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern const struct file_operations fat_file_operations; +extern const struct inode_operations fat_file_inode_operations; +extern int fat_setattr(struct dentry * dentry, struct iattr * attr); +extern void fat_truncate(struct inode *inode); +extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* fat/inode.c */ +extern void fat_attach(struct inode *inode, loff_t i_pos); +extern void fat_detach(struct inode *inode); +extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); +extern struct inode *fat_build_inode(struct super_block *sb, + struct msdos_dir_entry *de, loff_t i_pos); +extern int fat_sync_inode(struct inode *inode); +extern int fat_fill_super(struct super_block *sb, void *data, int silent, + const struct inode_operations *fs_dir_inode_ops, int isvfat); + +extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2); +/* fat/misc.c */ +extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); +extern void fat_clusters_flush(struct super_block *sb); +extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); +extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); +extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, + int tz_utc); +extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); + +int fat_cache_init(void); +void fat_cache_destroy(void); + +#endif /* !_FAT_H */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index fb98b3d847ed..5b5f49061b7c 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -7,6 +7,7 @@ #include #include #include +#include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); diff --git a/fs/fat/file.c b/fs/fat/file.c index ddde37025ca6..b21973f266a1 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -10,13 +10,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include "fat.h" int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 2b2eec1283bf..3921de2013a4 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -28,6 +27,7 @@ #include #include #include +#include "fat.h" #ifndef CONFIG_FAT_DEFAULT_IOCHARSET /* if user don't select VFAT, this is undefined. */ diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 79fb98ad36d4..91ad9be18ff9 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -8,8 +8,8 @@ #include #include -#include #include +#include "fat.h" /* * fat_fs_panic reports a severe file system problem and sets the file system diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e844b9809d27..c0a4d5cd99b2 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 155c10b4adbd..facf3bf0211a 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -16,14 +16,13 @@ */ #include - #include -#include #include #include #include #include #include +#include "fat.h" static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) { diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index ba63858056c7..0982fb47a90d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -167,282 +167,10 @@ struct msdos_dir_slot { }; #ifdef __KERNEL__ - -#include -#include -#include -#include -#include - -/* - * vfat shortname flags - */ -#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ -#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ -#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ -#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ -#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ - -struct fat_mount_options { - uid_t fs_uid; - gid_t fs_gid; - unsigned short fs_fmask; - unsigned short fs_dmask; - unsigned short codepage; /* Codepage for shortname conversions */ - char *iocharset; /* Charset used for filename input/display */ - unsigned short shortname; /* flags for shortname display/create rule */ - unsigned char name_check; /* r = relaxed, n = normal, s = strict */ - unsigned short allow_utime;/* permission for setting the [am]time */ - unsigned quiet:1, /* set = fake successful chmods and chowns */ - showexec:1, /* set = only set x bit for com/exe/bat */ - sys_immutable:1, /* set = system files are immutable */ - dotsOK:1, /* set = hidden and system files are named '.filename' */ - isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ - utf8:1, /* Use of UTF-8 character set (Default) */ - unicode_xlate:1, /* create escape sequences for unhandled Unicode */ - numtail:1, /* Does first alias have a numeric '~1' type tail? */ - flush:1, /* write things quickly */ - nocase:1, /* Does this need case conversion? 0=need case conversion*/ - usefree:1, /* Use free_clusters for FAT32 */ - tz_utc:1; /* Filesystem timestamps are in UTC */ -}; - -#define FAT_HASH_BITS 8 -#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) -#define FAT_HASH_MASK (FAT_HASH_SIZE-1) - -/* - * MS-DOS file system in-core superblock data - */ -struct msdos_sb_info { - unsigned short sec_per_clus; /* sectors/cluster */ - unsigned short cluster_bits; /* log2(cluster_size) */ - unsigned int cluster_size; /* cluster size */ - unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ - unsigned short fat_start; - unsigned long fat_length; /* FAT start & length (sec.) */ - unsigned long dir_start; - unsigned short dir_entries; /* root dir start & entries */ - unsigned long data_start; /* first data sector */ - unsigned long max_cluster; /* maximum cluster number */ - unsigned long root_cluster; /* first cluster of the root directory */ - unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ - struct mutex fat_lock; - unsigned int prev_free; /* previously allocated cluster number */ - unsigned int free_clusters; /* -1 if undefined */ - unsigned int free_clus_valid; /* is free_clusters valid? */ - struct fat_mount_options options; - struct nls_table *nls_disk; /* Codepage used on disk */ - struct nls_table *nls_io; /* Charset used for input and display */ - const void *dir_ops; /* Opaque; default directory operations */ - int dir_per_block; /* dir entries per block */ - int dir_per_block_bits; /* log2(dir_per_block) */ - - int fatent_shift; - struct fatent_operations *fatent_ops; - - spinlock_t inode_hash_lock; - struct hlist_head inode_hashtable[FAT_HASH_SIZE]; -}; - -#define FAT_CACHE_VALID 0 /* special case for valid cache */ - -/* - * MS-DOS file system inode data in memory - */ -struct msdos_inode_info { - spinlock_t cache_lru_lock; - struct list_head cache_lru; - int nr_caches; - /* for avoiding the race between fat_free() and fat_get_cluster() */ - unsigned int cache_valid_id; - - loff_t mmu_private; - int i_start; /* first cluster or 0 */ - int i_logstart; /* logical first cluster */ - int i_attrs; /* unused attribute bits */ - loff_t i_pos; /* on-disk position of directory entry or 0 */ - struct hlist_node i_fat_hash; /* hash by i_location */ - struct inode vfs_inode; -}; - -struct fat_slot_info { - loff_t i_pos; /* on-disk position of directory entry */ - loff_t slot_off; /* offset for slot or de start */ - int nr_slots; /* number of slots + 1(de) in filename */ - struct msdos_dir_entry *de; - struct buffer_head *bh; -}; - -static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) -{ - return container_of(inode, struct msdos_inode_info, vfs_inode); -} - -/* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) -{ - return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | - (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | - MSDOS_I(inode)->i_attrs; -} - -static inline unsigned char fat_checksum(const __u8 *name) -{ - unsigned char s = name[0]; - s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; - s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; - s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; - s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; - s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; - return s; -} - -static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) -{ - return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus - + sbi->data_start; -} - -static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - *dst++ = src[0] | (src[1] << 8); - src += 2; - } -#else - memcpy(dst, src, len * 2); -#endif -} - -static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - dst[0] = *src & 0x00FF; - dst[1] = (*src & 0xFF00) >> 8; - dst += 2; - src++; - } -#else - memcpy(dst, src, len * 2); -#endif -} - /* media of boot sector */ static inline int fat_valid_media(u8 media) { return 0xf8 <= media || media == 0xf0; } - -/* fat/cache.c */ -extern void fat_cache_inval_inode(struct inode *inode); -extern int fat_get_cluster(struct inode *inode, int cluster, - int *fclus, int *dclus); -extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks); - -/* fat/dir.c */ -extern const struct file_operations fat_dir_operations; -extern int fat_search_long(struct inode *inode, const unsigned char *name, - int name_len, struct fat_slot_info *sinfo); -extern int fat_dir_empty(struct inode *dir); -extern int fat_subdirs(struct inode *dir); -extern int fat_scan(struct inode *dir, const unsigned char *name, - struct fat_slot_info *sinfo); -extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, - struct msdos_dir_entry **de, loff_t *i_pos); -extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); -extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, - struct fat_slot_info *sinfo); -extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); - -/* fat/fatent.c */ -struct fat_entry { - int entry; - union { - u8 *ent12_p[2]; - __le16 *ent16_p; - __le32 *ent32_p; - } u; - int nr_bhs; - struct buffer_head *bhs[2]; -}; - -static inline void fatent_init(struct fat_entry *fatent) -{ - fatent->nr_bhs = 0; - fatent->entry = 0; - fatent->u.ent32_p = NULL; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -static inline void fatent_set_entry(struct fat_entry *fatent, int entry) -{ - fatent->entry = entry; - fatent->u.ent32_p = NULL; -} - -static inline void fatent_brelse(struct fat_entry *fatent) -{ - int i; - fatent->u.ent32_p = NULL; - for (i = 0; i < fatent->nr_bhs; i++) - brelse(fatent->bhs[i]); - fatent->nr_bhs = 0; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -extern void fat_ent_access_init(struct super_block *sb); -extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, - int entry); -extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, - int new, int wait); -extern int fat_alloc_clusters(struct inode *inode, int *cluster, - int nr_cluster); -extern int fat_free_clusters(struct inode *inode, int cluster); -extern int fat_count_free_clusters(struct super_block *sb); - -/* fat/file.c */ -extern int fat_generic_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern const struct file_operations fat_file_operations; -extern const struct inode_operations fat_file_inode_operations; -extern int fat_setattr(struct dentry * dentry, struct iattr * attr); -extern void fat_truncate(struct inode *inode); -extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); - -/* fat/inode.c */ -extern void fat_attach(struct inode *inode, loff_t i_pos); -extern void fat_detach(struct inode *inode); -extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); -extern struct inode *fat_build_inode(struct super_block *sb, - struct msdos_dir_entry *de, loff_t i_pos); -extern int fat_sync_inode(struct inode *inode); -extern int fat_fill_super(struct super_block *sb, void *data, int silent, - const struct inode_operations *fs_dir_inode_ops, int isvfat); - -extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, - struct inode *i2); -/* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); -extern void fat_clusters_flush(struct super_block *sb); -extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); -extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); -extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, - int tz_utc); -extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); - -int fat_cache_init(void); -void fat_cache_destroy(void); - -#endif /* __KERNEL__ */ - -#endif +#endif /* !__KERNEL__ */ +#endif /* !_LINUX_MSDOS_FS_H */ -- cgit v1.2.3 From 9c0aa1b87bf541affef519eb4879ce7c5a5941ae Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:54 -0800 Subject: fat: Cleanup FAT attribute stuff This adds three helpers: fat_make_attrs() - makes FAT attributes from inode. fat_make_mode() - makes mode_t from FAT attributes. fat_save_attrs() - saves FAT attributes to inode. Then this replaces: MSDOS_MKMODE() by fat_make_mode(), fat_attr() by fat_make_attrs(), ->i_attrs = attr & ATTR_UNUSED by fat_save_attrs(). And for root inode, those is used with ATTR_DIR instead of bogus ATTR_NONE. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 20 +++++++++++++++++++- fs/fat/file.c | 32 ++++++++++++-------------------- fs/fat/inode.c | 19 +++++++++---------- include/linux/msdos_fs.h | 5 ----- 4 files changed, 40 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 2b8e94c3eef4..3b4753a024e3 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -117,14 +117,32 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) return container_of(inode, struct msdos_inode_info, vfs_inode); } +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, + u8 attrs, mode_t mode) +{ + if (attrs & ATTR_RO) + mode &= ~S_IWUGO; + + if (attrs & ATTR_DIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + else + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + /* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) +static inline u8 fat_make_attrs(struct inode *inode) { return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | MSDOS_I(inode)->i_attrs; } +static inline void fat_save_attrs(struct inode *inode, u8 attrs) +{ + MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; +} + static inline unsigned char fat_checksum(const __u8 *name) { unsigned char s = name[0]; diff --git a/fs/fat/file.c b/fs/fat/file.c index b21973f266a1..f5a7e907a8fa 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -27,13 +27,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, switch (cmd) { case FAT_IOCTL_GET_ATTRIBUTES: { - u32 attr; - - if (inode->i_ino == MSDOS_ROOT_INO) - attr = ATTR_DIR; - else - attr = fat_attr(inode); - + u32 attr = fat_make_attrs(inode); return put_user(attr, user_attr); } case FAT_IOCTL_SET_ATTRIBUTES: @@ -62,20 +56,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, /* Merge in ATTR_VOLUME and ATTR_DIR */ attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | (is_dir ? ATTR_DIR : 0); - oldattr = fat_attr(inode); + oldattr = fat_make_attrs(inode); /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; ia.ia_ctime = current_fs_time(inode->i_sb); - if (is_dir) { - ia.ia_mode = MSDOS_MKMODE(attr, - S_IRWXUGO & ~sbi->options.fs_dmask) - | S_IFDIR; - } else { - ia.ia_mode = MSDOS_MKMODE(attr, - (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)) - & ~sbi->options.fs_fmask) - | S_IFREG; + if (is_dir) + ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); + else { + ia.ia_mode = fat_make_mode(sbi, attr, + S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); } /* The root directory has no attributes */ @@ -115,7 +105,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, inode->i_flags &= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; + fat_save_attrs(inode, attr); mark_inode_dirty(inode); up: mnt_drop_write(filp->f_path.mnt); @@ -274,7 +264,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Note, the basic check is already done by a caller of - * (attr->ia_mode & ~MSDOS_VALID_MODE) + * (attr->ia_mode & ~FAT_VALID_MODE) */ if (S_ISREG(inode->i_mode)) @@ -314,6 +304,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) } #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) +/* valid file mode bits */ +#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) int fat_setattr(struct dentry *dentry, struct iattr *attr) { @@ -356,7 +348,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & ~MSDOS_VALID_MODE))) + (attr->ia_mode & ~FAT_VALID_MODE))) error = -EPERM; if (error) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8e1b75c63c7f..7aaa21cf019a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -337,8 +337,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { inode->i_generation &= ~1; - inode->i_mode = MSDOS_MKMODE(de->attr, - S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; @@ -355,10 +354,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_nlink = fat_subdirs(inode); } else { /* not a directory */ inode->i_generation |= 1; - inode->i_mode = MSDOS_MKMODE(de->attr, - ((sbi->options.showexec && !is_exec(de->name + 8)) - ? S_IRUGO|S_IWUGO : S_IRWXUGO) - & ~sbi->options.fs_fmask) | S_IFREG; + inode->i_mode = fat_make_mode(sbi, de->attr, + ((sbi->options.showexec && !is_exec(de->name + 8)) + ? S_IRUGO|S_IWUGO : S_IRWXUGO)); MSDOS_I(inode)->i_start = le16_to_cpu(de->start); if (sbi->fat_bits == 32) MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); @@ -374,7 +372,8 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if (sbi->options.sys_immutable) inode->i_flags |= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; + fat_save_attrs(inode, de->attr); + inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; @@ -569,7 +568,7 @@ retry: raw_entry->size = 0; else raw_entry->size = cpu_to_le32(inode->i_size); - raw_entry->attr = fat_attr(inode); + raw_entry->attr = fat_make_attrs(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, @@ -1105,7 +1104,7 @@ static int fat_read_root(struct inode *inode) inode->i_gid = sbi->options.fs_gid; inode->i_version++; inode->i_generation = 0; - inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; if (sbi->fat_bits == 32) { @@ -1122,7 +1121,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_logstart = 0; MSDOS_I(inode)->mmu_private = inode->i_size; - MSDOS_I(inode)->i_attrs = ATTR_NONE; + fat_save_attrs(inode, ATTR_DIR); inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; inode->i_nlink = fat_subdirs(inode)+2; diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 0982fb47a90d..e0a9b207920d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -46,11 +46,6 @@ #define DELETED_FLAG 0xe5 /* marks file as deleted when in name[0] */ #define IS_FREE(n) (!*(n) || *(n) == DELETED_FLAG) -/* valid file mode bits */ -#define MSDOS_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO) -/* Convert attribute bits and a mask to the UNIX mode. */ -#define MSDOS_MKMODE(a, m) (m & (a & ATTR_RO ? S_IRUGO|S_IXUGO : S_IRWXUGO)) - #define MSDOS_NAME 11 /* maximum name length */ #define MSDOS_LONGNAME 256 /* maximum name length */ #define MSDOS_SLOTS 21 /* max # of slots for short and long names */ -- cgit v1.2.3 From 7597bc94d6f3bdccb086ac7f2ad91292fdaee2a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Nov 2008 17:38:47 +0000 Subject: Fix accidental implicit cast in HR-timer conversion Fix the hrtimer_add_expires_ns() function. It should take a 'u64 ns' argument, but rather takes an 'unsigned long ns' argument - which might only be 32-bits. On FRV, this results in the kernel locking up because hrtimer_forward() passes the result of a 64-bit multiplication to this function, for which the compiler discards the top 32-bits - something that didn't happen when ktime_add_ns() was called directly. Signed-off-by: David Howells Acked-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2b3645b1acf4..07e510a3b00a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -239,7 +239,7 @@ static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } -static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) +static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { timer->_expires = ktime_add_ns(timer->_expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); -- cgit v1.2.3 From 3b53fbf4314594fa04544b02b2fc6e607912da18 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 6 Nov 2008 15:45:32 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..33e9986beb86 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3 From cd83e42c6b0413dcbb548c2ead799111ff7e6a13 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 7 Nov 2008 11:12:29 +1100 Subject: cpumask: new API, v2 - add cpumask_of() - add free_bootmem_cpumask_var() Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 11 +++++++++++ lib/cpumask.c | 5 +++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c8e66619097b..31caa1bc620a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -893,6 +893,12 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) +/** + * cpumask_of - the cpumask containing just a given cpu + * @cpu: the cpu (<= nr_cpu_ids) + */ +#define cpumask_of(cpu) (get_cpu_mask(cpu)) + /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap @@ -946,6 +952,7 @@ typedef struct cpumask *cpumask_var_t; bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); +void free_bootmem_cpumask_var(cpumask_var_t mask); #else typedef struct cpumask cpumask_var_t[1]; @@ -962,6 +969,10 @@ static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) static inline void free_cpumask_var(cpumask_var_t mask) { } + +static inline void free_bootmem_cpumask_var(cpumask_var_t mask) +{ +} #endif /* CONFIG_CPUMASK_OFFSTACK */ /* The pointer versions of the maps, these will become the primary versions. */ diff --git a/lib/cpumask.c b/lib/cpumask.c index 5ceb4211c834..2ebc3a9a7465 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -107,4 +107,9 @@ void free_cpumask_var(cpumask_var_t mask) kfree(mask); } EXPORT_SYMBOL(free_cpumask_var); + +void free_bootmem_cpumask_var(cpumask_var_t mask) +{ + free_bootmem((unsigned long)mask, cpumask_size()); +} #endif -- cgit v1.2.3 From 14800984706bf6936bbec5187f736e928be5c218 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 7 Nov 2008 15:26:50 +0100 Subject: sched: fine-tune SD_MC_INIT Tune SD_MC_INIT the same way as SD_CPU_INIT: unset SD_BALANCE_NEWIDLE, and set SD_WAKE_BALANCE. This improves vmark by 5%: vmark 132102 125968 125497 messages/sec avg 127855.66 .984 vmark 139404 131719 131272 messages/sec avg 134131.66 1.033 Signed-off-by: Mike Galbraith Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar # *DOCUMENTATION* --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 34a7ee0ebed2..a8d840595b7e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -120,10 +120,10 @@ void arch_update_cpu_topology(void); .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ -- cgit v1.2.3 From 52c642f33b14bfa1b00ef2b68296effb34a573f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 7 Nov 2008 16:09:23 +0100 Subject: sched: fine-tune SD_SIBLING_INIT fine-tune the HT sched-domains parameters as well. On a HT capable box, this increases lat_ctx performance from 23.87 usecs to 1.49 usecs: # before $ ./lat_ctx -s 0 2 "size=0k ovr=1.89 2 23.87 # after $ ./lat_ctx -s 0 2 "size=0k ovr=1.84 2 1.49 Signed-off-by: Ingo Molnar --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index a8d840595b7e..117f1b7405cf 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -99,7 +99,7 @@ void arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ + | SD_WAKE_BALANCE \ | SD_SHARE_CPUPOWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3 From c3d6de698c84efdbdd3781b7058bcc339ab43da8 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 1 Aug 2008 17:37:55 +0200 Subject: ACPI video: if no ACPI backlight support, use vendor drivers If an ACPI graphics device supports backlight brightness functions (cmp. with latest ACPI spec Appendix B), let the ACPI video driver control backlight and switch backlight control off in vendor specific ACPI drivers (asus_acpi, thinkpad_acpi, eeepc, fujitsu_laptop, msi_laptop, sony_laptop, acer-wmi). Currently it is possible to load above drivers and let both poke on the brightness HW registers, the video and vendor specific ACPI drivers -> bad. This patch provides the basic support to check for BIOS capabilities before driver loading time. Driver specific modifications are in separate follow up patches. "acpi_backlight=vendor" Prever vendor driver over ACPI driver for backlight. "acpi_backlight=video" (default) Prever ACPI driver over vendor driver for backlight. Signed-off-by: Thomas Renninger Acked-by: Zhang Rui Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 12 ++ drivers/acpi/Makefile | 4 + drivers/acpi/scan.c | 32 +---- drivers/acpi/video.c | 28 ++-- drivers/acpi/video_detect.c | 268 ++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 44 ++++++ 6 files changed, 345 insertions(+), 43 deletions(-) create mode 100644 drivers/acpi/video_detect.c (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c86c07459712..dd5013f974d8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -198,6 +198,18 @@ and is between 256 and 4096 characters. It is defined in the file that require a timer override, but don't have HPET + acpi_backlight= [HW,ACPI] + acpi_backlight=vendor + acpi_backlight=video + If set to vendor, prefer vendor specific driver + (e.g. thinkpad_acpi, sony_acpi, etc.) instead + of the ACPI video.ko driver. + + acpi_display_output= [HW,ACPI] + acpi_display_output=vendor + acpi_display_output=video + See above. + acpi.debug_layer= [HW,ACPI] Format: Each bit of the indicates an ACPI debug layer, diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index d91c027ece8f..c03810aa19c0 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -46,6 +46,10 @@ obj-$(CONFIG_ACPI_BUTTON) += button.o obj-$(CONFIG_ACPI_FAN) += fan.o obj-$(CONFIG_ACPI_DOCK) += dock.o obj-$(CONFIG_ACPI_VIDEO) += video.o +ifdef CONFIG_ACPI_VIDEO +obj-y += video_detect.o +endif + obj-y += pci_root.o pci_link.o pci_irq.o pci_bind.o obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o obj-$(CONFIG_ACPI_PROCESSOR) += processor.o diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index a9dda8e0f9f9..556b182001ca 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -919,36 +919,6 @@ static void acpi_device_get_busid(struct acpi_device *device, } } -static int -acpi_video_bus_match(struct acpi_device *device) -{ - acpi_handle h_dummy; - - if (!device) - return -EINVAL; - - /* Since there is no HID, CID for ACPI Video drivers, we have - * to check well known required nodes for each feature we support. - */ - - /* Does this device able to support video switching ? */ - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOD", &h_dummy)) && - ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOS", &h_dummy))) - return 0; - - /* Does this device able to retrieve a video ROM ? */ - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_ROM", &h_dummy))) - return 0; - - /* Does this device able to configure which video head to be POSTed ? */ - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_VPO", &h_dummy)) && - ACPI_SUCCESS(acpi_get_handle(device->handle, "_GPD", &h_dummy)) && - ACPI_SUCCESS(acpi_get_handle(device->handle, "_SPD", &h_dummy))) - return 0; - - return -ENODEV; -} - /* * acpi_bay_match - see if a device is an ejectable driver bay * @@ -1031,7 +1001,7 @@ static void acpi_device_set_id(struct acpi_device *device, will get autoloaded and the device might still match against another driver. */ - if (ACPI_SUCCESS(acpi_video_bus_match(device))) + if (acpi_is_video_device(device)) cid_add = ACPI_VIDEO_HID; else if (ACPI_SUCCESS(acpi_bay_match(device))) cid_add = ACPI_BAY_HID; diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 6597c2a37c36..2097c399dd06 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -739,7 +739,8 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) device->cap._DSS = 1; } - max_level = acpi_video_init_brightness(device); + if (acpi_video_backlight_support()) + max_level = acpi_video_init_brightness(device); if (device->cap._BCL && device->cap._BCM && max_level > 0) { int result; @@ -785,18 +786,21 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) printk(KERN_ERR PREFIX "Create sysfs link\n"); } - if (device->cap._DCS && device->cap._DSS){ - static int count = 0; - char *name; - name = kzalloc(MAX_NAME_LEN, GFP_KERNEL); - if (!name) - return; - sprintf(name, "acpi_video%d", count++); - device->output_dev = video_output_register(name, - NULL, device, &acpi_output_properties); - kfree(name); + + if (acpi_video_display_switch_support()) { + + if (device->cap._DCS && device->cap._DSS) { + static int count; + char *name; + name = kzalloc(MAX_NAME_LEN, GFP_KERNEL); + if (!name) + return; + sprintf(name, "acpi_video%d", count++); + device->output_dev = video_output_register(name, + NULL, device, &acpi_output_properties); + kfree(name); + } } - return; } /* diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c new file mode 100644 index 000000000000..70b1e91ae2ab --- /dev/null +++ b/drivers/acpi/video_detect.c @@ -0,0 +1,268 @@ +/* + * Copyright (C) 2008 SuSE Linux Products GmbH + * Thomas Renninger + * + * May be copied or modified under the terms of the GNU General Public License + * + * video_detect.c: + * Provides acpi_is_video_device() for early scanning of ACPI devices in scan.c + * There a Linux specific (Spec does not provide a HID for video devices) is + * assinged + * + * After PCI devices are glued with ACPI devices + * acpi_get_physical_pci_device() can be called to identify ACPI graphics + * devices for which a real graphics card is plugged in + * + * Now acpi_video_get_capabilities() can be called to check which + * capabilities the graphics cards plugged in support. The check for general + * video capabilities will be triggered by the first caller of + * acpi_video_get_capabilities(NULL); which will happen when the first + * backlight (or display output) switching supporting driver calls: + * acpi_video_backlight_support(); + * + * Depending on whether ACPI graphics extensions (cmp. ACPI spec Appendix B) + * are available, video.ko should be used to handle the device. + * + * Otherwise vendor specific drivers like thinkpad_acpi, asus_acpi, + * sony_acpi,... can take care about backlight brightness and display output + * switching. + * + * If CONFIG_ACPI_VIDEO is neither set as "compiled in" (y) nor as a module (m) + * this file will not be compiled, acpi_video_get_capabilities() and + * acpi_video_backlight_support() will always return 0 and vendor specific + * drivers always can handle backlight. + * + */ + +#include +#include + +ACPI_MODULE_NAME("video"); +#define ACPI_VIDEO_COMPONENT 0x08000000 +#define _COMPONENT ACPI_VIDEO_COMPONENT + +static long acpi_video_support; +static bool acpi_video_caps_checked; + +static acpi_status +acpi_backlight_cap_match(acpi_handle handle, u32 level, void *context, + void **retyurn_value) +{ + long *cap = context; + acpi_handle h_dummy; + + if (ACPI_SUCCESS(acpi_get_handle(handle, "_BCM", &h_dummy)) && + ACPI_SUCCESS(acpi_get_handle(handle, "_BCL", &h_dummy))) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found generic backlight " + "support\n")); + *cap |= ACPI_VIDEO_BACKLIGHT; + /* We have backlight support, no need to scan further */ + return AE_CTRL_TERMINATE; + } + return 0; +} + +/* Returns true if the device is a video device which can be handled by + * video.ko. + * The device will get a Linux specific CID added in scan.c to + * identify the device as an ACPI graphics device + * Be aware that the graphics device may not be physically present + * Use acpi_video_get_capabilities() to detect general ACPI video + * capabilities of present cards + */ +long acpi_is_video_device(struct acpi_device *device) +{ + acpi_handle h_dummy; + long video_caps = 0; + + if (!device) + return 0; + + /* Does this device able to support video switching ? */ + if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOD", &h_dummy)) && + ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOS", &h_dummy))) + video_caps |= ACPI_VIDEO_OUTPUT_SWITCHING; + + /* Does this device able to retrieve a video ROM ? */ + if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_ROM", &h_dummy))) + video_caps |= ACPI_VIDEO_ROM_AVAILABLE; + + /* Does this device able to configure which video head to be POSTed ? */ + if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_VPO", &h_dummy)) && + ACPI_SUCCESS(acpi_get_handle(device->handle, "_GPD", &h_dummy)) && + ACPI_SUCCESS(acpi_get_handle(device->handle, "_SPD", &h_dummy))) + video_caps |= ACPI_VIDEO_DEVICE_POSTING; + + /* Only check for backlight functionality if one of the above hit. */ + if (video_caps) + acpi_walk_namespace(ACPI_TYPE_DEVICE, device->handle, + ACPI_UINT32_MAX, acpi_backlight_cap_match, + &video_caps, NULL); + + return video_caps; +} +EXPORT_SYMBOL(acpi_is_video_device); + +static acpi_status +find_video(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + long *cap = context; + struct device *dev; + struct acpi_device *acpi_dev; + + const struct acpi_device_id video_ids[] = { + {ACPI_VIDEO_HID, 0}, + {"", 0}, + }; + if (acpi_bus_get_device(handle, &acpi_dev)) + return AE_OK; + + if (!acpi_match_device_ids(acpi_dev, video_ids)) { + dev = acpi_get_physical_pci_device(handle); + if (!dev) + return AE_OK; + put_device(dev); + *cap |= acpi_is_video_device(acpi_dev); + } + return AE_OK; +} + +/* + * Returns the video capabilities of a specific ACPI graphics device + * + * if NULL is passed as argument all ACPI devices are enumerated and + * all graphics capabilities of physically present devices are + * summerized and returned. This is cached and done only once. + */ +long acpi_video_get_capabilities(acpi_handle graphics_handle) +{ + long caps = 0; + struct acpi_device *tmp_dev; + acpi_status status; + + if (acpi_video_caps_checked && graphics_handle == NULL) + return acpi_video_support; + + if (!graphics_handle) { + /* Only do the global walk through all graphics devices once */ + acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, find_video, + &caps, NULL); + /* There might be boot param flags set already... */ + acpi_video_support |= caps; + acpi_video_caps_checked = 1; + /* Add blacklists here. Be careful to use the right *DMI* bits + * to still be able to override logic via boot params, e.g.: + * + * if (dmi_name_in_vendors("XY")) { + * acpi_video_support |= + * ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR; + * acpi_video_support |= + * ACPI_VIDEO_BACKLIGHT_DMI_VENDOR; + *} + */ + } else { + status = acpi_bus_get_device(graphics_handle, &tmp_dev); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Invalid device")); + return 0; + } + acpi_walk_namespace(ACPI_TYPE_DEVICE, graphics_handle, + ACPI_UINT32_MAX, find_video, + &caps, NULL); + } + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "We have 0x%lX video support %s %s\n", + graphics_handle ? caps : acpi_video_support, + graphics_handle ? "on device " : "in general", + graphics_handle ? acpi_device_bid(tmp_dev) : "")); + return caps; +} +EXPORT_SYMBOL(acpi_video_get_capabilities); + +/* Returns true if video.ko can do backlight switching */ +int acpi_video_backlight_support(void) +{ + /* + * We must check whether the ACPI graphics device is physically plugged + * in. Therefore this must be called after binding PCI and ACPI devices + */ + if (!acpi_video_caps_checked) + acpi_video_get_capabilities(NULL); + + /* First check for boot param -> highest prio */ + if (acpi_video_support & ACPI_VIDEO_BACKLIGHT_FORCE_VENDOR) + return 0; + else if (acpi_video_support & ACPI_VIDEO_BACKLIGHT_FORCE_VIDEO) + return 1; + + /* Then check for DMI blacklist -> second highest prio */ + if (acpi_video_support & ACPI_VIDEO_BACKLIGHT_DMI_VENDOR) + return 0; + else if (acpi_video_support & ACPI_VIDEO_BACKLIGHT_DMI_VIDEO) + return 1; + + /* Then go the default way */ + return acpi_video_support & ACPI_VIDEO_BACKLIGHT; +} +EXPORT_SYMBOL(acpi_video_backlight_support); + +/* + * Returns true if video.ko can do display output switching. + * This does not work well/at all with binary graphics drivers + * which disable system io ranges and do it on their own. + */ +int acpi_video_display_switch_support(void) +{ + if (!acpi_video_caps_checked) + acpi_video_get_capabilities(NULL); + + if (acpi_video_support & ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VENDOR) + return 0; + else if (acpi_video_support & ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VIDEO) + return 1; + + if (acpi_video_support & ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR) + return 0; + else if (acpi_video_support & ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO) + return 1; + + return acpi_video_support & ACPI_VIDEO_OUTPUT_SWITCHING; +} +EXPORT_SYMBOL(acpi_video_display_switch_support); + +/* + * Use acpi_display_output=vendor/video or acpi_backlight=vendor/video + * To force that backlight or display output switching is processed by vendor + * specific acpi drivers or video.ko driver. + */ +int __init acpi_backlight(char *str) +{ + if (str == NULL || *str == '\0') + return 1; + else { + if (!strcmp("vendor", str)) + acpi_video_support |= + ACPI_VIDEO_BACKLIGHT_FORCE_VENDOR; + if (!strcmp("video", str)) + acpi_video_support |= + ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VIDEO; + } + return 1; +} +__setup("acpi_backlight=", acpi_backlight); + +int __init acpi_display_output(char *str) +{ + if (str == NULL || *str == '\0') + return 1; + else { + if (!strcmp("vendor", str)) + acpi_video_support |= + ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VENDOR; + if (!strcmp("video", str)) + acpi_video_support |= + ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VIDEO; + } + return 1; +} +__setup("acpi_display_output=", acpi_display_output); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fd6a452b0ceb..7f23761ace35 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -194,6 +194,50 @@ extern bool wmi_has_guid(const char *guid); #endif /* CONFIG_ACPI_WMI */ +#define ACPI_VIDEO_OUTPUT_SWITCHING 0x0001 +#define ACPI_VIDEO_DEVICE_POSTING 0x0002 +#define ACPI_VIDEO_ROM_AVAILABLE 0x0004 +#define ACPI_VIDEO_BACKLIGHT 0x0008 +#define ACPI_VIDEO_BACKLIGHT_FORCE_VENDOR 0x0010 +#define ACPI_VIDEO_BACKLIGHT_FORCE_VIDEO 0x0020 +#define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VENDOR 0x0040 +#define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VIDEO 0x0080 +#define ACPI_VIDEO_BACKLIGHT_DMI_VENDOR 0x0100 +#define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO 0x0200 +#define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 +#define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 + +#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) + +extern long acpi_video_get_capabilities(acpi_handle graphics_dev_handle); +extern long acpi_is_video_device(struct acpi_device *device); +extern int acpi_video_backlight_support(void); +extern int acpi_video_display_switch_support(void); + +#else + +static inline long acpi_video_get_capabilities(acpi_handle graphics_dev_handle) +{ + return 0; +} + +static inline long acpi_is_video_device(struct acpi_device *device) +{ + return 0; +} + +static inline int acpi_video_backlight_support(void) +{ + return 0; +} + +static inline int acpi_video_display_switch_support(void) +{ + return 0; +} + +#endif /* defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) */ + extern int acpi_blacklisted(void); #ifdef CONFIG_DMI extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d); -- cgit v1.2.3 From d1b268630875a7713b5d468a0c03403c5b721c8e Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sat, 8 Nov 2008 21:37:46 +0100 Subject: mmc: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Greg Kroah-Hartman Signed-Off-By: Kay Sievers Signed-off-by: Pierre Ossman --- drivers/mmc/core/bus.c | 3 +-- drivers/mmc/core/host.c | 5 ++--- drivers/mmc/core/sdio_bus.c | 3 +-- drivers/mmc/host/mmc_spi.c | 2 +- drivers/mmc/host/sdhci.c | 2 +- drivers/mmc/host/tifm_sd.c | 16 ++++++++-------- include/linux/mmc/card.h | 2 +- include/linux/mmc/host.h | 2 +- include/linux/mmc/sdio_func.h | 2 +- 9 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 0d9b2d6f9ebf..f210a8ee6861 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -216,8 +216,7 @@ int mmc_add_card(struct mmc_card *card) int ret; const char *type; - snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), - "%s:%04x", mmc_hostname(card->host), card->rca); + dev_set_name(&card->dev, "%s:%04x", mmc_hostname(card->host), card->rca); switch (card->type) { case MMC_TYPE_MMC: diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 6da80fd4d974..5e945e64ead7 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -73,8 +73,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) if (err) goto free; - snprintf(host->class_dev.bus_id, BUS_ID_SIZE, - "mmc%d", host->index); + dev_set_name(&host->class_dev, "mmc%d", host->index); host->parent = dev; host->class_dev.parent = dev; @@ -121,7 +120,7 @@ int mmc_add_host(struct mmc_host *host) WARN_ON((host->caps & MMC_CAP_SDIO_IRQ) && !host->ops->enable_sdio_irq); - led_trigger_register_simple(host->class_dev.bus_id, &host->led); + led_trigger_register_simple(dev_name(&host->class_dev), &host->led); err = device_add(&host->class_dev); if (err) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 233d0f9b3c4b..46284b527397 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -239,8 +239,7 @@ int sdio_add_func(struct sdio_func *func) { int ret; - snprintf(func->dev.bus_id, sizeof(func->dev.bus_id), - "%s:%d", mmc_card_id(func->card), func->num); + dev_set_name(&func->dev, "%s:%d", mmc_card_id(func->card), func->num); ret = device_add(&func->dev); if (ret == 0) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 07faf5412a1f..ad00e1632317 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1348,7 +1348,7 @@ static int mmc_spi_probe(struct spi_device *spi) goto fail_add_host; dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", - mmc->class_dev.bus_id, + dev_name(&mmc->class_dev), host->dma_dev ? "" : ", no DMA", (host->pdata && host->pdata->get_ro) ? "" : ", no WP", diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 30f64b1f2354..4d010a984bed 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1733,7 +1733,7 @@ int sdhci_add_host(struct sdhci_host *host) mmc_add_host(mmc); printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n", - mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id, + mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)), (host->flags & SDHCI_USE_ADMA)?"A":"", (host->flags & SDHCI_USE_DMA)?"DMA":"PIO"); diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index 13844843e8de..82554ddec6b3 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -632,7 +632,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) if (host->req) { printk(KERN_ERR "%s : unfinished request detected\n", - sock->dev.bus_id); + dev_name(&sock->dev)); mrq->cmd->error = -ETIMEDOUT; goto err_out; } @@ -672,7 +672,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE)) { printk(KERN_ERR "%s : scatterlist map failed\n", - sock->dev.bus_id); + dev_name(&sock->dev)); mrq->cmd->error = -ENOMEM; goto err_out; } @@ -684,7 +684,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) : PCI_DMA_FROMDEVICE); if (host->sg_len < 1) { printk(KERN_ERR "%s : scatterlist map failed\n", - sock->dev.bus_id); + dev_name(&sock->dev)); tifm_unmap_sg(sock, &host->bounce_buf, 1, r_data->flags & MMC_DATA_WRITE ? PCI_DMA_TODEVICE @@ -748,7 +748,7 @@ static void tifm_sd_end_cmd(unsigned long data) if (!mrq) { printk(KERN_ERR " %s : no request to complete?\n", - sock->dev.bus_id); + dev_name(&sock->dev)); spin_unlock_irqrestore(&sock->lock, flags); return; } @@ -789,7 +789,7 @@ static void tifm_sd_abort(unsigned long data) printk(KERN_ERR "%s : card failed to respond for a long period of time " "(%x, %x)\n", - host->dev->dev.bus_id, host->req->cmd->opcode, host->cmd_flags); + dev_name(&host->dev->dev), host->req->cmd->opcode, host->cmd_flags); tifm_eject(host->dev); } @@ -906,7 +906,7 @@ static int tifm_sd_initialize_host(struct tifm_sd *host) if (rc) { printk(KERN_ERR "%s : controller failed to reset\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return -ENODEV; } @@ -933,7 +933,7 @@ static int tifm_sd_initialize_host(struct tifm_sd *host) if (rc) { printk(KERN_ERR "%s : card not ready - probe failed on initialization\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return -ENODEV; } @@ -954,7 +954,7 @@ static int tifm_sd_probe(struct tifm_dev *sock) if (!(TIFM_SOCK_STATE_OCCUPIED & readl(sock->addr + SOCK_PRESENT_STATE))) { printk(KERN_WARNING "%s : card gone, unexpectedly\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return rc; } diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index ee6e822d5994..403aa505f27e 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -130,7 +130,7 @@ struct mmc_card { #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) #define mmc_card_name(c) ((c)->cid.prod_name) -#define mmc_card_id(c) ((c)->dev.bus_id) +#define mmc_card_id(c) (dev_name(&(c)->dev)) #define mmc_list_to_card(l) container_of(l, struct mmc_card, node) #define mmc_get_drvdata(c) dev_get_drvdata(&(c)->dev) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index bde891f64591..f842f234e44f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -176,7 +176,7 @@ static inline void *mmc_priv(struct mmc_host *host) #define mmc_dev(x) ((x)->parent) #define mmc_classdev(x) (&(x)->class_dev) -#define mmc_hostname(x) ((x)->class_dev.bus_id) +#define mmc_hostname(x) (dev_name(&(x)->class_dev)) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 07bee4a0d457..451bdfc85830 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -63,7 +63,7 @@ struct sdio_func { #define sdio_func_set_present(f) ((f)->state |= SDIO_STATE_PRESENT) -#define sdio_func_id(f) ((f)->dev.bus_id) +#define sdio_func_id(f) (dev_name(&(f)->dev)) #define sdio_get_drvdata(f) dev_get_drvdata(&(f)->dev) #define sdio_set_drvdata(f,d) dev_set_drvdata(&(f)->dev, d) -- cgit v1.2.3 From 058e3739f6b0753696db1952378de9e8d2a11735 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 9 Nov 2008 00:27:53 -0500 Subject: clarify usage expectations for cnt32_to_63() Currently, all existing users of cnt32_to_63() are fine since the CPU architectures where it is used don't do read access reordering, and user mode preemption is disabled already. It is nevertheless a good idea to better elaborate usage requirements wrt preemption, and use an explicit memory barrier on SMP to avoid different CPUs accessing the counter value in the wrong order. On UP a simple compiler barrier is sufficient. Signed-off-by: Nicolas Pitre Acked-by: Mathieu Desnoyers Signed-off-by: Linus Torvalds --- include/linux/cnt32_to_63.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h index 8c0f9505b48c..7605fdd1eb65 100644 --- a/include/linux/cnt32_to_63.h +++ b/include/linux/cnt32_to_63.h @@ -16,6 +16,7 @@ #include #include #include +#include /* this is used only to give gcc a clue about good code generation */ union cnt32_to_63 { @@ -53,11 +54,19 @@ union cnt32_to_63 { * needed increment. And any race in updating the value in memory is harmless * as the same value would simply be stored more than once. * - * The only restriction for the algorithm to work properly is that this - * code must be executed at least once per each half period of the 32-bit - * counter to properly update the state bit in memory. This is usually not a - * problem in practice, but if it is then a kernel timer could be scheduled - * to manage for this code to be executed often enough. + * The restrictions for the algorithm to work properly are: + * + * 1) this code must be called at least once per each half period of the + * 32-bit counter; + * + * 2) this code must not be preempted for a duration longer than the + * 32-bit counter half period minus the longest period between two + * calls to this code. + * + * Those requirements ensure proper update to the state bit in memory. + * This is usually not a problem in practice, but if it is then a kernel + * timer should be scheduled to manage for this code to be executed often + * enough. * * Note that the top bit (bit 63) in the returned value should be considered * as garbage. It is not cleared here because callers are likely to use a @@ -68,9 +77,10 @@ union cnt32_to_63 { */ #define cnt32_to_63(cnt_lo) \ ({ \ - static volatile u32 __m_cnt_hi; \ + static u32 __m_cnt_hi; \ union cnt32_to_63 __x; \ __x.hi = __m_cnt_hi; \ + smp_rmb(); \ __x.lo = (cnt_lo); \ if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \ __m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \ -- cgit v1.2.3 From 984f2f377fdfd098f5ae58d09ee04d5e29e6112b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 8 Nov 2008 20:24:19 +1100 Subject: cpumask: introduce new API, without changing anything, v3 Impact: cleanup Clean up based on feedback from Andrew Morton and others: - change to inline functions instead of macros - add __init to bootmem method - add a missing debug check Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 58 ++++++++++++++++++++++++++++++++++++++++++++----- lib/cpumask.c | 3 ++- 2 files changed, 54 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 31caa1bc620a..21e1dd43e52a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -564,12 +564,36 @@ static inline unsigned int cpumask_check(unsigned int cpu) } #if NR_CPUS == 1 -/* Uniprocesor. */ -#define cpumask_first(src) ({ (void)(src); 0; }) -#define cpumask_next(n, src) ({ (void)(src); 1; }) -#define cpumask_next_zero(n, src) ({ (void)(src); 1; }) -#define cpumask_next_and(n, srcp, andp) ({ (void)(srcp), (void)(andp); 1; }) -#define cpumask_any_but(mask, cpu) ({ (void)(mask); (void)(cpu); 0; }) +/* Uniprocessor. Assume all masks are "1". */ +static inline unsigned int cpumask_first(const struct cpumask *srcp) +{ + return 0; +} + +/* Valid inputs for n are -1 and 0. */ +static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) +{ + return n+1; +} + +static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +{ + return n+1; +} + +static inline unsigned int cpumask_next_and(int n, + const struct cpumask *srcp, + const struct cpumask *andp) +{ + return n+1; +} + +/* cpu must be a valid cpu, ie 0, so there's no other choice. */ +static inline unsigned int cpumask_any_but(const struct cpumask *mask, + unsigned int cpu) +{ + return 1; +} #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) @@ -620,10 +644,32 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); +/** + * for_each_cpu - iterate over every cpu in a mask + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ #define for_each_cpu(cpu, mask) \ for ((cpu) = -1; \ (cpu) = cpumask_next((cpu), (mask)), \ (cpu) < nr_cpu_ids;) + +/** + * for_each_cpu_and - iterate over every cpu in both masks + * @cpu: the (optionally unsigned) integer iterator + * @mask: the first cpumask pointer + * @and: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_and(&tmp, &mask, &and); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ #define for_each_cpu_and(cpu, mask, and) \ for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask), (and)), \ diff --git a/lib/cpumask.c b/lib/cpumask.c index 2ebc3a9a7465..8d03f22c6ced 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -67,6 +67,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) { unsigned int i; + cpumask_check(cpu); for_each_cpu(i, mask) if (i != cpu) break; @@ -108,7 +109,7 @@ void free_cpumask_var(cpumask_var_t mask) } EXPORT_SYMBOL(free_cpumask_var); -void free_bootmem_cpumask_var(cpumask_var_t mask) +void __init free_bootmem_cpumask_var(cpumask_var_t mask) { free_bootmem((unsigned long)mask, cpumask_size()); } -- cgit v1.2.3 From 8a8bc22332ee6ea49137508467a76aa7f4367719 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Nov 2008 14:48:21 +0900 Subject: libata: revert convert-to-block-tagging patches This patch reverts the following three commits which convert libata to use block layer tagging. 43a49cbdf31e812c0d8f553d433b09b421f5d52c e013e13bf605b9e6b702adffbe2853cfc60e7806 2fca5ccf97d2c28bcfce44f5b07d85e74e3cd18e Although using block layer tagging is the right direction, due to the tight coupling among tag number, data structure allocation and hardware command slot allocation, libata doesn't work correctly with the current conversion. The biggest problem is guaranteeing that tag 0 is always used for non-NCQ commands. Due to the way blk-tag is implemented and how SCSI starts and finishes requests, such guarantee can't be made. I'm not sure whether this would actually break any low level driver but it doesn't look like a good idea to break such assumption given the frailty of ATA controllers. So, for the time being, keep using the old dumb in-libata qc allocation. Signed-off-by: Tejun Heo Cc: Jens Axobe Cc: Jeff Garzik Signed-off-by: Linus Torvalds --- drivers/ata/libata-core.c | 66 ++++++++++++++++++++++++++++++++++++++++++----- drivers/ata/libata-scsi.c | 23 ++--------------- drivers/ata/libata.h | 19 ++------------ include/linux/libata.h | 1 + 4 files changed, 65 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 622350d9b2e3..0cd3ad497136 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1712,6 +1712,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, else tag = 0; + if (test_and_set_bit(tag, &ap->qc_allocated)) + BUG(); qc = __ata_qc_from_tag(ap, tag); qc->tag = tag; @@ -4562,6 +4564,37 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) #endif /* __BIG_ENDIAN */ } +/** + * ata_qc_new - Request an available ATA command, for queueing + * @ap: Port associated with device @dev + * @dev: Device from whom we request an available command structure + * + * LOCKING: + * None. + */ + +static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) +{ + struct ata_queued_cmd *qc = NULL; + unsigned int i; + + /* no command while frozen */ + if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) + return NULL; + + /* the last tag is reserved for internal command. */ + for (i = 0; i < ATA_MAX_QUEUE - 1; i++) + if (!test_and_set_bit(i, &ap->qc_allocated)) { + qc = __ata_qc_from_tag(ap, i); + break; + } + + if (qc) + qc->tag = i; + + return qc; +} + /** * ata_qc_new_init - Request an available ATA command, and initialize it * @dev: Device from whom we request an available command structure @@ -4571,20 +4604,16 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) * None. */ -struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) +struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev) { struct ata_port *ap = dev->link->ap; struct ata_queued_cmd *qc; - if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) - return NULL; - - qc = __ata_qc_from_tag(ap, tag); + qc = ata_qc_new(ap); if (qc) { qc->scsicmd = NULL; qc->ap = ap; qc->dev = dev; - qc->tag = tag; ata_qc_reinit(qc); } @@ -4592,6 +4621,31 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) return qc; } +/** + * ata_qc_free - free unused ata_queued_cmd + * @qc: Command to complete + * + * Designed to free unused ata_queued_cmd object + * in case something prevents using it. + * + * LOCKING: + * spin_lock_irqsave(host lock) + */ +void ata_qc_free(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + unsigned int tag; + + WARN_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + + qc->flags = 0; + tag = qc->tag; + if (likely(ata_tag_valid(tag))) { + qc->tag = ATA_TAG_POISON; + clear_bit(tag, &ap->qc_allocated); + } +} + void __ata_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3fa75eac135d..47c7afcb36f2 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -709,11 +709,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev, { struct ata_queued_cmd *qc; - if (cmd->request->tag != -1) - qc = ata_qc_new_init(dev, cmd->request->tag); - else - qc = ata_qc_new_init(dev, 0); - + qc = ata_qc_new_init(dev); if (qc) { qc->scsicmd = cmd; qc->scsidone = done; @@ -1108,17 +1104,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id)); depth = min(ATA_MAX_QUEUE - 1, depth); - - /* - * If this device is behind a port multiplier, we have - * to share the tag map between all devices on that PMP. - * Set up the shared tag map here and we get automatic. - */ - if (dev->link->ap->pmp_link) - scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1); - - scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); - scsi_activate_tcq(sdev, depth); + scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth); } return 0; @@ -1958,11 +1944,6 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) hdr[1] |= (1 << 7); memcpy(rbuf, hdr, sizeof(hdr)); - - /* if ncq, set tags supported */ - if (ata_id_has_ncq(args->id)) - rbuf[7] |= (1 << 1); - memcpy(&rbuf[8], "ATA ", 8); ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16); ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4); diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index d3831d39bdaa..fe2839e58774 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -74,7 +74,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev); extern void ata_force_cbl(struct ata_port *ap); extern u64 ata_tf_to_lba(const struct ata_taskfile *tf); extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf); -extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); +extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, unsigned int tag); @@ -103,6 +103,7 @@ extern int ata_dev_configure(struct ata_device *dev); extern int sata_down_spd_limit(struct ata_link *link); extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel); extern void ata_sg_clean(struct ata_queued_cmd *qc); +extern void ata_qc_free(struct ata_queued_cmd *qc); extern void ata_qc_issue(struct ata_queued_cmd *qc); extern void __ata_qc_complete(struct ata_queued_cmd *qc); extern int atapi_check_dma(struct ata_queued_cmd *qc); @@ -118,22 +119,6 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host); extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy); extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm); -/** - * ata_qc_free - free unused ata_queued_cmd - * @qc: Command to complete - * - * Designed to free unused ata_queued_cmd object - * in case something prevents using it. - * - * LOCKING: - * spin_lock_irqsave(host lock) - */ -static inline void ata_qc_free(struct ata_queued_cmd *qc) -{ - qc->flags = 0; - qc->tag = ATA_TAG_POISON; -} - /* libata-acpi.c */ #ifdef CONFIG_ATA_ACPI extern void ata_acpi_associate_sata_port(struct ata_port *ap); diff --git a/include/linux/libata.h b/include/linux/libata.h index c7665a4134c5..59b0f1c807b5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -698,6 +698,7 @@ struct ata_port { unsigned int cbl; /* cable type; ATA_CBL_xxx */ struct ata_queued_cmd qcmd[ATA_MAX_QUEUE]; + unsigned long qc_allocated; unsigned int qc_active; int nr_active_links; /* #links with active qcs */ -- cgit v1.2.3 From fd0fcf5c29dd0339c5f5d86eb2cbe9fdad5bcd73 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Thu, 6 Nov 2008 10:49:21 +0000 Subject: ssb: Fix DMA-API compilation for non-PCI systems This fixes compilation of the SSB DMA-API code on non-PCI platforms. Signed-off-by: Michael Buesch Signed-off-by: David S. Miller --- include/linux/ssb/ssb.h | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index e530026eedf7..17d9b58f6379 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -427,12 +427,16 @@ static inline int ssb_dma_mapping_error(struct ssb_device *dev, dma_addr_t addr) { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST return pci_dma_mapping_error(dev->bus->host_pci, addr); +#endif + break; case SSB_BUSTYPE_SSB: return dma_mapping_error(dev->dev, addr); default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); return -ENOSYS; } @@ -441,12 +445,16 @@ static inline dma_addr_t ssb_dma_map_single(struct ssb_device *dev, void *p, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST return pci_map_single(dev->bus->host_pci, p, size, dir); +#endif + break; case SSB_BUSTYPE_SSB: return dma_map_single(dev->dev, p, size, dir); default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); return 0; } @@ -455,14 +463,18 @@ static inline void ssb_dma_unmap_single(struct ssb_device *dev, dma_addr_t dma_a { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST pci_unmap_single(dev->bus->host_pci, dma_addr, size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_unmap_single(dev->dev, dma_addr, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } static inline void ssb_dma_sync_single_for_cpu(struct ssb_device *dev, @@ -472,15 +484,19 @@ static inline void ssb_dma_sync_single_for_cpu(struct ssb_device *dev, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST pci_dma_sync_single_for_cpu(dev->bus->host_pci, dma_addr, size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_sync_single_for_cpu(dev->dev, dma_addr, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } static inline void ssb_dma_sync_single_for_device(struct ssb_device *dev, @@ -490,15 +506,19 @@ static inline void ssb_dma_sync_single_for_device(struct ssb_device *dev, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST pci_dma_sync_single_for_device(dev->bus->host_pci, dma_addr, size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_sync_single_for_device(dev->dev, dma_addr, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } static inline void ssb_dma_sync_single_range_for_cpu(struct ssb_device *dev, @@ -509,17 +529,21 @@ static inline void ssb_dma_sync_single_range_for_cpu(struct ssb_device *dev, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST /* Just sync everything. That's all the PCI API can do. */ pci_dma_sync_single_for_cpu(dev->bus->host_pci, dma_addr, offset + size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_sync_single_range_for_cpu(dev->dev, dma_addr, offset, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } static inline void ssb_dma_sync_single_range_for_device(struct ssb_device *dev, @@ -530,17 +554,21 @@ static inline void ssb_dma_sync_single_range_for_device(struct ssb_device *dev, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST /* Just sync everything. That's all the PCI API can do. */ pci_dma_sync_single_for_device(dev->bus->host_pci, dma_addr, offset + size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_sync_single_range_for_device(dev->dev, dma_addr, offset, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } -- cgit v1.2.3 From ad474caca3e2a0550b7ce0706527ad5ab389a4d4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 10 Nov 2008 15:39:30 +0100 Subject: fix for account_group_exec_runtime(), make sure ->signal can't be freed under rq->lock Impact: fix hang/crash on ia64 under high load This is ugly, but the simplest patch by far. Unlike other similar routines, account_group_exec_runtime() could be called "implicitly" from within scheduler after exit_notify(). This means we can race with the parent doing release_task(), we can't just check ->signal != NULL. Change __exit_signal() to do spin_unlock_wait(&task_rq(tsk)->lock) before __cleanup_signal() to make sure ->signal can't be freed under task_rq(tsk)->lock. Note that task_rq_unlock_wait() doesn't care about the case when tsk changes cpu/rq under us, this should be OK. Thanks to Ingo who nacked my previous buggy patch. Signed-off-by: Oleg Nesterov Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar Reported-by: Doug Chapman --- include/linux/sched.h | 1 + kernel/exit.c | 5 +++++ kernel/sched.c | 8 ++++++++ 3 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 295b7c756ca6..644ffbda17ca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -247,6 +247,7 @@ extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(void); +extern void task_rq_unlock_wait(struct task_struct *p); extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) diff --git a/kernel/exit.c b/kernel/exit.c index 80137a5d9467..ae2b92be5fae 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -141,6 +141,11 @@ static void __exit_signal(struct task_struct *tsk) if (sig) { flush_sigqueue(&sig->shared_pending); taskstats_tgid_free(sig); + /* + * Make sure ->signal can't go away under rq->lock, + * see account_group_exec_runtime(). + */ + task_rq_unlock_wait(tsk); __cleanup_signal(sig); } } diff --git a/kernel/sched.c b/kernel/sched.c index f3149244e324..50a21f964679 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -969,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) } } +void task_rq_unlock_wait(struct task_struct *p) +{ + struct rq *rq = task_rq(p); + + smp_mb(); /* spin-unlock-wait is not a full memory barrier */ + spin_unlock_wait(&rq->lock); +} + static void __task_rq_unlock(struct rq *rq) __releases(rq->lock) { -- cgit v1.2.3 From 0906dd9df2f79042cfa82d8388895be7cbe7a51b Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 11 Nov 2008 14:51:23 +0000 Subject: telephony: trivial: fix up email address Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/telephony/phonedev.c | 2 +- include/linux/telephony.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c index 37caf4d69037..b52cc830c0b4 100644 --- a/drivers/telephony/phonedev.c +++ b/drivers/telephony/phonedev.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Author: Alan Cox, + * Author: Alan Cox, * * Fixes: Mar 01 2000 Thomas Sparr, * phone_register_device now works with unit!=PHONE_UNIT_ANY diff --git a/include/linux/telephony.h b/include/linux/telephony.h index 5b2b6261f193..f63afe330add 100644 --- a/include/linux/telephony.h +++ b/include/linux/telephony.h @@ -14,7 +14,7 @@ * Authors: Ed Okerson, * Greg Herlein, * - * Contributors: Alan Cox, + * Contributors: Alan Cox, * David W. Erhart, * * IN NO EVENT SHALL QUICKNET TECHNOLOGIES, INC. BE LIABLE TO ANY PARTY FOR -- cgit v1.2.3 From a358324466b171e145df20bdb74fe81759906de6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 11 Nov 2008 15:01:42 -0500 Subject: ring-buffer: buffer record on/off switch Impact: enable/disable ring buffer recording API added Several kernel developers have requested that there be a way to stop recording into the ring buffers with a simple switch that can also be enabled from userspace. This patch addes a new kernel API to the ring buffers called: tracing_on() tracing_off() When tracing_off() is called, all ring buffers will not be able to record into their buffers. tracing_on() will enable the ring buffers again. These two act like an on/off switch. That is, there is no counting of the number of times tracing_off or tracing_on has been called. A new file is added to the debugfs/tracing directory called tracing_on This allows for userspace applications to also flip the switch. echo 0 > debugfs/tracing/tracing_on disables the tracing. echo 1 > /debugfs/tracing/tracing_on enables it. Note, this does not disable or enable any tracers. It only sets or clears a flag that needs to be set in order for the ring buffers to write to their buffers. It is a global flag, and affects all ring buffers. The buffers start out with tracing_on enabled. There are now three flags that control recording into the buffers: tracing_on: which affects all ring buffer tracers. buffer->record_disabled: which affects an allocated buffer, which may be set if an anomaly is detected, and tracing is disabled. cpu_buffer->record_disabled: which is set by tracing_stop() or if an anomaly is detected. tracing_start can not reenable this if an anomaly occurred. The userspace debugfs/tracing/tracing_enabled is implemented with tracing_stop() but the user space code can not enable it if the kernel called tracing_stop(). Userspace can enable the tracing_on even if the kernel disabled it. It is just a switch used to stop tracing if a condition was hit. tracing_on is not for protecting critical areas in the kernel nor is it for stopping tracing if an anomaly occurred. This is because userspace can reenable it at any time. Side effect: With this patch, I discovered a dead variable in ftrace.c called tracing_on. This patch removes it. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 3 ++ kernel/trace/ftrace.c | 8 +--- kernel/trace/ring_buffer.c | 101 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 536b0ca46a03..e097c2e6b6dc 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -120,6 +120,9 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); u64 ring_buffer_time_stamp(int cpu); void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); +void tracing_on(void); +void tracing_off(void); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4a39d24568c8..14fa52297b28 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -185,7 +185,6 @@ enum { }; static int ftrace_filtered; -static int tracing_on; static LIST_HEAD(ftrace_new_addrs); @@ -506,13 +505,10 @@ static int __ftrace_modify_code(void *data) { int *command = data; - if (*command & FTRACE_ENABLE_CALLS) { + if (*command & FTRACE_ENABLE_CALLS) ftrace_replace_code(1); - tracing_on = 1; - } else if (*command & FTRACE_DISABLE_CALLS) { + else if (*command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); - tracing_on = 0; - } if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2f76193c3489..b08ee9f00c8d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -16,6 +16,35 @@ #include #include +#include "trace.h" + +/* Global flag to disable all recording to ring buffers */ +static int ring_buffers_off __read_mostly; + +/** + * tracing_on - enable all tracing buffers + * + * This function enables all tracing buffers that may have been + * disabled with tracing_off. + */ +void tracing_on(void) +{ + ring_buffers_off = 0; +} + +/** + * tracing_off - turn off all tracing buffers + * + * This function stops all tracing buffers from recording data. + * It does not disable any overhead the tracers themselves may + * be causing. This function simply causes all recording to + * the ring buffers to fail. + */ +void tracing_off(void) +{ + ring_buffers_off = 1; +} + /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 @@ -1133,6 +1162,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, struct ring_buffer_event *event; int cpu, resched; + if (ring_buffers_off) + return NULL; + if (atomic_read(&buffer->record_disabled)) return NULL; @@ -1249,6 +1281,9 @@ int ring_buffer_write(struct ring_buffer *buffer, int ret = -EBUSY; int cpu, resched; + if (ring_buffers_off) + return -EBUSY; + if (atomic_read(&buffer->record_disabled)) return -EBUSY; @@ -2070,3 +2105,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, return 0; } +static ssize_t +rb_simple_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int *p = filp->private_data; + char buf[64]; + int r; + + /* !ring_buffers_off == tracing_on */ + r = sprintf(buf, "%d\n", !*p); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +rb_simple_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int *p = filp->private_data; + char buf[64]; + long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + /* !ring_buffers_off == tracing_on */ + *p = !val; + + (*ppos)++; + + return cnt; +} + +static struct file_operations rb_simple_fops = { + .open = tracing_open_generic, + .read = rb_simple_read, + .write = rb_simple_write, +}; + + +static __init int rb_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("tracing_on", 0644, d_tracer, + &ring_buffers_off, &rb_simple_fops); + if (!entry) + pr_warning("Could not create debugfs 'tracing_on' entry\n"); + + return 0; +} + +fs_initcall(rb_init_debugfs); -- cgit v1.2.3 From 1a22f08dbd0e77c7cf45b5f527f93131d0b591b6 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 11 Nov 2008 12:19:05 +0900 Subject: serial: sh-sci: fix cannot work SH7723 SCIFA SH7723 has SCIFA. This module is similer SCI register map, but it has FIFO. So this patch adds new type(PORT_SCIFA) and change some type checking. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Paul Mundt --- drivers/serial/sh-sci.c | 20 +++++++++++--------- drivers/serial/sh-sci.h | 16 ++++++++-------- include/linux/serial_core.h | 3 +++ 3 files changed, 22 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c index 5c0f32c7fbf6..518c0321e4d3 100644 --- a/drivers/serial/sh-sci.c +++ b/drivers/serial/sh-sci.c @@ -478,10 +478,10 @@ static void sci_transmit_chars(struct uart_port *port) return; } - if (port->type == PORT_SCIF) - count = scif_txroom(port); - else + if (port->type == PORT_SCI) count = sci_txroom(port); + else + count = scif_txroom(port); do { unsigned char c; @@ -510,7 +510,7 @@ static void sci_transmit_chars(struct uart_port *port) } else { ctrl = sci_in(port, SCSCR); - if (port->type == PORT_SCIF) { + if (port->type != PORT_SCI) { sci_in(port, SCxSR); /* Dummy read */ sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port)); } @@ -536,10 +536,10 @@ static inline void sci_receive_chars(struct uart_port *port) return; while (1) { - if (port->type == PORT_SCIF) - count = scif_rxroom(port); - else + if (port->type == PORT_SCI) count = sci_rxroom(port); + else + count = scif_rxroom(port); /* Don't copy more bytes than there is room for in the buffer */ count = tty_buffer_request_room(tty, count); @@ -714,7 +714,7 @@ static inline int sci_handle_breaks(struct uart_port *port) #if defined(SCIF_ORER) /* XXX: Handle SCIF overrun error */ - if (port->type == PORT_SCIF && (sci_in(port, SCLSR) & SCIF_ORER) != 0) { + if (port->type != PORT_SCI && (sci_in(port, SCLSR) & SCIF_ORER) != 0) { sci_out(port, SCLSR, 0); if (tty_insert_flip_char(tty, 0, TTY_OVERRUN)) { copied++; @@ -1042,7 +1042,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, sci_out(port, SCSCR, 0x00); /* TE=0, RE=0, CKE1=0 */ - if (port->type == PORT_SCIF) + if (port->type != PORT_SCI) sci_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST); smr_val = sci_in(port, SCSMR) & 3; @@ -1085,6 +1085,7 @@ static const char *sci_type(struct uart_port *port) case PORT_SCI: return "sci"; case PORT_SCIF: return "scif"; case PORT_IRDA: return "irda"; + case PORT_SCIFA: return "scifa"; } return NULL; @@ -1112,6 +1113,7 @@ static void sci_config_port(struct uart_port *port, int flags) s->init_pins = sci_init_pins_sci; break; case PORT_SCIF: + case PORT_SCIFA: s->init_pins = sci_init_pins_scif; break; case PORT_IRDA: diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h index 6163a45f968f..9f33b064172e 100644 --- a/drivers/serial/sh-sci.h +++ b/drivers/serial/sh-sci.h @@ -289,18 +289,18 @@ #define CPU_SCIx_FNS(name, sci_offset, sci_size, scif_offset, scif_size)\ static inline unsigned int sci_##name##_in(struct uart_port *port) \ { \ - if (port->type == PORT_SCI) { \ - SCI_IN(sci_size, sci_offset) \ - } else { \ - SCI_IN(scif_size, scif_offset); \ + if (port->type == PORT_SCIF) { \ + SCI_IN(scif_size, scif_offset) \ + } else { /* PORT_SCI or PORT_SCIFA */ \ + SCI_IN(sci_size, sci_offset); \ } \ } \ static inline void sci_##name##_out(struct uart_port *port, unsigned int value) \ { \ - if (port->type == PORT_SCI) { \ - SCI_OUT(sci_size, sci_offset, value) \ - } else { \ - SCI_OUT(scif_size, scif_offset, value); \ + if (port->type == PORT_SCIF) { \ + SCI_OUT(scif_size, scif_offset, value) \ + } else { /* PORT_SCI or PORT_SCIFA */ \ + SCI_OUT(sci_size, sci_offset, value); \ } \ } diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index e27f216361fc..4e4f1277f3bf 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -155,6 +155,9 @@ #define PORT_SC26XX 82 +/* SH-SCI */ +#define PORT_SCIFA 83 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From 621a0d5207c18012cb39932f2d9830a11a6cb03d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Nov 2008 09:36:35 +0100 Subject: hrtimer: clean up unused callback modes Impact: cleanup git grep HRTIMER_CB_IRQSAFE revealed half the callback modes are actually unused. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 5 ----- kernel/hrtimer.c | 9 --------- 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 07e510a3b00a..3eba43878dcb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -46,9 +46,6 @@ enum hrtimer_restart { * hrtimer callback modes: * * HRTIMER_CB_SOFTIRQ: Callback must run in softirq context - * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context - * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and - * does not restart the timer * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context * Special mode for tick emulation and * scheduler timer. Such timers are per @@ -61,8 +58,6 @@ enum hrtimer_restart { */ enum hrtimer_cb_mode { HRTIMER_CB_SOFTIRQ, - HRTIMER_CB_IRQSAFE, - HRTIMER_CB_IRQSAFE_NO_RESTART, HRTIMER_CB_IRQSAFE_PERCPU, HRTIMER_CB_IRQSAFE_UNLOCKED, }; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 95d3949f2ae5..47e63349d1b2 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -664,14 +664,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, /* Timer is expired, act upon the callback mode */ switch(timer->cb_mode) { - case HRTIMER_CB_IRQSAFE_NO_RESTART: - debug_hrtimer_deactivate(timer); - /* - * We can call the callback from here. No restart - * happens, so no danger of recursion - */ - BUG_ON(timer->function(timer) != HRTIMER_NORESTART); - return 1; case HRTIMER_CB_IRQSAFE_PERCPU: case HRTIMER_CB_IRQSAFE_UNLOCKED: /* @@ -683,7 +675,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, */ debug_hrtimer_deactivate(timer); return 1; - case HRTIMER_CB_IRQSAFE: case HRTIMER_CB_SOFTIRQ: /* * Move everything else into the softirq pending list ! -- cgit v1.2.3 From b76f90b526737070302a127c710263e2ac707676 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 12 Nov 2008 13:26:55 -0800 Subject: remove ratelimt() It mistakenly assumes that a static local in an inlined function is a kernel-wide singleton. It also has no callers, so let's remove it. Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ratelimit.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 18a5b9ba9d40..00044b856453 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -17,11 +17,4 @@ struct ratelimit_state { struct ratelimit_state name = {interval, burst,} extern int __ratelimit(struct ratelimit_state *rs); - -static inline int ratelimit(void) -{ - static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); - return __ratelimit(&rs); -} #endif -- cgit v1.2.3 From 077eaf5b40ecb2c345d82f02275c20e965dfa3e5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Nov 2008 13:27:04 -0800 Subject: rtc: rtc-wm8350: add support for WM8350 RTC This adds support for the RTC provided by the Wolfson Microelectronics WM8350. This driver was originally written by Graeme Gregory and Liam Girdwood, though it has been modified since then to update it to current mainline coding standards and for API completeness. [akpm@linux-foundation.org: s/schedule_timeout_interruptible/schedule_timeout_uninterruptible/ to prevent bogus timeout when signal_pending()] Signed-off-by: Mark Brown Cc: Alessandro Zummo Cc: David Brownell Cc: Liam Girdwood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/Kconfig | 10 + drivers/rtc/Makefile | 1 + drivers/rtc/rtc-wm8350.c | 514 +++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/wm8350/rtc.h | 2 + 4 files changed, 527 insertions(+) create mode 100644 drivers/rtc/rtc-wm8350.c (limited to 'include/linux') diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 8abbb2020af9..7951ad2fd995 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -468,6 +468,16 @@ config RTC_DRV_V3020 This driver can also be built as a module. If so, the module will be called rtc-v3020. +config RTC_DRV_WM8350 + tristate "Wolfson Microelectronics WM8350 RTC" + depends on MFD_WM8350 + help + If you say yes here you will get support for the RTC subsystem + of the Wolfson Microelectronics WM8350. + + This driver can also be built as a module. If so, the module + will be called "rtc-wm8350". + comment "on-CPU RTC drivers" config RTC_DRV_OMAP diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index e9e8474cc8fe..7a41201e7efd 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -66,4 +66,5 @@ obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o obj-$(CONFIG_RTC_DRV_TWL4030) += rtc-twl4030.o obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o +obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c new file mode 100644 index 000000000000..5c5e3aa91385 --- /dev/null +++ b/drivers/rtc/rtc-wm8350.c @@ -0,0 +1,514 @@ +/* + * Real Time Clock driver for Wolfson Microelectronics WM8350 + * + * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. + * + * Author: Liam Girdwood + * linux@wolfsonmicro.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define WM8350_SET_ALM_RETRIES 5 +#define WM8350_SET_TIME_RETRIES 5 +#define WM8350_GET_TIME_RETRIES 5 + +#define to_wm8350_from_rtc_dev(d) container_of(d, struct wm8350, rtc.pdev.dev) + +/* + * Read current time and date in RTC + */ +static int wm8350_rtc_readtime(struct device *dev, struct rtc_time *tm) +{ + struct wm8350 *wm8350 = dev_get_drvdata(dev); + u16 time1[4], time2[4]; + int retries = WM8350_GET_TIME_RETRIES, ret; + + /* + * Read the time twice and compare. + * If time1 == time2, then time is valid else retry. + */ + do { + ret = wm8350_block_read(wm8350, WM8350_RTC_SECONDS_MINUTES, + 4, time1); + if (ret < 0) + return ret; + ret = wm8350_block_read(wm8350, WM8350_RTC_SECONDS_MINUTES, + 4, time2); + if (ret < 0) + return ret; + + if (memcmp(time1, time2, sizeof(time1)) == 0) { + tm->tm_sec = time1[0] & WM8350_RTC_SECS_MASK; + + tm->tm_min = (time1[0] & WM8350_RTC_MINS_MASK) + >> WM8350_RTC_MINS_SHIFT; + + tm->tm_hour = time1[1] & WM8350_RTC_HRS_MASK; + + tm->tm_wday = ((time1[1] >> WM8350_RTC_DAY_SHIFT) + & 0x7) - 1; + + tm->tm_mon = ((time1[2] & WM8350_RTC_MTH_MASK) + >> WM8350_RTC_MTH_SHIFT) - 1; + + tm->tm_mday = (time1[2] & WM8350_RTC_DATE_MASK); + + tm->tm_year = ((time1[3] & WM8350_RTC_YHUNDREDS_MASK) + >> WM8350_RTC_YHUNDREDS_SHIFT) * 100; + tm->tm_year += time1[3] & WM8350_RTC_YUNITS_MASK; + + tm->tm_yday = rtc_year_days(tm->tm_mday, tm->tm_mon, + tm->tm_year); + tm->tm_year -= 1900; + + dev_dbg(dev, "Read (%d left): %04x %04x %04x %04x\n", + retries, + time1[0], time1[1], time1[2], time1[3]); + + return 0; + } + } while (retries--); + + dev_err(dev, "timed out reading RTC time\n"); + return -EIO; +} + +/* + * Set current time and date in RTC + */ +static int wm8350_rtc_settime(struct device *dev, struct rtc_time *tm) +{ + struct wm8350 *wm8350 = dev_get_drvdata(dev); + u16 time[4]; + u16 rtc_ctrl; + int ret, retries = WM8350_SET_TIME_RETRIES; + + time[0] = tm->tm_sec; + time[0] |= tm->tm_min << WM8350_RTC_MINS_SHIFT; + time[1] = tm->tm_hour; + time[1] |= (tm->tm_wday + 1) << WM8350_RTC_DAY_SHIFT; + time[2] = tm->tm_mday; + time[2] |= (tm->tm_mon + 1) << WM8350_RTC_MTH_SHIFT; + time[3] = ((tm->tm_year + 1900) / 100) << WM8350_RTC_YHUNDREDS_SHIFT; + time[3] |= (tm->tm_year + 1900) % 100; + + dev_dbg(dev, "Setting: %04x %04x %04x %04x\n", + time[0], time[1], time[2], time[3]); + + /* Set RTC_SET to stop the clock */ + ret = wm8350_set_bits(wm8350, WM8350_RTC_TIME_CONTROL, WM8350_RTC_SET); + if (ret < 0) + return ret; + + /* Wait until confirmation of stopping */ + do { + rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); + schedule_timeout_uninterruptible(msecs_to_jiffies(1)); + } while (retries-- && !(rtc_ctrl & WM8350_RTC_STS)); + + if (!retries) { + dev_err(dev, "timed out on set confirmation\n"); + return -EIO; + } + + /* Write time to RTC */ + ret = wm8350_block_write(wm8350, WM8350_RTC_SECONDS_MINUTES, 4, time); + if (ret < 0) + return ret; + + /* Clear RTC_SET to start the clock */ + ret = wm8350_clear_bits(wm8350, WM8350_RTC_TIME_CONTROL, + WM8350_RTC_SET); + return ret; +} + +/* + * Read alarm time and date in RTC + */ +static int wm8350_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct wm8350 *wm8350 = dev_get_drvdata(dev); + struct rtc_time *tm = &alrm->time; + u16 time[4]; + int ret; + + ret = wm8350_block_read(wm8350, WM8350_ALARM_SECONDS_MINUTES, 4, time); + if (ret < 0) + return ret; + + tm->tm_sec = time[0] & WM8350_RTC_ALMSECS_MASK; + if (tm->tm_sec == WM8350_RTC_ALMSECS_MASK) + tm->tm_sec = -1; + + tm->tm_min = time[0] & WM8350_RTC_ALMMINS_MASK; + if (tm->tm_min == WM8350_RTC_ALMMINS_MASK) + tm->tm_min = -1; + else + tm->tm_min >>= WM8350_RTC_ALMMINS_SHIFT; + + tm->tm_hour = time[1] & WM8350_RTC_ALMHRS_MASK; + if (tm->tm_hour == WM8350_RTC_ALMHRS_MASK) + tm->tm_hour = -1; + + tm->tm_wday = ((time[1] >> WM8350_RTC_ALMDAY_SHIFT) & 0x7) - 1; + if (tm->tm_wday > 7) + tm->tm_wday = -1; + + tm->tm_mon = time[2] & WM8350_RTC_ALMMTH_MASK; + if (tm->tm_mon == WM8350_RTC_ALMMTH_MASK) + tm->tm_mon = -1; + else + tm->tm_mon = (tm->tm_mon >> WM8350_RTC_ALMMTH_SHIFT) - 1; + + tm->tm_mday = (time[2] & WM8350_RTC_ALMDATE_MASK); + if (tm->tm_mday == WM8350_RTC_ALMDATE_MASK) + tm->tm_mday = -1; + + tm->tm_year = -1; + + alrm->enabled = !(time[3] & WM8350_RTC_ALMSTS); + + return 0; +} + +static int wm8350_rtc_stop_alarm(struct wm8350 *wm8350) +{ + int retries = WM8350_SET_ALM_RETRIES; + u16 rtc_ctrl; + int ret; + + /* Set RTC_SET to stop the clock */ + ret = wm8350_set_bits(wm8350, WM8350_RTC_TIME_CONTROL, + WM8350_RTC_ALMSET); + if (ret < 0) + return ret; + + /* Wait until confirmation of stopping */ + do { + rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); + schedule_timeout_uninterruptible(msecs_to_jiffies(1)); + } while (retries-- && !(rtc_ctrl & WM8350_RTC_ALMSTS)); + + if (!(rtc_ctrl & WM8350_RTC_ALMSTS)) + return -ETIMEDOUT; + + return 0; +} + +static int wm8350_rtc_start_alarm(struct wm8350 *wm8350) +{ + int ret; + int retries = WM8350_SET_ALM_RETRIES; + u16 rtc_ctrl; + + ret = wm8350_clear_bits(wm8350, WM8350_RTC_TIME_CONTROL, + WM8350_RTC_ALMSET); + if (ret < 0) + return ret; + + /* Wait until confirmation */ + do { + rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); + schedule_timeout_uninterruptible(msecs_to_jiffies(1)); + } while (retries-- && rtc_ctrl & WM8350_RTC_ALMSTS); + + if (rtc_ctrl & WM8350_RTC_ALMSTS) + return -ETIMEDOUT; + + return 0; +} + +static int wm8350_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct wm8350 *wm8350 = dev_get_drvdata(dev); + struct rtc_time *tm = &alrm->time; + u16 time[3]; + int ret; + + memset(time, 0, sizeof(time)); + + if (tm->tm_sec != -1) + time[0] |= tm->tm_sec; + else + time[0] |= WM8350_RTC_ALMSECS_MASK; + + if (tm->tm_min != -1) + time[0] |= tm->tm_min << WM8350_RTC_ALMMINS_SHIFT; + else + time[0] |= WM8350_RTC_ALMMINS_MASK; + + if (tm->tm_hour != -1) + time[1] |= tm->tm_hour; + else + time[1] |= WM8350_RTC_ALMHRS_MASK; + + if (tm->tm_wday != -1) + time[1] |= (tm->tm_wday + 1) << WM8350_RTC_ALMDAY_SHIFT; + else + time[1] |= WM8350_RTC_ALMDAY_MASK; + + if (tm->tm_mday != -1) + time[2] |= tm->tm_mday; + else + time[2] |= WM8350_RTC_ALMDATE_MASK; + + if (tm->tm_mon != -1) + time[2] |= (tm->tm_mon + 1) << WM8350_RTC_ALMMTH_SHIFT; + else + time[2] |= WM8350_RTC_ALMMTH_MASK; + + ret = wm8350_rtc_stop_alarm(wm8350); + if (ret < 0) + return ret; + + /* Write time to RTC */ + ret = wm8350_block_write(wm8350, WM8350_ALARM_SECONDS_MINUTES, + 3, time); + if (ret < 0) + return ret; + + if (alrm->enabled) + ret = wm8350_rtc_start_alarm(wm8350); + + return ret; +} + +/* + * Handle commands from user-space + */ +static int wm8350_rtc_ioctl(struct device *dev, unsigned int cmd, + unsigned long arg) +{ + struct wm8350 *wm8350 = dev_get_drvdata(dev); + + switch (cmd) { + case RTC_AIE_OFF: + return wm8350_rtc_stop_alarm(wm8350); + case RTC_AIE_ON: + return wm8350_rtc_start_alarm(wm8350); + + case RTC_UIE_OFF: + wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); + break; + case RTC_UIE_ON: + wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC); + break; + + default: + return -ENOIOCTLCMD; + } + + return 0; +} + +static void wm8350_rtc_alarm_handler(struct wm8350 *wm8350, int irq, + void *data) +{ + struct rtc_device *rtc = wm8350->rtc.rtc; + int ret; + + rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF); + + /* Make it one shot */ + ret = wm8350_set_bits(wm8350, WM8350_RTC_TIME_CONTROL, + WM8350_RTC_ALMSET); + if (ret != 0) { + dev_err(&(wm8350->rtc.pdev->dev), + "Failed to disable alarm: %d\n", ret); + } +} + +static void wm8350_rtc_update_handler(struct wm8350 *wm8350, int irq, + void *data) +{ + struct rtc_device *rtc = wm8350->rtc.rtc; + + rtc_update_irq(rtc, 1, RTC_IRQF | RTC_UF); +} + +static const struct rtc_class_ops wm8350_rtc_ops = { + .ioctl = wm8350_rtc_ioctl, + .read_time = wm8350_rtc_readtime, + .set_time = wm8350_rtc_settime, + .read_alarm = wm8350_rtc_readalarm, + .set_alarm = wm8350_rtc_setalarm, +}; + +#ifdef CONFIG_PM +static int wm8350_rtc_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct wm8350 *wm8350 = dev_get_drvdata(&pdev->dev); + int ret = 0; + u16 reg; + + reg = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); + + if (device_may_wakeup(&wm8350->rtc.pdev->dev) && + reg & WM8350_RTC_ALMSTS) { + ret = wm8350_rtc_stop_alarm(wm8350); + if (ret != 0) + dev_err(&pdev->dev, "Failed to stop RTC alarm: %d\n", + ret); + } + + return ret; +} + +static int wm8350_rtc_resume(struct platform_device *pdev) +{ + struct wm8350 *wm8350 = dev_get_drvdata(&pdev->dev); + int ret; + + if (wm8350->rtc.alarm_enabled) { + ret = wm8350_rtc_start_alarm(wm8350); + if (ret != 0) + dev_err(&pdev->dev, + "Failed to restart RTC alarm: %d\n", ret); + } + + return 0; +} + +#else +#define wm8350_rtc_suspend NULL +#define wm8350_rtc_resume NULL +#endif + +static int wm8350_rtc_probe(struct platform_device *pdev) +{ + struct wm8350 *wm8350 = platform_get_drvdata(pdev); + struct wm8350_rtc *wm_rtc = &wm8350->rtc; + int ret = 0; + u16 timectl, power5; + + timectl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); + if (timectl & WM8350_RTC_BCD) { + dev_err(&pdev->dev, "RTC BCD mode not supported\n"); + return -EINVAL; + } + if (timectl & WM8350_RTC_12HR) { + dev_err(&pdev->dev, "RTC 12 hour mode not supported\n"); + return -EINVAL; + } + + /* enable the RTC if it's not already enabled */ + power5 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5); + if (!(power5 & WM8350_RTC_TICK_ENA)) { + dev_info(wm8350->dev, "Starting RTC\n"); + + wm8350_reg_unlock(wm8350); + + ret = wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5, + WM8350_RTC_TICK_ENA); + if (ret < 0) { + dev_err(&pdev->dev, "failed to enable RTC: %d\n", ret); + return ret; + } + + wm8350_reg_lock(wm8350); + } + + if (timectl & WM8350_RTC_STS) { + int retries; + + ret = wm8350_clear_bits(wm8350, WM8350_RTC_TIME_CONTROL, + WM8350_RTC_SET); + if (ret < 0) { + dev_err(&pdev->dev, "failed to start: %d\n", ret); + return ret; + } + + retries = WM8350_SET_TIME_RETRIES; + do { + timectl = wm8350_reg_read(wm8350, + WM8350_RTC_TIME_CONTROL); + } while (timectl & WM8350_RTC_STS && retries--); + + if (retries == 0) { + dev_err(&pdev->dev, "failed to start: timeout\n"); + return -ENODEV; + } + } + + device_init_wakeup(&pdev->dev, 1); + + wm_rtc->rtc = rtc_device_register("wm8350", &pdev->dev, + &wm8350_rtc_ops, THIS_MODULE); + if (IS_ERR(wm_rtc->rtc)) { + ret = PTR_ERR(wm_rtc->rtc); + dev_err(&pdev->dev, "failed to register RTC: %d\n", ret); + return ret; + } + + wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); + wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_PER); + + wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC, + wm8350_rtc_update_handler, NULL); + + wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM, + wm8350_rtc_alarm_handler, NULL); + wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_ALM); + + return 0; +} + +static int __devexit wm8350_rtc_remove(struct platform_device *pdev) +{ + struct wm8350 *wm8350 = platform_get_drvdata(pdev); + struct wm8350_rtc *wm_rtc = &wm8350->rtc; + + wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); + + wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC); + wm8350_free_irq(wm8350, WM8350_IRQ_RTC_ALM); + + rtc_device_unregister(wm_rtc->rtc); + + return 0; +} + +static struct platform_driver wm8350_rtc_driver = { + .probe = wm8350_rtc_probe, + .remove = __devexit_p(wm8350_rtc_remove), + .suspend = wm8350_rtc_suspend, + .resume = wm8350_rtc_resume, + .driver = { + .name = "wm8350-rtc", + }, +}; + +static int __init wm8350_rtc_init(void) +{ + return platform_driver_register(&wm8350_rtc_driver); +} +module_init(wm8350_rtc_init); + +static void __exit wm8350_rtc_exit(void) +{ + platform_driver_unregister(&wm8350_rtc_driver); +} +module_exit(wm8350_rtc_exit); + +MODULE_AUTHOR("Mark Brown "); +MODULE_DESCRIPTION("RTC driver for the WM8350"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:wm8350-rtc"); diff --git a/include/linux/mfd/wm8350/rtc.h b/include/linux/mfd/wm8350/rtc.h index dfda69e9f440..24add2bef6c9 100644 --- a/include/linux/mfd/wm8350/rtc.h +++ b/include/linux/mfd/wm8350/rtc.h @@ -261,6 +261,8 @@ struct wm8350_rtc { struct platform_device *pdev; + struct rtc_device *rtc; + int alarm_enabled; /* used over suspend/resume */ }; #endif -- cgit v1.2.3 From 4e17e1db96474af5620e3259754df4cb1c46521c Mon Sep 17 00:00:00 2001 From: Rodolfo Giometti Date: Wed, 12 Nov 2008 13:27:12 -0800 Subject: Add c2 port support C2port implements a two wire serial communication protocol (bit banging) designed to enable in-system programming, debugging, and boundary-scan testing on low pin-count Silicon Labs devices. Currently this code supports only flash programming through sysfs interface but extensions shoud be easy to add. Signed-off-by: Rodolfo Giometti Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-c2port | 88 +++ Documentation/c2port.txt | 90 +++ drivers/misc/Kconfig | 2 + drivers/misc/Makefile | 1 + drivers/misc/c2port/Kconfig | 24 + drivers/misc/c2port/Makefile | 1 + drivers/misc/c2port/core.c | 1002 ++++++++++++++++++++++++++++++++ include/linux/c2port.h | 65 +++ 8 files changed, 1273 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-c2port create mode 100644 Documentation/c2port.txt create mode 100644 drivers/misc/c2port/Kconfig create mode 100644 drivers/misc/c2port/Makefile create mode 100644 drivers/misc/c2port/core.c create mode 100644 include/linux/c2port.h (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-c2port b/Documentation/ABI/testing/sysfs-c2port new file mode 100644 index 000000000000..716cffc457e9 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-c2port @@ -0,0 +1,88 @@ +What: /sys/class/c2port/ +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/ directory will contain files and + directories that will provide a unified interface to + the C2 port interface. + +What: /sys/class/c2port/c2portX +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/ directory is related to X-th + C2 port into the system. Each directory will contain files to + manage and control its C2 port. + +What: /sys/class/c2port/c2portX/access +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/access file enable the access + to the C2 port from the system. No commands can be sent + till this entry is set to 0. + +What: /sys/class/c2port/c2portX/dev_id +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/dev_id file show the device ID + of the connected micro. + +What: /sys/class/c2port/c2portX/flash_access +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/flash_access file enable the + access to the on-board flash of the connected micro. + No commands can be sent till this entry is set to 0. + +What: /sys/class/c2port/c2portX/flash_block_size +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/flash_block_size file show + the on-board flash block size of the connected micro. + +What: /sys/class/c2port/c2portX/flash_blocks_num +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/flash_blocks_num file show + the on-board flash blocks number of the connected micro. + +What: /sys/class/c2port/c2portX/flash_data +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/flash_data file export + the content of the on-board flash of the connected micro. + +What: /sys/class/c2port/c2portX/flash_erase +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/flash_erase file execute + the "erase" command on the on-board flash of the connected + micro. + +What: /sys/class/c2port/c2portX/flash_erase +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/flash_erase file show the + on-board flash size of the connected micro. + +What: /sys/class/c2port/c2portX/reset +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/reset file execute a "reset" + command on the connected micro. + +What: /sys/class/c2port/c2portX/rev_id +Date: October 2008 +Contact: Rodolfo Giometti +Description: + The /sys/class/c2port/c2portX/rev_id file show the revision ID + of the connected micro. diff --git a/Documentation/c2port.txt b/Documentation/c2port.txt new file mode 100644 index 000000000000..d9bf93ea4398 --- /dev/null +++ b/Documentation/c2port.txt @@ -0,0 +1,90 @@ + C2 port support + --------------- + +(C) Copyright 2007 Rodolfo Giometti + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + + + +Overview +-------- + +This driver implements the support for Linux of Silicon Labs (Silabs) +C2 Interface used for in-system programming of micro controllers. + +By using this driver you can reprogram the in-system flash without EC2 +or EC3 debug adapter. This solution is also useful in those systems +where the micro controller is connected via special GPIOs pins. + +References +---------- + +The C2 Interface main references are at (http://www.silabs.com) +Silicon Laboratories site], see: + +- AN127: FLASH Programming via the C2 Interface at +http://www.silabs.com/public/documents/tpub_doc/anote/Microcontrollers/Small_Form_Factor/en/an127.pdf, and + +- C2 Specification at +http://www.silabs.com/public/documents/tpub_doc/spec/Microcontrollers/en/C2spec.pdf, + +however it implements a two wire serial communication protocol (bit +banging) designed to enable in-system programming, debugging, and +boundary-scan testing on low pin-count Silicon Labs devices. Currently +this code supports only flash programming but extensions are easy to +add. + +Using the driver +---------------- + +Once the driver is loaded you can use sysfs support to get C2port's +info or read/write in-system flash. + +# ls /sys/class/c2port/c2port0/ +access flash_block_size flash_erase rev_id +dev_id flash_blocks_num flash_size subsystem/ +flash_access flash_data reset uevent + +Initially the C2port access is disabled since you hardware may have +such lines multiplexed with other devices so, to get access to the +C2port, you need the command: + +# echo 1 > /sys/class/c2port/c2port0/access + +after that you should read the device ID and revision ID of the +connected micro controller: + +# cat /sys/class/c2port/c2port0/dev_id +8 +# cat /sys/class/c2port/c2port0/rev_id +1 + +However, for security reasons, the in-system flash access in not +enabled yet, to do so you need the command: + +# echo 1 > /sys/class/c2port/c2port0/flash_access + +After that you can read the whole flash: + +# cat /sys/class/c2port/c2port0/flash_data > image + +erase it: + +# echo 1 > /sys/class/c2port/c2port0/flash_erase + +and write it: + +# cat image > /sys/class/c2port/c2port0/flash_data + +after writing you have to reset the device to execute the new code: + +# echo 1 > /sys/class/c2port/c2port0/reset diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index dcac7ca76937..fee7304102af 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -498,4 +498,6 @@ config SGI_GRU_DEBUG This option enables addition debugging code for the SGI GRU driver. If you are unsure, say N. +source "drivers/misc/c2port/Kconfig" + endif # MISC_DEVICES diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index bb14633d1362..817f7f5ab3bd 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_KGDB_TESTS) += kgdbts.o obj-$(CONFIG_SGI_XP) += sgi-xp/ obj-$(CONFIG_SGI_GRU) += sgi-gru/ obj-$(CONFIG_HP_ILO) += hpilo.o +obj-$(CONFIG_C2PORT) += c2port/ diff --git a/drivers/misc/c2port/Kconfig b/drivers/misc/c2port/Kconfig new file mode 100644 index 000000000000..f1bad2b40329 --- /dev/null +++ b/drivers/misc/c2port/Kconfig @@ -0,0 +1,24 @@ +# +# C2 port devices +# + +menuconfig C2PORT + tristate "Silicon Labs C2 port support (EXPERIMENTAL)" + depends on EXPERIMENTAL + default no + help + This option enables support for Silicon Labs C2 port used to + program Silicon micro controller chips (and other 8051 compatible). + + If your board have no such micro controllers you don't need this + interface at all. + + To compile this driver as a module, choose M here: the module will + be called c2port_core. Note that you also need a client module + usually called c2port-*. + + If you are not sure, say N here. + +if C2PORT + +endif # C2PORT diff --git a/drivers/misc/c2port/Makefile b/drivers/misc/c2port/Makefile new file mode 100644 index 000000000000..3c610a2ba5ec --- /dev/null +++ b/drivers/misc/c2port/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_C2PORT) += core.o diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c new file mode 100644 index 000000000000..976b35d1d035 --- /dev/null +++ b/drivers/misc/c2port/core.c @@ -0,0 +1,1002 @@ +/* + * Silicon Labs C2 port core Linux support + * + * Copyright (c) 2007 Rodolfo Giometti + * Copyright (c) 2007 Eurotech S.p.A. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DRIVER_NAME "c2port" +#define DRIVER_VERSION "0.51.0" + +static DEFINE_SPINLOCK(c2port_idr_lock); +static DEFINE_IDR(c2port_idr); + +/* + * Local variables + */ + +static struct class *c2port_class; + +/* + * C2 registers & commands defines + */ + +/* C2 registers */ +#define C2PORT_DEVICEID 0x00 +#define C2PORT_REVID 0x01 +#define C2PORT_FPCTL 0x02 +#define C2PORT_FPDAT 0xB4 + +/* C2 interface commands */ +#define C2PORT_GET_VERSION 0x01 +#define C2PORT_DEVICE_ERASE 0x03 +#define C2PORT_BLOCK_READ 0x06 +#define C2PORT_BLOCK_WRITE 0x07 +#define C2PORT_PAGE_ERASE 0x08 + +/* C2 status return codes */ +#define C2PORT_INVALID_COMMAND 0x00 +#define C2PORT_COMMAND_FAILED 0x02 +#define C2PORT_COMMAND_OK 0x0d + +/* + * C2 port low level signal managements + */ + +static void c2port_reset(struct c2port_device *dev) +{ + struct c2port_ops *ops = dev->ops; + + /* To reset the device we have to keep clock line low for at least + * 20us. + */ + local_irq_disable(); + ops->c2ck_set(dev, 0); + udelay(25); + ops->c2ck_set(dev, 1); + local_irq_enable(); + + udelay(1); +} + +static void c2port_strobe_ck(struct c2port_device *dev) +{ + struct c2port_ops *ops = dev->ops; + + /* During hi-low-hi transition we disable local IRQs to avoid + * interructions since C2 port specification says that it must be + * shorter than 5us, otherwise the microcontroller may consider + * it as a reset signal! + */ + local_irq_disable(); + ops->c2ck_set(dev, 0); + udelay(1); + ops->c2ck_set(dev, 1); + local_irq_enable(); + + udelay(1); +} + +/* + * C2 port basic functions + */ + +static void c2port_write_ar(struct c2port_device *dev, u8 addr) +{ + struct c2port_ops *ops = dev->ops; + int i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (11b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + + /* ADDRESS field */ + for (i = 0; i < 8; i++) { + ops->c2d_set(dev, addr & 0x01); + c2port_strobe_ck(dev); + + addr >>= 1; + } + + /* STOP field */ + ops->c2d_dir(dev, 1); + c2port_strobe_ck(dev); +} + +static int c2port_read_ar(struct c2port_device *dev, u8 *addr) +{ + struct c2port_ops *ops = dev->ops; + int i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (10b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + + /* ADDRESS field */ + ops->c2d_dir(dev, 1); + *addr = 0; + for (i = 0; i < 8; i++) { + *addr >>= 1; /* shift in 8-bit ADDRESS field LSB first */ + + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + *addr |= 0x80; + } + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_write_dr(struct c2port_device *dev, u8 data) +{ + struct c2port_ops *ops = dev->ops; + int timeout, i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (01b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* LENGTH field (00b, LSB first -> 1 byte) */ + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* DATA field */ + for (i = 0; i < 8; i++) { + ops->c2d_set(dev, data & 0x01); + c2port_strobe_ck(dev); + + data >>= 1; + } + + /* WAIT field */ + ops->c2d_dir(dev, 1); + timeout = 20; + do { + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_read_dr(struct c2port_device *dev, u8 *data) +{ + struct c2port_ops *ops = dev->ops; + int timeout, i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (00b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* LENGTH field (00b, LSB first -> 1 byte) */ + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* WAIT field */ + ops->c2d_dir(dev, 1); + timeout = 20; + do { + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + /* DATA field */ + *data = 0; + for (i = 0; i < 8; i++) { + *data >>= 1; /* shift in 8-bit DATA field LSB first */ + + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + *data |= 0x80; + } + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_poll_in_busy(struct c2port_device *dev) +{ + u8 addr; + int ret, timeout = 20; + + do { + ret = (c2port_read_ar(dev, &addr)); + if (ret < 0) + return -EIO; + + if (!(addr & 0x02)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + return 0; +} + +static int c2port_poll_out_ready(struct c2port_device *dev) +{ + u8 addr; + int ret, timeout = 10000; /* erase flash needs long time... */ + + do { + ret = (c2port_read_ar(dev, &addr)); + if (ret < 0) + return -EIO; + + if (addr & 0x01) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + return 0; +} + +/* + * sysfs methods + */ + +static ssize_t c2port_show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", c2dev->name); +} + +static ssize_t c2port_show_flash_blocks_num(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->blocks_num); +} + +static ssize_t c2port_show_flash_block_size(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->block_size); +} + +static ssize_t c2port_show_flash_size(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size); +} + +static ssize_t c2port_show_access(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", c2dev->access); +} + +static ssize_t c2port_store_access(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + int status, ret; + + ret = sscanf(buf, "%d", &status); + if (ret != 1) + return -EINVAL; + + mutex_lock(&c2dev->mutex); + + c2dev->access = !!status; + + /* If access is "on" clock should be HIGH _before_ setting the line + * as output and data line should be set as INPUT anyway */ + if (c2dev->access) + ops->c2ck_set(c2dev, 1); + ops->access(c2dev, c2dev->access); + if (c2dev->access) + ops->c2d_dir(c2dev, 1); + + mutex_unlock(&c2dev->mutex); + + return count; +} + +static ssize_t c2port_store_reset(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + + c2port_reset(c2dev); + c2dev->flash_access = 0; + + mutex_unlock(&c2dev->mutex); + + return count; +} + +static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf) +{ + u8 data; + int ret; + + /* Select DEVICEID register for C2 data register accesses */ + c2port_write_ar(dev, C2PORT_DEVICEID); + + /* Read and return the device ID register */ + ret = c2port_read_dr(dev, &data); + if (ret < 0) + return ret; + + return sprintf(buf, "%d\n", data); +} + +static ssize_t c2port_show_dev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + ssize_t ret; + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_show_dev_id(c2dev, buf); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(dev, "cannot read from %s\n", c2dev->name); + + return ret; +} + +static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf) +{ + u8 data; + int ret; + + /* Select REVID register for C2 data register accesses */ + c2port_write_ar(dev, C2PORT_REVID); + + /* Read and return the revision ID register */ + ret = c2port_read_dr(dev, &data); + if (ret < 0) + return ret; + + return sprintf(buf, "%d\n", data); +} + +static ssize_t c2port_show_rev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + ssize_t ret; + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_show_rev_id(c2dev, buf); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot read from %s\n", c2dev->name); + + return ret; +} + +static ssize_t c2port_show_flash_access(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", c2dev->flash_access); +} + +static ssize_t __c2port_store_flash_access(struct c2port_device *dev, + int status) +{ + int ret; + + /* Check the device access status */ + if (!dev->access) + return -EBUSY; + + dev->flash_access = !!status; + + /* If flash_access is off we have nothing to do... */ + if (dev->flash_access == 0) + return 0; + + /* Target the C2 flash programming control register for C2 data + * register access */ + c2port_write_ar(dev, C2PORT_FPCTL); + + /* Write the first keycode to enable C2 Flash programming */ + ret = c2port_write_dr(dev, 0x02); + if (ret < 0) + return ret; + + /* Write the second keycode to enable C2 Flash programming */ + ret = c2port_write_dr(dev, 0x01); + if (ret < 0) + return ret; + + /* Delay for at least 20ms to ensure the target is ready for + * C2 flash programming */ + mdelay(25); + + return 0; +} + +static ssize_t c2port_store_flash_access(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + int status; + ssize_t ret; + + ret = sscanf(buf, "%d", &status); + if (ret != 1) + return -EINVAL; + + mutex_lock(&c2dev->mutex); + ret = __c2port_store_flash_access(c2dev, status); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) { + dev_err(c2dev->dev, "cannot enable %s flash programming\n", + c2dev->name); + return ret; + } + + return count; +} + +static ssize_t __c2port_write_flash_erase(struct c2port_device *dev) +{ + u8 status; + int ret; + + /* Target the C2 flash programming data register for C2 data register + * access. + */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send device erase command */ + c2port_write_dr(dev, C2PORT_DEVICE_ERASE); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send a three-byte arming sequence to enable the device erase. + * If the sequence is not received correctly, the command will be + * ignored. + * Sequence is: 0xde, 0xad, 0xa5. + */ + c2port_write_dr(dev, 0xde); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + c2port_write_dr(dev, 0xad); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + c2port_write_dr(dev, 0xa5); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + return 0; +} + +static ssize_t c2port_store_flash_erase(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + int ret; + + /* Check the device and flash access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_write_flash_erase(c2dev); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) { + dev_err(c2dev->dev, "cannot erase %s flash\n", c2dev->name); + return ret; + } + + return count; +} + +static ssize_t __c2port_read_flash_data(struct c2port_device *dev, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_ops *ops = dev->ops; + u8 status, nread = 128; + int i, ret; + + /* Check for flash end */ + if (offset >= ops->block_size * ops->blocks_num) + return 0; + + if (ops->block_size * ops->blocks_num - offset < nread) + nread = ops->block_size * ops->blocks_num - offset; + if (count < nread) + nread = count; + if (nread == 0) + return nread; + + /* Target the C2 flash programming data register for C2 data register + * access */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send flash block read command */ + c2port_write_dr(dev, C2PORT_BLOCK_READ); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send address high byte */ + c2port_write_dr(dev, offset >> 8); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address low byte */ + c2port_write_dr(dev, offset & 0x00ff); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address block size */ + c2port_write_dr(dev, nread); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before reading FLASH block */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Read flash block */ + for (i = 0; i < nread; i++) { + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + ret = c2port_read_dr(dev, buffer+i); + if (ret < 0) + return ret; + } + + return nread; +} + +static ssize_t c2port_read_flash_data(struct kobject *kobj, + struct bin_attribute *attr, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_device *c2dev = + dev_get_drvdata(container_of(kobj, + struct device, kobj)); + ssize_t ret; + + /* Check the device and flash access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_read_flash_data(c2dev, buffer, offset, count); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot read %s flash\n", c2dev->name); + + return ret; +} + +static ssize_t __c2port_write_flash_data(struct c2port_device *dev, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_ops *ops = dev->ops; + u8 status, nwrite = 128; + int i, ret; + + if (nwrite > count) + nwrite = count; + if (ops->block_size * ops->blocks_num - offset < nwrite) + nwrite = ops->block_size * ops->blocks_num - offset; + + /* Check for flash end */ + if (offset >= ops->block_size * ops->blocks_num) + return -EINVAL; + + /* Target the C2 flash programming data register for C2 data register + * access */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send flash block write command */ + c2port_write_dr(dev, C2PORT_BLOCK_WRITE); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send address high byte */ + c2port_write_dr(dev, offset >> 8); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address low byte */ + c2port_write_dr(dev, offset & 0x00ff); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address block size */ + c2port_write_dr(dev, nwrite); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before writing FLASH block */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Write flash block */ + for (i = 0; i < nwrite; i++) { + ret = c2port_write_dr(dev, *(buffer+i)); + if (ret < 0) + return ret; + + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + } + + /* Wait for last flash write to complete */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + return nwrite; +} + +static ssize_t c2port_write_flash_data(struct kobject *kobj, + struct bin_attribute *attr, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_device *c2dev = + dev_get_drvdata(container_of(kobj, + struct device, kobj)); + int ret; + + /* Check the device access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_write_flash_data(c2dev, buffer, offset, count); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot write %s flash\n", c2dev->name); + + return ret; +} + +/* + * Class attributes + */ + +static struct device_attribute c2port_attrs[] = { + __ATTR(name, 0444, c2port_show_name, NULL), + __ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL), + __ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL), + __ATTR(flash_size, 0444, c2port_show_flash_size, NULL), + __ATTR(access, 0644, c2port_show_access, c2port_store_access), + __ATTR(reset, 0200, NULL, c2port_store_reset), + __ATTR(dev_id, 0444, c2port_show_dev_id, NULL), + __ATTR(rev_id, 0444, c2port_show_rev_id, NULL), + + __ATTR(flash_access, 0644, c2port_show_flash_access, + c2port_store_flash_access), + __ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase), + __ATTR_NULL, +}; + +static struct bin_attribute c2port_bin_attrs = { + .attr = { + .name = "flash_data", + .mode = 0644 + }, + .read = c2port_read_flash_data, + .write = c2port_write_flash_data, + /* .size is computed at run-time */ +}; + +/* + * Exported functions + */ + +struct c2port_device *c2port_device_register(char *name, + struct c2port_ops *ops, void *devdata) +{ + struct c2port_device *c2dev; + int id, ret; + + if (unlikely(!ops) || unlikely(!ops->access) || \ + unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \ + unlikely(!ops->c2d_get) || unlikely(!ops->c2d_set)) + return ERR_PTR(-EINVAL); + + c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); + if (unlikely(!c2dev)) + return ERR_PTR(-ENOMEM); + + ret = idr_pre_get(&c2port_idr, GFP_KERNEL); + if (!ret) { + ret = -ENOMEM; + goto error_idr_get_new; + } + + spin_lock_irq(&c2port_idr_lock); + ret = idr_get_new(&c2port_idr, c2dev, &id); + spin_unlock_irq(&c2port_idr_lock); + + if (ret < 0) + goto error_idr_get_new; + c2dev->id = id; + + c2dev->dev = device_create(c2port_class, NULL, 0, c2dev, + "c2port%d", id); + if (unlikely(!c2dev->dev)) { + ret = -ENOMEM; + goto error_device_create; + } + dev_set_drvdata(c2dev->dev, c2dev); + + strncpy(c2dev->name, name, C2PORT_NAME_LEN); + c2dev->ops = ops; + mutex_init(&c2dev->mutex); + + /* Create binary file */ + c2port_bin_attrs.size = ops->blocks_num * ops->block_size; + ret = device_create_bin_file(c2dev->dev, &c2port_bin_attrs); + if (unlikely(ret)) + goto error_device_create_bin_file; + + /* By default C2 port access is off */ + c2dev->access = c2dev->flash_access = 0; + ops->access(c2dev, 0); + + dev_info(c2dev->dev, "C2 port %s added\n", name); + dev_info(c2dev->dev, "%s flash has %d blocks x %d bytes " + "(%d bytes total)\n", + name, ops->blocks_num, ops->block_size, + ops->blocks_num * ops->block_size); + + return c2dev; + +error_device_create_bin_file: + device_destroy(c2port_class, 0); + +error_device_create: + spin_lock_irq(&c2port_idr_lock); + idr_remove(&c2port_idr, id); + spin_unlock_irq(&c2port_idr_lock); + +error_idr_get_new: + kfree(c2dev); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL(c2port_device_register); + +void c2port_device_unregister(struct c2port_device *c2dev) +{ + if (!c2dev) + return; + + dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name); + + device_remove_bin_file(c2dev->dev, &c2port_bin_attrs); + spin_lock_irq(&c2port_idr_lock); + idr_remove(&c2port_idr, c2dev->id); + spin_unlock_irq(&c2port_idr_lock); + + device_destroy(c2port_class, c2dev->id); + + kfree(c2dev); +} +EXPORT_SYMBOL(c2port_device_unregister); + +/* + * Module stuff + */ + +static int __init c2port_init(void) +{ + printk(KERN_INFO "Silicon Labs C2 port support v. " DRIVER_VERSION + " - (C) 2007 Rodolfo Giometti\n"); + + c2port_class = class_create(THIS_MODULE, "c2port"); + if (!c2port_class) { + printk(KERN_ERR "c2port: failed to allocate class\n"); + return -ENOMEM; + } + c2port_class->dev_attrs = c2port_attrs; + + return 0; +} + +static void __exit c2port_exit(void) +{ + class_destroy(c2port_class); +} + +module_init(c2port_init); +module_exit(c2port_exit); + +MODULE_AUTHOR("Rodolfo Giometti "); +MODULE_DESCRIPTION("Silicon Labs C2 port support v. " DRIVER_VERSION); +MODULE_LICENSE("GPL"); diff --git a/include/linux/c2port.h b/include/linux/c2port.h new file mode 100644 index 000000000000..7b5a2388ba67 --- /dev/null +++ b/include/linux/c2port.h @@ -0,0 +1,65 @@ +/* + * Silicon Labs C2 port Linux support + * + * Copyright (c) 2007 Rodolfo Giometti + * Copyright (c) 2007 Eurotech S.p.A. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation + */ + +#include + +#define C2PORT_NAME_LEN 32 + +/* + * C2 port basic structs + */ + +/* Main struct */ +struct c2port_ops; +struct c2port_device { + unsigned int access:1; + unsigned int flash_access:1; + + int id; + char name[C2PORT_NAME_LEN]; + struct c2port_ops *ops; + struct mutex mutex; /* prevent races during read/write */ + + struct device *dev; + + void *private_data; +}; + +/* Basic operations */ +struct c2port_ops { + /* Flash layout */ + unsigned short block_size; /* flash block size in bytes */ + unsigned short blocks_num; /* flash blocks number */ + + /* Enable or disable the access to C2 port */ + void (*access)(struct c2port_device *dev, int status); + + /* Set C2D data line as input/output */ + void (*c2d_dir)(struct c2port_device *dev, int dir); + + /* Read/write C2D data line */ + int (*c2d_get)(struct c2port_device *dev); + void (*c2d_set)(struct c2port_device *dev, int status); + + /* Write C2CK clock line */ + void (*c2ck_set)(struct c2port_device *dev, int status); +}; + +/* + * Exported functions + */ + +#define to_class_dev(obj) container_of((obj), struct class_device, kobj) +#define to_c2port_device(obj) container_of((obj), struct c2port_device, class) + +extern struct c2port_device *c2port_device_register(char *name, + struct c2port_ops *ops, void *devdata); +extern void c2port_device_unregister(struct c2port_device *dev); -- cgit v1.2.3 From 437184ae8bd1ef923a40b009e37801deae66ad55 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Tue, 4 Nov 2008 13:31:38 +0100 Subject: HID: map macbook keys for "Expose" and "Dashboard" On macbooks there are specific keys for the user-space functions Expose and Dashboard, which currently has no counterpart in input.h. This patch adds KEY_SCALE and KEY_DASHBOARD, and maps the keyboard accordingly. Acked-by: Dmitry Torokhov Signed-off-by: Henrik Rydberg Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 5 +++-- include/linux/input.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index ce3c39938f9f..9b97795e45ad 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -55,10 +55,11 @@ struct apple_key_translation { static struct apple_key_translation apple_fn_keys[] = { { KEY_BACKSPACE, KEY_DELETE }, + { KEY_ENTER, KEY_INSERT }, { KEY_F1, KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY }, { KEY_F2, KEY_BRIGHTNESSUP, APPLE_FLAG_FKEY }, - { KEY_F3, KEY_FN_F5, APPLE_FLAG_FKEY }, /* Exposé */ - { KEY_F4, KEY_FN_F4, APPLE_FLAG_FKEY }, /* Dashboard */ + { KEY_F3, KEY_SCALE, APPLE_FLAG_FKEY }, + { KEY_F4, KEY_DASHBOARD, APPLE_FLAG_FKEY }, { KEY_F5, KEY_KBDILLUMDOWN, APPLE_FLAG_FKEY }, { KEY_F6, KEY_KBDILLUMUP, APPLE_FLAG_FKEY }, { KEY_F7, KEY_PREVIOUSSONG, APPLE_FLAG_FKEY }, diff --git a/include/linux/input.h b/include/linux/input.h index b86fb5581ce6..5341e8251f8c 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -238,6 +238,7 @@ struct input_absinfo { #define KEY_KPEQUAL 117 #define KEY_KPPLUSMINUS 118 #define KEY_PAUSE 119 +#define KEY_SCALE 120 /* AL Compiz Scale (Expose) */ #define KEY_KPCOMMA 121 #define KEY_HANGEUL 122 @@ -322,6 +323,7 @@ struct input_absinfo { #define KEY_PAUSECD 201 #define KEY_PROG3 202 #define KEY_PROG4 203 +#define KEY_DASHBOARD 204 /* AL Dashboard */ #define KEY_SUSPEND 205 #define KEY_CLOSE 206 /* AC Close */ #define KEY_PLAY 207 -- cgit v1.2.3 From d7de4c1dc3a2faca0bf05d9e342f885cb2696766 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 13 Nov 2008 20:40:12 +0200 Subject: slab: document SLAB_DESTROY_BY_RCU Explain this SLAB_DESTROY_BY_RCU thing... [hugh@veritas.com: add a pointer to comment in mm/slab.c] Signed-off-by: Peter Zijlstra Acked-by: Jens Axboe Acked-by: Paul E. McKenney Acked-by: Christoph Lameter Signed-off-by: Hugh Dickins Signed-off-by: Pekka Enberg --- include/linux/slab.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index ba965c84ae06..000da12b5cf0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -23,6 +23,34 @@ #define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ +/* + * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS! + * + * This delays freeing the SLAB page by a grace period, it does _NOT_ + * delay object freeing. This means that if you do kmem_cache_free() + * that memory location is free to be reused at any time. Thus it may + * be possible to see another object there in the same RCU grace period. + * + * This feature only ensures the memory location backing the object + * stays valid, the trick to using this is relying on an independent + * object validation pass. Something like: + * + * rcu_read_lock() + * again: + * obj = lockless_lookup(key); + * if (obj) { + * if (!try_get_ref(obj)) // might fail for free objects + * goto again; + * + * if (obj->key != key) { // not the object we expected + * put_ref(obj); + * goto again; + * } + * } + * rcu_read_unlock(); + * + * See also the comment on struct slab_rcu in mm/slab.c. + */ #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ -- cgit v1.2.3 From 352d026338378b1f13f044e33c1047da6e470056 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 29 Oct 2008 15:16:58 -0400 Subject: USB: don't register endpoints for interfaces that are going away This patch (as1155) fixes a bug in usbcore. When interfaces are deleted, either because the device was disconnected or because of a configuration change, the extra attribute files and child endpoint devices may get left behind. This is because the core removes them before calling device_del(). But during device_del(), after the driver is unbound the core will reinstall altsetting 0 and recreate those extra attributes and children. The patch prevents this by adding a flag to record when the interface is in the midst of being unregistered. When the flag is set, the attribute files and child devices will not be created. Signed-off-by: Alan Stern Cc: stable [2.6.27, 2.6.26, 2.6.25] Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 1 + drivers/usb/core/sysfs.c | 2 +- include/linux/usb.h | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 887738577b28..6d1048faf08e 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1091,6 +1091,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) continue; dev_dbg(&dev->dev, "unregistering interface %s\n", dev_name(&interface->dev)); + interface->unregistering = 1; usb_remove_sysfs_intf_files(interface); device_del(&interface->dev); } diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index f66fba11fbd5..4fb65fdc9dc3 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -840,7 +840,7 @@ int usb_create_sysfs_intf_files(struct usb_interface *intf) struct usb_host_interface *alt = intf->cur_altsetting; int retval; - if (intf->sysfs_files_created) + if (intf->sysfs_files_created || intf->unregistering) return 0; /* The interface string may be present in some altsettings diff --git a/include/linux/usb.h b/include/linux/usb.h index 8fa973bede5e..f72aa51f7bcd 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -108,6 +108,7 @@ enum usb_interface_condition { * (in probe()), bound to a driver, or unbinding (in disconnect()) * @is_active: flag set when the interface is bound and not suspended. * @sysfs_files_created: sysfs attributes exist + * @unregistering: flag set when the interface is being unregistered * @needs_remote_wakeup: flag set when the driver requires remote-wakeup * capability during autosuspend. * @needs_altsetting0: flag set when a set-interface request for altsetting 0 @@ -163,6 +164,7 @@ struct usb_interface { enum usb_interface_condition condition; /* state of binding */ unsigned is_active:1; /* the interface is not suspended */ unsigned sysfs_files_created:1; /* the sysfs attributes exist */ + unsigned unregistering:1; /* unregistration is in progress */ unsigned needs_remote_wakeup:1; /* driver requires remote wakeup */ unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ unsigned needs_binding:1; /* needs delayed unbind/rebind */ -- cgit v1.2.3 From e8f6fbf62de37cbc2e179176ac7010d5f4396b67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 01:38:36 +0000 Subject: lockdep: include/linux/lockdep.h - fix warning in net/bluetooth/af_bluetooth.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix this warning: net/bluetooth/af_bluetooth.c:60: warning: ‘bt_key_strings’ defined but not used net/bluetooth/af_bluetooth.c:71: warning: ‘bt_slock_key_strings’ defined but not used this is a lockdep macro problem in the !LOCKDEP case. We cannot convert it to an inline because the macro works on multiple types, but we can mark the parameter used. [ also clean up a misaligned tab in sock_lock_init_class_and_name() ] [ also remove #ifdefs from around af_family_clock_key strings - which were certainly added to get rid of the ugly build warnings. ] Signed-off-by: Ingo Molnar Signed-off-by: David S. Miller --- include/linux/lockdep.h | 5 +++-- include/net/sock.h | 2 +- net/core/sock.c | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 331e5f1c2d8e..29aec6e10020 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -331,10 +331,11 @@ static inline void lockdep_on(void) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) -# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0) +# define lockdep_init_map(lock, name, key, sub) \ + do { (void)(name); (void)(key); } while (0) # define lockdep_set_class(lock, key) do { (void)(key); } while (0) # define lockdep_set_class_and_name(lock, key, name) \ - do { (void)(key); } while (0) + do { (void)(key); (void)(name); } while (0) #define lockdep_set_class_and_subclass(lock, key, sub) \ do { (void)(key); } while (0) #define lockdep_set_subclass(lock, sub) do { } while (0) diff --git a/include/net/sock.h b/include/net/sock.h index c04f9e18ea22..2f47107f6d0f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -815,7 +815,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ - sk->sk_lock.owned = 0; \ + sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ diff --git a/net/core/sock.c b/net/core/sock.c index 5e2a3132a8c9..341e39456952 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -136,7 +136,6 @@ static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket @@ -187,7 +186,6 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_MAX" }; -#endif /* * sk_callback_lock locking rules are per-address-family, -- cgit v1.2.3 From d091c2f58ba32029495a933b721e8e02fbd12caa Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 12 Nov 2008 21:16:43 +0100 Subject: Add 'pr_fmt()' format modifier to pr_xyz macros. A common reason for device drivers to implement their own printk macros is the lack of a printk prefix with the standard pr_xyz macros. Introduce a pr_fmt() macro that is applied for every pr_xyz macro to the format string. The most common use of the pr_fmt macro would be to add the name of the device driver to all pr_xyz messages in a source file. Signed-off-by: Martin Schwidefsky Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fba141d3ca07..dc7e0d0a6474 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -318,32 +318,36 @@ static inline char *pack_hex_byte(char *buf, u8 byte) return buf; } -#define pr_emerg(fmt, arg...) \ - printk(KERN_EMERG fmt, ##arg) -#define pr_alert(fmt, arg...) \ - printk(KERN_ALERT fmt, ##arg) -#define pr_crit(fmt, arg...) \ - printk(KERN_CRIT fmt, ##arg) -#define pr_err(fmt, arg...) \ - printk(KERN_ERR fmt, ##arg) -#define pr_warning(fmt, arg...) \ - printk(KERN_WARNING fmt, ##arg) -#define pr_notice(fmt, arg...) \ - printk(KERN_NOTICE fmt, ##arg) -#define pr_info(fmt, arg...) \ - printk(KERN_INFO fmt, ##arg) +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#define pr_emerg(fmt, ...) \ + printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert(fmt, ...) \ + printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit(fmt, ...) \ + printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err(fmt, ...) \ + printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warning(fmt, ...) \ + printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_notice(fmt, ...) \ + printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) #define pr_debug(fmt, ...) do { \ - dynamic_pr_debug(fmt, ##__VA_ARGS__); \ + dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) -#define pr_debug(fmt, arg...) \ - printk(KERN_DEBUG fmt, ##arg) +#define pr_debug(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else -#define pr_debug(fmt, arg...) \ - ({ if (0) printk(KERN_DEBUG fmt, ##arg); 0; }) +#define pr_debug(fmt, ...) \ + ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) #endif /* -- cgit v1.2.3 From 8f7b0ba1c853919b85b54774775f567f30006107 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Nov 2008 01:15:43 +0000 Subject: Fix inotify watch removal/umount races Inotify watch removals suck violently. To kick the watch out we need (in this order) inode->inotify_mutex and ih->mutex. That's fine if we have a hold on inode; however, for all other cases we need to make damn sure we don't race with umount. We can *NOT* just grab a reference to a watch - inotify_unmount_inodes() will happily sail past it and we'll end with reference to inode potentially outliving its superblock. Ideally we just want to grab an active reference to superblock if we can; that will make sure we won't go into inotify_umount_inodes() until we are done. Cleanup is just deactivate_super(). However, that leaves a messy case - what if we *are* racing with umount() and active references to superblock can't be acquired anymore? We can bump ->s_count, grab ->s_umount, which will almost certainly wait until the superblock is shut down and the watch in question is pining for fjords. That's fine, but there is a problem - we might have hit the window between ->s_active getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock is past the point of no return and is heading for shutdown) and the moment when deactivate_super() acquires ->s_umount. We could just do drop_super() yield() and retry, but that's rather antisocial and this stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having found that we'd got there first (i.e. that ->s_root is non-NULL) we know that we won't race with inotify_umount_inodes(). So we could grab a reference to watch and do the rest as above, just with drop_super() instead of deactivate_super(), right? Wrong. We had to drop ih->mutex before we could grab ->s_umount. So the watch could've been gone already. That still can be dealt with - we need to save watch->wd, do idr_find() and compare its result with our pointer. If they match, we either have the damn thing still alive or we'd lost not one but two races at once, the watch had been killed and a new one got created with the same ->wd at the same address. That couldn't have happened in inotify_destroy(), but inotify_rm_wd() could run into that. Still, "new one got created" is not a problem - we have every right to kill it or leave it alone, whatever's more convenient. So we can use idr_find(...) == watch && watch->inode->i_sb == sb as "grab it and kill it" check. If it's been our original watch, we are fine, if it's a newcomer - nevermind, just pretend that we'd won the race and kill the fscker anyway; we are safe since we know that its superblock won't be going away. And yes, this is far beyond mere "not very pretty"; so's the entire concept of inotify to start with. Signed-off-by: Al Viro Acked-by: Greg KH Signed-off-by: Linus Torvalds --- fs/inotify.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/inotify.h | 11 ++++ kernel/audit_tree.c | 91 +++++++++++++++++------------ kernel/auditfilter.c | 14 +++-- 4 files changed, 218 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/fs/inotify.c b/fs/inotify.c index 690e72595e6e..7bbed1b89825 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(get_inotify_watch); +int pin_inotify_watch(struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + spin_lock(&sb_lock); + if (sb->s_count >= S_BIAS) { + atomic_inc(&sb->s_active); + spin_unlock(&sb_lock); + atomic_inc(&watch->count); + return 1; + } + spin_unlock(&sb_lock); + return 0; +} + /** * put_inotify_watch - decrements the ref count on a given watch. cleans up * watch references if the count reaches zero. inotify_watch is freed by @@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(put_inotify_watch); +void unpin_inotify_watch(struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + put_inotify_watch(watch); + deactivate_super(sb); +} + /* * inotify_handle_get_wd - returns the next WD for use by the given handle * @@ -479,6 +500,112 @@ void inotify_init_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(inotify_init_watch); +/* + * Watch removals suck violently. To kick the watch out we need (in this + * order) inode->inotify_mutex and ih->mutex. That's fine if we have + * a hold on inode; however, for all other cases we need to make damn sure + * we don't race with umount. We can *NOT* just grab a reference to a + * watch - inotify_unmount_inodes() will happily sail past it and we'll end + * with reference to inode potentially outliving its superblock. Ideally + * we just want to grab an active reference to superblock if we can; that + * will make sure we won't go into inotify_umount_inodes() until we are + * done. Cleanup is just deactivate_super(). However, that leaves a messy + * case - what if we *are* racing with umount() and active references to + * superblock can't be acquired anymore? We can bump ->s_count, grab + * ->s_umount, which will almost certainly wait until the superblock is shut + * down and the watch in question is pining for fjords. That's fine, but + * there is a problem - we might have hit the window between ->s_active + * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock + * is past the point of no return and is heading for shutdown) and the + * moment when deactivate_super() acquires ->s_umount. We could just do + * drop_super() yield() and retry, but that's rather antisocial and this + * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having + * found that we'd got there first (i.e. that ->s_root is non-NULL) we know + * that we won't race with inotify_umount_inodes(). So we could grab a + * reference to watch and do the rest as above, just with drop_super() instead + * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we + * could grab ->s_umount. So the watch could've been gone already. + * + * That still can be dealt with - we need to save watch->wd, do idr_find() + * and compare its result with our pointer. If they match, we either have + * the damn thing still alive or we'd lost not one but two races at once, + * the watch had been killed and a new one got created with the same ->wd + * at the same address. That couldn't have happened in inotify_destroy(), + * but inotify_rm_wd() could run into that. Still, "new one got created" + * is not a problem - we have every right to kill it or leave it alone, + * whatever's more convenient. + * + * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as + * "grab it and kill it" check. If it's been our original watch, we are + * fine, if it's a newcomer - nevermind, just pretend that we'd won the + * race and kill the fscker anyway; we are safe since we know that its + * superblock won't be going away. + * + * And yes, this is far beyond mere "not very pretty"; so's the entire + * concept of inotify to start with. + */ + +/** + * pin_to_kill - pin the watch down for removal + * @ih: inotify handle + * @watch: watch to kill + * + * Called with ih->mutex held, drops it. Possible return values: + * 0 - nothing to do, it has died + * 1 - remove it, drop the reference and deactivate_super() + * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid + * that variant, since it involved a lot of PITA, but that's the best that + * could've been done. + */ +static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + s32 wd = watch->wd; + + spin_lock(&sb_lock); + if (sb->s_count >= S_BIAS) { + atomic_inc(&sb->s_active); + spin_unlock(&sb_lock); + get_inotify_watch(watch); + mutex_unlock(&ih->mutex); + return 1; /* the best outcome */ + } + sb->s_count++; + spin_unlock(&sb_lock); + mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ + down_read(&sb->s_umount); + if (likely(!sb->s_root)) { + /* fs is already shut down; the watch is dead */ + drop_super(sb); + return 0; + } + /* raced with the final deactivate_super() */ + mutex_lock(&ih->mutex); + if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { + /* the watch is dead */ + mutex_unlock(&ih->mutex); + drop_super(sb); + return 0; + } + /* still alive or freed and reused with the same sb and wd; kill */ + get_inotify_watch(watch); + mutex_unlock(&ih->mutex); + return 2; +} + +static void unpin_and_kill(struct inotify_watch *watch, int how) +{ + struct super_block *sb = watch->inode->i_sb; + put_inotify_watch(watch); + switch (how) { + case 1: + deactivate_super(sb); + break; + case 2: + drop_super(sb); + } +} + /** * inotify_destroy - clean up and destroy an inotify instance * @ih: inotify handle @@ -490,11 +617,15 @@ void inotify_destroy(struct inotify_handle *ih) * pretty. We cannot do a simple iteration over the list, because we * do not know the inode until we iterate to the watch. But we need to * hold inode->inotify_mutex before ih->mutex. The following works. + * + * AV: it had to become even uglier to start working ;-/ */ while (1) { struct inotify_watch *watch; struct list_head *watches; + struct super_block *sb; struct inode *inode; + int how; mutex_lock(&ih->mutex); watches = &ih->watches; @@ -503,8 +634,10 @@ void inotify_destroy(struct inotify_handle *ih) break; } watch = list_first_entry(watches, struct inotify_watch, h_list); - get_inotify_watch(watch); - mutex_unlock(&ih->mutex); + sb = watch->inode->i_sb; + how = pin_to_kill(ih, watch); + if (!how) + continue; inode = watch->inode; mutex_lock(&inode->inotify_mutex); @@ -518,7 +651,7 @@ void inotify_destroy(struct inotify_handle *ih) mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(watch); + unpin_and_kill(watch, how); } /* free this handle: the put matching the get in inotify_init() */ @@ -719,7 +852,9 @@ void inotify_evict_watch(struct inotify_watch *watch) int inotify_rm_wd(struct inotify_handle *ih, u32 wd) { struct inotify_watch *watch; + struct super_block *sb; struct inode *inode; + int how; mutex_lock(&ih->mutex); watch = idr_find(&ih->idr, wd); @@ -727,9 +862,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) mutex_unlock(&ih->mutex); return -EINVAL; } - get_inotify_watch(watch); + sb = watch->inode->i_sb; + how = pin_to_kill(ih, watch); + if (!how) + return 0; + inode = watch->inode; - mutex_unlock(&ih->mutex); mutex_lock(&inode->inotify_mutex); mutex_lock(&ih->mutex); @@ -740,7 +878,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(watch); + unpin_and_kill(watch, how); return 0; } diff --git a/include/linux/inotify.h b/include/linux/inotify.h index bd578578a8b9..37ea2894b3c0 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -134,6 +134,8 @@ extern void inotify_remove_watch_locked(struct inotify_handle *, struct inotify_watch *); extern void get_inotify_watch(struct inotify_watch *); extern void put_inotify_watch(struct inotify_watch *); +extern int pin_inotify_watch(struct inotify_watch *); +extern void unpin_inotify_watch(struct inotify_watch *); #else @@ -228,6 +230,15 @@ static inline void put_inotify_watch(struct inotify_watch *watch) { } +extern inline int pin_inotify_watch(struct inotify_watch *watch) +{ + return 0; +} + +extern inline void unpin_inotify_watch(struct inotify_watch *watch) +{ +} + #endif /* CONFIG_INOTIFY */ #endif /* __KERNEL __ */ diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 8ba0e0d934f2..8b509441f49a 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -24,6 +24,7 @@ struct audit_chunk { struct list_head trees; /* with root here */ int dead; int count; + atomic_long_t refs; struct rcu_head head; struct node { struct list_head list; @@ -56,7 +57,8 @@ static LIST_HEAD(prune_list); * tree is refcounted; one reference for "some rules on rules_list refer to * it", one for each chunk with pointer to it. * - * chunk is refcounted by embedded inotify_watch. + * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount + * of watch contributes 1 to .refs). * * node.index allows to get from node.list to containing chunk. * MSB of that sucker is stolen to mark taggings that we might have to @@ -121,6 +123,7 @@ static struct audit_chunk *alloc_chunk(int count) INIT_LIST_HEAD(&chunk->hash); INIT_LIST_HEAD(&chunk->trees); chunk->count = count; + atomic_long_set(&chunk->refs, 1); for (i = 0; i < count; i++) { INIT_LIST_HEAD(&chunk->owners[i].list); chunk->owners[i].index = i; @@ -129,9 +132,8 @@ static struct audit_chunk *alloc_chunk(int count) return chunk; } -static void __free_chunk(struct rcu_head *rcu) +static void free_chunk(struct audit_chunk *chunk) { - struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head); int i; for (i = 0; i < chunk->count; i++) { @@ -141,14 +143,16 @@ static void __free_chunk(struct rcu_head *rcu) kfree(chunk); } -static inline void free_chunk(struct audit_chunk *chunk) +void audit_put_chunk(struct audit_chunk *chunk) { - call_rcu(&chunk->head, __free_chunk); + if (atomic_long_dec_and_test(&chunk->refs)) + free_chunk(chunk); } -void audit_put_chunk(struct audit_chunk *chunk) +static void __put_chunk(struct rcu_head *rcu) { - put_inotify_watch(&chunk->watch); + struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head); + audit_put_chunk(chunk); } enum {HASH_SIZE = 128}; @@ -176,7 +180,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode) list_for_each_entry_rcu(p, list, hash) { if (p->watch.inode == inode) { - get_inotify_watch(&p->watch); + atomic_long_inc(&p->refs); return p; } } @@ -194,17 +198,49 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree) /* tagging and untagging inodes with trees */ -static void untag_chunk(struct audit_chunk *chunk, struct node *p) +static struct audit_chunk *find_chunk(struct node *p) +{ + int index = p->index & ~(1U<<31); + p -= index; + return container_of(p, struct audit_chunk, owners[0]); +} + +static void untag_chunk(struct node *p) { + struct audit_chunk *chunk = find_chunk(p); struct audit_chunk *new; struct audit_tree *owner; int size = chunk->count - 1; int i, j; + if (!pin_inotify_watch(&chunk->watch)) { + /* + * Filesystem is shutting down; all watches are getting + * evicted, just take it off the node list for this + * tree and let the eviction logics take care of the + * rest. + */ + owner = p->owner; + if (owner->root == chunk) { + list_del_init(&owner->same_root); + owner->root = NULL; + } + list_del_init(&p->list); + p->owner = NULL; + put_tree(owner); + return; + } + + spin_unlock(&hash_lock); + + /* + * pin_inotify_watch() succeeded, so the watch won't go away + * from under us. + */ mutex_lock(&chunk->watch.inode->inotify_mutex); if (chunk->dead) { mutex_unlock(&chunk->watch.inode->inotify_mutex); - return; + goto out; } owner = p->owner; @@ -221,7 +257,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p) inotify_evict_watch(&chunk->watch); mutex_unlock(&chunk->watch.inode->inotify_mutex); put_inotify_watch(&chunk->watch); - return; + goto out; } new = alloc_chunk(size); @@ -263,7 +299,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p) inotify_evict_watch(&chunk->watch); mutex_unlock(&chunk->watch.inode->inotify_mutex); put_inotify_watch(&chunk->watch); - return; + goto out; Fallback: // do the best we can @@ -277,6 +313,9 @@ Fallback: put_tree(owner); spin_unlock(&hash_lock); mutex_unlock(&chunk->watch.inode->inotify_mutex); +out: + unpin_inotify_watch(&chunk->watch); + spin_lock(&hash_lock); } static int create_chunk(struct inode *inode, struct audit_tree *tree) @@ -387,13 +426,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) return 0; } -static struct audit_chunk *find_chunk(struct node *p) -{ - int index = p->index & ~(1U<<31); - p -= index; - return container_of(p, struct audit_chunk, owners[0]); -} - static void kill_rules(struct audit_tree *tree) { struct audit_krule *rule, *next; @@ -431,17 +463,10 @@ static void prune_one(struct audit_tree *victim) spin_lock(&hash_lock); while (!list_empty(&victim->chunks)) { struct node *p; - struct audit_chunk *chunk; p = list_entry(victim->chunks.next, struct node, list); - chunk = find_chunk(p); - get_inotify_watch(&chunk->watch); - spin_unlock(&hash_lock); - - untag_chunk(chunk, p); - put_inotify_watch(&chunk->watch); - spin_lock(&hash_lock); + untag_chunk(p); } spin_unlock(&hash_lock); put_tree(victim); @@ -469,7 +494,6 @@ static void trim_marked(struct audit_tree *tree) while (!list_empty(&tree->chunks)) { struct node *node; - struct audit_chunk *chunk; node = list_entry(tree->chunks.next, struct node, list); @@ -477,14 +501,7 @@ static void trim_marked(struct audit_tree *tree) if (!(node->index & (1U<<31))) break; - chunk = find_chunk(node); - get_inotify_watch(&chunk->watch); - spin_unlock(&hash_lock); - - untag_chunk(chunk, node); - - put_inotify_watch(&chunk->watch); - spin_lock(&hash_lock); + untag_chunk(node); } if (!tree->root && !tree->goner) { tree->goner = 1; @@ -878,7 +895,7 @@ static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask, static void destroy_watch(struct inotify_watch *watch) { struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch); - free_chunk(chunk); + call_rcu(&chunk->head, __put_chunk); } static const struct inotify_operations rtree_inotify_ops = { diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b7d354e2b0ef..9fd85a4640a0 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1094,8 +1094,8 @@ static void audit_inotify_unregister(struct list_head *in_list) list_for_each_entry_safe(p, n, in_list, ilist) { list_del(&p->ilist); inotify_rm_watch(audit_ih, &p->wdata); - /* the put matching the get in audit_do_del_rule() */ - put_inotify_watch(&p->wdata); + /* the unpin matching the pin in audit_do_del_rule() */ + unpin_inotify_watch(&p->wdata); } } @@ -1389,9 +1389,13 @@ static inline int audit_del_rule(struct audit_entry *entry, /* Put parent on the inotify un-registration * list. Grab a reference before releasing * audit_filter_mutex, to be released in - * audit_inotify_unregister(). */ - list_add(&parent->ilist, &inotify_list); - get_inotify_watch(&parent->wdata); + * audit_inotify_unregister(). + * If filesystem is going away, just leave + * the sucker alone, eviction will take + * care of it. + */ + if (pin_inotify_watch(&parent->wdata)) + list_add(&parent->ilist, &inotify_list); } } } -- cgit v1.2.3 From ba32929a91fe2c0628f5be62d1597b379c8d3062 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Nov 2008 15:29:58 +0900 Subject: block: make add_partition() return pointer to hd_struct Make add_partition() return pointer to the new hd_struct on success and ERR_PTR() value on failure. This change will be used to fix md autodetection bug. Signed-off-by: Tejun Heo Cc: Neil Brown Signed-off-by: Jens Axboe --- block/ioctl.c | 7 +++---- fs/partitions/check.c | 25 +++++++++++++------------ include/linux/genhd.h | 4 +++- 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/block/ioctl.c b/block/ioctl.c index c832d639b6e2..d03985b04d67 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -18,7 +18,6 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user struct disk_part_iter piter; long long start, length; int partno; - int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -61,10 +60,10 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user disk_part_iter_exit(&piter); /* all seems OK */ - err = add_partition(disk, partno, start, length, - ADDPART_FLAG_NONE); + part = add_partition(disk, partno, start, length, + ADDPART_FLAG_NONE); mutex_unlock(&bdev->bd_mutex); - return err; + return IS_ERR(part) ? PTR_ERR(part) : 0; case BLKPG_DEL_PARTITION: part = disk_get_part(disk, partno); if (!part) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 90bcf136a9de..633025340239 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -348,8 +348,8 @@ static ssize_t whole_disk_show(struct device *dev, static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); -int add_partition(struct gendisk *disk, int partno, - sector_t start, sector_t len, int flags) +struct hd_struct *add_partition(struct gendisk *disk, int partno, + sector_t start, sector_t len, int flags) { struct hd_struct *p; dev_t devt = MKDEV(0, 0); @@ -361,15 +361,15 @@ int add_partition(struct gendisk *disk, int partno, err = disk_expand_part_tbl(disk, partno); if (err) - return err; + return ERR_PTR(err); ptbl = disk->part_tbl; if (ptbl->part[partno]) - return -EBUSY; + return ERR_PTR(-EBUSY); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) - return -ENOMEM; + return ERR_PTR(-EBUSY); if (!init_part_stats(p)) { err = -ENOMEM; @@ -424,20 +424,20 @@ int add_partition(struct gendisk *disk, int partno, if (!ddev->uevent_suppress) kobject_uevent(&pdev->kobj, KOBJ_ADD); - return 0; + return p; out_free_stats: free_part_stats(p); out_free: kfree(p); - return err; + return ERR_PTR(err); out_del: kobject_put(p->holder_dir); device_del(pdev); out_put: put_device(pdev); blk_free_devt(devt); - return err; + return ERR_PTR(err); } /* Not exported, helper to add_disk(). */ @@ -568,10 +568,11 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) disk->disk_name, p, (unsigned long long) size); size = get_capacity(disk) - from; } - res = add_partition(disk, p, from, size, state->parts[p].flags); - if (res) { - printk(KERN_ERR " %s: p%d could not be added: %d\n", - disk->disk_name, p, -res); + part = add_partition(disk, p, from, size, + state->parts[p].flags); + if (IS_ERR(part)) { + printk(KERN_ERR " %s: p%d could not be added: %ld\n", + disk->disk_name, p, -PTR_ERR(part)); continue; } #ifdef CONFIG_BLK_DEV_MD diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e439e6aed832..3df7742ce246 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -522,7 +522,9 @@ extern char *disk_name (struct gendisk *hd, int partno, char *buf); extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); -extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); +extern struct hd_struct * __must_check add_partition(struct gendisk *disk, + int partno, sector_t start, + sector_t len, int flags); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); -- cgit v1.2.3 From de11defebf00007677fb7ee91d9b089b78786fbb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 19 Nov 2008 15:36:14 -0800 Subject: reintroduce accept4 Introduce a new accept4() system call. The addition of this system call matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added new system calls that differed from analogous traditional system calls in adding a flags argument that can be used to access additional functionality. The accept4() system call is exactly the same as accept(), except that it adds a flags bit-mask argument. Two flags are initially implemented. (Most of the new system calls in 2.6.27 also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for the new file descriptor returned by accept4(). This is a useful security feature to avoid leaking information in a multithreaded program where one thread is doing an accept() at the same time as another thread is doing a fork() plus exec(). More details here: http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling", Ulrich Drepper). The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag to be enabled on the new open file description created by accept4(). (This flag is merely a convenience, saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result. Here's a test program. Works on x86-32. Should work on x86-64, but I (mtk) don't have a system to hand to test with. It tests accept4() with each of the four possible combinations of SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that the appropriate flags are set on the file descriptor/open file description returned by accept4(). I tested Ulrich's patch in this thread by applying against 2.6.28-rc2, and it passes according to my test program. /* test_accept4.c Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk Licensed under the GNU GPLv2 or later. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #define PORT_NUM 33333 #define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0) /**********************************************************************/ /* The following is what we need until glibc gets a wrapper for accept4() */ /* Flags for socket(), socketpair(), accept4() */ #ifndef SOCK_CLOEXEC #define SOCK_CLOEXEC O_CLOEXEC #endif #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif #ifdef __x86_64__ #define SYS_accept4 288 #elif __i386__ #define USE_SOCKETCALL 1 #define SYS_ACCEPT4 18 #else #error "Sorry -- don't know the syscall # on this architecture" #endif static int accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags) { printf("Calling accept4(): flags = %x", flags); if (flags != 0) { printf(" ("); if (flags & SOCK_CLOEXEC) printf("SOCK_CLOEXEC"); if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK)) printf(" "); if (flags & SOCK_NONBLOCK) printf("SOCK_NONBLOCK"); printf(")"); } printf("\n"); #if USE_SOCKETCALL long args[6]; args[0] = fd; args[1] = (long) sockaddr; args[2] = (long) addrlen; args[3] = flags; return syscall(SYS_socketcall, SYS_ACCEPT4, args); #else return syscall(SYS_accept4, fd, sockaddr, addrlen, flags); #endif } /**********************************************************************/ static int do_test(int lfd, struct sockaddr_in *conn_addr, int closeonexec_flag, int nonblock_flag) { int connfd, acceptfd; int fdf, flf, fdf_pass, flf_pass; struct sockaddr_in claddr; socklen_t addrlen; printf("=======================================\n"); connfd = socket(AF_INET, SOCK_STREAM, 0); if (connfd == -1) die("socket"); if (connect(connfd, (struct sockaddr *) conn_addr, sizeof(struct sockaddr_in)) == -1) die("connect"); addrlen = sizeof(struct sockaddr_in); acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen, closeonexec_flag | nonblock_flag); if (acceptfd == -1) { perror("accept4()"); close(connfd); return 0; } fdf = fcntl(acceptfd, F_GETFD); if (fdf == -1) die("fcntl:F_GETFD"); fdf_pass = ((fdf & FD_CLOEXEC) != 0) == ((closeonexec_flag & SOCK_CLOEXEC) != 0); printf("Close-on-exec flag is %sset (%s); ", (fdf & FD_CLOEXEC) ? "" : "not ", fdf_pass ? "OK" : "failed"); flf = fcntl(acceptfd, F_GETFL); if (flf == -1) die("fcntl:F_GETFD"); flf_pass = ((flf & O_NONBLOCK) != 0) == ((nonblock_flag & SOCK_NONBLOCK) !=0); printf("nonblock flag is %sset (%s)\n", (flf & O_NONBLOCK) ? "" : "not ", flf_pass ? "OK" : "failed"); close(acceptfd); close(connfd); printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL"); return fdf_pass && flf_pass; } static int create_listening_socket(int port_num) { struct sockaddr_in svaddr; int lfd; int optval; memset(&svaddr, 0, sizeof(struct sockaddr_in)); svaddr.sin_family = AF_INET; svaddr.sin_addr.s_addr = htonl(INADDR_ANY); svaddr.sin_port = htons(port_num); lfd = socket(AF_INET, SOCK_STREAM, 0); if (lfd == -1) die("socket"); optval = 1; if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) == -1) die("setsockopt"); if (bind(lfd, (struct sockaddr *) &svaddr, sizeof(struct sockaddr_in)) == -1) die("bind"); if (listen(lfd, 5) == -1) die("listen"); return lfd; } int main(int argc, char *argv[]) { struct sockaddr_in conn_addr; int lfd; int port_num; int passed; passed = 1; port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM; memset(&conn_addr, 0, sizeof(struct sockaddr_in)); conn_addr.sin_family = AF_INET; conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); conn_addr.sin_port = htons(port_num); lfd = create_listening_socket(port_num); if (!do_test(lfd, &conn_addr, 0, 0)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0)) passed = 0; if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK)) passed = 0; close(lfd); exit(passed ? EXIT_SUCCESS : EXIT_FAILURE); } [mtk.manpages@gmail.com: rewrote changelog, updated test program] Signed-off-by: Ulrich Drepper Tested-by: Michael Kerrisk Acked-by: Michael Kerrisk Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/unistd_64.h | 4 +- include/linux/net.h | 6 +-- include/linux/syscalls.h | 3 +- kernel/sys_ni.c | 2 +- net/compat.c | 50 +++---------------------- net/socket.c | 80 +++++----------------------------------- 6 files changed, 21 insertions(+), 124 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 834b2c1d89fb..d2e415e6666f 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate) __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) #define __NR_timerfd_gettime 287 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) -#define __NR_paccept 288 -__SYSCALL(__NR_paccept, sys_paccept) +#define __NR_accept4 288 +__SYSCALL(__NR_accept4, sys_accept4) #define __NR_signalfd4 289 __SYSCALL(__NR_signalfd4, sys_signalfd4) #define __NR_eventfd2 290 diff --git a/include/linux/net.h b/include/linux/net.h index 6dc14a240042..4515efae4c39 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -40,7 +40,7 @@ #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */ #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ -#define SYS_PACCEPT 18 /* sys_paccept(2) */ +#define SYS_ACCEPT4 18 /* sys_accept4(2) */ typedef enum { SS_FREE = 0, /* not allocated */ @@ -100,7 +100,7 @@ enum sock_type { * remaining bits are used as flags. */ #define SOCK_TYPE_MASK 0xf -/* Flags for socket, socketpair, paccept */ +/* Flags for socket, socketpair, accept4 */ #define SOCK_CLOEXEC O_CLOEXEC #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK @@ -223,8 +223,6 @@ extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); -extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags); #define net_random() random32() #define net_srandom(seed) srandom32((__force u32)seed) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d6ff145919ca..04fb47bfb920 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname, asmlinkage long sys_bind(int, struct sockaddr __user *, int); asmlinkage long sys_connect(int, struct sockaddr __user *, int); asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *, - const __user sigset_t *, size_t, int); +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); asmlinkage long sys_send(int, void __user *, size_t, unsigned); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a77b27b11b04..e14a23281707 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -31,7 +31,7 @@ cond_syscall(sys_socketpair); cond_syscall(sys_bind); cond_syscall(sys_listen); cond_syscall(sys_accept); -cond_syscall(sys_paccept); +cond_syscall(sys_accept4); cond_syscall(sys_connect); cond_syscall(sys_getsockname); cond_syscall(sys_getpeername); diff --git a/net/compat.c b/net/compat.c index 6ce1a1cadcc0..a3a2ba0fac08 100644 --- a/net/compat.c +++ b/net/compat.c @@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt); static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(6)}; + AL(4)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize, int flags) -{ - compat_sigset_t ss32; - sigset_t ksigmask, sigsaved; - int ret; - - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) - return -EFAULT; - sigset_from_compat(&ksigmask, &ss32); - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); - - if (ret == -ERESTARTNOHAND) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} - asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_PACCEPT) + if (call < SYS_SOCKET || call > SYS_ACCEPT4) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ret = sys_listen(a0, a1); break; case SYS_ACCEPT: - ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); @@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; - case SYS_PACCEPT: - ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), - compat_ptr(a[3]), a[4], a[5]); + case SYS_ACCEPT4: + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; default: ret = -EINVAL; diff --git a/net/socket.c b/net/socket.c index 57550c3bcabe..92764d836891 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1426,8 +1426,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) +asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; @@ -1510,66 +1510,10 @@ out_fd: goto out_put; } -#if 0 -#ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const sigset_t __user *sigmask, - size_t sigsetsize, int flags) -{ - sigset_t ksigmask, sigsaved; - int ret; - - if (sigmask) { - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); - - if (ret < 0 && signal_pending(current)) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} -#else -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const sigset_t __user *sigmask, - size_t sigsetsize, int flags) -{ - /* The platform does not support restoring the signal mask in the - * return path. So we do not allow using paccept() with a signal - * mask. */ - if (sigmask) - return -EINVAL; - - return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); -} -#endif -#endif - asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) { - return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); + return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); } /* @@ -2096,7 +2040,7 @@ static const unsigned char nargs[19]={ AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(6) + AL(4) }; #undef AL @@ -2115,7 +2059,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) unsigned long a0, a1; int err; - if (call < 1 || call > SYS_PACCEPT) + if (call < 1 || call > SYS_ACCEPT4) return -EINVAL; /* copy_from_user should be SMP safe. */ @@ -2143,9 +2087,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) err = sys_listen(a0, a1); break; case SYS_ACCEPT: - err = - do_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], 0); + err = sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 0); break; case SYS_GETSOCKNAME: err = @@ -2192,12 +2135,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; - case SYS_PACCEPT: - err = - sys_paccept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], - (const sigset_t __user *) a[3], - a[4], a[5]); + case SYS_ACCEPT4: + err = sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], a[3]); break; default: err = -EINVAL; -- cgit v1.2.3 From f481891fdc49d3d1b8a9674a1825d183069a805f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 19 Nov 2008 15:36:30 -0800 Subject: cpuset: update top cpuset's mems after adding a node After adding a node into the machine, top cpuset's mems isn't updated. By reviewing the code, we found that the update function cpuset_track_online_nodes() was invoked after node_states[N_ONLINE] changes. It is wrong because N_ONLINE just means node has pgdat, and if node has/added memory, we use N_HIGH_MEMORY. So, We should invoke the update function after node_states[N_HIGH_MEMORY] changes, just like its commit says. This patch fixes it. And we use notifier of memory hotplug instead of direct calling of cpuset_track_online_nodes(). Signed-off-by: Miao Xie Acked-by: Yasunori Goto Cc: David Rientjes Cc: Paul Menage Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 4 ---- kernel/cpuset.c | 19 ++++++++++++++++--- mm/memory_hotplug.c | 3 --- 3 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 2691926fb506..8e540d32c9fe 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -74,8 +74,6 @@ static inline int cpuset_do_slab_mem_spread(void) return current->flags & PF_SPREAD_SLAB; } -extern void cpuset_track_online_nodes(void); - extern int current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); @@ -151,8 +149,6 @@ static inline int cpuset_do_slab_mem_spread(void) return 0; } -static inline void cpuset_track_online_nodes(void) {} - static inline int current_cpuset_is_being_rebound(void) { return 0; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 81fc6791a296..da7ff6137f37 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -2015,12 +2016,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. * See also the previous routine cpuset_track_online_cpus(). */ -void cpuset_track_online_nodes(void) +static int cpuset_track_online_nodes(struct notifier_block *self, + unsigned long action, void *arg) { cgroup_lock(); - top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; - scan_for_empty_cpusets(&top_cpuset); + switch (action) { + case MEM_ONLINE: + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + break; + case MEM_OFFLINE: + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + scan_for_empty_cpusets(&top_cpuset); + break; + default: + break; + } cgroup_unlock(); + return NOTIFY_OK; } #endif @@ -2036,6 +2048,7 @@ void __init cpuset_init_smp(void) top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; hotcpu_notifier(cpuset_track_online_cpus, 0); + hotplug_memory_notifier(cpuset_track_online_nodes, 10); } /** diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6837a1014372..b5b2b15085a8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -498,8 +497,6 @@ int add_memory(int nid, u64 start, u64 size) /* we online node here. we can't roll back from here. */ node_set_online(nid); - cpuset_track_online_nodes(); - if (new_pgdat) { ret = register_one_node(nid); /* -- cgit v1.2.3