From b35905c16ad6428551eb9e49525011bd2700cf56 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Feb 2008 16:54:37 -0500 Subject: ext4: Fix memory and buffer head leak in callers to ext4_ext_find_extent() The path variable returned via ext4_ext_find_extent is a kmalloc variable and needs to be freeded. It also contains a reference to buffer_head which needs to be dropped. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs_extents.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index 697da4bce6c5..1285c583b2d8 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -227,5 +227,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, ext4_lblk_t *, ext4_fsblk_t *); extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, ext4_lblk_t *, ext4_fsblk_t *); +extern void ext4_ext_drop_refs(struct ext4_ext_path *); #endif /* _LINUX_EXT4_EXTENTS */ -- cgit v1.2.3 From 2d07b255c7b8a9723010e5c74778e058dc05162e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 15 Feb 2008 09:56:34 -0800 Subject: sched: add declaration of sched_tail to sched.h Avoids sparse warnings: kernel/sched.c:2170:17: warning: symbol 'schedule_tail' was not declared. Should it be static? Avoids the need for an external declaration in arch/um/process.c Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/um/kernel/process.c | 2 -- include/linux/sched.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index fc50d2f959d1..e8cb9ff183e9 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -128,8 +128,6 @@ void *get_current(void) return current; } -extern void schedule_tail(struct task_struct *prev); - /* * This is called magically, by its address being stuffed in a jmp_buf * and being longjmp-d to. diff --git a/include/linux/sched.h b/include/linux/sched.h index e217d188a102..9c17e828d6d4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -242,6 +242,7 @@ struct task_struct; extern void sched_init(void); extern void sched_init_smp(void); +extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); -- cgit v1.2.3 From bdb9441e9c325d50b5ae17f7d3205d65b8ed2e5f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Feb 2008 23:02:48 +0100 Subject: lockdep: increase MAX_LOCK_DEPTH Some code paths exceed the current max lock depth (XFS), so increase this limit a bit. I looked at making this a dynamic allocated array, but we should not advocate insane lock depths, so stay with this as long as it works... Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e217d188a102..e3ea12437547 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1189,7 +1189,7 @@ struct task_struct { int softirq_context; #endif #ifdef CONFIG_LOCKDEP -# define MAX_LOCK_DEPTH 30UL +# define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; struct held_lock held_locks[MAX_LOCK_DEPTH]; -- cgit v1.2.3 From 24d10f0c37d301e88f6965e3dc0aa684311544e5 Mon Sep 17 00:00:00 2001 From: Adrian McMenamin Date: Sat, 16 Feb 2008 23:37:33 +0000 Subject: maple: remove unused variable Remove an unused variable from the definition of struct maple_device Signed-off-by: Adrian McMenamin Signed-off-by: Paul Mundt --- include/linux/maple.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/maple.h b/include/linux/maple.h index 3f01e2bae1a1..d31e36ebb436 100644 --- a/include/linux/maple.h +++ b/include/linux/maple.h @@ -64,7 +64,6 @@ struct maple_driver { int (*connect) (struct maple_device * dev); void (*disconnect) (struct maple_device * dev); struct device_driver drv; - int registered; }; void maple_getcond_callback(struct maple_device *dev, -- cgit v1.2.3 From 96de1a8f0275bd67f243833e7088baced518f873 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 26 Feb 2008 14:52:45 +0900 Subject: serial: Move asm-sh/sci.h to linux/serial_sci.h. This header is needed on other architectures as well (namely h8300), which currently fails to build without this in place. Rather than duplicating the port definition completely there, just move this to a common location instead. This should get h8300 working again for 2.6.25, in addition to the changes already pushed by Sato-san in -rc2. Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/sh2/setup-sh7619.c | 2 +- arch/sh/kernel/cpu/sh2a/setup-sh7203.c | 2 +- arch/sh/kernel/cpu/sh2a/setup-sh7206.c | 2 +- arch/sh/kernel/cpu/sh3/setup-sh7705.c | 2 +- arch/sh/kernel/cpu/sh3/setup-sh770x.c | 2 +- arch/sh/kernel/cpu/sh3/setup-sh7710.c | 2 +- arch/sh/kernel/cpu/sh3/setup-sh7720.c | 2 +- arch/sh/kernel/cpu/sh4/setup-sh4-202.c | 2 +- arch/sh/kernel/cpu/sh4/setup-sh7750.c | 2 +- arch/sh/kernel/cpu/sh4/setup-sh7760.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7343.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7366.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7722.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7763.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7770.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7780.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7785.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-shx3.c | 2 +- drivers/serial/sh-sci.c | 2 +- include/asm-sh/sci.h | 34 ---------------------------------- include/linux/serial_sci.h | 32 ++++++++++++++++++++++++++++++++ 21 files changed, 51 insertions(+), 53 deletions(-) delete mode 100644 include/asm-sh/sci.h create mode 100644 include/linux/serial_sci.h (limited to 'include/linux') diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c index b230eb278cef..cc530f4d84d6 100644 --- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c +++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c index db6ef5cecde1..e98dc4450352 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c index a564425b905f..e6d4ec445dd8 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c index dd0a20a685f7..f581534cb732 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include enum { diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c index 9066ed78e283..d3733b13ea52 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c index 0cc0e2bf135d..7406c9ad9259 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include enum { diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c index 3855ea4c21c8..8028082527c5 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #define INTC_ICR1 0xA4140010UL diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c index dab193293f20..7371abf64f80 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c index ae3603aca615..ec884039b914 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static struct resource rtc_resources[] = { [0] = { diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c index 85f81579b97e..254c5c55ab91 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c index c0a3f079dfdc..6d4f50cd4aaf 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c index 967e8b69a2f8..f26b5cdad0d1 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c index 73c778d40d13..b98b4bc93ec9 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c @@ -10,9 +10,9 @@ #include #include #include +#include #include #include -#include static struct resource usbf_resources[] = { [0] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c index eabd5386812d..07c988dc9de6 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static struct resource rtc_resources[] = { [0] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c index 32f4f59a837b..b9cec48b1808 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c index 293004b526ff..18dbbe23fea1 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include static struct resource rtc_resources[] = { [0] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c index 74b60e96cdf4..621e7329ec63 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c @@ -10,10 +10,10 @@ #include #include #include +#include #include #include #include -#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c index 4dc958b6b314..bd35f32534b9 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c @@ -10,9 +10,9 @@ #include #include #include +#include #include #include -#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c index 9ce12cb2cebc..a8c116b80bff 100644 --- a/drivers/serial/sh-sci.c +++ b/drivers/serial/sh-sci.c @@ -41,6 +41,7 @@ #include #include #include +#include #ifdef CONFIG_CPU_FREQ #include @@ -54,7 +55,6 @@ #include #endif -#include #include "sh-sci.h" struct sci_port { diff --git a/include/asm-sh/sci.h b/include/asm-sh/sci.h deleted file mode 100644 index 52e73660c129..000000000000 --- a/include/asm-sh/sci.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __ASM_SH_SCI_H -#define __ASM_SH_SCI_H - -#include - -/* - * Generic header for SuperH SCI(F) - * - * Do not place SH-specific parts in here, sh64 and h8300 depend on this too. - */ - -/* Offsets into the sci_port->irqs array */ -enum { - SCIx_ERI_IRQ, - SCIx_RXI_IRQ, - SCIx_TXI_IRQ, - SCIx_BRI_IRQ, - SCIx_NR_IRQS, -}; - -/* - * Platform device specific platform_data struct - */ -struct plat_sci_port { - void __iomem *membase; /* io cookie */ - unsigned long mapbase; /* resource base */ - unsigned int irqs[SCIx_NR_IRQS]; /* ERI, RXI, TXI, BRI */ - unsigned int type; /* SCI / SCIF / IRDA */ - upf_t flags; /* UPF_* flags */ -}; - -int early_sci_setup(struct uart_port *port); - -#endif /* __ASM_SH_SCI_H */ diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h new file mode 100644 index 000000000000..893cc53486bc --- /dev/null +++ b/include/linux/serial_sci.h @@ -0,0 +1,32 @@ +#ifndef __LINUX_SERIAL_SCI_H +#define __LINUX_SERIAL_SCI_H + +#include + +/* + * Generic header for SuperH SCI(F) (used by sh/sh64/h8300 and related parts) + */ + +/* Offsets into the sci_port->irqs array */ +enum { + SCIx_ERI_IRQ, + SCIx_RXI_IRQ, + SCIx_TXI_IRQ, + SCIx_BRI_IRQ, + SCIx_NR_IRQS, +}; + +/* + * Platform device specific platform_data struct + */ +struct plat_sci_port { + void __iomem *membase; /* io cookie */ + unsigned long mapbase; /* resource base */ + unsigned int irqs[SCIx_NR_IRQS]; /* ERI, RXI, TXI, BRI */ + unsigned int type; /* SCI / SCIF / IRDA */ + upf_t flags; /* UPF_* flags */ +}; + +int early_sci_setup(struct uart_port *port); + +#endif /* __LINUX_SERIAL_SCI_H */ -- cgit v1.2.3 From 78374676efae525094aee45c0aab4bcab95ea9d1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 26 Feb 2008 18:25:53 -0800 Subject: CONNECTOR: make cn_already_initialized static It is used in connector.c only, so make it static. Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- drivers/connector/connector.c | 2 +- include/linux/connector.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index fea2d3ed9cbd..85e2ba7fcfba 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -47,7 +47,7 @@ static LIST_HEAD(notify_list); static struct cn_dev cdev; -int cn_already_initialized = 0; +static int cn_already_initialized; /* * msg->seq and msg->ack are used to determine message genealogy. diff --git a/include/linux/connector.h b/include/linux/connector.h index da6dd957f908..96a89d3d6727 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -170,7 +170,5 @@ int cn_cb_equal(struct cb_id *, struct cb_id *); void cn_queue_wrapper(struct work_struct *work); -extern int cn_already_initialized; - #endif /* __KERNEL__ */ #endif /* __CONNECTOR_H */ -- cgit v1.2.3 From fbabbed8284d1526ed01754ecd4fabdb941a1ff2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Feb 2008 12:21:18 -0800 Subject: [NETFILTER]: Fix NF_QUEUE_NR() parenthesis Properly add parens around the macro argument. This is not needed by the kernel but the macro is exported to userspace, so it shouldn't make any assumptions. Also use NF_VERDICT_BITS instead of NF_VERDICT_QBTIS for the left-shift since thats whats logically correct. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b74b615492e8..f0680c2bee73 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -31,7 +31,7 @@ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE) +#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) /* only for userspace compatibility */ #ifndef __KERNEL__ -- cgit v1.2.3 From b59931649256685f294d2d163a4f6d6286fbff05 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 26 Feb 2008 13:20:58 -0800 Subject: elfcore-compat fix uid/gid types I overlooked the difference between __kernel_uid_t and uid_t when defining struct compat_elf_prpsinfo. The result is a regression in 32-bit core dumps on x86_64, where the NT_PRPSINFO note has the wrong size and layout. This patch fixes it. Signed-off-by: Roland McGrath Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/elfcore-compat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h index 532d13adabc4..0a90e1c3a422 100644 --- a/include/linux/elfcore-compat.h +++ b/include/linux/elfcore-compat.h @@ -45,8 +45,8 @@ struct compat_elf_prpsinfo char pr_zomb; char pr_nice; compat_ulong_t pr_flag; - compat_uid_t pr_uid; - compat_gid_t pr_gid; + __compat_uid_t pr_uid; + __compat_gid_t pr_gid; compat_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; char pr_fname[16]; char pr_psargs[ELF_PRARGSZ]; -- cgit v1.2.3 From 57ce36feb4d1281247755bc445bae77728298955 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 25 Feb 2008 16:45:03 +0100 Subject: let __dec_zone_page_state use __dec_zone_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This removes code duplication and makes __dec_zone_page_state look like __inc_zone_page_state. Signed-off-by: Uwe Kleine-König Acked-by: Christoph Lameter Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 75370ec0923e..9f1b4b46151e 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -246,8 +246,7 @@ static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item) static inline void __dec_zone_page_state(struct page *page, enum zone_stat_item item) { - atomic_long_dec(&page_zone(page)->vm_stat[item]); - atomic_long_dec(&vm_stat[item]); + __dec_zone_state(page_zone(page), item); } /* -- cgit v1.2.3 From 2232c2d8e0a6a31061dec311f3d1cf7624bc14f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Feb 2008 18:46:50 +0100 Subject: rcu: add support for dynamic ticks and preempt rcu The PREEMPT-RCU can get stuck if a CPU goes idle and NO_HZ is set. The idle CPU will not progress the RCU through its grace period and a synchronize_rcu my get stuck. Without this patch I have a box that will not boot when PREEMPT_RCU and NO_HZ are set. That same box boots fine with this patch. This patch comes from the -rt kernel where it has been tested for several months. Signed-off-by: Steven Rostedt Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 10 ++ include/linux/rcuclassic.h | 3 + include/linux/rcupreempt.h | 22 +++++ kernel/rcupreempt.c | 224 ++++++++++++++++++++++++++++++++++++++++++++- kernel/softirq.c | 1 + kernel/time/tick-sched.c | 3 + 6 files changed, 259 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 2961ec788046..49829988bfa0 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -109,6 +109,14 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif +#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ) +extern void rcu_irq_enter(void); +extern void rcu_irq_exit(void); +#else +# define rcu_irq_enter() do { } while (0) +# define rcu_irq_exit() do { } while (0) +#endif /* CONFIG_PREEMPT_RCU */ + /* * It is safe to do non-atomic ops on ->hardirq_context, * because NMI handlers may not preempt and the ops are @@ -117,6 +125,7 @@ static inline void account_system_vtime(struct task_struct *tsk) */ #define __irq_enter() \ do { \ + rcu_irq_enter(); \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ @@ -135,6 +144,7 @@ extern void irq_enter(void); trace_hardirq_exit(); \ account_system_vtime(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ + rcu_irq_exit(); \ } while (0) /* diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 4d6624260b4c..b3dccd68629e 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -160,5 +160,8 @@ extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); +#define rcu_enter_nohz() do { } while (0) +#define rcu_exit_nohz() do { } while (0) + #endif /* __KERNEL__ */ #endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 60c2a033b19e..01152ed532c8 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -82,5 +82,27 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; +#ifdef CONFIG_NO_HZ +DECLARE_PER_CPU(long, dynticks_progress_counter); + +static inline void rcu_enter_nohz(void) +{ + __get_cpu_var(dynticks_progress_counter)++; + WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); + mb(); +} + +static inline void rcu_exit_nohz(void) +{ + mb(); + __get_cpu_var(dynticks_progress_counter)++; + WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); +} + +#else /* CONFIG_NO_HZ */ +#define rcu_enter_nohz() do { } while (0) +#define rcu_exit_nohz() do { } while (0) +#endif /* CONFIG_NO_HZ */ + #endif /* __KERNEL__ */ #endif /* __LINUX_RCUPREEMPT_H */ diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 987cfb7ade89..c7c52096df48 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -23,6 +23,10 @@ * to Suparna Bhattacharya for pushing me completely away * from atomic instructions on the read side. * + * - Added handling of Dynamic Ticks + * Copyright 2007 - Paul E. Mckenney + * - Steven Rostedt + * * Papers: http://www.rdrop.com/users/paulmck/RCU * * Design Document: http://lwn.net/Articles/253651/ @@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) } } +#ifdef CONFIG_NO_HZ + +DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; +static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); +static DEFINE_PER_CPU(int, rcu_update_flag); + +/** + * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. + * + * If the CPU was idle with dynamic ticks active, this updates the + * dynticks_progress_counter to let the RCU handling know that the + * CPU is active. + */ +void rcu_irq_enter(void) +{ + int cpu = smp_processor_id(); + + if (per_cpu(rcu_update_flag, cpu)) + per_cpu(rcu_update_flag, cpu)++; + + /* + * Only update if we are coming from a stopped ticks mode + * (dynticks_progress_counter is even). + */ + if (!in_interrupt() && + (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { + /* + * The following might seem like we could have a race + * with NMI/SMIs. But this really isn't a problem. + * Here we do a read/modify/write, and the race happens + * when an NMI/SMI comes in after the read and before + * the write. But NMI/SMIs will increment this counter + * twice before returning, so the zero bit will not + * be corrupted by the NMI/SMI which is the most important + * part. + * + * The only thing is that we would bring back the counter + * to a postion that it was in during the NMI/SMI. + * But the zero bit would be set, so the rest of the + * counter would again be ignored. + * + * On return from the IRQ, the counter may have the zero + * bit be 0 and the counter the same as the return from + * the NMI/SMI. If the state machine was so unlucky to + * see that, it still doesn't matter, since all + * RCU read-side critical sections on this CPU would + * have already completed. + */ + per_cpu(dynticks_progress_counter, cpu)++; + /* + * The following memory barrier ensures that any + * rcu_read_lock() primitives in the irq handler + * are seen by other CPUs to follow the above + * increment to dynticks_progress_counter. This is + * required in order for other CPUs to correctly + * determine when it is safe to advance the RCU + * grace-period state machine. + */ + smp_mb(); /* see above block comment. */ + /* + * Since we can't determine the dynamic tick mode from + * the dynticks_progress_counter after this routine, + * we use a second flag to acknowledge that we came + * from an idle state with ticks stopped. + */ + per_cpu(rcu_update_flag, cpu)++; + /* + * If we take an NMI/SMI now, they will also increment + * the rcu_update_flag, and will not update the + * dynticks_progress_counter on exit. That is for + * this IRQ to do. + */ + } +} + +/** + * rcu_irq_exit - Called from exiting Hard irq context. + * + * If the CPU was idle with dynamic ticks active, update the + * dynticks_progress_counter to put let the RCU handling be + * aware that the CPU is going back to idle with no ticks. + */ +void rcu_irq_exit(void) +{ + int cpu = smp_processor_id(); + + /* + * rcu_update_flag is set if we interrupted the CPU + * when it was idle with ticks stopped. + * Once this occurs, we keep track of interrupt nesting + * because a NMI/SMI could also come in, and we still + * only want the IRQ that started the increment of the + * dynticks_progress_counter to be the one that modifies + * it on exit. + */ + if (per_cpu(rcu_update_flag, cpu)) { + if (--per_cpu(rcu_update_flag, cpu)) + return; + + /* This must match the interrupt nesting */ + WARN_ON(in_interrupt()); + + /* + * If an NMI/SMI happens now we are still + * protected by the dynticks_progress_counter being odd. + */ + + /* + * The following memory barrier ensures that any + * rcu_read_unlock() primitives in the irq handler + * are seen by other CPUs to preceed the following + * increment to dynticks_progress_counter. This + * is required in order for other CPUs to determine + * when it is safe to advance the RCU grace-period + * state machine. + */ + smp_mb(); /* see above block comment. */ + per_cpu(dynticks_progress_counter, cpu)++; + WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); + } +} + +static void dyntick_save_progress_counter(int cpu) +{ + per_cpu(rcu_dyntick_snapshot, cpu) = + per_cpu(dynticks_progress_counter, cpu); +} + +static inline int +rcu_try_flip_waitack_needed(int cpu) +{ + long curr; + long snap; + + curr = per_cpu(dynticks_progress_counter, cpu); + snap = per_cpu(rcu_dyntick_snapshot, cpu); + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU remained in dynticks mode for the entire time + * and didn't take any interrupts, NMIs, SMIs, or whatever, + * then it cannot be in the middle of an rcu_read_lock(), so + * the next rcu_read_lock() it executes must use the new value + * of the counter. So we can safely pretend that this CPU + * already acknowledged the counter. + */ + + if ((curr == snap) && ((curr & 0x1) == 0)) + return 0; + + /* + * If the CPU passed through or entered a dynticks idle phase with + * no active irq handlers, then, as above, we can safely pretend + * that this CPU already acknowledged the counter. + */ + + if ((curr - snap) > 2 || (snap & 0x1) == 0) + return 0; + + /* We need this CPU to explicitly acknowledge the counter flip. */ + + return 1; +} + +static inline int +rcu_try_flip_waitmb_needed(int cpu) +{ + long curr; + long snap; + + curr = per_cpu(dynticks_progress_counter, cpu); + snap = per_cpu(rcu_dyntick_snapshot, cpu); + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU remained in dynticks mode for the entire time + * and didn't take any interrupts, NMIs, SMIs, or whatever, + * then it cannot have executed an RCU read-side critical section + * during that time, so there is no need for it to execute a + * memory barrier. + */ + + if ((curr == snap) && ((curr & 0x1) == 0)) + return 0; + + /* + * If the CPU either entered or exited an outermost interrupt, + * SMI, NMI, or whatever handler, then we know that it executed + * a memory barrier when doing so. So we don't need another one. + */ + if (curr != snap) + return 0; + + /* We need the CPU to execute a memory barrier. */ + + return 1; +} + +#else /* !CONFIG_NO_HZ */ + +# define dyntick_save_progress_counter(cpu) do { } while (0) +# define rcu_try_flip_waitack_needed(cpu) (1) +# define rcu_try_flip_waitmb_needed(cpu) (1) + +#endif /* CONFIG_NO_HZ */ + /* * Get here when RCU is idle. Decide whether we need to * move out of idle state, and return non-zero if so. @@ -447,8 +657,10 @@ rcu_try_flip_idle(void) /* Now ask each CPU for acknowledgement of the flip. */ - for_each_cpu_mask(cpu, rcu_cpu_online_map) + for_each_cpu_mask(cpu, rcu_cpu_online_map) { per_cpu(rcu_flip_flag, cpu) = rcu_flipped; + dyntick_save_progress_counter(cpu); + } return 1; } @@ -464,7 +676,8 @@ rcu_try_flip_waitack(void) RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); for_each_cpu_mask(cpu, rcu_cpu_online_map) - if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { + if (rcu_try_flip_waitack_needed(cpu) && + per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); return 0; } @@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void) smp_mb(); /* ^^^^^^^^^^^^ */ /* Call for a memory barrier from each CPU. */ - for_each_cpu_mask(cpu, rcu_cpu_online_map) + for_each_cpu_mask(cpu, rcu_cpu_online_map) { per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; + dyntick_save_progress_counter(cpu); + } RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); return 1; @@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void) RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); for_each_cpu_mask(cpu, rcu_cpu_online_map) - if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { + if (rcu_try_flip_waitmb_needed(cpu) && + per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); return 0; } diff --git a/kernel/softirq.c b/kernel/softirq.c index 5b3aea5f471e..31e9f2a47928 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -313,6 +313,7 @@ void irq_exit(void) /* Make sure that timer wheel updates are propagated */ if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) tick_nohz_stop_sched_tick(); + rcu_irq_exit(); #endif preempt_enable_no_resched(); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index fa9bb73dbdb4..2968298f8f36 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void) ts->idle_tick = ts->sched_timer.expires; ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; + rcu_enter_nohz(); } /* @@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void) return; } + rcu_exit_nohz(); + /* Update jiffies first */ select_nohz_load_balancer(0); now = ktime_get(); -- cgit v1.2.3 From 674eea0fc4d1d693250b5d3ddad42ca931c87dfd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 11 Feb 2008 18:37:23 +0200 Subject: KVM: Make the supported cpuid list a host property rather than a vm property One of the use cases for the supported cpuid list is to create a "greatest common denominator" of cpu capabilities in a server farm. As such, it is useful to be able to get the list without creating a virtual machine first. Since the code does not depend on the vm in any way, all that is needed is to move it to the device ioctl handler. The capability identifier is also changed so that binaries made against -rc1 will fail gracefully. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 42 ++++++++++++++++++++++-------------------- include/linux/kvm.h | 4 ++-- 2 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ec60409299a3..a7069ec2267c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -46,6 +46,9 @@ #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries); + struct kvm_x86_ops *kvm_x86_ops; struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -727,6 +730,24 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_SUPPORTED_CPUID: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, + cpuid_arg->entries); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + goto out; + r = 0; + break; + } default: r = -EINVAL; } @@ -974,8 +995,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, put_cpu(); } -static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm, - struct kvm_cpuid2 *cpuid, +static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { struct kvm_cpuid_entry2 *cpuid_entries; @@ -1487,24 +1507,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { - struct kvm_cpuid2 __user *cpuid_arg = argp; - struct kvm_cpuid2 cpuid; - - r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) - goto out; - r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid, - cpuid_arg->entries); - if (r) - goto out; - - r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) - goto out; - r = 0; - break; - } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4de4fd2d8607..c1ec04fd000d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -221,6 +221,7 @@ struct kvm_vapic_addr { * Get size for mmap(vcpu_fd) */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ +#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) /* * Extension capability list. @@ -230,8 +231,8 @@ struct kvm_vapic_addr { #define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2 #define KVM_CAP_USER_MEMORY 3 #define KVM_CAP_SET_TSS_ADDR 4 -#define KVM_CAP_EXT_CPUID 5 #define KVM_CAP_VAPIC 6 +#define KVM_CAP_EXT_CPUID 7 /* * ioctls for VM fds @@ -249,7 +250,6 @@ struct kvm_vapic_addr { #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) -#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x48, struct kvm_cpuid2) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) -- cgit v1.2.3 From d0bcabcd72dda5f553322a1ca92ae31c15b408b6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Feb 2008 22:03:07 -0800 Subject: docbook: fix usb source files Fix docbook problems in USB source files. These cause the generated docbook to be incorrect. Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- drivers/usb/core/usb.c | 6 ++---- include/linux/usb.h | 9 +++------ 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 4e984060c984..f6f19908f5f0 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -99,8 +99,7 @@ struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev, EXPORT_SYMBOL_GPL(usb_ifnum_to_if); /** - * usb_altnum_to_altsetting - get the altsetting structure with a given - * alternate setting number. + * usb_altnum_to_altsetting - get the altsetting structure with a given alternate setting number. * @intf: the interface containing the altsetting in question * @altnum: the desired alternate setting number * @@ -442,8 +441,7 @@ EXPORT_SYMBOL_GPL(usb_put_intf); */ /** - * usb_lock_device_for_reset - cautiously acquire the lock for a - * usb device structure + * usb_lock_device_for_reset - cautiously acquire the lock for a usb device structure * @udev: device that's being locked * @iface: interface bound to the driver making the request (optional) * diff --git a/include/linux/usb.h b/include/linux/usb.h index 2372e2e6b527..5bd3ae8aaaf4 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -781,8 +781,7 @@ static inline int usb_endpoint_is_isoc_out( .idVendor = (vend), \ .idProduct = (prod) /** - * USB_DEVICE_VER - macro used to describe a specific usb device with a - * version range + * USB_DEVICE_VER - describe a specific usb device with a version range * @vend: the 16 bit USB Vendor ID * @prod: the 16 bit USB Product ID * @lo: the bcdDevice_lo value @@ -799,8 +798,7 @@ static inline int usb_endpoint_is_isoc_out( .bcdDevice_hi = (hi) /** - * USB_DEVICE_INTERFACE_PROTOCOL - macro used to describe a usb - * device with a specific interface protocol + * USB_DEVICE_INTERFACE_PROTOCOL - describe a usb device with a specific interface protocol * @vend: the 16 bit USB Vendor ID * @prod: the 16 bit USB Product ID * @pr: bInterfaceProtocol value @@ -846,8 +844,7 @@ static inline int usb_endpoint_is_isoc_out( .bInterfaceProtocol = (pr) /** - * USB_DEVICE_AND_INTERFACE_INFO - macro used to describe a specific usb device - * with a class of usb interfaces + * USB_DEVICE_AND_INTERFACE_INFO - describe a specific usb device with a class of usb interfaces * @vend: the 16 bit USB Vendor ID * @prod: the 16 bit USB Product ID * @cl: bInterfaceClass value -- cgit v1.2.3 From a973e9dd1e140a65bed694a2c5c8d53e9cba1a23 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 1 Mar 2008 13:40:44 -0800 Subject: Revert "unique end pointer" patch This only made sense for the alternate fastpath which was reverted last week. Mathieu is working on a new version that addresses the fastpath issues but that new code first needs to go through mm and it is not clear if we need the unique end pointers with his new scheme. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- include/linux/mm_types.h | 5 +--- mm/slub.c | 70 ++++++++++++++++-------------------------------- 2 files changed, 24 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bfee0bd1d435..34023c65d466 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -64,10 +64,7 @@ struct page { #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS spinlock_t ptl; #endif - struct { - struct kmem_cache *slab; /* SLUB: Pointer to slab */ - void *end; /* SLUB: end marker */ - }; + struct kmem_cache *slab; /* SLUB: Pointer to slab */ struct page *first_page; /* Compound tail pages */ }; union { diff --git a/mm/slub.c b/mm/slub.c index 74c65af0a54f..a873953e5a11 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -291,32 +291,15 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) #endif } -/* - * The end pointer in a slab is special. It points to the first object in the - * slab but has bit 0 set to mark it. - * - * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 - * in the mapping set. - */ -static inline int is_end(void *addr) -{ - return (unsigned long)addr & PAGE_MAPPING_ANON; -} - -static void *slab_address(struct page *page) -{ - return page->end - PAGE_MAPPING_ANON; -} - static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) { void *base; - if (object == page->end) + if (!object) return 1; - base = slab_address(page); + base = page_address(page); if (object < base || object >= base + s->objects * s->size || (object - base) % s->size) { return 0; @@ -349,8 +332,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) /* Scan freelist */ #define for_each_free_object(__p, __s, __free) \ - for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ - __p)) + for (__p = (__free); __p; __p = get_freepointer((__s), __p)) /* Determine object index from a given position */ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) @@ -502,7 +484,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) { unsigned int off; /* Offset of last byte */ - u8 *addr = slab_address(page); + u8 *addr = page_address(page); print_tracking(s, p); @@ -680,7 +662,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) if (!(s->flags & SLAB_POISON)) return 1; - start = slab_address(page); + start = page_address(page); end = start + (PAGE_SIZE << s->order); length = s->objects * s->size; remainder = end - (start + length); @@ -748,7 +730,7 @@ static int check_object(struct kmem_cache *s, struct page *page, * of the free objects in this slab. May cause * another error because the object count is now wrong. */ - set_freepointer(s, p, page->end); + set_freepointer(s, p, NULL); return 0; } return 1; @@ -782,18 +764,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) void *fp = page->freelist; void *object = NULL; - while (fp != page->end && nr <= s->objects) { + while (fp && nr <= s->objects) { if (fp == search) return 1; if (!check_valid_pointer(s, page, fp)) { if (object) { object_err(s, page, object, "Freechain corrupt"); - set_freepointer(s, object, page->end); + set_freepointer(s, object, NULL); break; } else { slab_err(s, page, "Freepointer corrupt"); - page->freelist = page->end; + page->freelist = NULL; page->inuse = s->objects; slab_fix(s, "Freelist cleared"); return 0; @@ -899,7 +881,7 @@ bad: */ slab_fix(s, "Marking all objects used"); page->inuse = s->objects; - page->freelist = page->end; + page->freelist = NULL; } return 0; } @@ -939,7 +921,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, } /* Special debug activities for freeing objects */ - if (!SlabFrozen(page) && page->freelist == page->end) + if (!SlabFrozen(page) && !page->freelist) remove_full(s, page); if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); @@ -1124,7 +1106,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) SetSlabDebug(page); start = page_address(page); - page->end = start + 1; if (unlikely(s->flags & SLAB_POISON)) memset(start, POISON_INUSE, PAGE_SIZE << s->order); @@ -1136,7 +1117,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) last = p; } setup_object(s, page, last); - set_freepointer(s, last, page->end); + set_freepointer(s, last, NULL); page->freelist = start; page->inuse = 0; @@ -1152,7 +1133,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) void *p; slab_pad_check(s, page); - for_each_object(p, s, slab_address(page)) + for_each_object(p, s, page_address(page)) check_object(s, page, p, 0); ClearSlabDebug(page); } @@ -1162,7 +1143,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -pages); - page->mapping = NULL; __free_pages(page, s->order); } @@ -1366,7 +1346,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) ClearSlabFrozen(page); if (page->inuse) { - if (page->freelist != page->end) { + if (page->freelist) { add_partial(n, page, tail); stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); } else { @@ -1410,12 +1390,8 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) * Merge cpu freelist into freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. - * - * We need to use _is_end here because deactivate slab may - * be called for a debug slab. Then c->freelist may contain - * a dummy pointer. */ - while (unlikely(!is_end(c->freelist))) { + while (unlikely(c->freelist)) { void **object; tail = 0; /* Hot objects. Put the slab first */ @@ -1517,7 +1493,7 @@ static void *__slab_alloc(struct kmem_cache *s, stat(c, ALLOC_REFILL); load_freelist: object = c->page->freelist; - if (unlikely(object == c->page->end)) + if (unlikely(!object)) goto another_slab; if (unlikely(SlabDebug(c->page))) goto debug; @@ -1525,7 +1501,7 @@ load_freelist: object = c->page->freelist; c->freelist = object[c->offset]; c->page->inuse = s->objects; - c->page->freelist = c->page->end; + c->page->freelist = NULL; c->node = page_to_nid(c->page); unlock_out: slab_unlock(c->page); @@ -1607,7 +1583,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); - if (unlikely(is_end(c->freelist) || !node_match(c, node))) + if (unlikely(!c->freelist || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); @@ -1677,7 +1653,7 @@ checks_ok: * was not on the partial list before * then add it. */ - if (unlikely(prior == page->end)) { + if (unlikely(!prior)) { add_partial(get_node(s, page_to_nid(page)), page, 1); stat(c, FREE_ADD_PARTIAL); } @@ -1687,7 +1663,7 @@ out_unlock: return; slab_empty: - if (prior != page->end) { + if (prior) { /* * Slab still on the partial list. */ @@ -1910,7 +1886,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, struct kmem_cache_cpu *c) { c->page = NULL; - c->freelist = (void *)PAGE_MAPPING_ANON; + c->freelist = NULL; c->node = 0; c->offset = s->offset / sizeof(void *); c->objsize = s->objsize; @@ -3199,7 +3175,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { void *p; - void *addr = slab_address(page); + void *addr = page_address(page); if (!check_slab(s, page) || !on_freelist(s, page, NULL)) @@ -3482,7 +3458,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, static void process_slab(struct loc_track *t, struct kmem_cache *s, struct page *page, enum track_item alloc) { - void *addr = slab_address(page); + void *addr = page_address(page); DECLARE_BITMAP(map, s->objects); void *p; -- cgit v1.2.3 From 6446faa2ff30ca77c5b25e886bbbfb81c63f1c91 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 23:45:26 -0800 Subject: slub: Fix up comments Provide comments and fix up various spelling / style issues. Signed-off-by: Christoph Lameter --- include/linux/slub_def.h | 4 ++-- mm/slub.c | 49 +++++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 57deecc79d52..b00c1c73eb0a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -61,7 +61,7 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int objsize; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ - int order; + int order; /* Current preferred allocation order */ /* * Avoid an extra cache line for UP, SMP and for the node local to @@ -138,11 +138,11 @@ static __always_inline int kmalloc_index(size_t size) if (size <= 512) return 9; if (size <= 1024) return 10; if (size <= 2 * 1024) return 11; + if (size <= 4 * 1024) return 12; /* * The following is only needed to support architectures with a larger page * size than 4k. */ - if (size <= 4 * 1024) return 12; if (size <= 8 * 1024) return 13; if (size <= 16 * 1024) return 14; if (size <= 32 * 1024) return 15; diff --git a/mm/slub.c b/mm/slub.c index 72f5f4ecd1d2..10d546954efa 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -291,6 +291,7 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) #endif } +/* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) { @@ -619,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, * A. Free pointer (if we cannot overwrite object on free) * B. Tracking data for SLAB_STORE_USER * C. Padding to reach required alignment boundary or at mininum - * one word if debuggin is on to be able to detect writes + * one word if debugging is on to be able to detect writes * before the word boundary. * * Padding is done using 0x5a (POISON_INUSE) @@ -1268,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) * may return off node objects because partial slabs are obtained * from other nodes and filled up. * - * If /sys/slab/xx/defrag_ratio is set to 100 (which makes + * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes * defrag_ratio = 1000) then every (well almost) allocation will * first attempt to defrag slab caches on other nodes. This means * scanning over all nodes to look for partial slabs which may be @@ -1343,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) * Adding an empty slab to the partial slabs in order * to avoid page allocator overhead. This slab needs * to come after the other slabs with objects in - * order to fill them up. That way the size of the - * partial list stays small. kmem_cache_shrink can - * reclaim empty slabs from the partial list. + * so that the others get filled first. That way the + * size of the partial list stays small. + * + * kmem_cache_shrink can reclaim any empty slabs from the + * partial list. */ add_partial(n, page, 1); slab_unlock(page); @@ -1368,7 +1371,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) if (c->freelist) stat(c, DEACTIVATE_REMOTE_FREES); /* - * Merge cpu freelist into freelist. Typically we get here + * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. */ @@ -1399,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) /* * Flush cpu slab. + * * Called from IPI handler with interrupts disabled. */ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) @@ -1457,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) * rest of the freelist to the lockless freelist. * * And if we were unable to get a new slab from the partial slab lists then - * we need to allocate a new slab. This is slowest path since we may sleep. + * we need to allocate a new slab. This is the slowest path since it involves + * a call to the page allocator and the setup of a new slab. */ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) @@ -1471,7 +1476,9 @@ static void *__slab_alloc(struct kmem_cache *s, slab_lock(c->page); if (unlikely(!node_match(c, node))) goto another_slab; + stat(c, ALLOC_REFILL); + load_freelist: object = c->page->freelist; if (unlikely(!object)) @@ -1616,6 +1623,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, if (unlikely(SlabDebug(page))) goto debug; + checks_ok: prior = object[offset] = page->freelist; page->freelist = object; @@ -1630,8 +1638,7 @@ checks_ok: goto slab_empty; /* - * Objects left in the slab. If it - * was not on the partial list before + * Objects left in the slab. If it was not on the partial list before * then add it. */ if (unlikely(!prior)) { @@ -1845,13 +1852,11 @@ static unsigned long calculate_alignment(unsigned long flags, unsigned long align, unsigned long size) { /* - * If the user wants hardware cache aligned objects then - * follow that suggestion if the object is sufficiently - * large. + * If the user wants hardware cache aligned objects then follow that + * suggestion if the object is sufficiently large. * - * The hardware cache alignment cannot override the - * specified alignment though. If that is greater - * then use it. + * The hardware cache alignment cannot override the specified + * alignment though. If that is greater then use it. */ if ((flags & SLAB_HWCACHE_ALIGN) && size > cache_line_size() / 2) @@ -2049,6 +2054,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, #endif init_kmem_cache_node(n); atomic_long_inc(&n->nr_slabs); + /* * lockdep requires consistent irq usage for each lock * so even though there cannot be a race this early in @@ -2301,7 +2307,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) /* * We could also check if the object is on the slabs freelist. * But this would be too expensive and it seems that the main - * purpose of kmem_ptr_valid is to check if the object belongs + * purpose of kmem_ptr_valid() is to check if the object belongs * to a certain slab. */ return 1; @@ -2913,7 +2919,7 @@ void __init kmem_cache_init(void) /* * Patch up the size_index table if we have strange large alignment * requirements for the kmalloc array. This is only the case for - * mips it seems. The standard arches will not generate any code here. + * MIPS it seems. The standard arches will not generate any code here. * * Largest permitted alignment is 256 bytes due to the way we * handle the index determination for the smaller caches. @@ -2942,7 +2948,6 @@ void __init kmem_cache_init(void) kmem_size = sizeof(struct kmem_cache); #endif - printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," " CPUs=%d, Nodes=%d\n", @@ -3039,12 +3044,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, */ for_each_online_cpu(cpu) get_cpu_slab(s, cpu)->objsize = s->objsize; + s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); + if (sysfs_slab_alias(s, name)) goto err; return s; } + s = kmalloc(kmem_size, GFP_KERNEL); if (s) { if (kmem_cache_open(s, GFP_KERNEL, name, @@ -3927,7 +3935,6 @@ SLAB_ATTR(remote_node_defrag_ratio); #endif #ifdef CONFIG_SLUB_STATS - static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) { unsigned long sum = 0; @@ -4111,8 +4118,8 @@ static struct kset *slab_kset; #define ID_STR_LENGTH 64 /* Create a unique string id for a slab cache: - * format - * :[flags-]size:[memory address of kmemcache] + * + * Format :[flags-]size */ static char *create_unique_id(struct kmem_cache *s) { -- cgit v1.2.3 From 7a85f8896f4b4a4a0249563b92af9e3161a6b467 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Mar 2008 11:17:11 +0100 Subject: block: restore the meaning of rq->data_len to the true data length The meaning of rq->data_len was changed to the length of an allocated buffer from the true data length. It breaks SG_IO friends and bsg. This patch restores the meaning of rq->data_len to the true data length and adds rq->extra_len to store an extended length (due to drain buffer and padding). This patch also removes the code to update bio in blk_rq_map_user introduced by the commit 40b01b9bbdf51ae543a04744283bf2d56c4a6afa. The commit adjusts bio according to memory alignment (queue_dma_alignment). However, memory alignment is NOT padding alignment. This adjustment also breaks SG_IO friends and bsg. Padding alignment needs to be fixed in a proper way (by a separate patch). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +-- block/blk-map.c | 6 +----- block/blk-merge.c | 2 +- block/bsg.c | 8 ++++---- block/scsi_ioctl.c | 4 ++-- drivers/ata/libata-scsi.c | 6 +++--- include/linux/blkdev.h | 2 +- 7 files changed, 13 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 2d7e3a2f56c4..a248cf1c98dd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -127,7 +127,6 @@ void rq_init(struct request_queue *q, struct request *rq) rq->nr_hw_segments = 0; rq->ioprio = 0; rq->special = NULL; - rq->raw_data_len = 0; rq->buffer = NULL; rq->tag = -1; rq->errors = 0; @@ -135,6 +134,7 @@ void rq_init(struct request_queue *q, struct request *rq) rq->cmd_len = 0; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->data_len = 0; + rq->extra_len = 0; rq->sense_len = 0; rq->data = NULL; rq->sense = NULL; @@ -2018,7 +2018,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->hard_cur_sectors = rq->current_nr_sectors; rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); rq->buffer = bio_data(bio); - rq->raw_data_len = bio->bi_size; rq->data_len = bio->bi_size; rq->bio = rq->biotail = bio; diff --git a/block/blk-map.c b/block/blk-map.c index 09f7fd0bcb73..f5598322954d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -19,7 +19,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, rq->biotail->bi_next = bio; rq->biotail = bio; - rq->raw_data_len += bio->bi_size; rq->data_len += bio->bi_size; } return 0; @@ -151,11 +150,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, */ if (len & queue_dma_alignment(q)) { unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1; - struct bio *bio = rq->biotail; - bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len; - bio->bi_size += pad_len; - rq->data_len += pad_len; + rq->extra_len += pad_len; } rq->buffer = rq->data = NULL; diff --git a/block/blk-merge.c b/block/blk-merge.c index 7506c4fe0264..0f58616bcd7f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -231,7 +231,7 @@ new_segment: ((unsigned long)q->dma_drain_buffer) & (PAGE_SIZE - 1)); nsegs++; - rq->data_len += q->dma_drain_size; + rq->extra_len += q->dma_drain_size; } if (sg) diff --git a/block/bsg.c b/block/bsg.c index 7f3c09549e4b..8917c5174dc2 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -437,14 +437,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, } if (rq->next_rq) { - hdr->dout_resid = rq->raw_data_len; - hdr->din_resid = rq->next_rq->raw_data_len; + hdr->dout_resid = rq->data_len; + hdr->din_resid = rq->next_rq->data_len; blk_rq_unmap_user(bidi_bio); blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) - hdr->din_resid = rq->raw_data_len; + hdr->din_resid = rq->data_len; else - hdr->dout_resid = rq->raw_data_len; + hdr->dout_resid = rq->data_len; /* * If the request generated a negative error number, return it diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index e993cac4911d..a2c3a936ebf9 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -266,7 +266,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, hdr->info = 0; if (hdr->masked_status || hdr->host_status || hdr->driver_status) hdr->info |= SG_INFO_CHECK; - hdr->resid = rq->raw_data_len; + hdr->resid = rq->data_len; hdr->sb_len_wr = 0; if (rq->sense_len && hdr->sbp) { @@ -528,8 +528,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, rq = blk_get_request(q, WRITE, __GFP_WAIT); rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->data = NULL; - rq->raw_data_len = 0; rq->data_len = 0; + rq->extra_len = 0; rq->timeout = BLK_DEFAULT_SG_TIMEOUT; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = cmd; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7b1f1ee8131d..fe47922dd69e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2538,7 +2538,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) } qc->tf.command = ATA_CMD_PACKET; - qc->nbytes = scsi_bufflen(scmd); + qc->nbytes = scsi_bufflen(scmd) + scmd->request->extra_len; /* check whether ATAPI DMA is safe */ if (!using_pio && ata_check_atapi_dma(qc)) @@ -2549,7 +2549,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) * want to set it properly, and for DMA where it is * effectively meaningless. */ - nbytes = min(scmd->request->raw_data_len, (unsigned int)63 * 1024); + nbytes = min(scmd->request->data_len, (unsigned int)63 * 1024); /* Most ATAPI devices which honor transfer chunk size don't * behave according to the spec when odd chunk size which @@ -2875,7 +2875,7 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) * TODO: find out if we need to do more here to * cover scatter/gather case. */ - qc->nbytes = scsi_bufflen(scmd); + qc->nbytes = scsi_bufflen(scmd) + scmd->request->extra_len; /* request result TF and be quiet about device error */ qc->flags |= ATA_QCFLAG_RESULT_TF | ATA_QCFLAG_QUIET; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6fe67d1939c2..b72526c13ca0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -216,8 +216,8 @@ struct request { unsigned int cmd_len; unsigned char cmd[BLK_MAX_CDB]; - unsigned int raw_data_len; unsigned int data_len; + unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; void *data; void *sense; -- cgit v1.2.3 From e3790c7d42a545e8fe8b38b513613ca96687b670 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Mar 2008 11:18:17 +0100 Subject: block: separate out padding from alignment Block layer alignment was used for two different purposes - memory alignment and padding. This causes problems in lower layers because drivers which only require memory alignment ends up with adjusted rq->data_len. Separate out padding such that padding occurs iff driver explicitly requests it. Tomo: restorethe code to update bio in blk_rq_map_user introduced by the commit 40b01b9bbdf51ae543a04744283bf2d56c4a6afa according to padding alignment. Signed-off-by: Tejun Heo Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-map.c | 20 +++++++++++++------- block/blk-settings.c | 17 +++++++++++++++++ drivers/ata/libata-scsi.c | 3 ++- include/linux/blkdev.h | 2 ++ 4 files changed, 34 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/block/blk-map.c b/block/blk-map.c index f5598322954d..4e17dfd0035d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -43,6 +43,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, void __user *ubuf, unsigned int len) { unsigned long uaddr; + unsigned int alignment; struct bio *bio, *orig_bio; int reading, ret; @@ -53,8 +54,8 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * direct dma. else, set up kernel bounce buffers */ uaddr = (unsigned long) ubuf; - if (!(uaddr & queue_dma_alignment(q)) && - !(len & queue_dma_alignment(q))) + alignment = queue_dma_alignment(q) | q->dma_pad_mask; + if (!(uaddr & alignment) && !(len & alignment)) bio = bio_map_user(q, NULL, uaddr, len, reading); else bio = bio_copy_user(q, uaddr, len, reading); @@ -141,15 +142,20 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, /* * __blk_rq_map_user() copies the buffers if starting address - * or length isn't aligned. As the copied buffer is always - * page aligned, we know that there's enough room for padding. - * Extend the last bio and update rq->data_len accordingly. + * or length isn't aligned to dma_pad_mask. As the copied + * buffer is always page aligned, we know that there's enough + * room for padding. Extend the last bio and update + * rq->data_len accordingly. * * On unmap, bio_uncopy_user() will use unmodified * bio_map_data pointed to by bio->bi_private. */ - if (len & queue_dma_alignment(q)) { - unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1; + if (len & q->dma_pad_mask) { + unsigned int pad_len = (q->dma_pad_mask & ~len) + 1; + struct bio *bio = rq->biotail; + + bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len; + bio->bi_size += pad_len; rq->extra_len += pad_len; } diff --git a/block/blk-settings.c b/block/blk-settings.c index da923fed1f2c..a9f37f530b15 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -292,6 +292,23 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) } EXPORT_SYMBOL(blk_queue_stack_limits); +/** + * blk_queue_dma_pad - set pad mask + * @q: the request queue for the device + * @mask: pad mask + * + * Set pad mask. Direct IO requests are padded to the mask specified. + * + * Appending pad buffer to a request modifies ->data_len such that it + * includes the pad buffer. The original requested data length can be + * obtained using blk_rq_raw_data_len(). + **/ +void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) +{ + q->dma_pad_mask = mask; +} +EXPORT_SYMBOL(blk_queue_dma_pad); + /** * blk_queue_dma_drain - Set up a drain buffer for excess dma. * @q: the request queue for the device diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index fe47922dd69e..8f0e8f2bc628 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -862,9 +862,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, struct request_queue *q = sdev->request_queue; void *buf; - /* set the min alignment */ + /* set the min alignment and padding */ blk_queue_update_dma_alignment(sdev->request_queue, ATA_DMA_PAD_SZ - 1); + blk_queue_dma_pad(sdev->request_queue, ATA_DMA_PAD_SZ - 1); /* configure draining */ buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b72526c13ca0..6f79d40dd3c0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -362,6 +362,7 @@ struct request_queue unsigned long seg_boundary_mask; void *dma_drain_buffer; unsigned int dma_drain_size; + unsigned int dma_pad_mask; unsigned int dma_alignment; struct blk_queue_tag *queue_tags; @@ -701,6 +702,7 @@ extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); +extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -- cgit v1.2.3 From 1826eadfc42839af7c1c5a1859510aff635d3fa1 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:46 +0100 Subject: block/genhd.c: cleanups This patch contains the following cleanups: - make the needlessly global struct disk_type static - #if 0 the unused genhd_media_change_notify() Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/genhd.c | 6 +++++- include/linux/genhd.h | 2 -- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index abc6feddc8c6..c44527d16c52 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -24,6 +24,8 @@ static DEFINE_MUTEX(block_class_lock); struct kobject *block_depr; #endif +static struct device_type disk_type; + /* * Can be deleted altogether. Later. * @@ -502,7 +504,7 @@ struct class block_class = { .name = "block", }; -struct device_type disk_type = { +static struct device_type disk_type = { .name = "disk", .groups = disk_attr_groups, .release = disk_release, @@ -632,12 +634,14 @@ static void media_change_notify_thread(struct work_struct *work) put_device(gd->driverfs_dev); } +#if 0 void genhd_media_change_notify(struct gendisk *disk) { get_device(disk->driverfs_dev); schedule_work(&disk->async_notify); } EXPORT_SYMBOL_GPL(genhd_media_change_notify); +#endif /* 0 */ dev_t blk_lookup_devt(const char *name) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 09a3b18918c7..cd048e3cc96d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -18,7 +18,6 @@ #define dev_to_disk(device) container_of(device, struct gendisk, dev) #define dev_to_part(device) container_of(device, struct hd_struct, dev) -extern struct device_type disk_type; extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; @@ -556,7 +555,6 @@ extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); -extern void genhd_media_change_notify(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, struct module *module, struct kobject *(*probe)(dev_t, int *, void *), -- cgit v1.2.3 From a0db701a6bf767320e4471bd55e70702d230f6fb Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:50 +0100 Subject: block/genhd.c: proper externs This patch adds proper externs for two structs in include/linux/genhd.h Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- fs/proc/proc_misc.c | 3 +-- include/linux/genhd.h | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 468805d40e2b..2d563979cb02 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -377,7 +378,6 @@ static int stram_read_proc(char *page, char **start, off_t off, #endif #ifdef CONFIG_BLOCK -extern const struct seq_operations partitions_op; static int partitions_open(struct inode *inode, struct file *file) { return seq_open(file, &partitions_op); @@ -389,7 +389,6 @@ static const struct file_operations proc_partitions_operations = { .release = seq_release, }; -extern const struct seq_operations diskstats_op; static int diskstats_open(struct inode *inode, struct file *file) { return seq_open(file, &diskstats_op); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index cd048e3cc96d..32c2ac49a070 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -22,6 +22,9 @@ extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; +extern const struct seq_operations partitions_op; +extern const struct seq_operations diskstats_op; + enum { /* These three have identical behaviour; use the second one if DOS FDISK gets confused about extended/logical partitions starting past cylinder 1023. */ -- cgit v1.2.3 From 72dc67a69690288538142df73a7e3ac66fea68dc Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Sun, 10 Feb 2008 18:04:15 +0200 Subject: KVM: remove the usage of the mmap_sem for the protection of the memory slots. This patch replaces the mmap_sem lock for the memory slots with a new kvm private lock, it is needed beacuse untill now there were cases where kvm accesses user memory while holding the mmap semaphore. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 24 ++++++++++++++--- arch/x86/kvm/paging_tmpl.h | 13 +++++++--- arch/x86/kvm/vmx.c | 7 +++-- arch/x86/kvm/x86.c | 65 ++++++++++++++++++++++++++-------------------- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 5 ++-- 6 files changed, 75 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8efdcdbebb03..26037106ad19 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -876,11 +876,18 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) { + struct page *page; + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); if (gpa == UNMAPPED_GVA) return NULL; - return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + + down_read(¤t->mm->mmap_sem); + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + + return page; } static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, @@ -1020,15 +1027,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) struct page *page; + down_read(&vcpu->kvm->slots_lock); + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, gfn); + up_read(¤t->mm->mmap_sem); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __nonpaging_map(vcpu, v, write, gfn, page); spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return r; } @@ -1362,6 +1372,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn_t gfn; int r; u64 gpte = 0; + struct page *page; if (bytes != 4 && bytes != 8) return; @@ -1389,6 +1400,11 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (!is_present_pte(gpte)) return; gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; + + down_read(¤t->mm->mmap_sem); + page = gfn_to_page(vcpu->kvm, gfn); + up_read(¤t->mm->mmap_sem); + vcpu->arch.update_pte.gfn = gfn; vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn); } @@ -1496,9 +1512,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) gpa_t gpa; int r; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); spin_lock(&vcpu->kvm->mmu_lock); r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 03ba8608fe0f..2009c6e9dc4d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -91,7 +91,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, pt_element_t *table; struct page *page; + down_read(¤t->mm->mmap_sem); page = gfn_to_page(kvm, table_gfn); + up_read(¤t->mm->mmap_sem); + table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); @@ -378,7 +381,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (r) return r; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); /* * Look up the shadow pte for the faulting address. */ @@ -392,11 +395,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, pgprintk("%s: guest page fault\n", __FUNCTION__); inject_page_fault(vcpu, addr, walker.error_code); vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 0; } + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, walker.gfn); + up_read(¤t->mm->mmap_sem); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); @@ -413,14 +418,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, */ if (shadow_pte && is_io_pte(*shadow_pte)) { spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 1; } ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return write_pt; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad36447e696e..86f5bf121838 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1477,7 +1477,7 @@ static int alloc_apic_access_page(struct kvm *kvm) struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); if (kvm->arch.apic_access_page) goto out; kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; @@ -1487,9 +1487,12 @@ static int alloc_apic_access_page(struct kvm *kvm) r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); if (r) goto out; + + down_read(¤t->mm->mmap_sem); kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); + up_read(¤t->mm->mmap_sem); out: - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 338764fa5391..6b01552bd1f1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -184,7 +184,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) int ret; u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, offset * sizeof(u64), sizeof(pdpte)); if (ret < 0) { @@ -201,7 +201,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return ret; } @@ -215,13 +215,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) if (is_long_mode(vcpu) || !is_pae(vcpu)) return false; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); if (r < 0) goto out; changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return changed; } @@ -359,7 +359,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) */ } - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); /* * Does the new cr3 value map to physical memory? (Note, we * catch an invalid cr3 even in real-mode, because it would @@ -375,7 +375,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vcpu->arch.cr3 = cr3; vcpu->arch.mmu.new_cr3(vcpu); } - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); } EXPORT_SYMBOL_GPL(set_cr3); @@ -1232,12 +1232,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) return -EINVAL; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return 0; } @@ -1286,7 +1286,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, < alias->target_phys_addr) goto out; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); p = &kvm->arch.aliases[alias->slot]; p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; @@ -1300,7 +1300,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, kvm_mmu_zap_all(kvm); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return 0; @@ -1376,7 +1376,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int is_dirty = 0; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); r = kvm_get_dirty_log(kvm, log, &is_dirty); if (r) @@ -1392,7 +1392,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, } r = 0; out: - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } @@ -1570,7 +1570,7 @@ int emulator_read_std(unsigned long addr, void *data = val; int r = X86EMUL_CONTINUE; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); while (bytes) { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); unsigned offset = addr & (PAGE_SIZE-1); @@ -1592,7 +1592,7 @@ int emulator_read_std(unsigned long addr, addr += tocopy; } out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(emulator_read_std); @@ -1611,9 +1611,9 @@ static int emulator_read_emulated(unsigned long addr, return X86EMUL_CONTINUE; } - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); /* For APIC access vmexit */ if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) @@ -1651,14 +1651,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, { int ret; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); if (ret < 0) { - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 0; } kvm_mmu_pte_write(vcpu, gpa, val, bytes); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 1; } @@ -1670,9 +1670,9 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_io_device *mmio_dev; gpa_t gpa; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); if (gpa == UNMAPPED_GVA) { kvm_inject_page_fault(vcpu, addr, 2); @@ -1749,7 +1749,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, char *kaddr; u64 val; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); if (gpa == UNMAPPED_GVA || @@ -1760,13 +1760,17 @@ static int emulator_cmpxchg_emulated(unsigned long addr, goto emul_write; val = *(u64 *)new; + + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + kaddr = kmap_atomic(page, KM_USER0); set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); kunmap_atomic(kaddr, KM_USER0); kvm_release_page_dirty(page); emul_write: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); } #endif @@ -2159,10 +2163,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_x86_ops->skip_emulated_instruction(vcpu); for (i = 0; i < nr_pages; ++i) { - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); vcpu->arch.pio.guest_pages[i] = page; - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); if (!page) { kvm_inject_gp(vcpu, 0); free_pio_guest_pages(vcpu); @@ -2485,8 +2489,9 @@ static void vapic_enter(struct kvm_vcpu *vcpu) down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); - vcpu->arch.apic->vapic_page = page; up_read(¤t->mm->mmap_sem); + + vcpu->arch.apic->vapic_page = page; } static void vapic_exit(struct kvm_vcpu *vcpu) @@ -2959,9 +2964,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, gpa_t gpa; vcpu_load(vcpu); - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); tr->physical_address = gpa; tr->valid = gpa != UNMAPPED_GVA; tr->writeable = 1; @@ -3234,11 +3239,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm, */ if (!user_alloc) { if (npages && !old.rmap) { + down_write(¤t->mm->mmap_sem); memslot->userspace_addr = do_mmap(NULL, 0, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); + up_write(¤t->mm->mmap_sem); if (IS_ERR((void *)memslot->userspace_addr)) return PTR_ERR((void *)memslot->userspace_addr); @@ -3246,8 +3253,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (!old.user_alloc && old.rmap) { int ret; + down_write(¤t->mm->mmap_sem); ret = do_munmap(current->mm, old.userspace_addr, old.npages * PAGE_SIZE); + up_write(¤t->mm->mmap_sem); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ea4764b0a2f4..928b0d59e9ba 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -107,6 +107,7 @@ struct kvm_memory_slot { struct kvm { struct mutex lock; /* protects the vcpus array and APIC accesses */ spinlock_t mmu_lock; + struct rw_semaphore slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ int nmemslots; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 32fbf8006969..b2e12893e3f4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -169,6 +169,7 @@ static struct kvm *kvm_create_vm(void) kvm_io_bus_init(&kvm->pio_bus); mutex_init(&kvm->lock); kvm_io_bus_init(&kvm->mmio_bus); + init_rwsem(&kvm->slots_lock); spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); @@ -339,9 +340,9 @@ int kvm_set_memory_region(struct kvm *kvm, { int r; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); r = __kvm_set_memory_region(kvm, mem, user_alloc); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); -- cgit v1.2.3 From 62fb185130e4d420f71a30ff59d8b16b74ef5d2b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Feb 2008 17:34:02 +0100 Subject: sched: revert load_balance_monitor() changes The following commits cause a number of regressions: commit 58e2d4ca581167c2a079f4ee02be2f0bc52e8729 Author: Srivatsa Vaddagiri Date: Fri Jan 25 21:08:00 2008 +0100 sched: group scheduling, change how cpu load is calculated commit 6b2d7700266b9402e12824e11e0099ae6a4a6a79 Author: Srivatsa Vaddagiri Date: Fri Jan 25 21:08:00 2008 +0100 sched: group scheduler, fix fairness of cpu bandwidth allocation for task groups Namely: - very frequent wakeups on SMP, reported by PowerTop users. - cacheline trashing on (large) SMP - some latencies larger than 500ms While there is a mergeable patch to fix the latter, the former issues are not fixable in a manner suitable for .25 (we're at -rc3 now). Hence we revert them and try again in v2.6.26. Signed-off-by: Peter Zijlstra CC: Srivatsa Vaddagiri Tested-by: Alexey Zaytsev Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 - kernel/sched.c | 283 +++++++------------------------------------------- kernel/sched_fair.c | 115 +++++++------------- kernel/sched_rt.c | 4 - kernel/sysctl.c | 18 ---- 5 files changed, 70 insertions(+), 354 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c9621f8bf87..9ae4030067a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1542,10 +1542,6 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; -#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) -extern unsigned int sysctl_sched_min_bal_int_shares; -extern unsigned int sysctl_sched_max_bal_int_shares; -#endif int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, diff --git a/kernel/sched.c b/kernel/sched.c index f06950c8a6ce..dcd553cc4ee8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -174,41 +174,6 @@ struct task_group { struct sched_entity **se; /* runqueue "owned" by this group on each cpu */ struct cfs_rq **cfs_rq; - - /* - * shares assigned to a task group governs how much of cpu bandwidth - * is allocated to the group. The more shares a group has, the more is - * the cpu bandwidth allocated to it. - * - * For ex, lets say that there are three task groups, A, B and C which - * have been assigned shares 1000, 2000 and 3000 respectively. Then, - * cpu bandwidth allocated by the scheduler to task groups A, B and C - * should be: - * - * Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66% - * Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33% - * Bw(C) = 3000/(1000+2000+3000) * 100 = 50% - * - * The weight assigned to a task group's schedulable entities on every - * cpu (task_group.se[a_cpu]->load.weight) is derived from the task - * group's shares. For ex: lets say that task group A has been - * assigned shares of 1000 and there are two CPUs in a system. Then, - * - * tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000; - * - * Note: It's not necessary that each of a task's group schedulable - * entity have the same weight on all CPUs. If the group - * has 2 of its tasks on CPU0 and 1 task on CPU1, then a - * better distribution of weight could be: - * - * tg_A->se[0]->load.weight = 2/3 * 2000 = 1333 - * tg_A->se[1]->load.weight = 1/2 * 2000 = 667 - * - * rebalance_shares() is responsible for distributing the shares of a - * task groups like this among the group's schedulable entities across - * cpus. - * - */ unsigned long shares; #endif @@ -250,22 +215,12 @@ static DEFINE_SPINLOCK(task_group_lock); static DEFINE_MUTEX(doms_cur_mutex); #ifdef CONFIG_FAIR_GROUP_SCHED -#ifdef CONFIG_SMP -/* kernel thread that runs rebalance_shares() periodically */ -static struct task_struct *lb_monitor_task; -static int load_balance_monitor(void *unused); -#endif - -static void set_se_shares(struct sched_entity *se, unsigned long shares); - #ifdef CONFIG_USER_SCHED # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) #else # define INIT_TASK_GROUP_LOAD NICE_0_LOAD #endif -#define MIN_GROUP_SHARES 2 - static int init_task_group_load = INIT_TASK_GROUP_LOAD; #endif @@ -1245,16 +1200,6 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime); static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} #endif -static inline void inc_cpu_load(struct rq *rq, unsigned long load) -{ - update_load_add(&rq->load, load); -} - -static inline void dec_cpu_load(struct rq *rq, unsigned long load) -{ - update_load_sub(&rq->load, load); -} - #ifdef CONFIG_SMP static unsigned long source_load(int cpu, int type); static unsigned long target_load(int cpu, int type); @@ -1272,14 +1217,26 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); #define sched_class_highest (&rt_sched_class) -static void inc_nr_running(struct rq *rq) +static inline void inc_load(struct rq *rq, const struct task_struct *p) +{ + update_load_add(&rq->load, p->se.load.weight); +} + +static inline void dec_load(struct rq *rq, const struct task_struct *p) +{ + update_load_sub(&rq->load, p->se.load.weight); +} + +static void inc_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running++; + inc_load(rq, p); } -static void dec_nr_running(struct rq *rq) +static void dec_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running--; + dec_load(rq, p); } static void set_load_weight(struct task_struct *p) @@ -1371,7 +1328,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) rq->nr_uninterruptible--; enqueue_task(rq, p, wakeup); - inc_nr_running(rq); + inc_nr_running(p, rq); } /* @@ -1383,7 +1340,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) rq->nr_uninterruptible++; dequeue_task(rq, p, sleep); - dec_nr_running(rq); + dec_nr_running(p, rq); } /** @@ -2023,7 +1980,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * management (if any): */ p->sched_class->task_new(rq, p); - inc_nr_running(rq); + inc_nr_running(p, rq); } check_preempt_curr(rq, p); #ifdef CONFIG_SMP @@ -4362,8 +4319,10 @@ void set_user_nice(struct task_struct *p, long nice) goto out_unlock; } on_rq = p->se.on_rq; - if (on_rq) + if (on_rq) { dequeue_task(rq, p, 0); + dec_load(rq, p); + } p->static_prio = NICE_TO_PRIO(nice); set_load_weight(p); @@ -4373,6 +4332,7 @@ void set_user_nice(struct task_struct *p, long nice) if (on_rq) { enqueue_task(rq, p, 0); + inc_load(rq, p); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: @@ -7087,21 +7047,6 @@ void __init sched_init_smp(void) if (set_cpus_allowed(current, non_isolated_cpus) < 0) BUG(); sched_init_granularity(); - -#ifdef CONFIG_FAIR_GROUP_SCHED - if (nr_cpu_ids == 1) - return; - - lb_monitor_task = kthread_create(load_balance_monitor, NULL, - "group_balance"); - if (!IS_ERR(lb_monitor_task)) { - lb_monitor_task->flags |= PF_NOFREEZE; - wake_up_process(lb_monitor_task); - } else { - printk(KERN_ERR "Could not create load balance monitor thread" - "(error = %ld) \n", PTR_ERR(lb_monitor_task)); - } -#endif } #else void __init sched_init_smp(void) @@ -7424,157 +7369,6 @@ void set_curr_task(int cpu, struct task_struct *p) #ifdef CONFIG_GROUP_SCHED -#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP -/* - * distribute shares of all task groups among their schedulable entities, - * to reflect load distribution across cpus. - */ -static int rebalance_shares(struct sched_domain *sd, int this_cpu) -{ - struct cfs_rq *cfs_rq; - struct rq *rq = cpu_rq(this_cpu); - cpumask_t sdspan = sd->span; - int balanced = 1; - - /* Walk thr' all the task groups that we have */ - for_each_leaf_cfs_rq(rq, cfs_rq) { - int i; - unsigned long total_load = 0, total_shares; - struct task_group *tg = cfs_rq->tg; - - /* Gather total task load of this group across cpus */ - for_each_cpu_mask(i, sdspan) - total_load += tg->cfs_rq[i]->load.weight; - - /* Nothing to do if this group has no load */ - if (!total_load) - continue; - - /* - * tg->shares represents the number of cpu shares the task group - * is eligible to hold on a single cpu. On N cpus, it is - * eligible to hold (N * tg->shares) number of cpu shares. - */ - total_shares = tg->shares * cpus_weight(sdspan); - - /* - * redistribute total_shares across cpus as per the task load - * distribution. - */ - for_each_cpu_mask(i, sdspan) { - unsigned long local_load, local_shares; - - local_load = tg->cfs_rq[i]->load.weight; - local_shares = (local_load * total_shares) / total_load; - if (!local_shares) - local_shares = MIN_GROUP_SHARES; - if (local_shares == tg->se[i]->load.weight) - continue; - - spin_lock_irq(&cpu_rq(i)->lock); - set_se_shares(tg->se[i], local_shares); - spin_unlock_irq(&cpu_rq(i)->lock); - balanced = 0; - } - } - - return balanced; -} - -/* - * How frequently should we rebalance_shares() across cpus? - * - * The more frequently we rebalance shares, the more accurate is the fairness - * of cpu bandwidth distribution between task groups. However higher frequency - * also implies increased scheduling overhead. - * - * sysctl_sched_min_bal_int_shares represents the minimum interval between - * consecutive calls to rebalance_shares() in the same sched domain. - * - * sysctl_sched_max_bal_int_shares represents the maximum interval between - * consecutive calls to rebalance_shares() in the same sched domain. - * - * These settings allows for the appropriate trade-off between accuracy of - * fairness and the associated overhead. - * - */ - -/* default: 8ms, units: milliseconds */ -const_debug unsigned int sysctl_sched_min_bal_int_shares = 8; - -/* default: 128ms, units: milliseconds */ -const_debug unsigned int sysctl_sched_max_bal_int_shares = 128; - -/* kernel thread that runs rebalance_shares() periodically */ -static int load_balance_monitor(void *unused) -{ - unsigned int timeout = sysctl_sched_min_bal_int_shares; - struct sched_param schedparm; - int ret; - - /* - * We don't want this thread's execution to be limited by the shares - * assigned to default group (init_task_group). Hence make it run - * as a SCHED_RR RT task at the lowest priority. - */ - schedparm.sched_priority = 1; - ret = sched_setscheduler(current, SCHED_RR, &schedparm); - if (ret) - printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance" - " monitor thread (error = %d) \n", ret); - - while (!kthread_should_stop()) { - int i, cpu, balanced = 1; - - /* Prevent cpus going down or coming up */ - get_online_cpus(); - /* lockout changes to doms_cur[] array */ - lock_doms_cur(); - /* - * Enter a rcu read-side critical section to safely walk rq->sd - * chain on various cpus and to walk task group list - * (rq->leaf_cfs_rq_list) in rebalance_shares(). - */ - rcu_read_lock(); - - for (i = 0; i < ndoms_cur; i++) { - cpumask_t cpumap = doms_cur[i]; - struct sched_domain *sd = NULL, *sd_prev = NULL; - - cpu = first_cpu(cpumap); - - /* Find the highest domain at which to balance shares */ - for_each_domain(cpu, sd) { - if (!(sd->flags & SD_LOAD_BALANCE)) - continue; - sd_prev = sd; - } - - sd = sd_prev; - /* sd == NULL? No load balance reqd in this domain */ - if (!sd) - continue; - - balanced &= rebalance_shares(sd, cpu); - } - - rcu_read_unlock(); - - unlock_doms_cur(); - put_online_cpus(); - - if (!balanced) - timeout = sysctl_sched_min_bal_int_shares; - else if (timeout < sysctl_sched_max_bal_int_shares) - timeout *= 2; - - msleep_interruptible(timeout); - } - - return 0; -} -#endif /* CONFIG_SMP */ - #ifdef CONFIG_FAIR_GROUP_SCHED static void free_fair_sched_group(struct task_group *tg) { @@ -7841,29 +7635,25 @@ void sched_move_task(struct task_struct *tsk) } #ifdef CONFIG_FAIR_GROUP_SCHED -/* rq->lock to be locked by caller */ static void set_se_shares(struct sched_entity *se, unsigned long shares) { struct cfs_rq *cfs_rq = se->cfs_rq; struct rq *rq = cfs_rq->rq; int on_rq; - if (!shares) - shares = MIN_GROUP_SHARES; + spin_lock_irq(&rq->lock); on_rq = se->on_rq; - if (on_rq) { + if (on_rq) dequeue_entity(cfs_rq, se, 0); - dec_cpu_load(rq, se->load.weight); - } se->load.weight = shares; se->load.inv_weight = div64_64((1ULL<<32), shares); - if (on_rq) { + if (on_rq) enqueue_entity(cfs_rq, se, 0); - inc_cpu_load(rq, se->load.weight); - } + + spin_unlock_irq(&rq->lock); } static DEFINE_MUTEX(shares_mutex); @@ -7873,18 +7663,18 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) int i; unsigned long flags; + /* + * A weight of 0 or 1 can cause arithmetics problems. + * (The default weight is 1024 - so there's no practical + * limitation from this.) + */ + if (shares < 2) + shares = 2; + mutex_lock(&shares_mutex); if (tg->shares == shares) goto done; - if (shares < MIN_GROUP_SHARES) - shares = MIN_GROUP_SHARES; - - /* - * Prevent any load balance activity (rebalance_shares, - * load_balance_fair) from referring to this group first, - * by taking it off the rq->leaf_cfs_rq_list on each cpu. - */ spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) unregister_fair_sched_group(tg, i); @@ -7898,11 +7688,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) * w/o tripping rebalance_share or load_balance_fair. */ tg->shares = shares; - for_each_possible_cpu(i) { - spin_lock_irq(&cpu_rq(i)->lock); + for_each_possible_cpu(i) set_se_shares(tg->se[i], shares); - spin_unlock_irq(&cpu_rq(i)->lock); - } /* * Enable load balance activity on this group, by inserting it back on diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c8e6492c5925..3df4d46994ca 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -727,8 +727,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return se->parent; } -#define GROUP_IMBALANCE_PCT 20 - #else /* CONFIG_FAIR_GROUP_SCHED */ #define for_each_sched_entity(se) \ @@ -819,26 +817,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p) static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) { struct cfs_rq *cfs_rq; - struct sched_entity *se = &p->se, - *topse = NULL; /* Highest schedulable entity */ - int incload = 1; + struct sched_entity *se = &p->se; for_each_sched_entity(se) { - topse = se; - if (se->on_rq) { - incload = 0; + if (se->on_rq) break; - } cfs_rq = cfs_rq_of(se); enqueue_entity(cfs_rq, se, wakeup); wakeup = 1; } - /* Increment cpu load if we just enqueued the first task of a group on - * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs - * at the highest grouping level. - */ - if (incload) - inc_cpu_load(rq, topse->load.weight); hrtick_start_fair(rq, rq->curr); } @@ -851,28 +838,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) { struct cfs_rq *cfs_rq; - struct sched_entity *se = &p->se, - *topse = NULL; /* Highest schedulable entity */ - int decload = 1; + struct sched_entity *se = &p->se; for_each_sched_entity(se) { - topse = se; cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, sleep); /* Don't dequeue parent if it has other entities besides us */ - if (cfs_rq->load.weight) { - if (parent_entity(se)) - decload = 0; + if (cfs_rq->load.weight) break; - } sleep = 1; } - /* Decrement cpu load if we just dequeued the last task of a group on - * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs - * at the highest grouping level. - */ - if (decload) - dec_cpu_load(rq, topse->load.weight); hrtick_start_fair(rq, rq->curr); } @@ -1186,6 +1161,25 @@ static struct task_struct *load_balance_next_fair(void *arg) return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); } +#ifdef CONFIG_FAIR_GROUP_SCHED +static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) +{ + struct sched_entity *curr; + struct task_struct *p; + + if (!cfs_rq->nr_running || !first_fair(cfs_rq)) + return MAX_PRIO; + + curr = cfs_rq->curr; + if (!curr) + curr = __pick_next_entity(cfs_rq); + + p = task_of(curr); + + return p->prio; +} +#endif + static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, @@ -1195,45 +1189,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, struct cfs_rq *busy_cfs_rq; long rem_load_move = max_load_move; struct rq_iterator cfs_rq_iterator; - unsigned long load_moved; cfs_rq_iterator.start = load_balance_start_fair; cfs_rq_iterator.next = load_balance_next_fair; for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { #ifdef CONFIG_FAIR_GROUP_SCHED - struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu]; - unsigned long maxload, task_load, group_weight; - unsigned long thisload, per_task_load; - struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu]; - - task_load = busy_cfs_rq->load.weight; - group_weight = se->load.weight; + struct cfs_rq *this_cfs_rq; + long imbalance; + unsigned long maxload; - /* - * 'group_weight' is contributed by tasks of total weight - * 'task_load'. To move 'rem_load_move' worth of weight only, - * we need to move a maximum task load of: - * - * maxload = (remload / group_weight) * task_load; - */ - maxload = (rem_load_move * task_load) / group_weight; + this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); - if (!maxload || !task_load) + imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; + /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ + if (imbalance <= 0) continue; - per_task_load = task_load / busy_cfs_rq->nr_running; - /* - * balance_tasks will try to forcibly move atleast one task if - * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if - * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load. - */ - if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load) - continue; + /* Don't pull more than imbalance/2 */ + imbalance /= 2; + maxload = min(rem_load_move, imbalance); - /* Disable priority-based load balance */ - *this_best_prio = 0; - thisload = this_cfs_rq->load.weight; + *this_best_prio = cfs_rq_best_prio(this_cfs_rq); #else # define maxload rem_load_move #endif @@ -1242,33 +1219,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, * load_balance_[start|next]_fair iterators */ cfs_rq_iterator.arg = busy_cfs_rq; - load_moved = balance_tasks(this_rq, this_cpu, busiest, + rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, maxload, sd, idle, all_pinned, this_best_prio, &cfs_rq_iterator); -#ifdef CONFIG_FAIR_GROUP_SCHED - /* - * load_moved holds the task load that was moved. The - * effective (group) weight moved would be: - * load_moved_eff = load_moved/task_load * group_weight; - */ - load_moved = (group_weight * load_moved) / task_load; - - /* Adjust shares on both cpus to reflect load_moved */ - group_weight -= load_moved; - set_se_shares(se, group_weight); - - se = busy_cfs_rq->tg->se[this_cpu]; - if (!thisload) - group_weight = load_moved; - else - group_weight = se->load.weight + load_moved; - set_se_shares(se, group_weight); -#endif - - rem_load_move -= load_moved; - if (rem_load_move <= 0) break; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index f54792b175b2..76e828517541 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -393,8 +393,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) */ for_each_sched_rt_entity(rt_se) enqueue_rt_entity(rt_se); - - inc_cpu_load(rq, p->se.load.weight); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) @@ -414,8 +412,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) if (rt_rq && rt_rq->rt_nr_running) enqueue_rt_entity(rt_se); } - - dec_cpu_load(rq, p->se.load.weight); } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8b7e95411795..b2a2d6889bab 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -311,24 +311,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) - { - .ctl_name = CTL_UNNUMBERED, - .procname = "sched_min_bal_int_shares", - .data = &sysctl_sched_min_bal_int_shares, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "sched_max_bal_int_shares", - .data = &sysctl_sched_max_bal_int_shares, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif #endif { .ctl_name = CTL_UNNUMBERED, -- cgit v1.2.3 From ec8670f1f795badedaa056a3a3245b9b82201747 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 1 Mar 2008 07:51:29 -0700 Subject: dmaengine: fix sparse warning include/linux/dmaengine.h:364:2: warning: returning void-valued expression Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index acbb364674ff..261e43a4c873 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -366,7 +366,7 @@ __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) */ static inline void dma_async_issue_pending(struct dma_chan *chan) { - return chan->device->device_issue_pending(chan); + chan->device->device_issue_pending(chan); } #define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan) -- cgit v1.2.3 From d9452e9f81e997cbd0c9bface8d2c2a4b064cc3e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 4 Mar 2008 12:28:49 -0800 Subject: [NETPOLL]: Revert two bogus cleanups that broke netconsole. Based upon a report by Andrew Morton and code analysis done by Jarek Poplawski. This reverts 33f807ba0d9259e7c75c7a2ce8bd2787e5b540c7 ("[NETPOLL]: Kill NETPOLL_RX_DROP, set but never tested.") and c7b6ea24b43afb5749cb704e143df19d70e23dea ("[NETPOLL]: Don't need rx_flags."). The rx_flags did get tested for zero vs. non-zero and therefore we do need those tests and that code which sets NETPOLL_RX_DROP et al. Signed-off-by: David S. Miller --- include/linux/netpoll.h | 7 ++++--- net/core/netpoll.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index a0525a1f4715..e3d79593fb3a 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -25,6 +25,7 @@ struct netpoll { struct netpoll_info { atomic_t refcnt; + int rx_flags; spinlock_t rx_lock; struct netpoll *rx_np; /* netpoll that registered an rx_hook */ struct sk_buff_head arp_tx; /* list of arp requests to reply to */ @@ -50,12 +51,12 @@ static inline int netpoll_rx(struct sk_buff *skb) unsigned long flags; int ret = 0; - if (!npinfo || !npinfo->rx_np) + if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags)) return 0; spin_lock_irqsave(&npinfo->rx_lock, flags); - /* check rx_np again with the lock held */ - if (npinfo->rx_np && __netpoll_rx(skb)) + /* check rx_flags again with the lock held */ + if (npinfo->rx_flags && __netpoll_rx(skb)) ret = 1; spin_unlock_irqrestore(&npinfo->rx_lock, flags); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6faa128a4c8e..4b7e756181c9 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -39,6 +39,8 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; #define USEC_PER_POLL 50 +#define NETPOLL_RX_ENABLED 1 +#define NETPOLL_RX_DROP 2 #define MAX_SKB_SIZE \ (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ @@ -126,11 +128,13 @@ static int poll_one_napi(struct netpoll_info *npinfo, if (!test_bit(NAPI_STATE_SCHED, &napi->state)) return budget; + npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); work = napi->poll(napi, budget); atomic_dec(&trapped); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; return budget - work; } @@ -472,7 +476,7 @@ int __netpoll_rx(struct sk_buff *skb) if (skb->dev->type != ARPHRD_ETHER) goto out; - /* if receive ARP during middle of NAPI poll, then queue */ + /* check if netpoll clients need ARP */ if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { skb_queue_tail(&npi->arp_tx, skb); @@ -534,9 +538,6 @@ int __netpoll_rx(struct sk_buff *skb) return 1; out: - /* If packet received while already in poll then just - * silently drop. - */ if (atomic_read(&trapped)) { kfree_skb(skb); return 1; @@ -675,6 +676,7 @@ int netpoll_setup(struct netpoll *np) goto release; } + npinfo->rx_flags = 0; npinfo->rx_np = NULL; spin_lock_init(&npinfo->rx_lock); @@ -756,6 +758,7 @@ int netpoll_setup(struct netpoll *np) if (np->rx_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_flags |= NETPOLL_RX_ENABLED; npinfo->rx_np = np; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } @@ -797,6 +800,7 @@ void netpoll_cleanup(struct netpoll *np) if (npinfo->rx_np == np) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } -- cgit v1.2.3 From 3634634edd49c115da931998b9540bcc17665b05 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 13 Feb 2008 17:08:16 -0800 Subject: debugfs: fix sparse warnings extern does not belong in C files, move declaration to linux/debugfs.h fs/debugfs/file.c:42:30: warning: symbol 'debugfs_file_operations' was not declared. Should it be static? fs/debugfs/file.c:54:31: warning: symbol 'debugfs_link_operations' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 4 ---- include/linux/debugfs.h | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index d26e2826ba5b..e9602d85c11d 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -29,10 +29,6 @@ #define DEBUGFS_MAGIC 0x64626720 -/* declared over in file.c */ -extern struct file_operations debugfs_file_operations; -extern struct inode_operations debugfs_link_operations; - static struct vfsmount *debugfs_mount; static int debugfs_mount_count; diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index f592d6de3b97..7266124361b4 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -27,6 +27,11 @@ struct debugfs_blob_wrapper { }; #if defined(CONFIG_DEBUG_FS) + +/* declared over in file.c */ +extern const struct file_operations debugfs_file_operations; +extern const struct inode_operations debugfs_link_operations; + struct dentry *debugfs_create_file(const char *name, mode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); -- cgit v1.2.3 From d6d914f52b15d5a8e81ad481e02d9ab30d412a29 Mon Sep 17 00:00:00 2001 From: Lei Ming Date: Mon, 25 Feb 2008 18:07:28 +0800 Subject: USB: fix comment of struct usb_interface update the comment for the removed "driver" field and being out-of-order of @cur_altsetting and @num_altsetting. Signed-off-by: Lei Ming Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 5bd3ae8aaaf4..583e0481dfa0 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -94,10 +94,9 @@ enum usb_interface_condition { * @altsetting: array of interface structures, one for each alternate * setting that may be selected. Each one includes a set of * endpoint configurations. They will be in no particular order. - * @num_altsetting: number of altsettings defined. * @cur_altsetting: the current altsetting. + * @num_altsetting: number of altsettings defined. * @intf_assoc: interface association descriptor - * @driver: the USB driver that is bound to this interface. * @minor: the minor number assigned to this interface, if this * interface is bound to a driver that uses the USB major number. * If this interface does not use the USB major, this field should -- cgit v1.2.3 From 90a1ba0c5e39eeea278f263c28ae02166c5911c8 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Fri, 22 Feb 2008 11:02:21 +0100 Subject: PCI: Add DECLARE_PCI_DEVICE_TABLE macro The definitions of struct pci_device_id arrays should generally follow the same pattern across the entire kernel. This macro defines this array as const and puts it into the __devinitconst section. There are currently many definitions scattered about the kernel that omit the __devinitdata modifier despite the documentation stating that it should always be there. These definitions really also should have been const, which wasn't possible before but has become so with the addition of the __devinitconst attribute. Furthermore, there are definitions that use "const" and __devinitdata, which is explicitly wrong but the compiler doesn't catch section mismatches if there's only one such one case in the module (which is often the case). Adding the __devinitconst modifier where there was nothing before buys us memory. Adding the const modifier gives the compiler a chance to do its thing. Changing __devinitdata to __devinitconst where it was wrong actually fixes some compiler errors in older (mid-release) kernels that were patched over by "removing" the section attribute altogether (which wastes memory). This macro makes it pretty difficult to get this definition wrong in the future... Signed-off-by: Jonas Bonn Signed-off-by: Greg Kroah-Hartman --- Documentation/pci.txt | 6 ++++-- include/linux/pci.h | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/pci.txt b/Documentation/pci.txt index 72b20c639596..bb7bd27d4682 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -123,7 +123,8 @@ initialization with a pointer to a structure describing the driver The ID table is an array of struct pci_device_id entries ending with an -all-zero entry. Each entry consists of: +all-zero entry; use of the macro DECLARE_PCI_DEVICE_TABLE is the preferred +method of declaring the table. Each entry consists of: vendor,device Vendor and device ID to match (or PCI_ANY_ID) @@ -191,7 +192,8 @@ Tips on when/where to use the above attributes: o Do not mark the struct pci_driver. - o The ID table array should be marked __devinitdata. + o The ID table array should be marked __devinitconst; this is done + automatically if the table is declared with DECLARE_PCI_DEVICE_TABLE(). o The probe() and remove() functions should be marked __devinit and __devexit respectively. All initialization functions diff --git a/include/linux/pci.h b/include/linux/pci.h index 87195b62de52..f3165e7ac431 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -388,6 +388,16 @@ struct pci_driver { #define to_pci_driver(drv) container_of(drv, struct pci_driver, driver) +/** + * DECLARE_PCI_DEVICE_TABLE - macro used to describe a pci device table + * @_table: device table name + * + * This macro is used to create a struct pci_device_id array (a device table) + * in a generic manner. + */ +#define DECLARE_PCI_DEVICE_TABLE(_table) \ + const struct pci_device_id _table[] __devinitconst + /** * PCI_DEVICE - macro used to describe a specific pci device * @vend: the 16 bit PCI Vendor ID -- cgit v1.2.3 From 7560fa60fcdcdb0da662f6a9fad9064b554ef46c Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 4 Mar 2008 14:28:27 -0800 Subject: gpio: and "no GPIO support here" stubs Add a defining fail/warn stubs for GPIO calls on platforms that don't support the GPIO programming interface. That includes the arch-specific implementation glue otherwise. This facilitates a new model for GPIO usage: drivers that can use GPIOs if they're available, but don't require them. One example of such a driver is NAND driver for various FreeScale chips. On platforms update with GPIO support, they can be used instead of a worst-case delay to verify that the BUSY signal is off. (Also includes a couple minor unrelated doc updates.) Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/gpio.txt | 16 ++++++--- include/linux/gpio.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 4 deletions(-) create mode 100644 include/linux/gpio.h (limited to 'include/linux') diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 8da724e2a0ff..54630095aa3c 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -2,6 +2,9 @@ GPIO Interfaces This provides an overview of GPIO access conventions on Linux. +These calls use the gpio_* naming prefix. No other calls should use that +prefix, or the related __gpio_* prefix. + What is a GPIO? =============== @@ -69,11 +72,13 @@ in this document, but drivers acting as clients to the GPIO interface must not care how it's implemented.) That said, if the convention is supported on their platform, drivers should -use it when possible. Platforms should declare GENERIC_GPIO support in -Kconfig (boolean true), which multi-platform drivers can depend on when -using the include file: +use it when possible. Platforms must declare GENERIC_GPIO support in their +Kconfig (boolean true), and provide an file. Drivers that can't +work without standard GPIO calls should have Kconfig entries which depend +on GENERIC_GPIO. The GPIO calls are available, either as "real code" or as +optimized-away stubs, when drivers use the include file: - #include + #include If you stick to this convention then it'll be easier for other developers to see what your code is doing, and help maintain it. @@ -316,6 +321,9 @@ pulldowns integrated on some platforms. Not all platforms support them, or support them in the same way; and any given board might use external pullups (or pulldowns) so that the on-chip ones should not be used. (When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) +Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a +platform-specific issue, as are models like (not) having a one-to-one +correspondence between configurable pins and GPIOs. There are other system-specific mechanisms that are not specified here, like the aforementioned options for input de-glitching and wire-OR output. diff --git a/include/linux/gpio.h b/include/linux/gpio.h new file mode 100644 index 000000000000..4987a84078ef --- /dev/null +++ b/include/linux/gpio.h @@ -0,0 +1,95 @@ +#ifndef __LINUX_GPIO_H +#define __LINUX_GPIO_H + +/* see Documentation/gpio.txt */ + +#ifdef CONFIG_GENERIC_GPIO +#include + +#else + +/* + * Some platforms don't support the GPIO programming interface. + * + * In case some driver uses it anyway (it should normally have + * depended on GENERIC_GPIO), these routines help the compiler + * optimize out much GPIO-related code ... or trigger a runtime + * warning when something is wrongly called. + */ + +static inline int gpio_is_valid(int number) +{ + return 0; +} + +static inline int gpio_request(unsigned gpio, const char *label) +{ + return -ENOSYS; +} + +static inline void gpio_free(unsigned gpio) +{ + /* GPIO can never have been requested */ + WARN_ON(1); +} + +static inline int gpio_direction_input(unsigned gpio) +{ + return -ENOSYS; +} + +static inline int gpio_direction_output(unsigned gpio, int value) +{ + return -ENOSYS; +} + +static inline int gpio_get_value(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline void gpio_set_value(unsigned gpio, int value) +{ + /* GPIO can never have been requested or set as output */ + WARN_ON(1); +} + +static inline int gpio_cansleep(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline int gpio_get_value_cansleep(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline void gpio_set_value_cansleep(unsigned gpio, int value) +{ + /* GPIO can never have been requested or set as output */ + WARN_ON(1); +} + +static inline int gpio_to_irq(unsigned gpio) +{ + /* GPIO can never have been requested or set as input */ + WARN_ON(1); + return -EINVAL; +} + +static inline int irq_to_gpio(unsigned irq) +{ + /* irq can never have been returned from gpio_to_irq() */ + WARN_ON(1); + return -EINVAL; +} + +#endif + +#endif /* __LINUX_GPIO_H */ -- cgit v1.2.3 From 9edddaa200df18e08fe0cf21036e8ae467b1363c Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Tue, 4 Mar 2008 14:28:37 -0800 Subject: Kprobes: indicate kretprobe support in Kconfig Add CONFIG_HAVE_KRETPROBES to the arch//Kconfig file for relevant architectures with kprobes support. This facilitates easy handling of in-kernel modules (like samples/kprobes/kretprobe_example.c) that depend on kretprobes being present in the kernel. Thanks to Sam Ravnborg for helping make the patch more lean. Per Mathieu's suggestion, added CONFIG_KRETPROBES and fixed up dependencies. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Mathieu Desnoyers Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 7 +++++++ arch/arm/Kconfig | 1 + arch/ia64/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sparc64/Kconfig | 1 + arch/x86/Kconfig | 1 + include/asm-arm/kprobes.h | 1 - include/asm-ia64/kprobes.h | 1 - include/asm-powerpc/kprobes.h | 1 - include/asm-s390/kprobes.h | 1 - include/asm-sparc64/kprobes.h | 2 -- include/asm-x86/kprobes.h | 1 - include/linux/kprobes.h | 6 +++--- kernel/kprobes.c | 9 +++------ 15 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 3d72dc3fc8f5..694c9af520bb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -27,5 +27,12 @@ config KPROBES for kernel debugging, non-intrusive instrumentation and testing. If in doubt, say "N". +config KRETPROBES + def_bool y + depends on KPROBES && HAVE_KRETPROBES + config HAVE_KPROBES def_bool n + +config HAVE_KRETPROBES + def_bool n diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 16b82e1272b0..955fc53c1c01 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -12,6 +12,7 @@ config ARM select SYS_SUPPORTS_APM_EMULATION select HAVE_OPROFILE select HAVE_KPROBES if (!XIP_KERNEL) + select HAVE_KRETPROBES if (HAVE_KPROBES) help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index dff9edfc7465..56762d3c2a6a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -18,6 +18,7 @@ config IA64 select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5b8d8382b762..1189d8d6170d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -90,6 +90,7 @@ config PPC select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES config EARLY_PRINTK bool diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b21444b681b6..9892827b6176 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -61,6 +61,7 @@ config S390 def_bool y select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES source "init/Kconfig" diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 3af378ddb6ae..463d1be32c98 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -10,6 +10,7 @@ config SPARC default y select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES config SPARC64 bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 53800b80a204..f41c9538ca30 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,6 +21,7 @@ config X86 select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h index 4e7bd32288ae..c042194d3ab5 100644 --- a/include/asm-arm/kprobes.h +++ b/include/asm-arm/kprobes.h @@ -20,7 +20,6 @@ #include #include -#define ARCH_SUPPORTS_KRETPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT #define MAX_INSN_SIZE 2 #define MAX_STACK_SIZE 64 /* 32 would probably be OK */ diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index a93ce9ef07ff..adbaba14eb0a 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -82,7 +82,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ]; }; -#define ARCH_SUPPORTS_KRETPROBES #define kretprobe_blacklist_size 0 #define SLOT0_OPCODE_SHIFT (37) diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h index afabad230dbb..d0e7701fa1f6 100644 --- a/include/asm-powerpc/kprobes.h +++ b/include/asm-powerpc/kprobes.h @@ -80,7 +80,6 @@ typedef unsigned int kprobe_opcode_t; #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) #endif -#define ARCH_SUPPORTS_KRETPROBES #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h index 948db3d0d05c..330f68caffe4 100644 --- a/include/asm-s390/kprobes.h +++ b/include/asm-s390/kprobes.h @@ -46,7 +46,6 @@ typedef u16 kprobe_opcode_t; ? (MAX_STACK_SIZE) \ : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) -#define ARCH_SUPPORTS_KRETPROBES #define kretprobe_blacklist_size 0 #define KPROBE_SWAP_INST 0x10 diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h index 7237dd87663e..5879d71afdaa 100644 --- a/include/asm-sparc64/kprobes.h +++ b/include/asm-sparc64/kprobes.h @@ -14,8 +14,6 @@ typedef u32 kprobe_opcode_t; #define arch_remove_kprobe(p) do {} while (0) -#define ARCH_SUPPORTS_KRETPROBES - #define flush_insn_slot(p) \ do { flushi(&(p)->ainsn.insn[0]); \ flushi(&(p)->ainsn.insn[1]); \ diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h index 143476a3cb52..61ad7b5d142e 100644 --- a/include/asm-x86/kprobes.h +++ b/include/asm-x86/kprobes.h @@ -42,7 +42,6 @@ typedef u8 kprobe_opcode_t; : (((unsigned long)current_thread_info()) + THREAD_SIZE \ - (unsigned long)(ADDR))) -#define ARCH_SUPPORTS_KRETPROBES #define flush_insn_slot(p) do { } while (0) extern const int kretprobe_blacklist_size; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 4a6ce82ba039..0f28486f6360 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -125,11 +125,11 @@ struct jprobe { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#ifdef ARCH_SUPPORTS_KRETPROBES +#ifdef CONFIG_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); -#else /* ARCH_SUPPORTS_KRETPROBES */ +#else /* CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -138,7 +138,7 @@ static inline int arch_trampoline_kprobe(struct kprobe *p) { return 0; } -#endif /* ARCH_SUPPORTS_KRETPROBES */ +#endif /* CONFIG_KRETPROBES */ /* * Function-return probe - * Note: diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7a86e6432338..e6a61dcbc578 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -678,8 +678,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp) unregister_kprobe(&jp->kp); } -#ifdef ARCH_SUPPORTS_KRETPROBES - +#ifdef CONFIG_KRETPROBES /* * This kprobe pre_handler is registered with every kretprobe. When probe * hits it will set up the return probe. @@ -769,8 +768,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) return ret; } -#else /* ARCH_SUPPORTS_KRETPROBES */ - +#else /* CONFIG_KRETPROBES */ int __kprobes register_kretprobe(struct kretprobe *rp) { return -ENOSYS; @@ -781,8 +779,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, { return 0; } - -#endif /* ARCH_SUPPORTS_KRETPROBES */ +#endif /* CONFIG_KRETPROBES */ void __kprobes unregister_kretprobe(struct kretprobe *rp) { -- cgit v1.2.3 From 00f0b8259e48979c37212995d798f3fbd0374690 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 4 Mar 2008 14:28:39 -0800 Subject: Memory controller: rename to Memory Resource Controller Rename Memory Controller to Memory Resource Controller. Reflect the same changes in the CONFIG definition for the Memory Resource Controller. Group together the config options for Resource Counters and Memory Resource Controller. Signed-off-by: Balbir Singh Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 8 ++++++-- include/linux/cgroup_subsys.h | 2 +- include/linux/memcontrol.h | 4 ++-- include/linux/mm_types.h | 4 ++-- init/Kconfig | 30 +++++++++++++++--------------- mm/Makefile | 2 +- mm/oom_kill.c | 2 +- mm/vmscan.c | 4 ++-- 8 files changed, 30 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index fba6af45225c..866b9cd9a959 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -1,4 +1,8 @@ -Memory Controller +Memory Resource Controller + +NOTE: The Memory Resource Controller has been generically been referred +to as the memory controller in this document. Do not confuse memory controller +used here with the memory controller that is used in hardware. Salient features @@ -152,7 +156,7 @@ The memory controller uses the following hierarchy a. Enable CONFIG_CGROUPS b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_CONT +c. Enable CONFIG_CGROUP_MEM_RES_CTLR 1. Prepare the cgroups # mkdir -p /cgroups diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ac6aad98b607..1ddebfc52565 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -37,7 +37,7 @@ SUBSYS(cpuacct) /* */ -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR SUBSYS(mem_cgroup) #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 04075628cb9a..a8be8073b9e6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -25,7 +25,7 @@ struct page_cgroup; struct page; struct mm_struct; -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); extern void mm_free_cgroup(struct mm_struct *mm); @@ -72,7 +72,7 @@ extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct zone *zone, int priority); -#else /* CONFIG_CGROUP_MEM_CONT */ +#else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 34023c65d466..af190ceab971 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -88,7 +88,7 @@ struct page { void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long page_cgroup; #endif }; @@ -222,7 +222,7 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; struct kioctx *ioctx_list; -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR struct mem_cgroup *mem_cgroup; #endif }; diff --git a/init/Kconfig b/init/Kconfig index f698a5af5007..442850b984be 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -366,6 +366,21 @@ config RESOURCE_COUNTERS infrastructure that works with cgroups depends on CGROUPS +config CGROUP_MEM_RES_CTLR + bool "Memory Resource Controller for Control Groups" + depends on CGROUPS && RESOURCE_COUNTERS + help + Provides a memory resource controller that manages both page cache and + RSS memory. + + Note that setting this option increases fixed memory overhead + associated with each page of memory in the system by 4/8 bytes + and also increases cache misses because struct page on many 64bit + systems will not fit into a single cache line anymore. + + Only enable when you're ok with these trade offs and really + sure you need the memory resource controller. + config SYSFS_DEPRECATED bool "Create deprecated sysfs files" depends on SYSFS @@ -387,21 +402,6 @@ config SYSFS_DEPRECATED If you are using a distro that was released in 2006 or later, it should be safe to say N here. -config CGROUP_MEM_CONT - bool "Memory controller for cgroups" - depends on CGROUPS && RESOURCE_COUNTERS - help - Provides a memory controller that manages both page cache and - RSS memory. - - Note that setting this option increases fixed memory overhead - associated with each page of memory in the system by 4/8 bytes - and also increases cache misses because struct page on many 64bit - systems will not fit into a single cache line anymore. - - Only enable when you're ok with these trade offs and really - sure you need the memory controller. - config PROC_PID_CPUSET bool "Include legacy /proc//cpuset file" depends on CPUSETS diff --git a/mm/Makefile b/mm/Makefile index 9f117bab5322..a5b0dd93427a 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -32,5 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o -obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o +obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 4194b9db0104..44b2da11bf43 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -412,7 +412,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, return oom_kill_task(p); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) { unsigned long points = 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index a26dabd62fed..106ba10e1ac6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -126,7 +126,7 @@ long vm_total_pages; /* The total number of pages which the VM controls */ static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR #define scan_global_lru(sc) (!(sc)->mem_cgroup) #else #define scan_global_lru(sc) (1) @@ -1427,7 +1427,7 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) return do_try_to_free_pages(zones, gfp_mask, &sc); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask) -- cgit v1.2.3 From 735c4fb916e9f83a9350aeb2680d77d01ea75094 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 4 Mar 2008 14:28:40 -0800 Subject: add noinline_for_stack People are adding `noinline' in various places to prevent excess stack consumption due to gcc inlining. But once this is done, it is quite unobvious why the `noinline' is present in the code. We can comment each and every site, or we can use noinline_for_stack. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d0e17e1657dc..dcae0c8d97e6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -138,6 +138,12 @@ extern void __chk_io_ptr(const volatile void __iomem *); #define noinline #endif +/* + * Rather then using noinline to prevent stack consumption, use + * noinline_for_stack instead. For documentaiton reasons. + */ +#define noinline_for_stack noinline + #ifndef __always_inline #define __always_inline inline #endif -- cgit v1.2.3 From 5cba6d22e35a05adb28fdea191b232501518c455 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 4 Mar 2008 14:28:45 -0800 Subject: ndelay(): switch to C function to avoid 64-bit division We should be able to do ndelay(some_u64), but that can cause a call to __divdi3() to be emitted because the ndelay() macros does a divide. Fix it by switching to static inline which will force the u64 arg to be treated as an unsigned long. udelay() takes an unsigned long arg. [bunk@kernel.org: reported m68k build breakage] Cc: Adrian Bunk Cc: Evgeniy Polyakov Cc: Martin Michlmayr Cc: Herbert Xu Cc: Ralf Baechle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delay.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index 17ddb55430ae..54552d21296e 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -7,6 +7,8 @@ * Delay routines, using a pre-computed "loops_per_jiffy" value. */ +#include + extern unsigned long loops_per_jiffy; #include @@ -32,7 +34,11 @@ extern unsigned long loops_per_jiffy; #endif #ifndef ndelay -#define ndelay(x) udelay(((x)+999)/1000) +static inline void ndelay(unsigned long x) +{ + udelay(DIV_ROUND_UP(x, 1000)); +} +#define ndelay(x) ndelay(x) #endif void calibrate_delay(void); -- cgit v1.2.3 From 3149be50d3a31df095bcc83d752293da65a37f62 Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Tue, 4 Mar 2008 14:28:50 -0800 Subject: sm501: add support for the SM502 programmable PLL SM502 has a programmable PLL which can provide the panel pixel clock instead of the 288MHz and 336MHz PLLs. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Ville Syrjala Cc: Ben Dooks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mfd/sm501.c | 163 +++++++++++++++++++++++++++++++++++---------- include/linux/sm501-regs.h | 3 + include/linux/sm501.h | 3 +- 3 files changed, 133 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index 4de8d467762a..13bac53db69a 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -48,6 +48,7 @@ struct sm501_devdata { unsigned int pdev_id; unsigned int irq; void __iomem *regs; + unsigned int rev; }; #define MHZ (1000 * 1000) @@ -417,46 +418,108 @@ struct sm501_clock { unsigned long mclk; int divider; int shift; + unsigned int m, n, k; }; +/* sm501_calc_clock + * + * Calculates the nearest discrete clock frequency that + * can be achieved with the specified input clock. + * the maximum divisor is 3 or 5 + */ + +static int sm501_calc_clock(unsigned long freq, + struct sm501_clock *clock, + int max_div, + unsigned long mclk, + long *best_diff) +{ + int ret = 0; + int divider; + int shift; + long diff; + + /* try dividers 1 and 3 for CRT and for panel, + try divider 5 for panel only.*/ + + for (divider = 1; divider <= max_div; divider += 2) { + /* try all 8 shift values.*/ + for (shift = 0; shift < 8; shift++) { + /* Calculate difference to requested clock */ + diff = sm501fb_round_div(mclk, divider << shift) - freq; + if (diff < 0) + diff = -diff; + + /* If it is less than the current, use it */ + if (diff < *best_diff) { + *best_diff = diff; + + clock->mclk = mclk; + clock->divider = divider; + clock->shift = shift; + ret = 1; + } + } + } + + return ret; +} + +/* sm501_calc_pll + * + * Calculates the nearest discrete clock frequency that can be + * achieved using the programmable PLL. + * the maximum divisor is 3 or 5 + */ + +static unsigned long sm501_calc_pll(unsigned long freq, + struct sm501_clock *clock, + int max_div) +{ + unsigned long mclk; + unsigned int m, n, k; + long best_diff = 999999999; + + /* + * The SM502 datasheet doesn't specify the min/max values for M and N. + * N = 1 at least doesn't work in practice. + */ + for (m = 2; m <= 255; m++) { + for (n = 2; n <= 127; n++) { + for (k = 0; k <= 1; k++) { + mclk = (24000000UL * m / n) >> k; + + if (sm501_calc_clock(freq, clock, max_div, + mclk, &best_diff)) { + clock->m = m; + clock->n = n; + clock->k = k; + } + } + } + } + + /* Return best clock. */ + return clock->mclk / (clock->divider << clock->shift); +} + /* sm501_select_clock * - * selects nearest discrete clock frequency the SM501 can achive + * Calculates the nearest discrete clock frequency that can be + * achieved using the 288MHz and 336MHz PLLs. * the maximum divisor is 3 or 5 */ + static unsigned long sm501_select_clock(unsigned long freq, struct sm501_clock *clock, int max_div) { unsigned long mclk; - int divider; - int shift; - long diff; long best_diff = 999999999; /* Try 288MHz and 336MHz clocks. */ for (mclk = 288000000; mclk <= 336000000; mclk += 48000000) { - /* try dividers 1 and 3 for CRT and for panel, - try divider 5 for panel only.*/ - - for (divider = 1; divider <= max_div; divider += 2) { - /* try all 8 shift values.*/ - for (shift = 0; shift < 8; shift++) { - /* Calculate difference to requested clock */ - diff = sm501fb_round_div(mclk, divider << shift) - freq; - if (diff < 0) - diff = -diff; - - /* If it is less than the current, use it */ - if (diff < best_diff) { - best_diff = diff; - - clock->mclk = mclk; - clock->divider = divider; - clock->shift = shift; - } - } - } + sm501_calc_clock(freq, clock, max_div, mclk, &best_diff); } /* Return best clock. */ @@ -478,6 +541,7 @@ unsigned long sm501_set_clock(struct device *dev, unsigned long gate = readl(sm->regs + SM501_CURRENT_GATE); unsigned long clock = readl(sm->regs + SM501_CURRENT_CLOCK); unsigned char reg; + unsigned int pll_reg = 0; unsigned long sm501_freq; /* the actual frequency acheived */ struct sm501_clock to; @@ -492,14 +556,28 @@ unsigned long sm501_set_clock(struct device *dev, * requested frequency the value must be multiplied by * 2. This clock also has an additional pre divisor */ - sm501_freq = (sm501_select_clock(2 * req_freq, &to, 5) / 2); - reg=to.shift & 0x07;/* bottom 3 bits are shift */ - if (to.divider == 3) - reg |= 0x08; /* /3 divider required */ - else if (to.divider == 5) - reg |= 0x10; /* /5 divider required */ - if (to.mclk != 288000000) - reg |= 0x20; /* which mclk pll is source */ + if (sm->rev >= 0xC0) { + /* SM502 -> use the programmable PLL */ + sm501_freq = (sm501_calc_pll(2 * req_freq, + &to, 5) / 2); + reg = to.shift & 0x07;/* bottom 3 bits are shift */ + if (to.divider == 3) + reg |= 0x08; /* /3 divider required */ + else if (to.divider == 5) + reg |= 0x10; /* /5 divider required */ + reg |= 0x40; /* select the programmable PLL */ + pll_reg = 0x20000 | (to.k << 15) | (to.n << 8) | to.m; + } else { + sm501_freq = (sm501_select_clock(2 * req_freq, + &to, 5) / 2); + reg = to.shift & 0x07;/* bottom 3 bits are shift */ + if (to.divider == 3) + reg |= 0x08; /* /3 divider required */ + else if (to.divider == 5) + reg |= 0x10; /* /5 divider required */ + if (to.mclk != 288000000) + reg |= 0x20; /* which mclk pll is source */ + } break; case SM501_CLOCK_V2XCLK: @@ -560,6 +638,10 @@ unsigned long sm501_set_clock(struct device *dev, } writel(mode, sm->regs + SM501_POWER_MODE_CONTROL); + + if (pll_reg) + writel(pll_reg, sm->regs + SM501_PROGRAMMABLE_PLL_CONTROL); + sm501_sync_regs(sm); dev_info(sm->dev, "gate %08lx, clock %08lx, mode %08lx\n", @@ -580,15 +662,24 @@ EXPORT_SYMBOL_GPL(sm501_set_clock); * finds the closest available frequency for a given clock */ -unsigned long sm501_find_clock(int clksrc, +unsigned long sm501_find_clock(struct device *dev, + int clksrc, unsigned long req_freq) { + struct sm501_devdata *sm = dev_get_drvdata(dev); unsigned long sm501_freq; /* the frequency achiveable by the 501 */ struct sm501_clock to; switch (clksrc) { case SM501_CLOCK_P2XCLK: - sm501_freq = (sm501_select_clock(2 * req_freq, &to, 5) / 2); + if (sm->rev >= 0xC0) { + /* SM502 -> use the programmable PLL */ + sm501_freq = (sm501_calc_pll(2 * req_freq, + &to, 5) / 2); + } else { + sm501_freq = (sm501_select_clock(2 * req_freq, + &to, 5) / 2); + } break; case SM501_CLOCK_V2XCLK: @@ -895,6 +986,8 @@ static int sm501_init_dev(struct sm501_devdata *sm) dev_info(sm->dev, "SM501 At %p: Version %08lx, %ld Mb, IRQ %d\n", sm->regs, devid, (unsigned long)mem_avail >> 20, sm->irq); + sm->rev = devid & SM501_DEVICEID_REVMASK; + sm501_dump_gate(sm); ret = device_create_file(sm->dev, &dev_attr_dbg_regs); diff --git a/include/linux/sm501-regs.h b/include/linux/sm501-regs.h index 64236b73c724..d53642d2d899 100644 --- a/include/linux/sm501-regs.h +++ b/include/linux/sm501-regs.h @@ -129,11 +129,14 @@ #define SM501_DEVICEID_SM501 (0x05010000) #define SM501_DEVICEID_IDMASK (0xffff0000) +#define SM501_DEVICEID_REVMASK (0x000000ff) #define SM501_PLLCLOCK_COUNT (0x000064) #define SM501_MISC_TIMING (0x000068) #define SM501_CURRENT_SDRAM_CLOCK (0x00006C) +#define SM501_PROGRAMMABLE_PLL_CONTROL (0x000074) + /* GPIO base */ #define SM501_GPIO (0x010000) #define SM501_GPIO_DATA_LOW (0x00) diff --git a/include/linux/sm501.h b/include/linux/sm501.h index 932a9efee8a5..bca134544700 100644 --- a/include/linux/sm501.h +++ b/include/linux/sm501.h @@ -24,7 +24,8 @@ extern int sm501_unit_power(struct device *dev, extern unsigned long sm501_set_clock(struct device *dev, int clksrc, unsigned long freq); -extern unsigned long sm501_find_clock(int clksrc, unsigned long req_freq); +extern unsigned long sm501_find_clock(struct device *dev, + int clksrc, unsigned long req_freq); /* sm501_misc_control * -- cgit v1.2.3 From 040922c04cf2c8ac70be2e88a8a9614ecdb41d2e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 4 Mar 2008 14:28:53 -0800 Subject: include falloc.h in header-y Include falloc.h in header-y; it defines a flag for the fallocate sysctl. Signed-off-by: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index aada32fffec2..994df3780007 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -61,6 +61,7 @@ header-y += efs_fs_sb.h header-y += elf-fdpic.h header-y += elf-em.h header-y += fadvise.h +header-y += falloc.h header-y += fd.h header-y += fdreg.h header-y += fib_rules.h -- cgit v1.2.3 From acc4988bcf38f9618886eaeb9802aeacc6978ec2 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 4 Mar 2008 14:29:00 -0800 Subject: markers: add an if(0) to __mark_check_format() Wrap __mark_check_format() into an if(0) to make sure that parameters such as trace_mark(mm_page_alloc, "order %u pfn %lu", order, page?page_to_pfn(page):0); (where page_to_pfn() has side-effects) won't generate code because of the __mark_check_format(). Thanks to Jan Kiszka for reporting this. Signed-off-by: Mathieu Desnoyers Cc: Jan Kiszka Cc: "Frank Ch. Eigler" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/marker.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 5df879dc3776..430f6adf9762 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -104,10 +104,16 @@ static inline void marker_update_probe_range(struct marker *begin, #define MARK_NOARGS " " /* To be used for string format validity checking with gcc */ -static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...) +static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) { } +#define __mark_check_format(format, args...) \ + do { \ + if (0) \ + ___mark_check_format(format, ## args); \ + } while (0) + extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, -- cgit v1.2.3 From bd845e38c7a7251a95a8f2c38aa7fb87140b771d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 4 Mar 2008 14:29:01 -0800 Subject: memcg: mm_match_cgroup not vm_match_cgroup vm_match_cgroup is a perverse name for a macro to match mm with cgroup: rename it mm_match_cgroup, matching mm_init_cgroup and mm_free_cgroup. Signed-off-by: Hugh Dickins Acked-by: David Rientjes Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takahashi Cc: YAMAMOTO Takashi Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- mm/memcontrol.c | 2 +- mm/rmap.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a8be8073b9e6..e4247c83c1c7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -48,7 +48,7 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); -#define vm_match_cgroup(mm, cgroup) \ +#define mm_match_cgroup(mm, cgroup) \ ((cgroup) == rcu_dereference((mm)->mem_cgroup)) extern int mem_cgroup_prepare_migration(struct page *page); @@ -118,7 +118,7 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } -static inline int vm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) +static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 631002d085d1..41041c0a6898 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -399,7 +399,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) int ret; task_lock(task); - ret = task->mm && vm_match_cgroup(task->mm, mem); + ret = task->mm && mm_match_cgroup(task->mm, mem); task_unlock(task); return ret; } diff --git a/mm/rmap.c b/mm/rmap.c index 8fd527c4e2bf..0c9a2df06c39 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -321,7 +321,7 @@ static int page_referenced_anon(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont)) + if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) continue; referenced += page_referenced_one(page, vma, &mapcount); if (!mapcount) @@ -382,7 +382,7 @@ static int page_referenced_file(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont)) + if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) continue; if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) == (VM_LOCKED|VM_MAYSHARE)) { -- cgit v1.2.3 From 427d5416f317681498337ab19218d195edea02d6 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 4 Mar 2008 14:29:03 -0800 Subject: memcg: move_lists on page not page_cgroup Each caller of mem_cgroup_move_lists is having to use page_get_page_cgroup: it's more convenient if it acts upon the page itself not the page_cgroup; and in a later patch this becomes important to handle within memcontrol.c. Signed-off-by: Hugh Dickins Cc: David Rientjes Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takahashi Cc: YAMAMOTO Takashi Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++--- mm/memcontrol.c | 4 +++- mm/swap.c | 2 +- mm/vmscan.c | 5 +++-- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4247c83c1c7..56432ff8d4e3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -36,7 +36,7 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_uncharge(struct page_cgroup *pc); extern void mem_cgroup_uncharge_page(struct page *page); -extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active); +extern void mem_cgroup_move_lists(struct page *page, bool active); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, @@ -106,8 +106,7 @@ static inline void mem_cgroup_uncharge_page(struct page *page) { } -static inline void mem_cgroup_move_lists(struct page_cgroup *pc, - bool active) +static inline void mem_cgroup_move_lists(struct page *page, bool active) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 41041c0a6898..afdd406f618a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -407,11 +407,13 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) /* * This routine assumes that the appropriate zone's lru lock is already held */ -void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) +void mem_cgroup_move_lists(struct page *page, bool active) { + struct page_cgroup *pc; struct mem_cgroup_per_zone *mz; unsigned long flags; + pc = page_get_page_cgroup(page); if (!pc) return; diff --git a/mm/swap.c b/mm/swap.c index 710a20bb9749..d4ec59aa5c46 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -176,7 +176,7 @@ void activate_page(struct page *page) SetPageActive(page); add_page_to_active_list(zone, page); __count_vm_event(PGACTIVATE); - mem_cgroup_move_lists(page_get_page_cgroup(page), true); + mem_cgroup_move_lists(page, true); } spin_unlock_irq(&zone->lru_lock); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 106ba10e1ac6..45711585684e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1128,7 +1128,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, ClearPageActive(page); list_move(&page->lru, &zone->inactive_list); - mem_cgroup_move_lists(page_get_page_cgroup(page), false); + mem_cgroup_move_lists(page, false); pgmoved++; if (!pagevec_add(&pvec, page)) { __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); @@ -1156,8 +1156,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, VM_BUG_ON(PageLRU(page)); SetPageLRU(page); VM_BUG_ON(!PageActive(page)); + list_move(&page->lru, &zone->active_list); - mem_cgroup_move_lists(page_get_page_cgroup(page), true); + mem_cgroup_move_lists(page, true); pgmoved++; if (!pagevec_add(&pvec, page)) { __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); -- cgit v1.2.3 From 9442ec9df40d952b0de185ae5638a74970388e01 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 4 Mar 2008 14:29:07 -0800 Subject: memcg: bad page if page_cgroup when free Replace free_hot_cold_page's VM_BUG_ON(page_get_page_cgroup(page)) by a "Bad page state" and clear: most users don't have CONFIG_DEBUG_VM on, and if it were set here, it'd likely cause corruption when the page is reused. Don't use page_assign_page_cgroup to clear it: that should be private to memcontrol.c, and always called with the lock taken; and memmap_init_zone doesn't need it either - like page->mapping and other pointers throughout the kernel, Linux assumes pointers in zeroed structures are NULL pointers. Instead use page_reset_bad_cgroup, added to memcontrol.h for this only. Signed-off-by: Hugh Dickins Cc: David Rientjes Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takahashi Cc: YAMAMOTO Takashi Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ++++---- mm/memcontrol.c | 27 ++++++++++++--------------- mm/page_alloc.c | 18 ++++++++++++------ 3 files changed, 28 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 56432ff8d4e3..70789df7dab4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -29,8 +29,9 @@ struct mm_struct; extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); extern void mm_free_cgroup(struct mm_struct *mm); -extern void page_assign_page_cgroup(struct page *page, - struct page_cgroup *pc); + +#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) + extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); @@ -82,8 +83,7 @@ static inline void mm_free_cgroup(struct mm_struct *mm) { } -static inline void page_assign_page_cgroup(struct page *page, - struct page_cgroup *pc) +static inline void page_reset_bad_cgroup(struct page *page) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index afdd406f618a..9e170d3c71e5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -140,11 +140,17 @@ struct mem_cgroup { /* * We use the lower bit of the page->page_cgroup pointer as a bit spin - * lock. We need to ensure that page->page_cgroup is atleast two - * byte aligned (based on comments from Nick Piggin) + * lock. We need to ensure that page->page_cgroup is at least two + * byte aligned (based on comments from Nick Piggin). But since + * bit_spin_lock doesn't actually set that lock bit in a non-debug + * uniprocessor kernel, we should avoid setting it here too. */ #define PAGE_CGROUP_LOCK_BIT 0x0 -#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) +#else +#define PAGE_CGROUP_LOCK 0x0 +#endif /* * A page_cgroup page is associated with every page descriptor. The @@ -271,19 +277,10 @@ static inline int page_cgroup_locked(struct page *page) &page->page_cgroup); } -void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) +static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) { - int locked; - - /* - * While resetting the page_cgroup we might not hold the - * page_cgroup lock. free_hot_cold_page() is an example - * of such a scenario - */ - if (pc) - VM_BUG_ON(!page_cgroup_locked(page)); - locked = (page->page_cgroup & PAGE_CGROUP_LOCK); - page->page_cgroup = ((unsigned long)pc | locked); + VM_BUG_ON(!page_cgroup_locked(page)); + page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); } struct page_cgroup *page_get_page_cgroup(struct page *page) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e76cf94725c9..402a504f1228 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -222,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page) static void bad_page(struct page *page) { - printk(KERN_EMERG "Bad page state in process '%s'\n" - KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" - KERN_EMERG "Trying to fix it up, but a reboot is needed\n" - KERN_EMERG "Backtrace:\n", + void *pc = page_get_page_cgroup(page); + + printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG + "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", current->comm, page, (int)(2*sizeof(unsigned long)), (unsigned long)page->flags, page->mapping, page_mapcount(page), page_count(page)); + if (pc) { + printk(KERN_EMERG "cgroup:%p\n", pc); + page_reset_bad_cgroup(page); + } + printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" + KERN_EMERG "Backtrace:\n"); dump_stack(); page->flags &= ~(1 << PG_lru | 1 << PG_private | @@ -454,6 +460,7 @@ static inline int free_pages_check(struct page *page) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | + (page_get_page_cgroup(page) != NULL) | (page_count(page) != 0) | (page->flags & ( 1 << PG_lru | @@ -603,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | + (page_get_page_cgroup(page) != NULL) | (page_count(page) != 0) | (page->flags & ( 1 << PG_lru | @@ -989,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold) if (!PageHighMem(page)) debug_check_no_locks_freed(page_address(page), PAGE_SIZE); - VM_BUG_ON(page_get_page_cgroup(page)); arch_free_page(page, 0); kernel_map_pages(page, 1, 0); @@ -2528,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, set_page_links(page, zone, nid, pfn); init_page_count(page); reset_page_mapcount(page); - page_assign_page_cgroup(page, NULL); SetPageReserved(page); /* -- cgit v1.2.3 From 8289546e573d5ff681cdf0fc7a1184cca66fdb55 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 4 Mar 2008 14:29:08 -0800 Subject: memcg: remove mem_cgroup_uncharge Nothing uses mem_cgroup_uncharge apart from mem_cgroup_uncharge_page, (a trivial wrapper around it) and mem_cgroup_end_migration (which does the same as mem_cgroup_uncharge_page). And it often ends up having to lock just to let its caller unlock. Remove it (but leave the silly locking until a later patch). Moved mem_cgroup_cache_charge next to mem_cgroup_charge in memcontrol.h. Signed-off-by: Hugh Dickins Cc: David Rientjes Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takahashi Cc: YAMAMOTO Takashi Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 20 +++++++------------- mm/memcontrol.c | 23 ++++++++--------------- 2 files changed, 15 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 70789df7dab4..8b1c4295848b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,7 +35,8 @@ extern void mm_free_cgroup(struct mm_struct *mm); extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern void mem_cgroup_uncharge(struct page_cgroup *pc); +extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_move_lists(struct page *page, bool active); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, @@ -45,8 +46,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct mem_cgroup *mem_cont, int active); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); -extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); #define mm_match_cgroup(mm, cgroup) \ @@ -92,14 +91,16 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page) return NULL; } -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +static inline int mem_cgroup_charge(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { return 0; } -static inline void mem_cgroup_uncharge(struct page_cgroup *pc) +static inline int mem_cgroup_cache_charge(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { + return 0; } static inline void mem_cgroup_uncharge_page(struct page *page) @@ -110,13 +111,6 @@ static inline void mem_cgroup_move_lists(struct page *page, bool active) { } -static inline int mem_cgroup_cache_charge(struct page *page, - struct mm_struct *mm, - gfp_t gfp_mask) -{ - return 0; -} - static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 83ba13ad31e1..1333d25163bb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -697,20 +697,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, /* * Uncharging is always a welcome operation, we never complain, simply - * uncharge. This routine should be called with lock_page_cgroup held + * uncharge. */ -void mem_cgroup_uncharge(struct page_cgroup *pc) +void mem_cgroup_uncharge_page(struct page *page) { + struct page_cgroup *pc; struct mem_cgroup *mem; struct mem_cgroup_per_zone *mz; - struct page *page; unsigned long flags; /* * Check if our page_cgroup is valid */ + lock_page_cgroup(page); + pc = page_get_page_cgroup(page); if (!pc) - return; + goto unlock; if (atomic_dec_and_test(&pc->ref_cnt)) { page = pc->page; @@ -731,12 +733,8 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) } lock_page_cgroup(page); } -} -void mem_cgroup_uncharge_page(struct page *page) -{ - lock_page_cgroup(page); - mem_cgroup_uncharge(page_get_page_cgroup(page)); +unlock: unlock_page_cgroup(page); } @@ -759,12 +757,7 @@ int mem_cgroup_prepare_migration(struct page *page) void mem_cgroup_end_migration(struct page *page) { - struct page_cgroup *pc; - - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - mem_cgroup_uncharge(pc); - unlock_page_cgroup(page); + mem_cgroup_uncharge_page(page); } /* * We know both *page* and *newpage* are now not-on-LRU and Pg_locked. -- cgit v1.2.3 From 3715863aa142c4f4c5208f5f3e5e9bac06006d2f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Mar 2008 14:29:27 -0800 Subject: iommu: export iommu_is_span_boundary helper function iommu_is_span_boundary is used internally in the IOMMU helper (lib/iommu-helper.c), a primitive function that judges whether a memory area spans LLD's segment boundary or not. It's difficult to convert some IOMMUs to use the IOMMU helper but iommu_is_span_boundary is still useful for them. So this patch exports it. This is needed for the parisc iommu fixes. Signed-off-by: FUJITA Tomonori Cc: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/iommu-helper.h | 3 +++ lib/iommu-helper.c | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 4dd4c04ff2f4..c975caf75385 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -1,3 +1,6 @@ +extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size); extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long shift, diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index 495575a59ca6..a3b8d4c3f77a 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -40,10 +40,12 @@ static inline void set_bit_area(unsigned long *map, unsigned long i, } } -static inline int is_span_boundary(unsigned int index, unsigned int nr, - unsigned long shift, - unsigned long boundary_size) +int iommu_is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size) { + BUG_ON(!is_power_of_2(boundary_size)); + shift = (shift + index) & (boundary_size - 1); return shift + nr > boundary_size; } @@ -57,7 +59,7 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, again: index = find_next_zero_area(map, size, start, nr, align_mask); if (index != -1) { - if (is_span_boundary(index, nr, shift, boundary_size)) { + if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { /* we could do more effectively */ start = index + 1; goto again; -- cgit v1.2.3 From 8311c29d40235062a843f4a8e8a70a44af6fe4c9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 4 Mar 2008 14:29:30 -0800 Subject: md: reduce CPU wastage on idle md array with a write-intent bitmap On an md array with a write-intent bitmap, a thread wakes up every few seconds and scans the bitmap looking for work to do. If the array is idle, there will be no work to do, but a lot of scanning is done to discover this. So cache the fact that the bitmap is completely clean, and avoid scanning the whole bitmap when the cache is known to be clean. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/bitmap.c | 19 +++++++++++++++++-- include/linux/raid/bitmap.h | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7aeceedcf7d4..831aed9c56ff 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1047,6 +1047,11 @@ void bitmap_daemon_work(struct bitmap *bitmap) if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) return; bitmap->daemon_lastrun = jiffies; + if (bitmap->allclean) { + bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + return; + } + bitmap->allclean = 1; for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; @@ -1068,8 +1073,10 @@ void bitmap_daemon_work(struct bitmap *bitmap) clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - if (need_write) + if (need_write) { write_page(bitmap, page, 0); + bitmap->allclean = 0; + } continue; } @@ -1098,6 +1105,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) /* if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); */ + if (*bmc) + bitmap->allclean = 0; + if (*bmc == 2) { *bmc=1; /* maybe clear the bit next time */ set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); @@ -1132,6 +1142,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) } } + if (bitmap->allclean == 0) + bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1226,6 +1238,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect sectors -= blocks; else sectors = 0; } + bitmap->allclean = 0; return 0; } @@ -1296,6 +1309,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, } } spin_unlock_irq(&bitmap->lock); + bitmap->allclean = 0; return rv; } @@ -1332,6 +1346,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab } unlock: spin_unlock_irqrestore(&bitmap->lock, flags); + bitmap->allclean = 0; } void bitmap_close_sync(struct bitmap *bitmap) @@ -1399,7 +1414,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } spin_unlock_irq(&bitmap->lock); - + bitmap->allclean = 0; } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index e51b531cd0b2..47fbcba11850 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -235,6 +235,8 @@ struct bitmap { unsigned long flags; + int allclean; + unsigned long max_write_behind; /* write-behind mode */ atomic_t behind_writes; -- cgit v1.2.3 From d0fae18f1b53a1d39135a968792be034bdf7ff26 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 4 Mar 2008 14:29:31 -0800 Subject: md: clean up irregularity with raid autodetect When a raid1 array is stopped, all components currently get added to the list for auto-detection. However we should really only add components that were found by autodetection in the first place. So add a flag to record that information, and use it. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 4 +++- include/linux/raid/md_k.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/md/md.c b/drivers/md/md.c index b375de5c1af2..a71241c5ae72 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1503,7 +1503,8 @@ static void export_rdev(mdk_rdev_t * rdev) free_disk_sb(rdev); list_del_init(&rdev->same_set); #ifndef MODULE - md_autodetect_dev(rdev->bdev->bd_dev); + if (test_bit(AutoDetected, &rdev->flags)) + md_autodetect_dev(rdev->bdev->bd_dev); #endif unlock_rdev(rdev); kobject_put(&rdev->kobj); @@ -6025,6 +6026,7 @@ static void autostart_arrays(int part) MD_BUG(); continue; } + set_bit(AutoDetected, &rdev->flags); list_add(&rdev->same_set, &pending_raid_disks); i_passed++; } diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 85a068bab625..7bb6d1abf71e 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -83,6 +83,7 @@ struct mdk_rdev_s #define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ #define AllReserved 6 /* If whole device is reserved for * one array */ +#define AutoDetected 7 /* added by auto-detect */ int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ -- cgit v1.2.3 From e0007529893c1c064be90bd21422ca0da4a0198e Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 5 Mar 2008 10:31:54 -0500 Subject: LSM/SELinux: Interfaces to allow FS to control mount options Introduce new LSM interfaces to allow an FS to deal with their own mount options. This includes a new string parsing function exported from the LSM that an FS can use to get a security data blob and a new security data blob. This is particularly useful for an FS which uses binary mount data, like NFS, which does not pass strings into the vfs to be handled by the loaded LSM. Also fix a BUG() in both SELinux and SMACK when dealing with binary mount data. If the binary mount data is less than one page the copy_page() in security_sb_copy_data() can cause an illegal page fault and boom. Remove all NFSisms from the SELinux code since they were broken by past NFS changes. Signed-off-by: Eric Paris Acked-by: Stephen Smalley Acked-by: Casey Schaufler Signed-off-by: James Morris --- fs/super.c | 4 +- include/linux/security.h | 99 ++++++++++++++------ security/dummy.c | 23 ++--- security/security.c | 23 +++-- security/selinux/hooks.c | 175 +++++++++++++++++++----------------- security/selinux/include/security.h | 5 ++ security/smack/smack_lsm.c | 9 +- 7 files changed, 204 insertions(+), 134 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 88811f60c8de..010446d8c40a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -870,12 +870,12 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (!mnt) goto out; - if (data) { + if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { secdata = alloc_secdata(); if (!secdata) goto out_mnt; - error = security_sb_copy_data(type, data, secdata); + error = security_sb_copy_data(data, secdata); if (error) goto out_free_secdata; } diff --git a/include/linux/security.h b/include/linux/security.h index fe52cdeab0a6..b07357ca2137 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -34,12 +34,6 @@ #include #include -/* only a char in selinux superblock security struct flags */ -#define FSCONTEXT_MNT 0x01 -#define CONTEXT_MNT 0x02 -#define ROOTCONTEXT_MNT 0x04 -#define DEFCONTEXT_MNT 0x08 - extern unsigned securebits; struct ctl_table; @@ -114,6 +108,32 @@ struct request_sock; #ifdef CONFIG_SECURITY +struct security_mnt_opts { + char **mnt_opts; + int *mnt_opts_flags; + int num_mnt_opts; +}; + +static inline void security_init_mnt_opts(struct security_mnt_opts *opts) +{ + opts->mnt_opts = NULL; + opts->mnt_opts_flags = NULL; + opts->num_mnt_opts = 0; +} + +static inline void security_free_mnt_opts(struct security_mnt_opts *opts) +{ + int i; + if (opts->mnt_opts) + for(i = 0; i < opts->num_mnt_opts; i++) + kfree(opts->mnt_opts[i]); + kfree(opts->mnt_opts); + opts->mnt_opts = NULL; + kfree(opts->mnt_opts_flags); + opts->mnt_opts_flags = NULL; + opts->num_mnt_opts = 0; +} + /** * struct security_operations - main security structure * @@ -262,19 +282,19 @@ struct request_sock; * @sb_get_mnt_opts: * Get the security relevant mount options used for a superblock * @sb the superblock to get security mount options from - * @mount_options array for pointers to mount options - * @mount_flags array of ints specifying what each mount options is - * @num_opts number of options in the arrays + * @opts binary data structure containing all lsm mount data * @sb_set_mnt_opts: * Set the security relevant mount options used for a superblock * @sb the superblock to set security mount options for - * @mount_options array for pointers to mount options - * @mount_flags array of ints specifying what each mount options is - * @num_opts number of options in the arrays + * @opts binary data structure containing all lsm mount data * @sb_clone_mnt_opts: * Copy all security options from a given superblock to another * @oldsb old superblock which contain information to clone * @newsb new superblock which needs filled in + * @sb_parse_opts_str: + * Parse a string of security data filling in the opts structure + * @options string containing all mount options known by the LSM + * @opts binary data structure usable by the LSM * * Security hooks for inode operations. * @@ -1238,8 +1258,7 @@ struct security_operations { int (*sb_alloc_security) (struct super_block * sb); void (*sb_free_security) (struct super_block * sb); - int (*sb_copy_data)(struct file_system_type *type, - void *orig, void *copy); + int (*sb_copy_data)(char *orig, char *copy); int (*sb_kern_mount) (struct super_block *sb, void *data); int (*sb_statfs) (struct dentry *dentry); int (*sb_mount) (char *dev_name, struct nameidata * nd, @@ -1257,12 +1276,12 @@ struct security_operations { void (*sb_post_pivotroot) (struct nameidata * old_nd, struct nameidata * new_nd); int (*sb_get_mnt_opts) (const struct super_block *sb, - char ***mount_options, int **flags, - int *num_opts); - int (*sb_set_mnt_opts) (struct super_block *sb, char **mount_options, - int *flags, int num_opts); + struct security_mnt_opts *opts); + int (*sb_set_mnt_opts) (struct super_block *sb, + struct security_mnt_opts *opts); void (*sb_clone_mnt_opts) (const struct super_block *oldsb, struct super_block *newsb); + int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); int (*inode_alloc_security) (struct inode *inode); void (*inode_free_security) (struct inode *inode); @@ -1507,7 +1526,7 @@ int security_bprm_check(struct linux_binprm *bprm); int security_bprm_secureexec(struct linux_binprm *bprm); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); -int security_sb_copy_data(struct file_system_type *type, void *orig, void *copy); +int security_sb_copy_data(char *orig, char *copy); int security_sb_kern_mount(struct super_block *sb, void *data); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(char *dev_name, struct nameidata *nd, @@ -1520,12 +1539,12 @@ void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *d void security_sb_post_addmount(struct vfsmount *mnt, struct nameidata *mountpoint_nd); int security_sb_pivotroot(struct nameidata *old_nd, struct nameidata *new_nd); void security_sb_post_pivotroot(struct nameidata *old_nd, struct nameidata *new_nd); -int security_sb_get_mnt_opts(const struct super_block *sb, char ***mount_options, - int **flags, int *num_opts); -int security_sb_set_mnt_opts(struct super_block *sb, char **mount_options, - int *flags, int num_opts); +int security_sb_get_mnt_opts(const struct super_block *sb, + struct security_mnt_opts *opts); +int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts); void security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb); +int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts); int security_inode_alloc(struct inode *inode); void security_inode_free(struct inode *inode); @@ -1635,6 +1654,16 @@ int security_secctx_to_secid(char *secdata, u32 seclen, u32 *secid); void security_release_secctx(char *secdata, u32 seclen); #else /* CONFIG_SECURITY */ +struct security_mnt_opts { +}; + +static inline void security_init_mnt_opts(struct security_mnt_opts *opts) +{ +} + +static inline void security_free_mnt_opts(struct security_mnt_opts *opts) +{ +} /* * This is the default capabilities functionality. Most of these functions @@ -1762,8 +1791,7 @@ static inline int security_sb_alloc (struct super_block *sb) static inline void security_sb_free (struct super_block *sb) { } -static inline int security_sb_copy_data (struct file_system_type *type, - void *orig, void *copy) +static inline int security_sb_copy_data (char *orig, char *copy) { return 0; } @@ -1819,6 +1847,27 @@ static inline int security_sb_pivotroot (struct nameidata *old_nd, static inline void security_sb_post_pivotroot (struct nameidata *old_nd, struct nameidata *new_nd) { } +static inline int security_sb_get_mnt_opts(const struct super_block *sb, + struct security_mnt_opts *opts) +{ + security_init_mnt_opts(opts); + return 0; +} + +static inline int security_sb_set_mnt_opts(struct super_block *sb, + struct security_mnt_opts *opts) +{ + return 0; +} + +static inline void security_sb_clone_mnt_opts(const struct super_block *oldsb, + struct super_block *newsb) +{ } + +static inline int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) +{ + return 0; +} static inline int security_inode_alloc (struct inode *inode) { diff --git a/security/dummy.c b/security/dummy.c index 649326bf64ea..78d8f92310a4 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -181,8 +181,7 @@ static void dummy_sb_free_security (struct super_block *sb) return; } -static int dummy_sb_copy_data (struct file_system_type *type, - void *orig, void *copy) +static int dummy_sb_copy_data (char *orig, char *copy) { return 0; } @@ -245,19 +244,17 @@ static void dummy_sb_post_pivotroot (struct nameidata *old_nd, struct nameidata return; } -static int dummy_sb_get_mnt_opts(const struct super_block *sb, char ***mount_options, - int **flags, int *num_opts) +static int dummy_sb_get_mnt_opts(const struct super_block *sb, + struct security_mnt_opts *opts) { - *mount_options = NULL; - *flags = NULL; - *num_opts = 0; + security_init_mnt_opts(opts); return 0; } -static int dummy_sb_set_mnt_opts(struct super_block *sb, char **mount_options, - int *flags, int num_opts) +static int dummy_sb_set_mnt_opts(struct super_block *sb, + struct security_mnt_opts *opts) { - if (unlikely(num_opts)) + if (unlikely(opts->num_mnt_opts)) return -EOPNOTSUPP; return 0; } @@ -268,6 +265,11 @@ static void dummy_sb_clone_mnt_opts(const struct super_block *oldsb, return; } +static int dummy_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) +{ + return 0; +} + static int dummy_inode_alloc_security (struct inode *inode) { return 0; @@ -1028,6 +1030,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sb_get_mnt_opts); set_to_dummy_if_null(ops, sb_set_mnt_opts); set_to_dummy_if_null(ops, sb_clone_mnt_opts); + set_to_dummy_if_null(ops, sb_parse_opts_str); set_to_dummy_if_null(ops, inode_alloc_security); set_to_dummy_if_null(ops, inode_free_security); set_to_dummy_if_null(ops, inode_init_security); diff --git a/security/security.c b/security/security.c index d15e56cbaade..b1387a6b416d 100644 --- a/security/security.c +++ b/security/security.c @@ -244,10 +244,11 @@ void security_sb_free(struct super_block *sb) security_ops->sb_free_security(sb); } -int security_sb_copy_data(struct file_system_type *type, void *orig, void *copy) +int security_sb_copy_data(char *orig, char *copy) { - return security_ops->sb_copy_data(type, orig, copy); + return security_ops->sb_copy_data(orig, copy); } +EXPORT_SYMBOL(security_sb_copy_data); int security_sb_kern_mount(struct super_block *sb, void *data) { @@ -306,24 +307,30 @@ void security_sb_post_pivotroot(struct nameidata *old_nd, struct nameidata *new_ } int security_sb_get_mnt_opts(const struct super_block *sb, - char ***mount_options, - int **flags, int *num_opts) + struct security_mnt_opts *opts) { - return security_ops->sb_get_mnt_opts(sb, mount_options, flags, num_opts); + return security_ops->sb_get_mnt_opts(sb, opts); } int security_sb_set_mnt_opts(struct super_block *sb, - char **mount_options, - int *flags, int num_opts) + struct security_mnt_opts *opts) { - return security_ops->sb_set_mnt_opts(sb, mount_options, flags, num_opts); + return security_ops->sb_set_mnt_opts(sb, opts); } +EXPORT_SYMBOL(security_sb_set_mnt_opts); void security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb) { security_ops->sb_clone_mnt_opts(oldsb, newsb); } +EXPORT_SYMBOL(security_sb_clone_mnt_opts); + +int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) +{ + return security_ops->sb_parse_opts_str(options, opts); +} +EXPORT_SYMBOL(security_sb_parse_opts_str); int security_inode_alloc(struct inode *inode) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 75c2e99bfb81..4bf4807f2d44 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -443,8 +443,7 @@ out: * mount options, or whatever. */ static int selinux_get_mnt_opts(const struct super_block *sb, - char ***mount_options, int **mnt_opts_flags, - int *num_opts) + struct security_mnt_opts *opts) { int rc = 0, i; struct superblock_security_struct *sbsec = sb->s_security; @@ -452,9 +451,7 @@ static int selinux_get_mnt_opts(const struct super_block *sb, u32 len; char tmp; - *num_opts = 0; - *mount_options = NULL; - *mnt_opts_flags = NULL; + security_init_mnt_opts(opts); if (!sbsec->initialized) return -EINVAL; @@ -470,18 +467,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb, /* count the number of mount options for this sb */ for (i = 0; i < 8; i++) { if (tmp & 0x01) - (*num_opts)++; + opts->num_mnt_opts++; tmp >>= 1; } - *mount_options = kcalloc(*num_opts, sizeof(char *), GFP_ATOMIC); - if (!*mount_options) { + opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC); + if (!opts->mnt_opts) { rc = -ENOMEM; goto out_free; } - *mnt_opts_flags = kcalloc(*num_opts, sizeof(int), GFP_ATOMIC); - if (!*mnt_opts_flags) { + opts->mnt_opts_flags = kcalloc(opts->num_mnt_opts, sizeof(int), GFP_ATOMIC); + if (!opts->mnt_opts_flags) { rc = -ENOMEM; goto out_free; } @@ -491,22 +488,22 @@ static int selinux_get_mnt_opts(const struct super_block *sb, rc = security_sid_to_context(sbsec->sid, &context, &len); if (rc) goto out_free; - (*mount_options)[i] = context; - (*mnt_opts_flags)[i++] = FSCONTEXT_MNT; + opts->mnt_opts[i] = context; + opts->mnt_opts_flags[i++] = FSCONTEXT_MNT; } if (sbsec->flags & CONTEXT_MNT) { rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len); if (rc) goto out_free; - (*mount_options)[i] = context; - (*mnt_opts_flags)[i++] = CONTEXT_MNT; + opts->mnt_opts[i] = context; + opts->mnt_opts_flags[i++] = CONTEXT_MNT; } if (sbsec->flags & DEFCONTEXT_MNT) { rc = security_sid_to_context(sbsec->def_sid, &context, &len); if (rc) goto out_free; - (*mount_options)[i] = context; - (*mnt_opts_flags)[i++] = DEFCONTEXT_MNT; + opts->mnt_opts[i] = context; + opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT; } if (sbsec->flags & ROOTCONTEXT_MNT) { struct inode *root = sbsec->sb->s_root->d_inode; @@ -515,24 +512,16 @@ static int selinux_get_mnt_opts(const struct super_block *sb, rc = security_sid_to_context(isec->sid, &context, &len); if (rc) goto out_free; - (*mount_options)[i] = context; - (*mnt_opts_flags)[i++] = ROOTCONTEXT_MNT; + opts->mnt_opts[i] = context; + opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT; } - BUG_ON(i != *num_opts); + BUG_ON(i != opts->num_mnt_opts); return 0; out_free: - /* don't leak context string if security_sid_to_context had an error */ - if (*mount_options && i) - for (; i > 0; i--) - kfree((*mount_options)[i-1]); - kfree(*mount_options); - *mount_options = NULL; - kfree(*mnt_opts_flags); - *mnt_opts_flags = NULL; - *num_opts = 0; + security_free_mnt_opts(opts); return rc; } @@ -553,12 +542,13 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 1; return 0; } + /* * Allow filesystems with binary mount data to explicitly set mount point * labeling information. */ -static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options, - int *flags, int num_opts) +static int selinux_set_mnt_opts(struct super_block *sb, + struct security_mnt_opts *opts) { int rc = 0, i; struct task_security_struct *tsec = current->security; @@ -568,6 +558,9 @@ static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options, struct inode_security_struct *root_isec = inode->i_security; u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; u32 defcontext_sid = 0; + char **mount_options = opts->mnt_opts; + int *flags = opts->mnt_opts_flags; + int num_opts = opts->num_mnt_opts; mutex_lock(&sbsec->lock); @@ -588,6 +581,21 @@ static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options, goto out; } + /* + * Binary mount data FS will come through this function twice. Once + * from an explicit call and once from the generic calls from the vfs. + * Since the generic VFS calls will not contain any security mount data + * we need to skip the double mount verification. + * + * This does open a hole in which we will not notice if the first + * mount using this sb set explict options and a second mount using + * this sb does not set any security options. (The first options + * will be used for both mounts) + */ + if (sbsec->initialized && (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) + && (num_opts == 0)) + goto out; + /* * parse the mount options, check if they are valid sids. * also check if someone is trying to mount the same sb more @@ -792,43 +800,14 @@ static void selinux_sb_clone_mnt_opts(const struct super_block *oldsb, mutex_unlock(&newsbsec->lock); } -/* - * string mount options parsing and call set the sbsec - */ -static int superblock_doinit(struct super_block *sb, void *data) +int selinux_parse_opts_str(char *options, struct security_mnt_opts *opts) { + char *p; char *context = NULL, *defcontext = NULL; char *fscontext = NULL, *rootcontext = NULL; - int rc = 0; - char *p, *options = data; - /* selinux only know about a fixed number of mount options */ - char *mnt_opts[NUM_SEL_MNT_OPTS]; - int mnt_opts_flags[NUM_SEL_MNT_OPTS], num_mnt_opts = 0; - - if (!data) - goto out; + int rc, num_mnt_opts = 0; - /* with the nfs patch this will become a goto out; */ - if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) { - const char *name = sb->s_type->name; - /* NFS we understand. */ - if (!strcmp(name, "nfs")) { - struct nfs_mount_data *d = data; - - if (d->version != NFS_MOUNT_VERSION) - goto out; - - if (d->context[0]) { - context = kstrdup(d->context, GFP_KERNEL); - if (!context) { - rc = -ENOMEM; - goto out; - } - } - goto build_flags; - } else - goto out; - } + opts->num_mnt_opts = 0; /* Standard string-based options. */ while ((p = strsep(&options, "|")) != NULL) { @@ -901,26 +880,37 @@ static int superblock_doinit(struct super_block *sb, void *data) } } -build_flags: + rc = -ENOMEM; + opts->mnt_opts = kcalloc(NUM_SEL_MNT_OPTS, sizeof(char *), GFP_ATOMIC); + if (!opts->mnt_opts) + goto out_err; + + opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int), GFP_ATOMIC); + if (!opts->mnt_opts_flags) { + kfree(opts->mnt_opts); + goto out_err; + } + if (fscontext) { - mnt_opts[num_mnt_opts] = fscontext; - mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT; + opts->mnt_opts[num_mnt_opts] = fscontext; + opts->mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT; } if (context) { - mnt_opts[num_mnt_opts] = context; - mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT; + opts->mnt_opts[num_mnt_opts] = context; + opts->mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT; } if (rootcontext) { - mnt_opts[num_mnt_opts] = rootcontext; - mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT; + opts->mnt_opts[num_mnt_opts] = rootcontext; + opts->mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT; } if (defcontext) { - mnt_opts[num_mnt_opts] = defcontext; - mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT; + opts->mnt_opts[num_mnt_opts] = defcontext; + opts->mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT; } -out: - rc = selinux_set_mnt_opts(sb, mnt_opts, mnt_opts_flags, num_mnt_opts); + opts->num_mnt_opts = num_mnt_opts; + return 0; + out_err: kfree(context); kfree(defcontext); @@ -928,6 +918,33 @@ out_err: kfree(rootcontext); return rc; } +/* + * string mount options parsing and call set the sbsec + */ +static int superblock_doinit(struct super_block *sb, void *data) +{ + int rc = 0; + char *options = data; + struct security_mnt_opts opts; + + security_init_mnt_opts(&opts); + + if (!data) + goto out; + + BUG_ON(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA); + + rc = selinux_parse_opts_str(options, &opts); + if (rc) + goto out_err; + +out: + rc = selinux_set_mnt_opts(sb, &opts); + +out_err: + security_free_mnt_opts(&opts); + return rc; +} static inline u16 inode_mode_to_security_class(umode_t mode) { @@ -2253,7 +2270,7 @@ static inline void take_selinux_option(char **to, char *from, int *first, } } -static int selinux_sb_copy_data(struct file_system_type *type, void *orig, void *copy) +static int selinux_sb_copy_data(char *orig, char *copy) { int fnosec, fsec, rc = 0; char *in_save, *in_curr, *in_end; @@ -2263,12 +2280,6 @@ static int selinux_sb_copy_data(struct file_system_type *type, void *orig, void in_curr = orig; sec_curr = copy; - /* Binary mount data: just copy */ - if (type->fs_flags & FS_BINARY_MOUNTDATA) { - copy_page(sec_curr, in_curr); - goto out; - } - nosec = (char *)get_zeroed_page(GFP_KERNEL); if (!nosec) { rc = -ENOMEM; @@ -5251,6 +5262,8 @@ static struct security_operations selinux_ops = { .sb_get_mnt_opts = selinux_get_mnt_opts, .sb_set_mnt_opts = selinux_set_mnt_opts, .sb_clone_mnt_opts = selinux_sb_clone_mnt_opts, + .sb_parse_opts_str = selinux_parse_opts_str, + .inode_alloc_security = selinux_inode_alloc_security, .inode_free_security = selinux_inode_free_security, diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 837ce420d2f6..f7d2f03781f2 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -35,6 +35,11 @@ #define POLICYDB_VERSION_MAX POLICYDB_VERSION_POLCAP #endif +#define CONTEXT_MNT 0x01 +#define FSCONTEXT_MNT 0x02 +#define ROOTCONTEXT_MNT 0x04 +#define DEFCONTEXT_MNT 0x08 + struct netlbl_lsm_secattr; extern int selinux_enabled; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 770eb067e165..0241fd359675 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -189,17 +189,10 @@ static void smack_sb_free_security(struct super_block *sb) * Copy the Smack specific mount options out of the mount * options list. */ -static int smack_sb_copy_data(struct file_system_type *type, void *orig, - void *smackopts) +static int smack_sb_copy_data(char *orig, char *smackopts) { char *cp, *commap, *otheropts, *dp; - /* Binary mount data: just copy */ - if (type->fs_flags & FS_BINARY_MOUNTDATA) { - copy_page(smackopts, orig); - return 0; - } - otheropts = (char *)get_zeroed_page(GFP_KERNEL); if (otheropts == NULL) return -ENOMEM; -- cgit v1.2.3 From 1c61fc40fc264059ff41a614ed2d899127288281 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 5 Mar 2008 13:58:17 -0800 Subject: slab - use angle brackets for include of kmalloc_sizes.h Make them all use angle brackets and the directory name. Acked-by: Pekka Enberg Signed-off-by: Joe Perches Signed-off-by: Christoph Lameter --- include/linux/slab_def.h | 4 ++-- mm/slab.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index fcc48096ee64..39c3a5eb8ebe 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -41,7 +41,7 @@ static inline void *kmalloc(size_t size, gfp_t flags) goto found; \ else \ i++; -#include "kmalloc_sizes.h" +#include #undef CACHE { extern void __you_cannot_kmalloc_that_much(void); @@ -75,7 +75,7 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) goto found; \ else \ i++; -#include "kmalloc_sizes.h" +#include #undef CACHE { extern void __you_cannot_kmalloc_that_much(void); diff --git a/mm/slab.c b/mm/slab.c index 5d16c8a30499..f7faff72cf56 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -333,7 +333,7 @@ static __always_inline int index_of(const size_t size) return i; \ else \ i++; -#include "linux/kmalloc_sizes.h" +#include #undef CACHE __bad_size(); } else -- cgit v1.2.3 From 810b38179e9e4d4f57b4b733767bb08f8291a965 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Feb 2008 15:21:01 -0500 Subject: sched: retain vruntime Kei Tokunaga reported an interactivity problem when moving tasks between control groups. Tasks would retain their old vruntime when moved between groups, this can cause funny lags. Re-set the vruntime on group move to fit within the new tree. Reported-by: Kei Tokunaga Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ kernel/sched.c | 5 +++++ kernel/sched_fair.c | 14 ++++++++++++++ 3 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9ae4030067a9..11d8e9a74eff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -899,6 +899,10 @@ struct sched_class { int running); void (*prio_changed) (struct rq *this_rq, struct task_struct *task, int oldprio, int running); + +#ifdef CONFIG_FAIR_GROUP_SCHED + void (*moved_group) (struct task_struct *p); +#endif }; struct load_weight { diff --git a/kernel/sched.c b/kernel/sched.c index dcd553cc4ee8..0b949c4e73ad 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7625,6 +7625,11 @@ void sched_move_task(struct task_struct *tsk) set_task_rq(tsk, task_cpu(tsk)); +#ifdef CONFIG_FAIR_GROUP_SCHED + if (tsk->sched_class->moved_group) + tsk->sched_class->moved_group(tsk); +#endif + if (on_rq) { if (unlikely(running)) tsk->sched_class->set_curr_task(rq); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3df4d46994ca..e2a530515619 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1353,6 +1353,16 @@ static void set_curr_task_fair(struct rq *rq) set_next_entity(cfs_rq_of(se), se); } +#ifdef CONFIG_FAIR_GROUP_SCHED +static void moved_group_fair(struct task_struct *p) +{ + struct cfs_rq *cfs_rq = task_cfs_rq(p); + + update_curr(cfs_rq); + place_entity(cfs_rq, &p->se, 1); +} +#endif + /* * All the scheduling class methods: */ @@ -1381,6 +1391,10 @@ static const struct sched_class fair_sched_class = { .prio_changed = prio_changed_fair, .switched_to = switched_to_fair, + +#ifdef CONFIG_FAIR_GROUP_SCHED + .moved_group = moved_group_fair, +#endif }; #ifdef CONFIG_SCHED_DEBUG -- cgit v1.2.3 From e9720acd728a46cb40daa52c99a979f7c4ff195c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 7 Mar 2008 11:08:40 -0800 Subject: [NET]: Make /proc/net a symlink on /proc/self/net (v3) Current /proc/net is done with so called "shadows", but current implementation is broken and has little chances to get fixed. The problem is that dentries subtree of /proc/net directory has fancy revalidation rules to make processes living in different net namespaces see different entries in /proc/net subtree, but currently, tasks see in the /proc/net subdir the contents of any other namespace, depending on who opened the file first. The proposed fix is to turn /proc/net into a symlink, which points to /proc/self/net, which in turn shows what previously was in /proc/net - the network-related info, from the net namespace the appropriate task lives in. # ls -l /proc/net lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net In other words - this behaves like /proc/mounts, but unlike "mounts", "net" is not a file, but a directory. Changes from v2: * Fixed discrepancy of /proc/net nlink count and selinux labeling screwup pointed out by Stephen. To get the correct nlink count the ->getattr callback for /proc/net is overridden to read one from the net->proc_net entry. To make selinux still work the net->proc_net entry is initialized properly, i.e. with the "net" name and the proc_net parent. Selinux fixes are Acked-by: Stephen Smalley Changes from v1: * Fixed a task_struct leak in get_proc_task_net, pointed out by Paul. Signed-off-by: Pavel Emelyanov Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- fs/proc/base.c | 1 + fs/proc/generic.c | 26 ++++++---- fs/proc/internal.h | 7 +++ fs/proc/proc_net.c | 117 +++++++++++++++++++++++++++++++++----------- include/linux/proc_fs.h | 3 -- include/net/net_namespace.h | 1 - 6 files changed, 114 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index 96ee899d6502..cc43cf0c1fa5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2274,6 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, task), DIR("fd", S_IRUSR|S_IXUSR, fd), DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), + DIR("net", S_IRUGO|S_IXUSR, net), REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), ONE("status", S_IRUGO, pid_status), diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 68971e66cd41..a36ad3c75cf4 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations = * Don't create negative dentries here, return -ENOENT by hand * instead. */ -struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, + struct dentry *dentry) { struct inode *inode = NULL; - struct proc_dir_entry * de; int error = -ENOENT; lock_kernel(); spin_lock(&proc_subdir_lock); - de = PDE(dir); if (de) { for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) @@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino; - if (de->shadow_proc) - de = de->shadow_proc(current, de); ino = de->low_ino; de_get(de); spin_unlock(&proc_subdir_lock); @@ -417,6 +414,12 @@ out_unlock: return ERR_PTR(error); } +struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + return proc_lookup_de(PDE(dir), dir, dentry); +} + /* * This returns non-zero if at EOF, so that the /proc * root directory can use this and check if it should @@ -426,10 +429,9 @@ out_unlock: * value of the readdir() call, as long as it's non-negative * for success.. */ -int proc_readdir(struct file * filp, - void * dirent, filldir_t filldir) +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, + filldir_t filldir) { - struct proc_dir_entry * de; unsigned int ino; int i; struct inode *inode = filp->f_path.dentry->d_inode; @@ -438,7 +440,6 @@ int proc_readdir(struct file * filp, lock_kernel(); ino = inode->i_ino; - de = PDE(inode); if (!de) { ret = -EINVAL; goto out; @@ -499,6 +500,13 @@ out: unlock_kernel(); return ret; } +int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + + return proc_readdir_de(PDE(inode), filp, dirent, filldir); +} + /* * These are the generic /proc directory operations. They * use the in-memory "struct proc_dir_entry" tree to parse diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1c81c8f1aeed..bc72f5c8c47d 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; +extern const struct file_operations proc_net_operations; +extern const struct inode_operations proc_net_inode_operations; void free_proc_entry(struct proc_dir_entry *de); @@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode) { return PROC_I(inode)->fd; } + +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, + struct dentry *dentry); +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, + filldir_t filldir); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 14e9b5aaf863..4caa5f774fb7 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -63,6 +63,82 @@ int seq_release_net(struct inode *ino, struct file *f) } EXPORT_SYMBOL_GPL(seq_release_net); +static struct net *get_proc_task_net(struct inode *dir) +{ + struct task_struct *task; + struct nsproxy *ns; + struct net *net = NULL; + + rcu_read_lock(); + task = pid_task(proc_pid(dir), PIDTYPE_PID); + if (task != NULL) { + ns = task_nsproxy(task); + if (ns != NULL) + net = get_net(ns->net_ns); + } + rcu_read_unlock(); + + return net; +} + +static struct dentry *proc_tgid_net_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *de; + struct net *net; + + de = ERR_PTR(-ENOENT); + net = get_proc_task_net(dir); + if (net != NULL) { + de = proc_lookup_de(net->proc_net, dir, dentry); + put_net(net); + } + return de; +} + +static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct net *net; + + net = get_proc_task_net(inode); + + generic_fillattr(inode, stat); + + if (net != NULL) { + stat->nlink = net->proc_net->nlink; + put_net(net); + } + + return 0; +} + +const struct inode_operations proc_net_inode_operations = { + .lookup = proc_tgid_net_lookup, + .getattr = proc_tgid_net_getattr, +}; + +static int proc_tgid_net_readdir(struct file *filp, void *dirent, + filldir_t filldir) +{ + int ret; + struct net *net; + + ret = -EINVAL; + net = get_proc_task_net(filp->f_path.dentry->d_inode); + if (net != NULL) { + ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); + put_net(net); + } + return ret; +} + +const struct file_operations proc_net_operations = { + .read = generic_read_dir, + .readdir = proc_tgid_net_readdir, +}; + struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) @@ -83,14 +159,6 @@ struct net *get_proc_net(const struct inode *inode) } EXPORT_SYMBOL_GPL(get_proc_net); -static struct proc_dir_entry *shadow_pde; - -static struct proc_dir_entry *proc_net_shadow(struct task_struct *task, - struct proc_dir_entry *de) -{ - return task->nsproxy->net_ns->proc_net; -} - struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent) { @@ -104,45 +172,39 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir); static __net_init int proc_net_ns_init(struct net *net) { - struct proc_dir_entry *root, *netd, *net_statd; + struct proc_dir_entry *netd, *net_statd; int err; err = -ENOMEM; - root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) + netd = kzalloc(sizeof(*netd), GFP_KERNEL); + if (!netd) goto out; - err = -EEXIST; - netd = proc_net_mkdir(net, "net", root); - if (!netd) - goto free_root; + netd->data = net; + netd->nlink = 2; + netd->name = "net"; + netd->namelen = 3; + netd->parent = &proc_root; err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) goto free_net; - root->data = net; - - net->proc_net_root = root; net->proc_net = netd; net->proc_net_stat = net_statd; - err = 0; + return 0; +free_net: + kfree(netd); out: return err; -free_net: - remove_proc_entry("net", root); -free_root: - kfree(root); - goto out; } static __net_exit void proc_net_ns_exit(struct net *net) { remove_proc_entry("stat", net->proc_net); - remove_proc_entry("net", net->proc_net_root); - kfree(net->proc_net_root); + kfree(net->proc_net); } static struct pernet_operations __net_initdata proc_net_ns_ops = { @@ -152,8 +214,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { int __init proc_net_init(void) { - shadow_pde = proc_mkdir("net", NULL); - shadow_pde->shadow_proc = proc_net_shadow; + proc_symlink("net", NULL, "self/net"); return register_pernet_subsys(&proc_net_ns_ops); } diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index d9a9e718ad19..9b6c935f69cf 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -50,8 +50,6 @@ typedef int (read_proc_t)(char *page, char **start, off_t off, typedef int (write_proc_t)(struct file *file, const char __user *buffer, unsigned long count, void *data); typedef int (get_info_t)(char *, char **, off_t, int); -typedef struct proc_dir_entry *(shadow_proc_t)(struct task_struct *task, - struct proc_dir_entry *pde); struct proc_dir_entry { unsigned int low_ino; @@ -82,7 +80,6 @@ struct proc_dir_entry { int pde_users; /* number of callers into module in progress */ spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ struct completion *pde_unload_completion; - shadow_proc_t *shadow_proc; }; struct kcore_list { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 28738b7d53eb..923f2b8b9096 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -31,7 +31,6 @@ struct net { struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; - struct proc_dir_entry *proc_net_root; struct list_head sysctl_table_headers; -- cgit v1.2.3 From e621e69137b24fdbbe7ad28214e8d81e614c25b7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 7 Mar 2008 11:11:13 -0800 Subject: [NET]: include into linux/ethtool.h for __u* typedef Signed-off-by: Kirill A. Shutemov Signed-off-by: David S. Miller --- include/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index fcbe8b640ffb..c8d216357865 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -12,6 +12,7 @@ #ifndef _LINUX_ETHTOOL_H #define _LINUX_ETHTOOL_H +#include /* This should work for both 32 and 64 bit userland. */ struct ethtool_cmd { -- cgit v1.2.3 From c37dcd334c0b0a46a90cfa13b9f69e2aaa89bc09 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 6 Mar 2008 12:34:50 -0500 Subject: NFS: Fix the fsid revalidation in nfs_update_inode() When we detect that we've crossed a mountpoint on the remote server, we must take care not to use that inode to revalidate the fsid on our current superblock. To do so, we label the inode as a remote mountpoint, and check for that in nfs_update_inode(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 6 ++++-- include/linux/nfs_fs.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 966a8850aa30..a4c7cf2bff3a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -299,6 +299,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) else inode->i_op = &nfs_mountpoint_inode_operations; inode->i_fop = NULL; + set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags); } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; @@ -1003,8 +1004,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) server = NFS_SERVER(inode); /* Update the fsid? */ - if (S_ISDIR(inode->i_mode) - && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + if (S_ISDIR(inode->i_mode) && + !nfs_fsid_equal(&server->fsid, &fattr->fsid) && + !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags)) server->fsid = fattr->fsid; /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a69ba80f2dfe..f4a0e4c218df 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -195,6 +195,7 @@ struct nfs_inode { #define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ #define NFS_INO_STALE (2) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ +#define NFS_INO_MOUNTPOINT (4) /* inode is remote mountpoint */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3 From 38332cb98772f5ea757e6486bed7ed0381cb5f98 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Tue, 4 Mar 2008 14:59:54 -0800 Subject: time: prevent the loop in timespec_add_ns() from being optimised away Since some architectures don't support __udivdi3(). Signed-off-by: Segher Boessenkool Cc: john stultz Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/time.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 2091a19f1655..d32ef0ad4c0a 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -174,6 +174,10 @@ static inline void timespec_add_ns(struct timespec *a, u64 ns) { ns += a->tv_nsec; while(unlikely(ns >= NSEC_PER_SEC)) { + /* The following asm() prevents the compiler from + * optimising this loop into a modulo operation. */ + asm("" : "+r"(ns)); + ns -= NSEC_PER_SEC; a->tv_sec++; } -- cgit v1.2.3 From 10a398d04c4a1fc395840f4d040493375f562302 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Tue, 4 Mar 2008 15:14:26 -0800 Subject: time: remove obsolete CLOCK_TICK_ADJUST The first version of the ntp_interval/tick_length inconsistent usage patch was recently merged as bbe4d18ac2e058c56adb0cd71f49d9ed3216a405 http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=bbe4d18ac2e058c56adb0cd71f49d9ed3216a405 While the fix did greatly improve the situation, it was correctly pointed out by Roman that it does have a small bug: If the users change clocksources after the system has been running and NTP has made corrections, the correctoins made against the old clocksource will be applied against the new clocksource, causing error. The second attempt, which corrects the issue in the NTP_INTERVAL_LENGTH definition has also made it up-stream as commit e13a2e61dd5152f5499d2003470acf9c838eab84 http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e13a2e61dd5152f5499d2003470acf9c838eab84 Roman has correctly pointed out that CLOCK_TICK_ADJUST is calculated based on the PIT's frequency, and isn't really relevant to non-PIT driven clocksources (that is, clocksources other then jiffies and pit). This patch reverts both of those changes, and simply removes CLOCK_TICK_ADJUST. This does remove the granularity error correction for users of PIT and Jiffies clocksource users, but the granularity error but for the majority of users, it should be within the 500ppm range NTP can accommodate for. For systems that have granularity errors greater then 500ppm, the "ntp_tick_adj=" boot option can be used to compensate. [johnstul@us.ibm.com: provided changelog] [mattilinnanvuori@yahoo.com: maek ntp_tick_adj static] Signed-off-by: Roman Zippel Acked-by: john stultz Signed-off-by: Matti Linnanvuori Signed-off-by: Andrew Morton Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 9 +-------- kernel/time/ntp.c | 11 ++++++++++- kernel/time/timekeeping.c | 6 ++---- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index c3f374786a43..8ea3e71ba7fa 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -232,14 +232,7 @@ static inline int ntp_synced(void) #else #define NTP_INTERVAL_FREQ (HZ) #endif - -#define CLOCK_TICK_OVERFLOW (LATCH * HZ - CLOCK_TICK_RATE) -#define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \ - (s64)CLOCK_TICK_RATE) - -/* Because using NSEC_PER_SEC would be too easy */ -#define NTP_INTERVAL_LENGTH ((((s64)TICK_USEC * NSEC_PER_USEC * USER_HZ) + \ - CLOCK_TICK_ADJUST) / NTP_INTERVAL_FREQ) +#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ extern u64 current_tick_length(void); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index d4bca927f715..5fd9b9469770 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -42,12 +42,13 @@ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ long time_freq; /* frequency offset (scaled ppm)*/ static long time_reftime; /* time at last adjustment (s) */ long time_adjust; +static long ntp_tick_adj; static void ntp_update_frequency(void) { u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; - second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; + second_length += (s64)ntp_tick_adj << TICK_LENGTH_SHIFT; second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); tick_length_base = second_length; @@ -402,3 +403,11 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) notify_cmos_timer(); return(result); } + +static int __init ntp_tick_adj_setup(char *str) +{ + ntp_tick_adj = simple_strtol(str, NULL, 0); + return 1; +} + +__setup("ntp_tick_adj=", ntp_tick_adj_setup); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1af9fb050fe2..671af612b768 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -187,8 +187,7 @@ static void change_clocksource(void) clock->error = 0; clock->xtime_nsec = 0; - clocksource_calculate_interval(clock, - (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT)); + clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); tick_clock_notify(); @@ -245,8 +244,7 @@ void __init timekeeping_init(void) ntp_clear(); clock = clocksource_get_next(); - clocksource_calculate_interval(clock, - (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT)); + clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); clock->cycle_last = clocksource_read(clock); xtime.tv_sec = sec; -- cgit v1.2.3 From fbab976d7ce4556d4212d554f766dae461d22e16 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 7 Mar 2008 08:57:54 -0600 Subject: firmware: provide stubs for the FW_LOADER=n case libsas has a case where it uses the firmware loader to provide services, but doesn't want to select it all the time. This currently causes a compile failure in libsas if FW_LOADER=n. Fix this by providing error stubs for the firmware loader API in the FW_LOADER=n case. Signed-off-by: James Bottomley Cc: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 33d8f2087b6e..4d10c7328d2d 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -10,7 +10,10 @@ struct firmware { size_t size; u8 *data; }; + struct device; + +#if defined(CONFIG_FW_LOADER) || defined(CONFIG_FW_LOADER_MODULE) int request_firmware(const struct firmware **fw, const char *name, struct device *device); int request_firmware_nowait( @@ -19,4 +22,24 @@ int request_firmware_nowait( void (*cont)(const struct firmware *fw, void *context)); void release_firmware(const struct firmware *fw); +#else +static inline int request_firmware(const struct firmware **fw, + const char *name, + struct device *device) +{ + return -EINVAL; +} +static inline int request_firmware_nowait( + struct module *module, int uevent, + const char *name, struct device *device, void *context, + void (*cont)(const struct firmware *fw, void *context)) +{ + return -EINVAL; +} + +static inline void release_firmware(const struct firmware *fw) +{ +} +#endif + #endif -- cgit v1.2.3 From b5e85dee2a5433246d5b7488918a1a0ad22c046a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 10 Mar 2008 16:41:06 -0700 Subject: [NETFILTER]: nfnetlink: fix ifdef in nfnetlink_compat.h Use __KERNEL__ instead of __KERNEL to make sure the headers are not usable by the kernel. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_compat.h b/include/linux/netfilter/nfnetlink_compat.h index 02a42d875cf7..e1451760c9cd 100644 --- a/include/linux/netfilter/nfnetlink_compat.h +++ b/include/linux/netfilter/nfnetlink_compat.h @@ -1,6 +1,6 @@ #ifndef _NFNETLINK_COMPAT_H #define _NFNETLINK_COMPAT_H -#ifndef __KERNEL +#ifndef __KERNEL__ /* Old nfnetlink macros for userspace */ /* nfnetlink groups: Up to 32 maximum */ -- cgit v1.2.3 From e61062587d0484c3852e822e844416c728362438 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Fri, 7 Mar 2008 11:02:00 -0500 Subject: USB: g_printer.h does not need to be "unifdef"ed. Since the header file g_printer.h doesn't depend on __KERNEL__, there's no need to unifdef it in the Kbuild file. Signed-off-by: Robert P. J. Day Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/Kbuild | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild index b8cba1dcb2c6..42e84fc315e3 100644 --- a/include/linux/usb/Kbuild +++ b/include/linux/usb/Kbuild @@ -3,5 +3,5 @@ header-y += cdc.h header-y += ch9.h header-y += gadgetfs.h header-y += midi.h -unifdef-y += g_printer.h +header-y += g_printer.h -- cgit v1.2.3 From 20f590df4fbb962d1f8fcb12c4b4e790c7054045 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Fri, 7 Mar 2008 11:40:07 -0500 Subject: USB: Remove __KERNEL__ check from non-exported gadget.h. Since the header file gadget.h isn't being exported to userspace, there seems to be little point having a __KERNEL__ proprocessor check. Signed-off-by: Robert P. J. Day Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index aa3047ff00d1..f3295296b435 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -15,8 +15,6 @@ #ifndef __LINUX_USB_GADGET_H #define __LINUX_USB_GADGET_H -#ifdef __KERNEL__ - struct usb_ep; /** @@ -848,6 +846,4 @@ extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *, extern void usb_ep_autoconfig_reset(struct usb_gadget *) __devinit; -#endif /* __KERNEL__ */ - #endif /* __LINUX_USB_GADGET_H */ -- cgit v1.2.3 From 9f9351bbe34a9b12966b1fb6f7c21cfe128340c1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 10 Mar 2008 11:43:34 -0700 Subject: rename DECLARE_PCI_DEVICE_TABLE to DEFINE_PCI_DEVICE_TABLE This macro is used to define tables, not to declare them. Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/pci.txt | 4 ++-- include/linux/pci.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/pci.txt b/Documentation/pci.txt index bb7bd27d4682..d2c2e6e2b224 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -123,7 +123,7 @@ initialization with a pointer to a structure describing the driver The ID table is an array of struct pci_device_id entries ending with an -all-zero entry; use of the macro DECLARE_PCI_DEVICE_TABLE is the preferred +all-zero entry; use of the macro DEFINE_PCI_DEVICE_TABLE is the preferred method of declaring the table. Each entry consists of: vendor,device Vendor and device ID to match (or PCI_ANY_ID) @@ -193,7 +193,7 @@ Tips on when/where to use the above attributes: o Do not mark the struct pci_driver. o The ID table array should be marked __devinitconst; this is done - automatically if the table is declared with DECLARE_PCI_DEVICE_TABLE(). + automatically if the table is declared with DEFINE_PCI_DEVICE_TABLE(). o The probe() and remove() functions should be marked __devinit and __devexit respectively. All initialization functions diff --git a/include/linux/pci.h b/include/linux/pci.h index f3165e7ac431..38eff1947750 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -389,13 +389,13 @@ struct pci_driver { #define to_pci_driver(drv) container_of(drv, struct pci_driver, driver) /** - * DECLARE_PCI_DEVICE_TABLE - macro used to describe a pci device table + * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table * @_table: device table name * * This macro is used to create a struct pci_device_id array (a device table) * in a generic manner. */ -#define DECLARE_PCI_DEVICE_TABLE(_table) \ +#define DEFINE_PCI_DEVICE_TABLE(_table) \ const struct pci_device_id _table[] __devinitconst /** -- cgit v1.2.3 From e1f19995f55294fbb00ea22ba85d7b0d80ba3813 Mon Sep 17 00:00:00 2001 From: Alex Dubov Date: Mon, 10 Mar 2008 11:43:37 -0700 Subject: memstick: introduce correct definitions in the header Thanks to some input from kind people at JMicron it is now possible to have more correct definitions of protocol structures and bit field semantics. Signed-off-by: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/memstick/core/memstick.c | 4 +- drivers/memstick/core/mspro_block.c | 16 ++--- drivers/memstick/host/tifm_ms.c | 19 +++--- include/linux/memstick.h | 130 ++++++++++++++++++++++++------------ 4 files changed, 105 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index bba467fe4bce..5e0e960df456 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -271,7 +271,7 @@ void memstick_init_req_sg(struct memstick_request *mrq, unsigned char tpc, mrq->data_dir = READ; mrq->sg = *sg; - mrq->io_type = MEMSTICK_IO_SG; + mrq->long_data = 1; if (tpc == MS_TPC_SET_CMD || tpc == MS_TPC_EX_SET_CMD) mrq->need_card_int = 1; @@ -306,7 +306,7 @@ void memstick_init_req(struct memstick_request *mrq, unsigned char tpc, if (mrq->data_dir == WRITE) memcpy(mrq->data, buf, mrq->data_len); - mrq->io_type = MEMSTICK_IO_VAL; + mrq->long_data = 0; if (tpc == MS_TPC_SET_CMD || tpc == MS_TPC_EX_SET_CMD) mrq->need_card_int = 1; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 423ad8cf4bb9..214211c8ac9a 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -629,7 +629,7 @@ static void mspro_block_process_request(struct memstick_dev *card, param.system = msb->system; param.data_count = cpu_to_be16(page_count); param.data_address = cpu_to_be32((uint32_t)t_sec); - param.cmd_param = 0; + param.tpc_param = 0; msb->data_dir = rq_data_dir(req); msb->transfer_cmd = msb->data_dir == READ @@ -761,7 +761,7 @@ static int mspro_block_switch_to_parallel(struct memstick_dev *card) .system = 0, .data_count = 0, .data_address = 0, - .cmd_param = 0 + .tpc_param = 0 }; card->next_request = h_mspro_block_req_init; @@ -773,8 +773,8 @@ static int mspro_block_switch_to_parallel(struct memstick_dev *card) if (card->current_mrq.error) return card->current_mrq.error; - msb->system = 0; - host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_PARALLEL); + msb->system = MEMSTICK_SYS_PAR4; + host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_PAR4); card->next_request = h_mspro_block_req_init; msb->mrq_handler = h_mspro_block_default; @@ -802,7 +802,7 @@ static int mspro_block_read_attributes(struct memstick_dev *card) .system = msb->system, .data_count = cpu_to_be16(1), .data_address = 0, - .cmd_param = 0 + .tpc_param = 0 }; struct mspro_attribute *attr = NULL; struct mspro_sys_attr *s_attr = NULL; @@ -922,7 +922,7 @@ static int mspro_block_read_attributes(struct memstick_dev *card) param.system = msb->system; param.data_count = cpu_to_be16((rc / msb->page_size) + 1); param.data_address = cpu_to_be32(addr / msb->page_size); - param.cmd_param = 0; + param.tpc_param = 0; sg_init_one(&msb->req_sg[0], buffer, be16_to_cpu(param.data_count) * msb->page_size); @@ -964,7 +964,7 @@ static int mspro_block_init_card(struct memstick_dev *card) struct memstick_host *host = card->host; int rc = 0; - msb->system = 0x80; + msb->system = MEMSTICK_SYS_SERIAL; card->reg_addr.r_offset = offsetof(struct mspro_register, status); card->reg_addr.r_length = sizeof(struct ms_status_register); card->reg_addr.w_offset = offsetof(struct mspro_register, param); @@ -973,7 +973,7 @@ static int mspro_block_init_card(struct memstick_dev *card) if (memstick_set_rw_addr(card)) return -EIO; - if (host->caps & MEMSTICK_CAP_PARALLEL) { + if (host->caps & MEMSTICK_CAP_PAR4) { if (mspro_block_switch_to_parallel(card)) printk(KERN_WARNING "%s: could not switch to " "parallel interface\n", card->dev.bus_id); diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c index 4fb24215bd95..5b5bd61b3a4a 100644 --- a/drivers/memstick/host/tifm_ms.c +++ b/drivers/memstick/host/tifm_ms.c @@ -209,7 +209,7 @@ static int tifm_ms_issue_cmd(struct tifm_ms *host) host->cmd_flags = 0; - if (host->req->io_type == MEMSTICK_IO_SG) { + if (host->req->long_data) { if (!host->no_dma) { if (1 != tifm_map_sg(sock, &host->req->sg, 1, host->req->data_dir == READ @@ -248,7 +248,7 @@ static int tifm_ms_issue_cmd(struct tifm_ms *host) cmd_mask = readl(sock->addr + SOCK_MS_SYSTEM); cmd_mask |= TIFM_MS_SYS_DATA | TIFM_MS_SYS_NOT_RDY; writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM); - } else if (host->req->io_type == MEMSTICK_IO_VAL) { + } else { data = host->req->data; data_len = host->req->data_len; @@ -294,8 +294,7 @@ static int tifm_ms_issue_cmd(struct tifm_ms *host) cmd_mask |= TIFM_MS_SYS_NOT_RDY; dev_dbg(&sock->dev, "mask %x\n", cmd_mask); writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM); - } else - BUG(); + } mod_timer(&host->timer, jiffies + host->timeout_jiffies); writel(TIFM_CTRL_LED | readl(sock->addr + SOCK_CONTROL), @@ -319,13 +318,13 @@ static void tifm_ms_complete_cmd(struct tifm_ms *host) int rc; del_timer(&host->timer); - if (host->req->io_type == MEMSTICK_IO_SG) { + if (host->req->long_data) { if (!host->no_dma) tifm_unmap_sg(sock, &host->req->sg, 1, host->req->data_dir == READ ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); - } else if (host->req->io_type == MEMSTICK_IO_VAL) { + } else { writel(~TIFM_MS_SYS_DATA & readl(sock->addr + SOCK_MS_SYSTEM), sock->addr + SOCK_MS_SYSTEM); @@ -365,7 +364,7 @@ static int tifm_ms_check_status(struct tifm_ms *host) if (!host->req->error) { if (!(host->cmd_flags & CMD_READY)) return 1; - if ((host->req->io_type == MEMSTICK_IO_SG) + if (host->req->long_data && !(host->cmd_flags & FIFO_READY)) return 1; if (host->req->need_card_int @@ -505,7 +504,7 @@ static void tifm_ms_set_param(struct memstick_host *msh, writel((~TIFM_CTRL_FAST_CLK) & readl(sock->addr + SOCK_CONTROL), sock->addr + SOCK_CONTROL); - } else if (value == MEMSTICK_PARALLEL) { + } else if (value == MEMSTICK_PAR4) { host->mode_mask = 0; writel(TIFM_CTRL_FAST_CLK | readl(sock->addr + SOCK_CONTROL), @@ -542,7 +541,7 @@ static int tifm_ms_initialize_host(struct tifm_ms *host) writel(0x0200 | TIFM_MS_SYS_NOT_RDY, sock->addr + SOCK_MS_SYSTEM); writel(0xffffffff, sock->addr + SOCK_MS_STATUS); if (tifm_has_ms_pif(sock)) - msh->caps |= MEMSTICK_CAP_PARALLEL; + msh->caps |= MEMSTICK_CAP_PAR4; return 0; } @@ -601,7 +600,7 @@ static void tifm_ms_remove(struct tifm_dev *sock) writel(TIFM_FIFO_INT_SETALL, sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR); writel(TIFM_DMA_RESET, sock->addr + SOCK_DMA_CONTROL); - if ((host->req->io_type == MEMSTICK_IO_SG) && !host->no_dma) + if (host->req->long_data && !host->no_dma) tifm_unmap_sg(sock, &host->req->sg, 1, host->req->data_dir == READ ? PCI_DMA_TODEVICE diff --git a/include/linux/memstick.h b/include/linux/memstick.h index 334d059d6794..c104e722de06 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -22,6 +22,8 @@ struct ms_status_register { unsigned char reserved; unsigned char interrupt; #define MEMSTICK_INT_CMDNAK 0x0001 +#define MEMSTICK_INT_IOREQ 0x0008 +#define MEMSTICK_INT_IOBREQ 0x0010 #define MEMSTICK_INT_BREQ 0x0020 #define MEMSTICK_INT_ERR 0x0040 #define MEMSTICK_INT_CED 0x0080 @@ -47,13 +49,17 @@ struct ms_status_register { struct ms_id_register { unsigned char type; - unsigned char reserved; + unsigned char if_mode; unsigned char category; unsigned char class; } __attribute__((packed)); struct ms_param_register { unsigned char system; +#define MEMSTICK_SYS_ATEN 0xc0 +#define MEMSTICK_SYS_BAMD 0x80 +#define MEMSTICK_SYS_PAM 0x08 + unsigned char block_address_msb; unsigned short block_address; unsigned char cp; @@ -90,16 +96,48 @@ struct ms_register { struct mspro_param_register { unsigned char system; +#define MEMSTICK_SYS_SERIAL 0x80 +#define MEMSTICK_SYS_PAR4 0x00 +#define MEMSTICK_SYS_PAR8 0x40 + + unsigned short data_count; + unsigned int data_address; + unsigned char tpc_param; +} __attribute__((packed)); + +struct mspro_io_info_register { + unsigned char version; + unsigned char io_category; + unsigned char current_req; + unsigned char card_opt_info; + unsigned char rdy_wait_time; +} __attribute__((packed)); + +struct mspro_io_func_register { + unsigned char func_enable; + unsigned char func_select; + unsigned char func_intmask; + unsigned char transfer_mode; +} __attribute__((packed)); + +struct mspro_io_cmd_register { + unsigned short tpc_param; unsigned short data_count; unsigned int data_address; - unsigned char cmd_param; } __attribute__((packed)); struct mspro_register { - struct ms_status_register status; - struct ms_id_register id; - unsigned char reserved[8]; - struct mspro_param_register param; + struct ms_status_register status; + struct ms_id_register id; + unsigned char reserved0[8]; + struct mspro_param_register param; + unsigned char reserved1[8]; + struct mspro_io_info_register io_info; + struct mspro_io_func_register io_func; + unsigned char reserved2[7]; + struct mspro_io_cmd_register io_cmd; + unsigned char io_int; + unsigned char io_int_func; } __attribute__((packed)); struct ms_register_addr { @@ -110,49 +148,55 @@ struct ms_register_addr { } __attribute__((packed)); enum { + MS_TPC_READ_MG_STATUS = 0x01, MS_TPC_READ_LONG_DATA = 0x02, MS_TPC_READ_SHORT_DATA = 0x03, + MS_TPC_READ_MG_DATA = 0x03, MS_TPC_READ_REG = 0x04, - MS_TPC_READ_IO_DATA = 0x05, /* unverified */ + MS_TPC_READ_QUAD_DATA = 0x05, + MS_TPC_READ_IO_DATA = 0x05, MS_TPC_GET_INT = 0x07, MS_TPC_SET_RW_REG_ADRS = 0x08, MS_TPC_EX_SET_CMD = 0x09, - MS_TPC_WRITE_IO_DATA = 0x0a, /* unverified */ + MS_TPC_WRITE_QUAD_DATA = 0x0a, + MS_TPC_WRITE_IO_DATA = 0x0a, MS_TPC_WRITE_REG = 0x0b, MS_TPC_WRITE_SHORT_DATA = 0x0c, + MS_TPC_WRITE_MG_DATA = 0x0c, MS_TPC_WRITE_LONG_DATA = 0x0d, MS_TPC_SET_CMD = 0x0e }; enum { - MS_CMD_BLOCK_END = 0x33, - MS_CMD_RESET = 0x3c, - MS_CMD_BLOCK_WRITE = 0x55, - MS_CMD_SLEEP = 0x5a, - MS_CMD_BLOCK_ERASE = 0x99, - MS_CMD_BLOCK_READ = 0xaa, - MS_CMD_CLEAR_BUF = 0xc3, - MS_CMD_FLASH_STOP = 0xcc, - MSPRO_CMD_FORMAT = 0x10, - MSPRO_CMD_SLEEP = 0x11, - MSPRO_CMD_READ_DATA = 0x20, - MSPRO_CMD_WRITE_DATA = 0x21, - MSPRO_CMD_READ_ATRB = 0x24, - MSPRO_CMD_STOP = 0x25, - MSPRO_CMD_ERASE = 0x26, - MSPRO_CMD_SET_IBA = 0x46, - MSPRO_CMD_SET_IBD = 0x47 -/* - MSPRO_CMD_RESET - MSPRO_CMD_WAKEUP - MSPRO_CMD_IN_IO_DATA - MSPRO_CMD_OUT_IO_DATA - MSPRO_CMD_READ_IO_ATRB - MSPRO_CMD_IN_IO_FIFO - MSPRO_CMD_OUT_IO_FIFO - MSPRO_CMD_IN_IOM - MSPRO_CMD_OUT_IOM -*/ + MS_CMD_BLOCK_END = 0x33, + MS_CMD_RESET = 0x3c, + MS_CMD_BLOCK_WRITE = 0x55, + MS_CMD_SLEEP = 0x5a, + MS_CMD_BLOCK_ERASE = 0x99, + MS_CMD_BLOCK_READ = 0xaa, + MS_CMD_CLEAR_BUF = 0xc3, + MS_CMD_FLASH_STOP = 0xcc, + MS_CMD_LOAD_ID = 0x60, + MS_CMD_CMP_ICV = 0x7f, + MSPRO_CMD_FORMAT = 0x10, + MSPRO_CMD_SLEEP = 0x11, + MSPRO_CMD_WAKEUP = 0x12, + MSPRO_CMD_READ_DATA = 0x20, + MSPRO_CMD_WRITE_DATA = 0x21, + MSPRO_CMD_READ_ATRB = 0x24, + MSPRO_CMD_STOP = 0x25, + MSPRO_CMD_ERASE = 0x26, + MSPRO_CMD_READ_QUAD = 0x27, + MSPRO_CMD_WRITE_QUAD = 0x28, + MSPRO_CMD_SET_IBD = 0x46, + MSPRO_CMD_GET_IBD = 0x47, + MSPRO_CMD_IN_IO_DATA = 0xb0, + MSPRO_CMD_OUT_IO_DATA = 0xb1, + MSPRO_CMD_READ_IO_ATRB = 0xb2, + MSPRO_CMD_IN_IO_FIFO = 0xb3, + MSPRO_CMD_OUT_IO_FIFO = 0xb4, + MSPRO_CMD_IN_IOM = 0xb5, + MSPRO_CMD_OUT_IOM = 0xb6, }; /*** Driver structures and functions ***/ @@ -165,7 +209,8 @@ enum memstick_param { MEMSTICK_POWER = 1, MEMSTICK_INTERFACE }; #define MEMSTICK_POWER_ON 1 #define MEMSTICK_SERIAL 0 -#define MEMSTICK_PARALLEL 1 +#define MEMSTICK_PAR4 1 +#define MEMSTICK_PAR8 2 struct memstick_host; struct memstick_driver; @@ -195,11 +240,7 @@ struct memstick_request { unsigned char data_dir:1, need_card_int:1, get_int_reg:1, - io_type:2; -#define MEMSTICK_IO_NONE 0 -#define MEMSTICK_IO_VAL 1 -#define MEMSTICK_IO_SG 2 - + long_data:1; unsigned char int_reg; int error; union { @@ -231,8 +272,9 @@ struct memstick_host { struct mutex lock; unsigned int id; unsigned int caps; -#define MEMSTICK_CAP_PARALLEL 1 -#define MEMSTICK_CAP_AUTO_GET_INT 2 +#define MEMSTICK_CAP_AUTO_GET_INT 1 +#define MEMSTICK_CAP_PAR4 2 +#define MEMSTICK_CAP_PAR8 4 struct work_struct media_checker; struct class_device cdev; -- cgit v1.2.3 From d114ad54ffb020dc781b6159c1c2f391c6ec418f Mon Sep 17 00:00:00 2001 From: Alex Dubov Date: Mon, 10 Mar 2008 11:43:38 -0700 Subject: memstick: add memstick_suspend/resume_host methods Bus driver may need to be informed that host is being suspended/resumed. Signed-off-by: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/memstick/core/memstick.c | 25 +++++++++++++++++++++++++ drivers/memstick/host/tifm_ms.c | 8 ++++---- include/linux/memstick.h | 2 ++ 3 files changed, 31 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 5e0e960df456..3c97bac4e47b 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -561,6 +561,31 @@ void memstick_free_host(struct memstick_host *host) } EXPORT_SYMBOL(memstick_free_host); +/** + * memstick_suspend_host - notify bus driver of host suspension + * @host - host to use + */ +void memstick_suspend_host(struct memstick_host *host) +{ + mutex_lock(&host->lock); + host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF); + mutex_unlock(&host->lock); +} +EXPORT_SYMBOL(memstick_suspend_host); + +/** + * memstick_resume_host - notify bus driver of host resumption + * @host - host to use + */ +void memstick_resume_host(struct memstick_host *host) +{ + mutex_lock(&host->lock); + host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_ON); + mutex_unlock(&host->lock); + memstick_detect_change(host); +} +EXPORT_SYMBOL(memstick_resume_host); + int memstick_register_driver(struct memstick_driver *drv) { drv->driver.bus = &memstick_bus_type; diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c index 5b5bd61b3a4a..8b1c102fc317 100644 --- a/drivers/memstick/host/tifm_ms.c +++ b/drivers/memstick/host/tifm_ms.c @@ -627,17 +627,17 @@ static void tifm_ms_remove(struct tifm_dev *sock) static int tifm_ms_suspend(struct tifm_dev *sock, pm_message_t state) { + struct memstick_host *msh = tifm_get_drvdata(sock); + + memstick_suspend_host(msh); return 0; } static int tifm_ms_resume(struct tifm_dev *sock) { struct memstick_host *msh = tifm_get_drvdata(sock); - struct tifm_ms *host = memstick_priv(msh); - - tifm_ms_initialize_host(host); - memstick_detect_change(msh); + memstick_resume_host(msh); return 0; } diff --git a/include/linux/memstick.h b/include/linux/memstick.h index c104e722de06..b7ee25888836 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -312,6 +312,8 @@ int memstick_add_host(struct memstick_host *host); void memstick_remove_host(struct memstick_host *host); void memstick_free_host(struct memstick_host *host); void memstick_detect_change(struct memstick_host *host); +void memstick_suspend_host(struct memstick_host *host); +void memstick_resume_host(struct memstick_host *host); void memstick_init_req_sg(struct memstick_request *mrq, unsigned char tpc, struct scatterlist *sg); -- cgit v1.2.3 From 92b22d935fed1e4d88b9b6f9a674ab2a4272ee78 Mon Sep 17 00:00:00 2001 From: Alex Dubov Date: Mon, 10 Mar 2008 11:43:40 -0700 Subject: tifm: fix the MemoryStick host fifo handling code Additional input received from JMicron on MemoryStick host interfaces showed that some assumtions in fifo handling code were incorrect. This patch also fixes data corruption used to occure during PIO transfers. Signed-off-by: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/memstick/host/tifm_ms.c | 524 ++++++++++++++++++++-------------------- include/linux/tifm.h | 2 +- 2 files changed, 264 insertions(+), 262 deletions(-) (limited to 'include/linux') diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c index c62e709ca771..b88f5b30efbf 100644 --- a/drivers/memstick/host/tifm_ms.c +++ b/drivers/memstick/host/tifm_ms.c @@ -24,275 +24,289 @@ static int no_dma; module_param(no_dma, bool, 0644); -#define TIFM_MS_TIMEOUT 0x00100 -#define TIFM_MS_BADCRC 0x00200 -#define TIFM_MS_EOTPC 0x01000 -#define TIFM_MS_INT 0x02000 - -/* The meaning of the bit majority in this constant is unknown. */ -#define TIFM_MS_SERIAL 0x04010 +/* + * Some control bits of TIFM appear to conform to Sony's reference design, + * so I'm just assuming they all are. + */ -#define TIFM_MS_SYS_LATCH 0x00100 -#define TIFM_MS_SYS_NOT_RDY 0x00800 -#define TIFM_MS_SYS_DATA 0x10000 +#define TIFM_MS_STAT_DRQ 0x04000 +#define TIFM_MS_STAT_MSINT 0x02000 +#define TIFM_MS_STAT_RDY 0x01000 +#define TIFM_MS_STAT_CRC 0x00200 +#define TIFM_MS_STAT_TOE 0x00100 +#define TIFM_MS_STAT_EMP 0x00020 +#define TIFM_MS_STAT_FUL 0x00010 +#define TIFM_MS_STAT_CED 0x00008 +#define TIFM_MS_STAT_ERR 0x00004 +#define TIFM_MS_STAT_BRQ 0x00002 +#define TIFM_MS_STAT_CNK 0x00001 + +#define TIFM_MS_SYS_DMA 0x10000 +#define TIFM_MS_SYS_RESET 0x08000 +#define TIFM_MS_SYS_SRAC 0x04000 +#define TIFM_MS_SYS_INTEN 0x02000 +#define TIFM_MS_SYS_NOCRC 0x01000 +#define TIFM_MS_SYS_INTCLR 0x00800 +#define TIFM_MS_SYS_MSIEN 0x00400 +#define TIFM_MS_SYS_FCLR 0x00200 +#define TIFM_MS_SYS_FDIR 0x00100 +#define TIFM_MS_SYS_DAM 0x00080 +#define TIFM_MS_SYS_DRM 0x00040 +#define TIFM_MS_SYS_DRQSL 0x00020 +#define TIFM_MS_SYS_REI 0x00010 +#define TIFM_MS_SYS_REO 0x00008 +#define TIFM_MS_SYS_BSY_MASK 0x00007 + +#define TIFM_MS_SYS_FIFO (TIFM_MS_SYS_INTEN | TIFM_MS_SYS_MSIEN \ + | TIFM_MS_SYS_FCLR | TIFM_MS_SYS_BSY_MASK) /* Hardware flags */ enum { - CMD_READY = 0x0001, - FIFO_READY = 0x0002, - CARD_READY = 0x0004, - DATA_CARRY = 0x0008 + CMD_READY = 0x01, + FIFO_READY = 0x02, + CARD_INT = 0x04 }; struct tifm_ms { struct tifm_dev *dev; - unsigned short eject:1, - no_dma:1; - unsigned short cmd_flags; + struct timer_list timer; + struct memstick_request *req; unsigned int mode_mask; unsigned int block_pos; unsigned long timeout_jiffies; - - struct timer_list timer; - struct memstick_request *req; + unsigned char eject:1, + use_dma:1; + unsigned char cmd_flags; + unsigned char io_pos; unsigned int io_word; }; -static void tifm_ms_read_fifo(struct tifm_ms *host, unsigned int fifo_offset, - struct page *pg, unsigned int page_off, - unsigned int length) +static unsigned int tifm_ms_read_data(struct tifm_ms *host, + unsigned char *buf, unsigned int length) { struct tifm_dev *sock = host->dev; - unsigned int cnt = 0, off = 0; - unsigned char *buf = kmap_atomic(pg, KM_BIO_DST_IRQ) + page_off; + unsigned int off = 0; + + while (host->io_pos && length) { + buf[off++] = host->io_word & 0xff; + host->io_word >>= 8; + length--; + host->io_pos--; + } + + if (!length) + return off; + + while (!(TIFM_MS_STAT_EMP & readl(sock->addr + SOCK_MS_STATUS))) { + if (length < 4) + break; + *(unsigned int *)(buf + off) = __raw_readl(sock->addr + + SOCK_MS_DATA); + length -= 4; + off += 4; + } - if (host->cmd_flags & DATA_CARRY) { - while ((fifo_offset & 3) && length) { + if (length + && !(TIFM_MS_STAT_EMP & readl(sock->addr + SOCK_MS_STATUS))) { + host->io_word = readl(sock->addr + SOCK_MS_DATA); + for (host->io_pos = 4; host->io_pos; --host->io_pos) { buf[off++] = host->io_word & 0xff; host->io_word >>= 8; length--; - fifo_offset++; + if (!length) + break; } - if (!(fifo_offset & 3)) - host->cmd_flags &= ~DATA_CARRY; - if (!length) - return; } - do { - host->io_word = readl(sock->addr + SOCK_FIFO_ACCESS - + fifo_offset); - cnt = 4; - while (length && cnt) { - buf[off++] = (host->io_word >> 8) & 0xff; - cnt--; - length--; - } - fifo_offset += 4 - cnt; - } while (length); - - if (cnt) - host->cmd_flags |= DATA_CARRY; - - kunmap_atomic(buf - page_off, KM_BIO_DST_IRQ); + return off; } -static void tifm_ms_write_fifo(struct tifm_ms *host, unsigned int fifo_offset, - struct page *pg, unsigned int page_off, - unsigned int length) +static unsigned int tifm_ms_write_data(struct tifm_ms *host, + unsigned char *buf, unsigned int length) { struct tifm_dev *sock = host->dev; - unsigned int cnt = 0, off = 0; - unsigned char *buf = kmap_atomic(pg, KM_BIO_SRC_IRQ) + page_off; + unsigned int off = 0; - if (host->cmd_flags & DATA_CARRY) { - while (fifo_offset & 3) { - host->io_word |= buf[off++] << (8 * (fifo_offset & 3)); + if (host->io_pos) { + while (host->io_pos < 4 && length) { + host->io_word |= buf[off++] << (host->io_pos * 8); + host->io_pos++; length--; - fifo_offset++; - } - if (!(fifo_offset & 3)) { - writel(host->io_word, sock->addr + SOCK_FIFO_ACCESS - + fifo_offset - 4); - - host->cmd_flags &= ~DATA_CARRY; } - if (!length) - return; } - do { - cnt = 4; + if (host->io_pos == 4 + && !(TIFM_MS_STAT_FUL & readl(sock->addr + SOCK_MS_STATUS))) { + writel(TIFM_MS_SYS_FDIR | readl(sock->addr + SOCK_MS_SYSTEM), + sock->addr + SOCK_MS_SYSTEM); + writel(host->io_word, sock->addr + SOCK_MS_DATA); + host->io_pos = 0; host->io_word = 0; - while (length && cnt) { - host->io_word |= buf[off++] << (4 - cnt); - cnt--; - length--; - } - fifo_offset += 4 - cnt; - if (!cnt) - writel(host->io_word, sock->addr + SOCK_FIFO_ACCESS - + fifo_offset - 4); - - } while (length); - - if (cnt) - host->cmd_flags |= DATA_CARRY; + } else if (host->io_pos) { + return off; + } - kunmap_atomic(buf - page_off, KM_BIO_SRC_IRQ); -} + if (!length) + return off; -static void tifm_ms_move_block(struct tifm_ms *host, unsigned int length) -{ - unsigned int t_size; - unsigned int off = host->req->sg.offset + host->block_pos; - unsigned int p_off, p_cnt; - struct page *pg; - unsigned long flags; + while (!(TIFM_MS_STAT_FUL & readl(sock->addr + SOCK_MS_STATUS))) { + if (length < 4) + break; + writel(TIFM_MS_SYS_FDIR | readl(sock->addr + SOCK_MS_SYSTEM), + sock->addr + SOCK_MS_SYSTEM); + __raw_writel(*(unsigned int *)(buf + off), + sock->addr + SOCK_MS_DATA); + length -= 4; + off += 4; + } - dev_dbg(&host->dev->dev, "moving block\n"); - local_irq_save(flags); - t_size = length; - while (t_size) { - pg = nth_page(sg_page(&host->req->sg), off >> PAGE_SHIFT); - p_off = offset_in_page(off); - p_cnt = PAGE_SIZE - p_off; - p_cnt = min(p_cnt, t_size); + switch (length) { + case 3: + host->io_word |= buf[off + 2] << 16; + host->io_pos++; + case 2: + host->io_word |= buf[off + 1] << 8; + host->io_pos++; + case 1: + host->io_word |= buf[off]; + host->io_pos++; + } - if (host->req->data_dir == WRITE) - tifm_ms_write_fifo(host, length - t_size, - pg, p_off, p_cnt); - else - tifm_ms_read_fifo(host, length - t_size, - pg, p_off, p_cnt); + off += host->io_pos; - t_size -= p_cnt; - } - local_irq_restore(flags); + return off; } -static int tifm_ms_transfer_data(struct tifm_ms *host, int skip) +static unsigned int tifm_ms_transfer_data(struct tifm_ms *host) { struct tifm_dev *sock = host->dev; - unsigned int length = host->req->sg.length - host->block_pos; + unsigned int length; + unsigned int off; + unsigned int t_size, p_off, p_cnt; + unsigned char *buf; + struct page *pg; + unsigned long flags = 0; - if (!length) - return 1; + if (host->req->long_data) { + length = host->req->sg.length - host->block_pos; + off = host->req->sg.offset + host->block_pos; + } else { + length = host->req->data_len - host->block_pos; + off = 0; + } + dev_dbg(&sock->dev, "fifo data transfer, %d, %d\n", length, + host->block_pos); + + while (length) { + if (host->req->long_data) { + pg = nth_page(sg_page(&host->req->sg), + off >> PAGE_SHIFT); + p_off = offset_in_page(off); + p_cnt = PAGE_SIZE - p_off; + p_cnt = min(p_cnt, length); + + local_irq_save(flags); + buf = kmap_atomic(pg, KM_BIO_SRC_IRQ) + p_off; + } else { + buf = host->req->data + host->block_pos; + p_cnt = host->req->data_len - host->block_pos; + } - if (length > TIFM_FIFO_SIZE) - length = TIFM_FIFO_SIZE; + t_size = host->req->data_dir == WRITE + ? tifm_ms_write_data(host, buf, p_cnt) + : tifm_ms_read_data(host, buf, p_cnt); - if (!skip) { - tifm_ms_move_block(host, length); - host->block_pos += length; - } + if (host->req->long_data) { + kunmap_atomic(buf - p_off, KM_BIO_SRC_IRQ); + local_irq_restore(flags); + } - if ((host->req->data_dir == READ) - && (host->block_pos == host->req->sg.length)) - return 1; + if (!t_size) + break; + host->block_pos += t_size; + length -= t_size; + off += t_size; + } - writel(ilog2(length) - 2, sock->addr + SOCK_FIFO_PAGE_SIZE); - if (host->req->data_dir == WRITE) - writel((1 << 8) | TIFM_DMA_TX, sock->addr + SOCK_DMA_CONTROL); - else - writel((1 << 8), sock->addr + SOCK_DMA_CONTROL); + dev_dbg(&sock->dev, "fifo data transfer, %d remaining\n", length); + if (!length && (host->req->data_dir == WRITE)) { + if (host->io_pos) { + writel(TIFM_MS_SYS_FDIR + | readl(sock->addr + SOCK_MS_SYSTEM), + sock->addr + SOCK_MS_SYSTEM); + writel(host->io_word, sock->addr + SOCK_MS_DATA); + } + writel(TIFM_MS_SYS_FDIR + | readl(sock->addr + SOCK_MS_SYSTEM), + sock->addr + SOCK_MS_SYSTEM); + writel(0, sock->addr + SOCK_MS_DATA); + } else { + readl(sock->addr + SOCK_MS_DATA); + } - return 0; + return length; } static int tifm_ms_issue_cmd(struct tifm_ms *host) { struct tifm_dev *sock = host->dev; unsigned char *data; - unsigned int data_len = 0, cmd = 0, cmd_mask = 0, cnt, tval = 0; + unsigned int data_len, cmd, sys_param; + host->cmd_flags = 0; + host->block_pos = 0; + host->io_pos = 0; + host->io_word = 0; host->cmd_flags = 0; - if (host->req->long_data) { - if (!host->no_dma) { - if (1 != tifm_map_sg(sock, &host->req->sg, 1, - host->req->data_dir == READ - ? PCI_DMA_FROMDEVICE - : PCI_DMA_TODEVICE)) { - host->req->error = -ENOMEM; - return host->req->error; - } - data_len = sg_dma_len(&host->req->sg); - } else - data_len = host->req->sg.length; - - writel(TIFM_FIFO_INT_SETALL, - sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR); - writel(TIFM_FIFO_ENABLE, - sock->addr + SOCK_FIFO_CONTROL); - writel(TIFM_FIFO_INTMASK, - sock->addr + SOCK_DMA_FIFO_INT_ENABLE_SET); + data = host->req->data; - if (!host->no_dma) { - writel(ilog2(data_len) - 2, - sock->addr + SOCK_FIFO_PAGE_SIZE); - writel(sg_dma_address(&host->req->sg), - sock->addr + SOCK_DMA_ADDRESS); - if (host->req->data_dir == WRITE) - writel((1 << 8) | TIFM_DMA_TX | TIFM_DMA_EN, - sock->addr + SOCK_DMA_CONTROL); - else - writel((1 << 8) | TIFM_DMA_EN, - sock->addr + SOCK_DMA_CONTROL); - } else { - tifm_ms_transfer_data(host, - host->req->data_dir == READ); - } + host->use_dma = !no_dma; - cmd_mask = readl(sock->addr + SOCK_MS_SYSTEM); - cmd_mask |= TIFM_MS_SYS_DATA | TIFM_MS_SYS_NOT_RDY; - writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM); + if (host->req->long_data) { + data_len = host->req->sg.length; + if (!is_power_of_2(data_len)) + host->use_dma = 0; } else { - data = host->req->data; data_len = host->req->data_len; + host->use_dma = 0; + } - cmd_mask = host->mode_mask | 0x2607; /* unknown constant */ - - if (host->req->data_dir == WRITE) { - cmd_mask |= TIFM_MS_SYS_LATCH; - writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM); - for (cnt = 0; (data_len - cnt) >= 4; cnt += 4) { - writel(TIFM_MS_SYS_LATCH - | readl(sock->addr + SOCK_MS_SYSTEM), - sock->addr + SOCK_MS_SYSTEM); - __raw_writel(*(unsigned int *)(data + cnt), - sock->addr + SOCK_MS_DATA); - dev_dbg(&sock->dev, "writing %x\n", - *(int *)(data + cnt)); - } - switch (data_len - cnt) { - case 3: - tval |= data[cnt + 2] << 16; - case 2: - tval |= data[cnt + 1] << 8; - case 1: - tval |= data[cnt]; - writel(TIFM_MS_SYS_LATCH - | readl(sock->addr + SOCK_MS_SYSTEM), - sock->addr + SOCK_MS_SYSTEM); - writel(tval, sock->addr + SOCK_MS_DATA); - dev_dbg(&sock->dev, "writing %x\n", tval); - } + writel(TIFM_FIFO_INT_SETALL, + sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR); + writel(TIFM_FIFO_ENABLE, + sock->addr + SOCK_FIFO_CONTROL); + + if (host->use_dma) { + if (1 != tifm_map_sg(sock, &host->req->sg, 1, + host->req->data_dir == READ + ? PCI_DMA_FROMDEVICE + : PCI_DMA_TODEVICE)) { + host->req->error = -ENOMEM; + return host->req->error; + } + data_len = sg_dma_len(&host->req->sg); - writel(TIFM_MS_SYS_LATCH - | readl(sock->addr + SOCK_MS_SYSTEM), - sock->addr + SOCK_MS_SYSTEM); - writel(0, sock->addr + SOCK_MS_DATA); - dev_dbg(&sock->dev, "writing %x\n", 0); + writel(ilog2(data_len) - 2, + sock->addr + SOCK_FIFO_PAGE_SIZE); + writel(TIFM_FIFO_INTMASK, + sock->addr + SOCK_DMA_FIFO_INT_ENABLE_SET); + sys_param = TIFM_DMA_EN | (1 << 8); + if (host->req->data_dir == WRITE) + sys_param |= TIFM_DMA_TX; - } else - writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM); + writel(TIFM_FIFO_INTMASK, + sock->addr + SOCK_DMA_FIFO_INT_ENABLE_SET); - cmd_mask = readl(sock->addr + SOCK_MS_SYSTEM); - cmd_mask &= ~TIFM_MS_SYS_DATA; - cmd_mask |= TIFM_MS_SYS_NOT_RDY; - dev_dbg(&sock->dev, "mask %x\n", cmd_mask); - writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM); + writel(sg_dma_address(&host->req->sg), + sock->addr + SOCK_DMA_ADDRESS); + writel(sys_param, sock->addr + SOCK_DMA_CONTROL); + } else { + writel(host->mode_mask | TIFM_MS_SYS_FIFO, + sock->addr + SOCK_MS_SYSTEM); + + writel(TIFM_FIFO_MORE, + sock->addr + SOCK_DMA_FIFO_INT_ENABLE_SET); } mod_timer(&host->timer, jiffies + host->timeout_jiffies); @@ -300,11 +314,21 @@ static int tifm_ms_issue_cmd(struct tifm_ms *host) sock->addr + SOCK_CONTROL); host->req->error = 0; + sys_param = readl(sock->addr + SOCK_MS_SYSTEM); + sys_param |= TIFM_MS_SYS_INTCLR; + + if (host->use_dma) + sys_param |= TIFM_MS_SYS_DMA; + else + sys_param &= ~TIFM_MS_SYS_DMA; + + writel(sys_param, sock->addr + SOCK_MS_SYSTEM); + cmd = (host->req->tpc & 0xf) << 12; cmd |= data_len; writel(cmd, sock->addr + SOCK_MS_COMMAND); - dev_dbg(&sock->dev, "executing TPC %x, %x\n", cmd, cmd_mask); + dev_dbg(&sock->dev, "executing TPC %x, %x\n", cmd, sys_param); return 0; } @@ -312,47 +336,20 @@ static void tifm_ms_complete_cmd(struct tifm_ms *host) { struct tifm_dev *sock = host->dev; struct memstick_host *msh = tifm_get_drvdata(sock); - unsigned int tval = 0, data_len; - unsigned char *data; int rc; del_timer(&host->timer); - if (host->req->long_data) { - if (!host->no_dma) - tifm_unmap_sg(sock, &host->req->sg, 1, - host->req->data_dir == READ - ? PCI_DMA_FROMDEVICE - : PCI_DMA_TODEVICE); - } else { - writel(~TIFM_MS_SYS_DATA & readl(sock->addr + SOCK_MS_SYSTEM), - sock->addr + SOCK_MS_SYSTEM); - - data = host->req->data; - data_len = host->req->data_len; - if (host->req->data_dir == READ) { - for (rc = 0; (data_len - rc) >= 4; rc += 4) - *(int *)(data + rc) - = __raw_readl(sock->addr - + SOCK_MS_DATA); - - if (data_len - rc) - tval = readl(sock->addr + SOCK_MS_DATA); - switch (data_len - rc) { - case 3: - data[rc + 2] = (tval >> 16) & 0xff; - case 2: - data[rc + 1] = (tval >> 8) & 0xff; - case 1: - data[rc] = tval & 0xff; - } - readl(sock->addr + SOCK_MS_DATA); - } - } + if (host->use_dma) + tifm_unmap_sg(sock, &host->req->sg, 1, + host->req->data_dir == READ + ? PCI_DMA_FROMDEVICE + : PCI_DMA_TODEVICE); writel((~TIFM_CTRL_LED) & readl(sock->addr + SOCK_CONTROL), sock->addr + SOCK_CONTROL); + dev_dbg(&sock->dev, "TPC complete\n"); do { rc = memstick_next_req(msh, &host->req); } while (!rc && tifm_ms_issue_cmd(host)); @@ -363,11 +360,10 @@ static int tifm_ms_check_status(struct tifm_ms *host) if (!host->req->error) { if (!(host->cmd_flags & CMD_READY)) return 1; - if (host->req->long_data - && !(host->cmd_flags & FIFO_READY)) + if (!(host->cmd_flags & FIFO_READY)) return 1; if (host->req->need_card_int - && !(host->cmd_flags & CARD_READY)) + && !(host->cmd_flags & CARD_INT)) return 1; } return 0; @@ -377,18 +373,24 @@ static int tifm_ms_check_status(struct tifm_ms *host) static void tifm_ms_data_event(struct tifm_dev *sock) { struct tifm_ms *host; - unsigned int fifo_status = 0; + unsigned int fifo_status = 0, host_status = 0; int rc = 1; spin_lock(&sock->lock); host = memstick_priv((struct memstick_host *)tifm_get_drvdata(sock)); fifo_status = readl(sock->addr + SOCK_DMA_FIFO_STATUS); - dev_dbg(&sock->dev, "data event: fifo_status %x, flags %x\n", - fifo_status, host->cmd_flags); + host_status = readl(sock->addr + SOCK_MS_STATUS); + dev_dbg(&sock->dev, + "data event: fifo_status %x, host_status %x, flags %x\n", + fifo_status, host_status, host->cmd_flags); if (host->req) { - if (fifo_status & TIFM_FIFO_READY) { - if (!host->no_dma || tifm_ms_transfer_data(host, 0)) { + if (host->use_dma && (fifo_status & 1)) { + host->cmd_flags |= FIFO_READY; + rc = tifm_ms_check_status(host); + } + if (!host->use_dma && (fifo_status & TIFM_FIFO_MORE)) { + if (!tifm_ms_transfer_data(host)) { host->cmd_flags |= FIFO_READY; rc = tifm_ms_check_status(host); } @@ -417,9 +419,9 @@ static void tifm_ms_card_event(struct tifm_dev *sock) host_status, host->cmd_flags); if (host->req) { - if (host_status & TIFM_MS_TIMEOUT) + if (host_status & TIFM_MS_STAT_TOE) host->req->error = -ETIME; - else if (host_status & TIFM_MS_BADCRC) + else if (host_status & TIFM_MS_STAT_CRC) host->req->error = -EILSEQ; if (host->req->error) { @@ -428,18 +430,17 @@ static void tifm_ms_card_event(struct tifm_dev *sock) writel(TIFM_DMA_RESET, sock->addr + SOCK_DMA_CONTROL); } - if (host_status & TIFM_MS_EOTPC) + if (host_status & TIFM_MS_STAT_RDY) host->cmd_flags |= CMD_READY; - if (host_status & TIFM_MS_INT) - host->cmd_flags |= CARD_READY; + + if (host_status & TIFM_MS_STAT_MSINT) + host->cmd_flags |= CARD_INT; rc = tifm_ms_check_status(host); } - writel(TIFM_MS_SYS_NOT_RDY | readl(sock->addr + SOCK_MS_SYSTEM), - sock->addr + SOCK_MS_SYSTEM); - writel((~TIFM_MS_SYS_DATA) & readl(sock->addr + SOCK_MS_SYSTEM), + writel(TIFM_MS_SYS_INTCLR | readl(sock->addr + SOCK_MS_SYSTEM), sock->addr + SOCK_MS_SYSTEM); if (!rc) @@ -499,7 +500,7 @@ static void tifm_ms_set_param(struct memstick_host *msh, break; case MEMSTICK_INTERFACE: if (value == MEMSTICK_SERIAL) { - host->mode_mask = TIFM_MS_SERIAL; + host->mode_mask = TIFM_MS_SYS_SRAC | TIFM_MS_SYS_REI; writel((~TIFM_CTRL_FAST_CLK) & readl(sock->addr + SOCK_CONTROL), sock->addr + SOCK_CONTROL); @@ -535,9 +536,10 @@ static int tifm_ms_initialize_host(struct tifm_ms *host) struct tifm_dev *sock = host->dev; struct memstick_host *msh = tifm_get_drvdata(sock); - host->mode_mask = TIFM_MS_SERIAL; - writel(0x8000, sock->addr + SOCK_MS_SYSTEM); - writel(0x0200 | TIFM_MS_SYS_NOT_RDY, sock->addr + SOCK_MS_SYSTEM); + host->mode_mask = TIFM_MS_SYS_SRAC | TIFM_MS_SYS_REI; + writel(TIFM_MS_SYS_RESET, sock->addr + SOCK_MS_SYSTEM); + writel(TIFM_MS_SYS_FCLR | TIFM_MS_SYS_INTCLR, + sock->addr + SOCK_MS_SYSTEM); writel(0xffffffff, sock->addr + SOCK_MS_STATUS); if (tifm_has_ms_pif(sock)) msh->caps |= MEMSTICK_CAP_PAR4; @@ -566,7 +568,6 @@ static int tifm_ms_probe(struct tifm_dev *sock) tifm_set_drvdata(sock, msh); host->dev = sock; host->timeout_jiffies = msecs_to_jiffies(1000); - host->no_dma = no_dma; setup_timer(&host->timer, tifm_ms_abort, (unsigned long)host); @@ -599,7 +600,7 @@ static void tifm_ms_remove(struct tifm_dev *sock) writel(TIFM_FIFO_INT_SETALL, sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR); writel(TIFM_DMA_RESET, sock->addr + SOCK_DMA_CONTROL); - if (host->req->long_data && !host->no_dma) + if (host->use_dma) tifm_unmap_sg(sock, &host->req->sg, 1, host->req->data_dir == READ ? PCI_DMA_TODEVICE @@ -616,7 +617,8 @@ static void tifm_ms_remove(struct tifm_dev *sock) memstick_remove_host(msh); - writel(0x0200 | TIFM_MS_SYS_NOT_RDY, sock->addr + SOCK_MS_SYSTEM); + writel(TIFM_MS_SYS_FCLR | TIFM_MS_SYS_INTCLR, + sock->addr + SOCK_MS_SYSTEM); writel(0xffffffff, sock->addr + SOCK_MS_STATUS); memstick_free_host(msh); diff --git a/include/linux/tifm.h b/include/linux/tifm.h index da76ed85f595..848c0f392541 100644 --- a/include/linux/tifm.h +++ b/include/linux/tifm.h @@ -70,9 +70,9 @@ enum { #define TIFM_FIFO_ENABLE 0x00000001 #define TIFM_FIFO_READY 0x00000001 +#define TIFM_FIFO_MORE 0x00000008 #define TIFM_FIFO_INT_SETALL 0x0000ffff #define TIFM_FIFO_INTMASK 0x00000005 -#define TIFM_FIFO_SIZE 0x00000200 #define TIFM_DMA_RESET 0x00000002 #define TIFM_DMA_TX 0x00008000 -- cgit v1.2.3 From 60fdd931d577fcca351930fda4cde26ce07d35af Mon Sep 17 00:00:00 2001 From: Alex Dubov Date: Mon, 10 Mar 2008 11:43:43 -0700 Subject: memstick: add support for JMicron jmb38x MemoryStick host controller Signed-off-by: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/memstick/Kconfig | 2 +- drivers/memstick/host/Kconfig | 10 + drivers/memstick/host/Makefile | 6 +- drivers/memstick/host/jmb38x_ms.c | 945 ++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 1 + 5 files changed, 960 insertions(+), 4 deletions(-) create mode 100644 drivers/memstick/host/jmb38x_ms.c (limited to 'include/linux') diff --git a/drivers/memstick/Kconfig b/drivers/memstick/Kconfig index 1093fdb07297..f0ca41c20323 100644 --- a/drivers/memstick/Kconfig +++ b/drivers/memstick/Kconfig @@ -8,7 +8,7 @@ menuconfig MEMSTICK Sony MemoryStick is a proprietary storage/extension card protocol. If you want MemoryStick support, you should say Y here and also - to the specific driver for your MMC interface. + to the specific driver for your MemoryStick interface. if MEMSTICK diff --git a/drivers/memstick/host/Kconfig b/drivers/memstick/host/Kconfig index c002fcc3c879..4ce5c8dffb68 100644 --- a/drivers/memstick/host/Kconfig +++ b/drivers/memstick/host/Kconfig @@ -20,3 +20,13 @@ config MEMSTICK_TIFM_MS To compile this driver as a module, choose M here: the module will be called tifm_ms. +config MEMSTICK_JMICRON_38X + tristate "JMicron JMB38X MemoryStick interface support (EXPERIMENTAL)" + depends on EXPERIMENTAL && PCI + + help + Say Y here if you want to be able to access MemoryStick cards with + the JMicron(R) JMB38X MemoryStick card reader. + + To compile this driver as a module, choose M here: the + module will be called jmb38x_ms. diff --git a/drivers/memstick/host/Makefile b/drivers/memstick/host/Makefile index ee666380efa1..12530e4311d3 100644 --- a/drivers/memstick/host/Makefile +++ b/drivers/memstick/host/Makefile @@ -3,8 +3,8 @@ # ifeq ($(CONFIG_MEMSTICK_DEBUG),y) - EXTRA_CFLAGS += -DDEBUG + EXTRA_CFLAGS += -DDEBUG endif -obj-$(CONFIG_MEMSTICK_TIFM_MS) += tifm_ms.o - +obj-$(CONFIG_MEMSTICK_TIFM_MS) += tifm_ms.o +obj-$(CONFIG_MEMSTICK_JMICRON_38X) += jmb38x_ms.o diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c new file mode 100644 index 000000000000..03fe8783b1ee --- /dev/null +++ b/drivers/memstick/host/jmb38x_ms.c @@ -0,0 +1,945 @@ +/* + * jmb38x_ms.c - JMicron jmb38x MemoryStick card reader + * + * Copyright (C) 2008 Alex Dubov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "jmb38x_ms" + +static int no_dma; +module_param(no_dma, bool, 0644); + +enum { + DMA_ADDRESS = 0x00, + BLOCK = 0x04, + DMA_CONTROL = 0x08, + TPC_P0 = 0x0c, + TPC_P1 = 0x10, + TPC = 0x14, + HOST_CONTROL = 0x18, + DATA = 0x1c, + STATUS = 0x20, + INT_STATUS = 0x24, + INT_STATUS_ENABLE = 0x28, + INT_SIGNAL_ENABLE = 0x2c, + TIMER = 0x30, + TIMER_CONTROL = 0x34, + PAD_OUTPUT_ENABLE = 0x38, + PAD_PU_PD = 0x3c, + CLOCK_DELAY = 0x40, + ADMA_ADDRESS = 0x44, + CLOCK_CONTROL = 0x48, + LED_CONTROL = 0x4c, + VERSION = 0x50 +}; + +struct jmb38x_ms_host { + struct jmb38x_ms *chip; + void __iomem *addr; + spinlock_t lock; + int id; + char host_id[DEVICE_ID_SIZE]; + int irq; + unsigned int block_pos; + unsigned long timeout_jiffies; + struct timer_list timer; + struct memstick_request *req; + unsigned char eject:1, + use_dma:1; + unsigned char cmd_flags; + unsigned char io_pos; + unsigned int io_word[2]; +}; + +struct jmb38x_ms { + struct pci_dev *pdev; + int host_cnt; + struct memstick_host *hosts[]; +}; + +#define BLOCK_COUNT_MASK 0xffff0000 +#define BLOCK_SIZE_MASK 0x00000fff + +#define DMA_CONTROL_ENABLE 0x00000001 + +#define TPC_DATA_SEL 0x00008000 +#define TPC_DIR 0x00004000 +#define TPC_WAIT_INT 0x00002000 +#define TPC_GET_INT 0x00000800 +#define TPC_CODE_SZ_MASK 0x00000700 +#define TPC_DATA_SZ_MASK 0x00000007 + +#define HOST_CONTROL_RESET_REQ 0x00008000 +#define HOST_CONTROL_REI 0x00004000 +#define HOST_CONTROL_LED 0x00000400 +#define HOST_CONTROL_FAST_CLK 0x00000200 +#define HOST_CONTROL_RESET 0x00000100 +#define HOST_CONTROL_POWER_EN 0x00000080 +#define HOST_CONTROL_CLOCK_EN 0x00000040 +#define HOST_CONTROL_IF_SHIFT 4 + +#define HOST_CONTROL_IF_SERIAL 0x0 +#define HOST_CONTROL_IF_PAR4 0x1 +#define HOST_CONTROL_IF_PAR8 0x3 + +#define STATUS_HAS_MEDIA 0x00000400 +#define STATUS_FIFO_EMPTY 0x00000200 +#define STATUS_FIFO_FULL 0x00000100 + +#define INT_STATUS_TPC_ERR 0x00080000 +#define INT_STATUS_CRC_ERR 0x00040000 +#define INT_STATUS_TIMER_TO 0x00020000 +#define INT_STATUS_HSK_TO 0x00010000 +#define INT_STATUS_ANY_ERR 0x00008000 +#define INT_STATUS_FIFO_WRDY 0x00000080 +#define INT_STATUS_FIFO_RRDY 0x00000040 +#define INT_STATUS_MEDIA_OUT 0x00000010 +#define INT_STATUS_MEDIA_IN 0x00000008 +#define INT_STATUS_DMA_BOUNDARY 0x00000004 +#define INT_STATUS_EOTRAN 0x00000002 +#define INT_STATUS_EOTPC 0x00000001 + +#define INT_STATUS_ALL 0x000f801f + +#define PAD_OUTPUT_ENABLE_MS 0x0F3F + +#define PAD_PU_PD_OFF 0x7FFF0000 +#define PAD_PU_PD_ON_MS_SOCK0 0x5f8f0000 +#define PAD_PU_PD_ON_MS_SOCK1 0x0f0f0000 + +enum { + CMD_READY = 0x01, + FIFO_READY = 0x02, + REG_DATA = 0x04, + AUTO_GET_INT = 0x08 +}; + +static unsigned int jmb38x_ms_read_data(struct jmb38x_ms_host *host, + unsigned char *buf, unsigned int length) +{ + unsigned int off = 0; + + while (host->io_pos && length) { + buf[off++] = host->io_word[0] & 0xff; + host->io_word[0] >>= 8; + length--; + host->io_pos--; + } + + if (!length) + return off; + + while (!(STATUS_FIFO_EMPTY & readl(host->addr + STATUS))) { + if (length < 4) + break; + *(unsigned int *)(buf + off) = __raw_readl(host->addr + DATA); + length -= 4; + off += 4; + } + + if (length + && !(STATUS_FIFO_EMPTY & readl(host->addr + STATUS))) { + host->io_word[0] = readl(host->addr + DATA); + for (host->io_pos = 4; host->io_pos; --host->io_pos) { + buf[off++] = host->io_word[0] & 0xff; + host->io_word[0] >>= 8; + length--; + if (!length) + break; + } + } + + return off; +} + +static unsigned int jmb38x_ms_read_reg_data(struct jmb38x_ms_host *host, + unsigned char *buf, + unsigned int length) +{ + unsigned int off = 0; + + while (host->io_pos > 4 && length) { + buf[off++] = host->io_word[0] & 0xff; + host->io_word[0] >>= 8; + length--; + host->io_pos--; + } + + if (!length) + return off; + + while (host->io_pos && length) { + buf[off++] = host->io_word[1] & 0xff; + host->io_word[1] >>= 8; + length--; + host->io_pos--; + } + + return off; +} + +static unsigned int jmb38x_ms_write_data(struct jmb38x_ms_host *host, + unsigned char *buf, + unsigned int length) +{ + unsigned int off = 0; + + if (host->io_pos) { + while (host->io_pos < 4 && length) { + host->io_word[0] |= buf[off++] << (host->io_pos * 8); + host->io_pos++; + length--; + } + } + + if (host->io_pos == 4 + && !(STATUS_FIFO_FULL & readl(host->addr + STATUS))) { + writel(host->io_word[0], host->addr + DATA); + host->io_pos = 0; + host->io_word[0] = 0; + } else if (host->io_pos) { + return off; + } + + if (!length) + return off; + + while (!(STATUS_FIFO_FULL & readl(host->addr + STATUS))) { + if (length < 4) + break; + + __raw_writel(*(unsigned int *)(buf + off), + host->addr + DATA); + length -= 4; + off += 4; + } + + switch (length) { + case 3: + host->io_word[0] |= buf[off + 2] << 16; + host->io_pos++; + case 2: + host->io_word[0] |= buf[off + 1] << 8; + host->io_pos++; + case 1: + host->io_word[0] |= buf[off]; + host->io_pos++; + } + + off += host->io_pos; + + return off; +} + +static unsigned int jmb38x_ms_write_reg_data(struct jmb38x_ms_host *host, + unsigned char *buf, + unsigned int length) +{ + unsigned int off = 0; + + while (host->io_pos < 4 && length) { + host->io_word[0] &= ~(0xff << (host->io_pos * 8)); + host->io_word[0] |= buf[off++] << (host->io_pos * 8); + host->io_pos++; + length--; + } + + if (!length) + return off; + + while (host->io_pos < 8 && length) { + host->io_word[1] &= ~(0xff << (host->io_pos * 8)); + host->io_word[1] |= buf[off++] << (host->io_pos * 8); + host->io_pos++; + length--; + } + + return off; +} + +static int jmb38x_ms_transfer_data(struct jmb38x_ms_host *host) +{ + unsigned int length; + unsigned int off; + unsigned int t_size, p_off, p_cnt; + unsigned char *buf; + struct page *pg; + unsigned long flags = 0; + + if (host->req->long_data) { + length = host->req->sg.length - host->block_pos; + off = host->req->sg.offset + host->block_pos; + } else { + length = host->req->data_len - host->block_pos; + off = 0; + } + + while (length) { + if (host->req->long_data) { + pg = nth_page(sg_page(&host->req->sg), + off >> PAGE_SHIFT); + p_off = offset_in_page(off); + p_cnt = PAGE_SIZE - p_off; + p_cnt = min(p_cnt, length); + + local_irq_save(flags); + buf = kmap_atomic(pg, KM_BIO_SRC_IRQ) + p_off; + } else { + buf = host->req->data + host->block_pos; + p_cnt = host->req->data_len - host->block_pos; + } + + if (host->req->data_dir == WRITE) + t_size = !(host->cmd_flags & REG_DATA) + ? jmb38x_ms_write_data(host, buf, p_cnt) + : jmb38x_ms_write_reg_data(host, buf, p_cnt); + else + t_size = !(host->cmd_flags & REG_DATA) + ? jmb38x_ms_read_data(host, buf, p_cnt) + : jmb38x_ms_read_reg_data(host, buf, p_cnt); + + if (host->req->long_data) { + kunmap_atomic(buf - p_off, KM_BIO_SRC_IRQ); + local_irq_restore(flags); + } + + if (!t_size) + break; + host->block_pos += t_size; + length -= t_size; + off += t_size; + } + + if (!length && host->req->data_dir == WRITE) { + if (host->cmd_flags & REG_DATA) { + writel(host->io_word[0], host->addr + TPC_P0); + writel(host->io_word[1], host->addr + TPC_P1); + } else if (host->io_pos) { + writel(host->io_word[0], host->addr + DATA); + } + } + + return length; +} + +static int jmb38x_ms_issue_cmd(struct memstick_host *msh) +{ + struct jmb38x_ms_host *host = memstick_priv(msh); + unsigned char *data; + unsigned int data_len, cmd, t_val; + + if (!(STATUS_HAS_MEDIA & readl(host->addr + STATUS))) { + dev_dbg(msh->cdev.dev, "no media status\n"); + host->req->error = -ETIME; + return host->req->error; + } + + dev_dbg(msh->cdev.dev, "control %08x\n", + readl(host->addr + HOST_CONTROL)); + dev_dbg(msh->cdev.dev, "status %08x\n", readl(host->addr + INT_STATUS)); + dev_dbg(msh->cdev.dev, "hstatus %08x\n", readl(host->addr + STATUS)); + + host->cmd_flags = 0; + host->block_pos = 0; + host->io_pos = 0; + host->io_word[0] = 0; + host->io_word[1] = 0; + + cmd = host->req->tpc << 16; + cmd |= TPC_DATA_SEL; + + if (host->req->data_dir == READ) + cmd |= TPC_DIR; + if (host->req->need_card_int) + cmd |= TPC_WAIT_INT; + if (host->req->get_int_reg) + cmd |= TPC_GET_INT; + + data = host->req->data; + + host->use_dma = !no_dma; + + if (host->req->long_data) { + data_len = host->req->sg.length; + } else { + data_len = host->req->data_len; + host->use_dma = 0; + } + + if (data_len <= 8) { + cmd &= ~(TPC_DATA_SEL | 0xf); + host->cmd_flags |= REG_DATA; + cmd |= data_len & 0xf; + host->use_dma = 0; + } + + if (host->use_dma) { + if (1 != pci_map_sg(host->chip->pdev, &host->req->sg, 1, + host->req->data_dir == READ + ? PCI_DMA_FROMDEVICE + : PCI_DMA_TODEVICE)) { + host->req->error = -ENOMEM; + return host->req->error; + } + data_len = sg_dma_len(&host->req->sg); + writel(sg_dma_address(&host->req->sg), + host->addr + DMA_ADDRESS); + writel(((1 << 16) & BLOCK_COUNT_MASK) + | (data_len & BLOCK_SIZE_MASK), + host->addr + BLOCK); + writel(DMA_CONTROL_ENABLE, host->addr + DMA_CONTROL); + } else if (!(host->cmd_flags & REG_DATA)) { + writel(((1 << 16) & BLOCK_COUNT_MASK) + | (data_len & BLOCK_SIZE_MASK), + host->addr + BLOCK); + t_val = readl(host->addr + INT_STATUS_ENABLE); + t_val |= host->req->data_dir == READ + ? INT_STATUS_FIFO_RRDY + : INT_STATUS_FIFO_WRDY; + + writel(t_val, host->addr + INT_STATUS_ENABLE); + writel(t_val, host->addr + INT_SIGNAL_ENABLE); + } else { + cmd &= ~(TPC_DATA_SEL | 0xf); + host->cmd_flags |= REG_DATA; + cmd |= data_len & 0xf; + + if (host->req->data_dir == WRITE) { + jmb38x_ms_transfer_data(host); + writel(host->io_word[0], host->addr + TPC_P0); + writel(host->io_word[1], host->addr + TPC_P1); + } + } + + mod_timer(&host->timer, jiffies + host->timeout_jiffies); + writel(HOST_CONTROL_LED | readl(host->addr + HOST_CONTROL), + host->addr + HOST_CONTROL); + host->req->error = 0; + + writel(cmd, host->addr + TPC); + dev_dbg(msh->cdev.dev, "executing TPC %08x, len %x\n", cmd, data_len); + + return 0; +} + +static void jmb38x_ms_complete_cmd(struct memstick_host *msh, int last) +{ + struct jmb38x_ms_host *host = memstick_priv(msh); + unsigned int t_val = 0; + int rc; + + del_timer(&host->timer); + + dev_dbg(msh->cdev.dev, "c control %08x\n", + readl(host->addr + HOST_CONTROL)); + dev_dbg(msh->cdev.dev, "c status %08x\n", + readl(host->addr + INT_STATUS)); + dev_dbg(msh->cdev.dev, "c hstatus %08x\n", readl(host->addr + STATUS)); + + if (host->req->get_int_reg) { + t_val = readl(host->addr + TPC_P0); + host->req->int_reg = (t_val & 0xff); + } + + if (host->use_dma) { + writel(0, host->addr + DMA_CONTROL); + pci_unmap_sg(host->chip->pdev, &host->req->sg, 1, + host->req->data_dir == READ + ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + } else { + t_val = readl(host->addr + INT_STATUS_ENABLE); + if (host->req->data_dir == READ) + t_val &= ~INT_STATUS_FIFO_RRDY; + else + t_val &= ~INT_STATUS_FIFO_WRDY; + + writel(t_val, host->addr + INT_STATUS_ENABLE); + writel(t_val, host->addr + INT_SIGNAL_ENABLE); + } + + writel((~HOST_CONTROL_LED) & readl(host->addr + HOST_CONTROL), + host->addr + HOST_CONTROL); + + if (!last) { + do { + rc = memstick_next_req(msh, &host->req); + } while (!rc && jmb38x_ms_issue_cmd(msh)); + } else { + do { + rc = memstick_next_req(msh, &host->req); + if (!rc) + host->req->error = -ETIME; + } while (!rc); + } +} + +static irqreturn_t jmb38x_ms_isr(int irq, void *dev_id) +{ + struct memstick_host *msh = dev_id; + struct jmb38x_ms_host *host = memstick_priv(msh); + unsigned int irq_status; + + spin_lock(&host->lock); + irq_status = readl(host->addr + INT_STATUS); + dev_dbg(&host->chip->pdev->dev, "irq_status = %08x\n", irq_status); + if (irq_status == 0 || irq_status == (~0)) { + spin_unlock(&host->lock); + return IRQ_NONE; + } + + if (host->req) { + if (irq_status & INT_STATUS_ANY_ERR) { + if (irq_status & INT_STATUS_CRC_ERR) + host->req->error = -EILSEQ; + else + host->req->error = -ETIME; + } else { + if (host->use_dma) { + if (irq_status & INT_STATUS_EOTRAN) + host->cmd_flags |= FIFO_READY; + } else { + if (irq_status & (INT_STATUS_FIFO_RRDY + | INT_STATUS_FIFO_WRDY)) + jmb38x_ms_transfer_data(host); + + if (irq_status & INT_STATUS_EOTRAN) { + jmb38x_ms_transfer_data(host); + host->cmd_flags |= FIFO_READY; + } + } + + if (irq_status & INT_STATUS_EOTPC) { + host->cmd_flags |= CMD_READY; + if (host->cmd_flags & REG_DATA) { + if (host->req->data_dir == READ) { + host->io_word[0] + = readl(host->addr + + TPC_P0); + host->io_word[1] + = readl(host->addr + + TPC_P1); + host->io_pos = 8; + + jmb38x_ms_transfer_data(host); + } + host->cmd_flags |= FIFO_READY; + } + } + } + } + + if (irq_status & (INT_STATUS_MEDIA_IN | INT_STATUS_MEDIA_OUT)) { + dev_dbg(&host->chip->pdev->dev, "media changed\n"); + memstick_detect_change(msh); + } + + writel(irq_status, host->addr + INT_STATUS); + + if (host->req + && (((host->cmd_flags & CMD_READY) + && (host->cmd_flags & FIFO_READY)) + || host->req->error)) + jmb38x_ms_complete_cmd(msh, 0); + + spin_unlock(&host->lock); + return IRQ_HANDLED; +} + +static void jmb38x_ms_abort(unsigned long data) +{ + struct memstick_host *msh = (struct memstick_host *)data; + struct jmb38x_ms_host *host = memstick_priv(msh); + unsigned long flags; + + dev_dbg(&host->chip->pdev->dev, "abort\n"); + spin_lock_irqsave(&host->lock, flags); + if (host->req) { + host->req->error = -ETIME; + jmb38x_ms_complete_cmd(msh, 0); + } + spin_unlock_irqrestore(&host->lock, flags); +} + +static void jmb38x_ms_request(struct memstick_host *msh) +{ + struct jmb38x_ms_host *host = memstick_priv(msh); + unsigned long flags; + int rc; + + spin_lock_irqsave(&host->lock, flags); + if (host->req) { + spin_unlock_irqrestore(&host->lock, flags); + BUG(); + return; + } + + do { + rc = memstick_next_req(msh, &host->req); + } while (!rc && jmb38x_ms_issue_cmd(msh)); + spin_unlock_irqrestore(&host->lock, flags); +} + +static void jmb38x_ms_reset(struct jmb38x_ms_host *host) +{ + unsigned int host_ctl = readl(host->addr + HOST_CONTROL); + + writel(host_ctl | HOST_CONTROL_RESET_REQ | HOST_CONTROL_RESET, + host->addr + HOST_CONTROL); + + while (HOST_CONTROL_RESET_REQ + & (host_ctl = readl(host->addr + HOST_CONTROL))) { + ndelay(100); + dev_dbg(&host->chip->pdev->dev, "reset\n"); + } + + writel(INT_STATUS_ALL, host->addr + INT_STATUS_ENABLE); + writel(INT_STATUS_ALL, host->addr + INT_SIGNAL_ENABLE); + + dev_dbg(&host->chip->pdev->dev, "reset\n"); +} + +static void jmb38x_ms_set_param(struct memstick_host *msh, + enum memstick_param param, + int value) +{ + struct jmb38x_ms_host *host = memstick_priv(msh); + unsigned int host_ctl; + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + + switch (param) { + case MEMSTICK_POWER: + if (value == MEMSTICK_POWER_ON) { + jmb38x_ms_reset(host); + + writel(host->id ? PAD_PU_PD_ON_MS_SOCK1 + : PAD_PU_PD_ON_MS_SOCK0, + host->addr + PAD_PU_PD); + + writel(PAD_OUTPUT_ENABLE_MS, + host->addr + PAD_OUTPUT_ENABLE); + + host_ctl = readl(host->addr + HOST_CONTROL); + host_ctl |= 7; + writel(host_ctl | (HOST_CONTROL_POWER_EN + | HOST_CONTROL_CLOCK_EN), + host->addr + HOST_CONTROL); + + dev_dbg(&host->chip->pdev->dev, "power on\n"); + } else if (value == MEMSTICK_POWER_OFF) { + writel(readl(host->addr + HOST_CONTROL) + & ~(HOST_CONTROL_POWER_EN + | HOST_CONTROL_CLOCK_EN), + host->addr + HOST_CONTROL); + writel(0, host->addr + PAD_OUTPUT_ENABLE); + writel(PAD_PU_PD_OFF, host->addr + PAD_PU_PD); + dev_dbg(&host->chip->pdev->dev, "power off\n"); + } + break; + case MEMSTICK_INTERFACE: + /* jmb38x_ms_reset(host); */ + + host_ctl = readl(host->addr + HOST_CONTROL); + host_ctl &= ~(3 << HOST_CONTROL_IF_SHIFT); + /* host_ctl |= 7; */ + + if (value == MEMSTICK_SERIAL) { + host_ctl &= ~HOST_CONTROL_FAST_CLK; + host_ctl |= HOST_CONTROL_IF_SERIAL + << HOST_CONTROL_IF_SHIFT; + host_ctl |= HOST_CONTROL_REI; + writel(0, host->addr + CLOCK_DELAY); + } else if (value == MEMSTICK_PAR4) { + host_ctl |= HOST_CONTROL_FAST_CLK; + host_ctl |= HOST_CONTROL_IF_PAR4 + << HOST_CONTROL_IF_SHIFT; + host_ctl &= ~HOST_CONTROL_REI; + writel(4, host->addr + CLOCK_DELAY); + } else if (value == MEMSTICK_PAR8) { + host_ctl |= HOST_CONTROL_FAST_CLK; + host_ctl |= HOST_CONTROL_IF_PAR8 + << HOST_CONTROL_IF_SHIFT; + host_ctl &= ~HOST_CONTROL_REI; + writel(4, host->addr + CLOCK_DELAY); + } + writel(host_ctl, host->addr + HOST_CONTROL); + break; + }; + + spin_unlock_irqrestore(&host->lock, flags); +} + +#ifdef CONFIG_PM + +static int jmb38x_ms_suspend(struct pci_dev *dev, pm_message_t state) +{ + struct jmb38x_ms *jm = pci_get_drvdata(dev); + int cnt; + + for (cnt = 0; cnt < jm->host_cnt; ++cnt) { + if (!jm->hosts[cnt]) + break; + memstick_suspend_host(jm->hosts[cnt]); + } + + pci_save_state(dev); + pci_enable_wake(dev, pci_choose_state(dev, state), 0); + pci_disable_device(dev); + pci_set_power_state(dev, pci_choose_state(dev, state)); + return 0; +} + +static int jmb38x_ms_resume(struct pci_dev *dev) +{ + struct jmb38x_ms *jm = pci_get_drvdata(dev); + int rc; + + pci_set_power_state(dev, PCI_D0); + pci_restore_state(dev); + rc = pci_enable_device(dev); + if (rc) + return rc; + pci_set_master(dev); + + pci_read_config_dword(dev, 0xac, &rc); + pci_write_config_dword(dev, 0xac, rc | 0x00470000); + + for (rc = 0; rc < jm->host_cnt; ++rc) { + if (!jm->hosts[rc]) + break; + memstick_resume_host(jm->hosts[rc]); + memstick_detect_change(jm->hosts[rc]); + } + + return 0; +} + +#else + +#define jmb38x_ms_suspend NULL +#define jmb38x_ms_resume NULL + +#endif /* CONFIG_PM */ + +static int jmb38x_ms_count_slots(struct pci_dev *pdev) +{ + int cnt, rc = 0; + + for (cnt = 0; cnt < PCI_ROM_RESOURCE; ++cnt) { + if (!(IORESOURCE_MEM & pci_resource_flags(pdev, cnt))) + break; + + if (256 != pci_resource_len(pdev, cnt)) + break; + + ++rc; + } + return rc; +} + +static struct memstick_host *jmb38x_ms_alloc_host(struct jmb38x_ms *jm, int cnt) +{ + struct memstick_host *msh; + struct jmb38x_ms_host *host; + + msh = memstick_alloc_host(sizeof(struct jmb38x_ms_host), + &jm->pdev->dev); + if (!msh) + return NULL; + + host = memstick_priv(msh); + host->chip = jm; + host->addr = ioremap(pci_resource_start(jm->pdev, cnt), + pci_resource_len(jm->pdev, cnt)); + if (!host->addr) + goto err_out_free; + + spin_lock_init(&host->lock); + host->id = cnt; + snprintf(host->host_id, DEVICE_ID_SIZE, DRIVER_NAME ":slot%d", + host->id); + host->irq = jm->pdev->irq; + host->timeout_jiffies = msecs_to_jiffies(4000); + msh->request = jmb38x_ms_request; + msh->set_param = jmb38x_ms_set_param; + /* + msh->caps = MEMSTICK_CAP_AUTO_GET_INT | MEMSTICK_CAP_PAR4 + | MEMSTICK_CAP_PAR8; + */ + msh->caps = MEMSTICK_CAP_PAR4 | MEMSTICK_CAP_PAR8; + + setup_timer(&host->timer, jmb38x_ms_abort, (unsigned long)msh); + + if (!request_irq(host->irq, jmb38x_ms_isr, IRQF_SHARED, host->host_id, + msh)) + return msh; + + iounmap(host->addr); +err_out_free: + kfree(msh); + return NULL; +} + +static void jmb38x_ms_free_host(struct memstick_host *msh) +{ + struct jmb38x_ms_host *host = memstick_priv(msh); + + free_irq(host->irq, msh); + iounmap(host->addr); + memstick_free_host(msh); +} + +static int jmb38x_ms_probe(struct pci_dev *pdev, + const struct pci_device_id *dev_id) +{ + struct jmb38x_ms *jm; + int pci_dev_busy = 0; + int rc, cnt; + + rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (rc) + return rc; + + rc = pci_enable_device(pdev); + if (rc) + return rc; + + pci_set_master(pdev); + + rc = pci_request_regions(pdev, DRIVER_NAME); + if (rc) { + pci_dev_busy = 1; + goto err_out; + } + + pci_read_config_dword(pdev, 0xac, &rc); + pci_write_config_dword(pdev, 0xac, rc | 0x00470000); + + cnt = jmb38x_ms_count_slots(pdev); + if (!cnt) { + rc = -ENODEV; + pci_dev_busy = 1; + goto err_out; + } + + jm = kzalloc(sizeof(struct jmb38x_ms) + + cnt * sizeof(struct memstick_host *), GFP_KERNEL); + if (!jm) { + rc = -ENOMEM; + goto err_out_int; + } + + jm->pdev = pdev; + jm->host_cnt = cnt; + pci_set_drvdata(pdev, jm); + + for (cnt = 0; cnt < jm->host_cnt; ++cnt) { + jm->hosts[cnt] = jmb38x_ms_alloc_host(jm, cnt); + if (!jm->hosts[cnt]) + break; + + rc = memstick_add_host(jm->hosts[cnt]); + + if (rc) { + jmb38x_ms_free_host(jm->hosts[cnt]); + jm->hosts[cnt] = NULL; + break; + } + } + + if (cnt) + return 0; + + rc = -ENODEV; + + pci_set_drvdata(pdev, NULL); + kfree(jm); +err_out_int: + pci_release_regions(pdev); +err_out: + if (!pci_dev_busy) + pci_disable_device(pdev); + return rc; +} + +static void jmb38x_ms_remove(struct pci_dev *dev) +{ + struct jmb38x_ms *jm = pci_get_drvdata(dev); + struct jmb38x_ms_host *host; + int cnt; + unsigned long flags; + + for (cnt = 0; cnt < jm->host_cnt; ++cnt) { + if (!jm->hosts[cnt]) + break; + + host = memstick_priv(jm->hosts[cnt]); + + writel(0, host->addr + INT_SIGNAL_ENABLE); + writel(0, host->addr + INT_STATUS_ENABLE); + mmiowb(); + dev_dbg(&jm->pdev->dev, "interrupts off\n"); + spin_lock_irqsave(&host->lock, flags); + if (host->req) { + host->req->error = -ETIME; + jmb38x_ms_complete_cmd(jm->hosts[cnt], 1); + } + spin_unlock_irqrestore(&host->lock, flags); + + memstick_remove_host(jm->hosts[cnt]); + dev_dbg(&jm->pdev->dev, "host removed\n"); + + jmb38x_ms_free_host(jm->hosts[cnt]); + } + + pci_set_drvdata(dev, NULL); + pci_release_regions(dev); + pci_disable_device(dev); + kfree(jm); +} + +static struct pci_device_id jmb38x_ms_id_tbl [] = { + { PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB38X_MS, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, + { } +}; + +static struct pci_driver jmb38x_ms_driver = { + .name = DRIVER_NAME, + .id_table = jmb38x_ms_id_tbl, + .probe = jmb38x_ms_probe, + .remove = jmb38x_ms_remove, + .suspend = jmb38x_ms_suspend, + .resume = jmb38x_ms_resume +}; + +static int __init jmb38x_ms_init(void) +{ + return pci_register_driver(&jmb38x_ms_driver); +} + +static void __exit jmb38x_ms_exit(void) +{ + pci_unregister_driver(&jmb38x_ms_driver); +} + +MODULE_AUTHOR("Alex Dubov"); +MODULE_DESCRIPTION("JMicron jmb38x MemoryStick driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, jmb38x_ms_id_tbl); + +module_init(jmb38x_ms_init); +module_exit(jmb38x_ms_exit); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index effdb558a588..70eb3c803d47 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2184,6 +2184,7 @@ #define PCI_DEVICE_ID_JMICRON_JMB366 0x2366 #define PCI_DEVICE_ID_JMICRON_JMB368 0x2368 #define PCI_DEVICE_ID_JMICRON_JMB38X_SD 0x2381 +#define PCI_DEVICE_ID_JMICRON_JMB38X_MS 0x2383 #define PCI_VENDOR_ID_KORENIX 0x1982 #define PCI_DEVICE_ID_KORENIX_JETCARDF0 0x1600 -- cgit v1.2.3 From 0738c4bb8f2a8bf15178f852494643b0981f578b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 12 Mar 2008 16:51:31 +0900 Subject: nommu: Provide is_vmalloc_addr() stub. Introduced in commit-id 9e2779fa281cfda13ac060753d674bbcaa23367e and ifdef'ed out for nommu in 8ca3ed87db062201e1fa15b64a9214e193fc3a8a, both approaches end up breaking the nommu build in different ways. An impressive feat for a 2-liner. Current is_vmalloc_addr() users fall in to two camps: - Determining whether to use vfree()/kfree() - Whether to do vmlist traversal (only /proc/kcore). Since we don't support /proc/kcore on nommu, that leaves the vfree()/kfree() determination use cases. nommu vfree() happens to be a wrapper to kfree() anyways, so is_vmalloc_addr() can always return 0 and end up with the right behaviour. Signed-off-by: Paul Mundt Signed-off-by: Linus Torvalds --- include/linux/mm.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3f3ccfe42de0..b695875d63e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -235,15 +235,22 @@ static inline int get_page_unless_zero(struct page *page) struct page *vmalloc_to_page(const void *addr); unsigned long vmalloc_to_pfn(const void *addr); -#ifdef CONFIG_MMU -/* Determine if an address is within the vmalloc range */ +/* + * Determine if an address is within the vmalloc range + * + * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there + * is no special casing required. + */ static inline int is_vmalloc_addr(const void *x) { +#ifdef CONFIG_MMU unsigned long addr = (unsigned long)x; return addr >= VMALLOC_START && addr < VMALLOC_END; -} +#else + return 0; #endif +} static inline struct page *compound_head(struct page *page) { -- cgit v1.2.3 From a8ae50ba9336ff77d0df0943ac27b79ba0a5a521 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Mar 2008 17:52:56 +0100 Subject: Remove from user-visible headers. It was all wrapped in '#ifdef CONFIG_BLOCK' anyway, so userspace was getting nothing useful out of it. And the special #ifndef __KERNEL__ version of 'struct partition' makes me inclined to promote an attitude of violence... Stick some comments on some of the #endifs too, while we're at it. Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 - include/linux/genhd.h | 30 +++--------------------------- 2 files changed, 3 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 994df3780007..0fac822c1157 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -205,7 +205,6 @@ unifdef-y += futex.h unifdef-y += fs.h unifdef-y += gameport.h unifdef-y += generic_serial.h -unifdef-y += genhd.h unifdef-y += gfs2_ondisk.h unifdef-y += hayesesp.h unifdef-y += hdlcdrv.h diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 32c2ac49a070..ecd2bf63fc84 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -55,24 +55,6 @@ enum { UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ }; -#ifndef __KERNEL__ - -struct partition { - unsigned char boot_ind; /* 0x80 - active */ - unsigned char head; /* starting head */ - unsigned char sector; /* starting sector */ - unsigned char cyl; /* starting cylinder */ - unsigned char sys_ind; /* What partition type */ - unsigned char end_head; /* end head */ - unsigned char end_sector; /* end sector */ - unsigned char end_cyl; /* end cylinder */ - unsigned int start_sect; /* starting sector counting from 0 */ - unsigned int nr_sects; /* nr of sectors in partition */ -} __attribute__((packed)); - -#endif - -#ifdef __KERNEL__ #include #include #include @@ -228,7 +210,7 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) { sizeof(struct disk_stats)); } -#else +#else /* !CONFIG_SMP */ #define __disk_stat_add(gendiskp, field, addnd) \ (gendiskp->dkstats.field += addnd) #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) @@ -256,7 +238,7 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) memset(&part->dkstats, value, sizeof(struct disk_stats)); } -#endif +#endif /* CONFIG_SMP */ #define disk_stat_add(gendiskp, field, addnd) \ do { \ @@ -395,8 +377,6 @@ static inline void set_capacity(struct gendisk *disk, sector_t size) disk->capacity = size; } -#endif /* __KERNEL__ */ - #ifdef CONFIG_SOLARIS_X86_PARTITION #define SOLARIS_X86_NUMSLICE 16 @@ -540,8 +520,6 @@ struct unixware_disklabel { # define MINIX_NR_SUBPARTITIONS 4 #endif /* CONFIG_MINIX_SUBPARTITION */ -#ifdef __KERNEL__ - #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 @@ -570,8 +548,6 @@ static inline struct block_device *bdget_disk(struct gendisk *disk, int index) return bdget(MKDEV(disk->major, disk->first_minor) + index); } -#endif - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } @@ -584,4 +560,4 @@ static inline dev_t blk_lookup_devt(const char *name) #endif /* CONFIG_BLOCK */ -#endif +#endif /* _LINUX_GENHD_H */ -- cgit v1.2.3 From 07c941d00087581c9553661c2c4fb593da37f525 Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Wed, 12 Mar 2008 10:48:48 +1100 Subject: [POWERPC] Fix undefined pmu_sys_suspended compilation error pmu_sys_suspended is declared extern when: defined(CONFIG_PM_SLEEP) && defined(CONFIG_PPC32) but only defined when: defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32) which is wrong. Let's fix that. Signed-off-by: Tony Breeds Signed-off-by: Paul Mackerras --- include/linux/pmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pmu.h b/include/linux/pmu.h index 4c5f65392d36..b02b57c0fba0 100644 --- a/include/linux/pmu.h +++ b/include/linux/pmu.h @@ -192,7 +192,7 @@ extern unsigned int pmu_power_flags; extern void pmu_backlight_init(void); /* some code needs to know if the PMU was suspended for hibernation */ -#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PPC32) +#if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32) extern int pmu_sys_suspended; #else /* if power management is not configured it can't be suspended */ -- cgit v1.2.3 From a99d9a6ebdf8328d5c61ca9f1038f4815e25720e Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Wed, 12 Mar 2008 10:48:48 +1100 Subject: [POWERPC] Fix drivers/macintosh/mediabay.c when !CONFIG_ADB_PMU When building drivers/macintosh/mediabay.c if CONFIG_ADB_PMU isn't defined we get: drivers/built-in.o: In function `media_bay_step': mediabay.c:(.text+0x92b84): undefined reference to `pmu_suspend' mediabay.c:(.text+0x92c08): undefined reference to `pmu_resume' Create empty place holders in that scenario. Signed-off-by: Tony Breeds Signed-off-by: Paul Mackerras --- include/linux/pmu.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pmu.h b/include/linux/pmu.h index b02b57c0fba0..cafe98d96948 100644 --- a/include/linux/pmu.h +++ b/include/linux/pmu.h @@ -147,8 +147,15 @@ extern void pmu_wait_complete(struct adb_request *req); /* For use before switching interrupts off for a long time; * warning: not stackable */ +#if defined(CONFIG_ADB_PMU) extern void pmu_suspend(void); extern void pmu_resume(void); +#else +static inline void pmu_suspend(void) +{} +static inline void pmu_resume(void) +{} +#endif extern void pmu_enable_irled(int on); -- cgit v1.2.3