From f6cf891c4d7128f9f91243fc0b9ce99e10fa1586 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Aug 2007 12:53:24 +0200 Subject: sched: make the scheduler converge to the ideal latency de-HZ-ification of the granularity defaults unearthed a pre-existing property of CFS: while it correctly converges to the granularity goal, it does not prevent run-time fluctuations in the range of [-gran ... 0 ... +gran]. With the increase of the granularity due to the removal of HZ dependencies, this becomes visible in chew-max output (with 5 tasks running): out: 28 . 27. 32 | flu: 0 . 0 | ran: 9 . 13 | per: 37 . 40 out: 27 . 27. 32 | flu: 0 . 0 | ran: 17 . 13 | per: 44 . 40 out: 27 . 27. 32 | flu: 0 . 0 | ran: 9 . 13 | per: 36 . 40 out: 29 . 27. 32 | flu: 2 . 0 | ran: 17 . 13 | per: 46 . 40 out: 28 . 27. 32 | flu: 0 . 0 | ran: 9 . 13 | per: 37 . 40 out: 29 . 27. 32 | flu: 0 . 0 | ran: 18 . 13 | per: 47 . 40 out: 28 . 27. 32 | flu: 0 . 0 | ran: 9 . 13 | per: 37 . 40 average slice is the ideal 13 msecs and the period is picture-perfect 40 msecs. But the 'ran' field fluctuates around 13.33 msecs and there's no mechanism in CFS to keep that from happening: it's a perfectly valid solution that CFS finds. to fix this we add a granularity/preemption rule that knows about the "target latency", which makes tasks that run longer than the ideal latency run a bit less. The simplest approach is to simply decrease the preemption granularity when a task overruns its ideal latency. For this we have to track how much the task executed since its last preemption. ( this adds a new field to task_struct, but we can eliminate that overhead in 2.6.24 by putting all the scheduler timestamps into an anonymous union. ) with this change in place, chew-max output is fluctuation-less all around: out: 28 . 27. 39 | flu: 0 . 2 | ran: 13 . 13 | per: 41 . 40 out: 28 . 27. 39 | flu: 0 . 2 | ran: 13 . 13 | per: 41 . 40 out: 28 . 27. 39 | flu: 0 . 2 | ran: 13 . 13 | per: 41 . 40 out: 28 . 27. 39 | flu: 0 . 2 | ran: 13 . 13 | per: 41 . 40 out: 28 . 27. 39 | flu: 0 . 1 | ran: 13 . 13 | per: 41 . 40 out: 28 . 27. 39 | flu: 0 . 1 | ran: 13 . 13 | per: 41 . 40 this patch has no impact on any fastpath or on any globally observable scheduling property. (unless you have sharp enough eyes to see millisecond-level ruckles in glxgears smoothness :-) Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index bd6a0320a770..f4e324ed2e44 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -904,6 +904,7 @@ struct sched_entity { u64 exec_start; u64 sum_exec_runtime; + u64 prev_sum_exec_runtime; u64 wait_start_fair; u64 sleep_start_fair; -- cgit v1.2.3 From 05bb1fad1cde025a864a90cfeb98dcbefe78a44a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 30 Aug 2007 22:10:28 -0700 Subject: [TCP]: Allow minimum RTO to be configurable via routing metrics. Cell phone networks do link layer retransmissions and other things that cause unnecessary timeout retransmits. So allow the minimum RTO to be inflated per-route to deal with this. Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ net/ipv4/tcp_input.c | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index c91476ce314a..dff3192374f8 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -351,6 +351,8 @@ enum #define RTAX_INITCWND RTAX_INITCWND RTAX_FEATURES, #define RTAX_FEATURES RTAX_FEATURES + RTAX_RTO_MIN, +#define RTAX_RTO_MIN RTAX_RTO_MIN __RTAX_MAX }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9785df37a65f..1ee72127462b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -555,6 +555,16 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_grow_window(sk, skb); } +static u32 tcp_rto_min(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + u32 rto_min = TCP_RTO_MIN; + + if (dst_metric_locked(dst, RTAX_RTO_MIN)) + rto_min = dst->metrics[RTAX_RTO_MIN-1]; + return rto_min; +} + /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge @@ -616,13 +626,13 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) if (tp->mdev_max < tp->rttvar) tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2; tp->rtt_seq = tp->snd_nxt; - tp->mdev_max = TCP_RTO_MIN; + tp->mdev_max = tcp_rto_min(sk); } } else { /* no previous measure. */ tp->srtt = m<<3; /* take the measured time to be rtt */ tp->mdev = m<<1; /* make sure rto = 3*rtt */ - tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); + tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); tp->rtt_seq = tp->snd_nxt; } } -- cgit v1.2.3 From 91a6d4ed333a47003f07636b3bd143521e0bbad1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 27 Aug 2007 19:35:22 +0200 Subject: ata: add ATA_MWDMA* and ATA_SWDMA* defines Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jeff Garzik --- include/linux/ata.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 23a22df039d8..c043c1ccf1c5 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -73,6 +73,19 @@ enum { ATA_PIO5 = ATA_PIO4 | (1 << 5), ATA_PIO6 = ATA_PIO5 | (1 << 6), + ATA_SWDMA0 = (1 << 0), + ATA_SWDMA1 = ATA_SWDMA0 | (1 << 1), + ATA_SWDMA2 = ATA_SWDMA1 | (1 << 2), + + ATA_SWDMA2_ONLY = (1 << 2), + + ATA_MWDMA0 = (1 << 0), + ATA_MWDMA1 = ATA_MWDMA0 | (1 << 1), + ATA_MWDMA2 = ATA_MWDMA1 | (1 << 2), + + ATA_MWDMA12_ONLY = (1 << 1) | (1 << 2), + ATA_MWDMA2_ONLY = (1 << 2), + ATA_UDMA0 = (1 << 0), ATA_UDMA1 = ATA_UDMA0 | (1 << 1), ATA_UDMA2 = ATA_UDMA1 | (1 << 2), -- cgit v1.2.3 From aa137f9d29d30592774c727ec5cfcf9891e576fa Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 31 Aug 2007 00:48:45 -0700 Subject: SLUB: Force inlining for functions in slub_def.h Some compilers (especially older gcc releases) may skip inlining sometimes which will lead to link failures. Force the inlining of keyfunctions in slub_def.h to avoid these issues. Signed-off-by: Christoph Lameter Acked-by: Jan Dittmer Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 124270df8734..74962077f632 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -78,7 +78,7 @@ extern struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; * Sorry that the following has to be that ugly but some versions of GCC * have trouble with constant propagation and loops. */ -static inline int kmalloc_index(size_t size) +static __always_inline int kmalloc_index(size_t size) { if (!size) return 0; @@ -133,7 +133,7 @@ static inline int kmalloc_index(size_t size) * This ought to end up with a global pointer to the right cache * in kmalloc_caches. */ -static inline struct kmem_cache *kmalloc_slab(size_t size) +static __always_inline struct kmem_cache *kmalloc_slab(size_t size) { int index = kmalloc_index(size); @@ -166,7 +166,7 @@ static inline struct kmem_cache *kmalloc_slab(size_t size) void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); -static inline void *kmalloc(size_t size, gfp_t flags) +static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); @@ -183,7 +183,7 @@ static inline void *kmalloc(size_t size, gfp_t flags) void *__kmalloc_node(size_t size, gfp_t flags, int node); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); -- cgit v1.2.3 From 16c55b038033d8f6f7601996dfae44399666d9ab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 29 Aug 2007 11:58:33 +0900 Subject: libata: implement BROKEN_HPA horkage and apply it to affected drives Some drives choke on READ_NATIVE_MAX_ADDRESS[_EXT]. Implement ATA_HORKAGE_BROKEN_HPA and apply it to affected drives. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 11 ++++++++--- include/linux/libata.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 9cf46bf8c8d2..a3ee087223de 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1911,8 +1911,9 @@ int ata_dev_configure(struct ata_device *dev) dev->flags |= ATA_DFLAG_FLUSH_EXT; } - if (ata_id_hpa_enabled(dev->id)) - dev->n_sectors = ata_hpa_resize(dev); + if (!(dev->horkage & ATA_HORKAGE_BROKEN_HPA) && + ata_id_hpa_enabled(dev->id)) + dev->n_sectors = ata_hpa_resize(dev); /* config NCQ */ ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc)); @@ -3795,7 +3796,11 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST9160821AS", "3.CLF", ATA_HORKAGE_NONCQ, }, { "SAMSUNG HD401LJ", "ZZ100-15", ATA_HORKAGE_NONCQ, }, - /* Devices with NCQ limits */ + /* devices which puke on READ_NATIVE_MAX */ + { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA, }, + { "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA }, + { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA }, + { "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA }, /* End Marker */ { } diff --git a/include/linux/libata.h b/include/linux/libata.h index 41978a557318..a67bb9075e9b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -303,6 +303,7 @@ enum { ATA_HORKAGE_NODMA = (1 << 1), /* DMA problems */ ATA_HORKAGE_NONCQ = (1 << 2), /* Don't use NCQ */ ATA_HORKAGE_MAX_SEC_128 = (1 << 3), /* Limit max sects to 128 */ + ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ }; enum hsm_task_states { -- cgit v1.2.3 From f3de4be9d5f8551d7880a1f1f5231a30e0161b1f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 30 Aug 2007 23:56:29 -0700 Subject: PM: Fix dependencies of CONFIG_SUSPEND and CONFIG_HIBERNATION Dependencies of CONFIG_SUSPEND and CONFIG_HIBERNATION introduced by commit 296699de6bdc717189a331ab6bbe90e05c94db06 "Introduce CONFIG_SUSPEND for suspend-to-Ram and standby" are incorrect, as they don't cover the facts that (1) not all architectures support suspend and (2) SMP hibernation is only possible on X86 and PPC64 (if CONFIG_PPC64_SWSUSP is set). Signed-off-by: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/defconfig | 1 - include/linux/cpu.h | 6 +++--- kernel/cpu.c | 4 ++-- kernel/power/Kconfig | 41 +++++++++++++++++++++++++++++++---------- 4 files changed, 36 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index e64f65c9d901..b091c5e35558 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -201,7 +201,6 @@ CONFIG_PM=y # CONFIG_PM_DEBUG is not set CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="" -CONFIG_SUSPEND_SMP=y # # ACPI (Advanced Configuration and Power Interface) Support diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 1d5ded0836ee..0ad72c4cf312 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -126,16 +126,16 @@ static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) static inline int cpu_is_offline(int cpu) { return 0; } #endif /* CONFIG_HOTPLUG_CPU */ -#ifdef CONFIG_SUSPEND_SMP +#ifdef CONFIG_PM_SLEEP_SMP extern int suspend_cpu_hotplug; extern int disable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); -#else +#else /* !CONFIG_PM_SLEEP_SMP */ #define suspend_cpu_hotplug 0 static inline int disable_nonboot_cpus(void) { return 0; } static inline void enable_nonboot_cpus(void) {} -#endif +#endif /* !CONFIG_PM_SLEEP_SMP */ #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 181ae7086029..38033db8d8ec 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -273,7 +273,7 @@ int __cpuinit cpu_up(unsigned int cpu) return err; } -#ifdef CONFIG_SUSPEND_SMP +#ifdef CONFIG_PM_SLEEP_SMP static cpumask_t frozen_cpus; int disable_nonboot_cpus(void) @@ -334,4 +334,4 @@ void enable_nonboot_cpus(void) out: mutex_unlock(&cpu_add_remove_lock); } -#endif +#endif /* CONFIG_PM_SLEEP_SMP */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 412859f8d94a..c8580a1e6873 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -72,15 +72,10 @@ config PM_TRACE CAUTION: this option will cause your machine's real-time clock to be set to an invalid time after a resume. -config SUSPEND_SMP_POSSIBLE - bool - depends on (X86 && !X86_VOYAGER) || (PPC64 && (PPC_PSERIES || PPC_PMAC)) - depends on SMP - default y - -config SUSPEND_SMP +config PM_SLEEP_SMP bool - depends on SUSPEND_SMP_POSSIBLE && PM_SLEEP + depends on SUSPEND_SMP_POSSIBLE || HIBERNATION_SMP_POSSIBLE + depends on PM_SLEEP select HOTPLUG_CPU default y @@ -89,20 +84,46 @@ config PM_SLEEP depends on SUSPEND || HIBERNATION default y +config SUSPEND_UP_POSSIBLE + bool + depends on (X86 && !X86_VOYAGER) || PPC || ARM || BLACKFIN || MIPS \ + || SUPERH || FRV + depends on !SMP + default y + +config SUSPEND_SMP_POSSIBLE + bool + depends on (X86 && !X86_VOYAGER) \ + || (PPC && (PPC_PSERIES || PPC_PMAC)) || ARM + depends on SMP + default y + config SUSPEND bool "Suspend to RAM and standby" depends on PM - depends on !SMP || SUSPEND_SMP_POSSIBLE + depends on SUSPEND_UP_POSSIBLE || SUSPEND_SMP_POSSIBLE default y ---help--- Allow the system to enter sleep states in which main memory is powered and thus its contents are preserved, such as the suspend-to-RAM state (i.e. the ACPI S3 state). +config HIBERNATION_UP_POSSIBLE + bool + depends on X86 || PPC64_SWSUSP || FRV || PPC32 + depends on !SMP + default y + +config HIBERNATION_SMP_POSSIBLE + bool + depends on (X86 && !X86_VOYAGER) || PPC64_SWSUSP + depends on SMP + default y + config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP - depends on ((X86 || PPC64_SWSUSP || FRV || PPC32) && !SMP) || SUSPEND_SMP_POSSIBLE + depends on HIBERNATION_UP_POSSIBLE || HIBERNATION_SMP_POSSIBLE ---help--- Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the -- cgit v1.2.3 From 60693e5a9a2063b87d4dbe8029816c814b3fa84e Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Thu, 30 Aug 2007 23:56:38 -0700 Subject: i2c-piix4: Fix SB700 PCI device ID We find that SB700 and SB800 use the same SMBus device ID as SB600, which is 0x4385, instead of the already submitted 0x4395. Besides removing the wrong SB700 device ID, add SB800 support to kernel, by renaming the PCI_DEVICE_ID_ATI_IXP600_SMBUS into PCI_DEVICE_ID_ATI_SBX00_SMBUS. Signed-off-by: Shane Huang Signed-off-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/i2c/busses/i2c-piix4 | 2 +- drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-piix4.c | 6 ++---- include/linux/pci_ids.h | 3 +-- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4 index fa0c786a8bf5..cf6b6cb02aa1 100644 --- a/Documentation/i2c/busses/i2c-piix4 +++ b/Documentation/i2c/busses/i2c-piix4 @@ -6,7 +6,7 @@ Supported adapters: Datasheet: Publicly available at the Intel website * ServerWorks OSB4, CSB5, CSB6 and HT-1000 southbridges Datasheet: Only available via NDA from ServerWorks - * ATI IXP200, IXP300, IXP400, SB600 and SB700 southbridges + * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges Datasheet: Not publicly available * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge Datasheet: Publicly available at the SMSC website http://www.smsc.com diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 1842f523c23d..9f3a4cd0b07f 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -208,6 +208,7 @@ config I2C_PIIX4 ATI IXP400 ATI SB600 ATI SB700 + ATI SB800 Serverworks OSB4 Serverworks CSB5 Serverworks CSB6 diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index debc76cd2161..167e4137ee21 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -23,7 +23,7 @@ Supports: Intel PIIX4, 440MX Serverworks OSB4, CSB5, CSB6, HT-1000 - ATI IXP200, IXP300, IXP400, SB600, SB700 + ATI IXP200, IXP300, IXP400, SB600, SB700, SB800 SMSC Victory66 Note: we assume there can only be one device, with one SMBus interface. @@ -397,9 +397,7 @@ static struct pci_device_id piix4_ids[] = { .driver_data = 0 }, { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS), .driver_data = 0 }, - { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SMBUS), - .driver_data = 0 }, - { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SMBUS), + { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS), .driver_data = 0 }, { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4), .driver_data = 0 }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 06d23e10a16d..17168f3cc73f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -374,10 +374,9 @@ #define PCI_DEVICE_ID_ATI_IXP400_SATA 0x4379 #define PCI_DEVICE_ID_ATI_IXP400_SATA2 0x437a #define PCI_DEVICE_ID_ATI_IXP600_SATA 0x4380 -#define PCI_DEVICE_ID_ATI_IXP600_SMBUS 0x4385 +#define PCI_DEVICE_ID_ATI_SBX00_SMBUS 0x4385 #define PCI_DEVICE_ID_ATI_IXP600_IDE 0x438c #define PCI_DEVICE_ID_ATI_IXP700_SATA 0x4390 -#define PCI_DEVICE_ID_ATI_IXP700_SMBUS 0x4395 #define PCI_DEVICE_ID_ATI_IXP700_IDE 0x439c #define PCI_VENDOR_ID_VLSI 0x1004 -- cgit v1.2.3 From dec4ad86c2fbea062e9ef9caa6d6e79f7c5e0b12 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 30 Aug 2007 23:56:40 -0700 Subject: hugepage: fix broken check for offset alignment in hugepage mappings For hugepage mappings, the file offset, like the address and size, needs to be aligned to the size of a hugepage. In commit 68589bc353037f233fe510ad9ff432338c95db66, the check for this was moved into prepare_hugepage_range() along with the address and size checks. But since BenH's rework of the get_unmapped_area() paths leading up to commit 4b1d89290b62bb2db476c94c82cf7442aab440c8, prepare_hugepage_range() is only called for MAP_FIXED mappings, not for other mappings. This means we're no longer ever checking for an aligned offset - I've confirmed that mmap() will (apparently) succeed with a misaligned offset on both powerpc and i386 at least. This patch restores the check, removing it from prepare_hugepage_range() and putting it back into hugetlbfs_file_mmap(). I'm putting it there, rather than in the get_unmapped_area() path so it only needs to go in one place, than separately in the half-dozen or so arch-specific implementations of hugetlb_get_unmapped_area(). Signed-off-by: David Gibson Cc: Adam Litke Cc: Andi Kleen Cc: "David S. Miller" Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/hugetlbpage.c | 2 +- arch/ia64/mm/hugetlbpage.c | 6 ++---- arch/sparc64/mm/hugetlbpage.c | 2 +- fs/hugetlbfs/inode.c | 15 ++++++++++----- include/linux/hugetlb.h | 10 +++------- 5 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index efdf95ac8031..6c06d9c0488e 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c @@ -367,7 +367,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len, pgoff)) + if (prepare_hugepage_range(addr, len)) return -EINVAL; return addr; } diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index d22861c5b04c..a9ff685aea25 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -75,10 +75,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) * Don't actually need to do any preparation, but need to make sure * the address is in the right region. */ -int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) +int prepare_hugepage_range(unsigned long addr, unsigned long len) { - if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) - return -EINVAL; if (len & ~HPAGE_MASK) return -EINVAL; if (addr & ~HPAGE_MASK) @@ -151,7 +149,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u /* Handle MAP_FIXED */ if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len, pgoff)) + if (prepare_hugepage_range(addr, len)) return -EINVAL; return addr; } diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index eaba9b70b184..6cfab2e4d340 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -175,7 +175,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len, pgoff)) + if (prepare_hugepage_range(addr, len)) return -EINVAL; return addr; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c848a191525d..950c2fbb815b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -82,14 +82,19 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) int ret; /* - * vma alignment has already been checked by prepare_hugepage_range. - * If you add any error returns here, do so after setting VM_HUGETLB, - * so is_vm_hugetlb_page tests below unmap_region go the right way - * when do_mmap_pgoff unwinds (may be important on powerpc and ia64). + * vma address alignment (but not the pgoff alignment) has + * already been checked by prepare_hugepage_range. If you add + * any error returns here, do so after setting VM_HUGETLB, so + * is_vm_hugetlb_page tests below unmap_region go the right + * way when do_mmap_pgoff unwinds (may be important on powerpc + * and ia64). */ vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; + if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) + return -EINVAL; + vma_len = (loff_t)(vma->vm_end - vma->vm_start); mutex_lock(&inode->i_mutex); @@ -132,7 +137,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len, pgoff)) + if (prepare_hugepage_range(addr, len)) return -EINVAL; return addr; } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e6a71c82d204..3a19b032c0eb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -66,11 +66,8 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len, - pgoff_t pgoff) +static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) { - if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) - return -EINVAL; if (len & ~HPAGE_MASK) return -EINVAL; if (addr & ~HPAGE_MASK) @@ -78,8 +75,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len, return 0; } #else -int prepare_hugepage_range(unsigned long addr, unsigned long len, - pgoff_t pgoff); +int prepare_hugepage_range(unsigned long addr, unsigned long len); #endif #ifndef ARCH_HAS_SETCLEAR_HUGE_PTE @@ -117,7 +113,7 @@ static inline unsigned long hugetlb_total_pages(void) #define hugetlb_report_meminfo(buf) 0 #define hugetlb_report_node_meminfo(n, buf) 0 #define follow_huge_pmd(mm, addr, pmd, write) NULL -#define prepare_hugepage_range(addr,len,pgoff) (-EINVAL) +#define prepare_hugepage_range(addr,len) (-EINVAL) #define pmd_huge(x) 0 #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) -- cgit v1.2.3 From 1b3b4a1a2deb7d3e5d66063bd76304d840c966b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Aug 2007 10:29:36 -0400 Subject: NFS: Fix a write request leak in nfs_invalidate_page() Ryusuke Konishi says: The recent truncate_complete_page() clears the dirty flag from a page before calling a_ops->invalidatepage(), ^^^^^^ static void truncate_complete_page(struct address_space *mapping, struct page *page) { ... cancel_dirty_page(page, PAGE_CACHE_SIZE); <--- Inserted here at kernel 2.6.20 if (PagePrivate(page)) do_invalidatepage(page, 0); ---> will call a_ops->invalidatepage() ... } and this is disturbing nfs_wb_page_priority() from calling nfs_writepage_locked() that is expected to handle the pending request (=nfs_page) associated with the page. int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) { ... if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out; } ... } Since truncate_complete_page() will get rid of the page after a_ops->invalidatepage() returns, the request (=nfs_page) associated with the page becomes a garbage in nfs_inode->nfs_page_tree. ------------------------ Fix this by ensuring that nfs_wb_page_priority() recognises that it may also need to clear out non-dirty pages that have an nfs_page associated with them. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- fs/nfs/write.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 1 + 3 files changed, 46 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c87dc713b5d7..579cf8a7d4a7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -316,7 +316,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE); + nfs_wb_page_cancel(page->mapping->host, page); } static int nfs_release_page(struct page *page, gfp_t gfp) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ef97e0c0f5b1..0d7a77cc394b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1396,6 +1396,50 @@ out: return ret; } +int nfs_wb_page_cancel(struct inode *inode, struct page *page) +{ + struct nfs_page *req; + loff_t range_start = page_offset(page); + loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); + struct writeback_control wbc = { + .bdi = page->mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = range_start, + .range_end = range_end, + }; + int ret = 0; + + BUG_ON(!PageLocked(page)); + for (;;) { + req = nfs_page_find_request(page); + if (req == NULL) + goto out; + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_release_request(req); + break; + } + if (nfs_lock_request_dontget(req)) { + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_request(req); + break; + } + ret = nfs_wait_on_request(req); + if (ret < 0) + goto out; + } + if (!PagePrivate(page)) + return 0; + ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); +out: + return ret; +} + int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) { loff_t range_start = page_offset(page); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 157dcb055b5c..7250eeadd7b5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -431,6 +431,7 @@ extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); +extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commit_alloc(void); -- cgit v1.2.3 From 9e3d3d07de1a9f2b83299653b75bfdbc0a8118f2 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 4 Sep 2007 23:16:04 -0400 Subject: Input: add more Braille keycodes Some braille keyboards have 10 dots, so extend the Input braille keys definitions. Signed-off-by: Samuel Thibault Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 ++ include/linux/keyboard.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index e02c6a66b2ba..17df5a7331c7 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -552,6 +552,8 @@ struct input_absinfo { #define KEY_BRL_DOT6 0x1f6 #define KEY_BRL_DOT7 0x1f7 #define KEY_BRL_DOT8 0x1f8 +#define KEY_BRL_DOT9 0x1f9 +#define KEY_BRL_DOT10 0x1fa /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h index de76843bbe8a..7ddbc30aa8e7 100644 --- a/include/linux/keyboard.h +++ b/include/linux/keyboard.h @@ -437,8 +437,10 @@ extern unsigned short plain_map[NR_KEYS]; #define K_BRL_DOT6 K(KT_BRL, 6) #define K_BRL_DOT7 K(KT_BRL, 7) #define K_BRL_DOT8 K(KT_BRL, 8) +#define K_BRL_DOT9 K(KT_BRL, 9) +#define K_BRL_DOT10 K(KT_BRL, 10) -#define NR_BRL 9 +#define NR_BRL 11 #define MAX_DIACR 256 #endif -- cgit v1.2.3 From b311ec4ae82b1dc337689e966dcf9c5f6a53877e Mon Sep 17 00:00:00 2001 From: Joseph Chan Date: Mon, 10 Sep 2007 22:06:01 -0400 Subject: [libata, IDE] add new VIA bridge to VIA PATA drivers Signed-off-by: Joseph Chan Signed-off-by: Jeff Garzik --- drivers/ata/pata_via.c | 1 + drivers/ide/pci/via82cxxx.c | 1 + include/linux/pci_ids.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 1b0acafad1a6..636c4f1a0b24 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -97,6 +97,7 @@ static const struct via_isa_bridge { u8 rev_max; u16 flags; } via_isa_bridges[] = { + { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c index 581316f9581d..8c539381d622 100644 --- a/drivers/ide/pci/via82cxxx.c +++ b/drivers/ide/pci/via82cxxx.c @@ -74,6 +74,7 @@ static struct via_isa_bridge { u8 udma_mask; u8 flags; } via_isa_bridges[] = { + { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 17168f3cc73f..d41747b9fd15 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1343,6 +1343,7 @@ #define PCI_DEVICE_ID_VIA_8231_4 0x8235 #define PCI_DEVICE_ID_VIA_8365_1 0x8305 #define PCI_DEVICE_ID_VIA_CX700 0x8324 +#define PCI_DEVICE_ID_VIA_VX800 0x8353 #define PCI_DEVICE_ID_VIA_8371_1 0x8391 #define PCI_DEVICE_ID_VIA_82C598_1 0x8598 #define PCI_DEVICE_ID_VIA_838X_1 0xB188 -- cgit v1.2.3 From 16fcec35e7d7c4faaa4709f6434a4a25b06d25e3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 11 Sep 2007 11:28:26 +0200 Subject: [NETFILTER]: Fix/improve deadlock condition on module removal netfilter So I've had a deadlock reported to me. I've found that the sequence of events goes like this: 1) process A (modprobe) runs to remove ip_tables.ko 2) process B (iptables-restore) runs and calls setsockopt on a netfilter socket, increasing the ip_tables socket_ops use count 3) process A acquires a file lock on the file ip_tables.ko, calls remove_module in the kernel, which in turn executes the ip_tables module cleanup routine, which calls nf_unregister_sockopt 4) nf_unregister_sockopt, seeing that the use count is non-zero, puts the calling process into uninterruptible sleep, expecting the process using the socket option code to wake it up when it exits the kernel 4) the user of the socket option code (process B) in do_ipt_get_ctl, calls ipt_find_table_lock, which in this case calls request_module to load ip_tables_nat.ko 5) request_module forks a copy of modprobe (process C) to load the module and blocks until modprobe exits. 6) Process C. forked by request_module process the dependencies of ip_tables_nat.ko, of which ip_tables.ko is one. 7) Process C attempts to lock the request module and all its dependencies, it blocks when it attempts to lock ip_tables.ko (which was previously locked in step 3) Theres not really any great permanent solution to this that I can see, but I've developed a two part solution that corrects the problem Part 1) Modifies the nf_sockopt registration code so that, instead of using a use counter internal to the nf_sockopt_ops structure, we instead use a pointer to the registering modules owner to do module reference counting when nf_sockopt calls a modules set/get routine. This prevents the deadlock by preventing set 4 from happening. Part 2) Enhances the modprobe utilty so that by default it preforms non-blocking remove operations (the same way rmmod does), and add an option to explicity request blocking operation. So if you select blocking operation in modprobe you can still cause the above deadlock, but only if you explicity try (and since root can do any old stupid thing it would like.... :) ). Signed-off-by: Neil Horman Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 5 ++-- net/bridge/netfilter/ebtables.c | 1 + net/ipv4/ipvs/ip_vs_ctl.c | 1 + net/ipv4/netfilter/arp_tables.c | 1 + net/ipv4/netfilter/ip_tables.c | 1 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + net/netfilter/nf_sockopt.c | 36 ++++++++------------------ 8 files changed, 19 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0eed0b7ab2df..1dd075eda595 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -88,9 +88,8 @@ struct nf_sockopt_ops int (*compat_get)(struct sock *sk, int optval, void __user *user, int *len); - /* Number of users inside set() or get(). */ - unsigned int use; - struct task_struct *cleanup_task; + /* Use the module struct to lock set/get code in place */ + struct module *owner; }; /* Each queued (to userspace) skbuff has one of these. */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 4169a2a89a39..6018d0e51938 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1513,6 +1513,7 @@ static struct nf_sockopt_ops ebt_sockopts = .get_optmin = EBT_BASE_CTL, .get_optmax = EBT_SO_GET_MAX + 1, .get = do_ebt_get_ctl, + .owner = THIS_MODULE, }; static int __init ebtables_init(void) diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 902fd578aa3c..f656d41d8d41 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -2339,6 +2339,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { .get_optmin = IP_VS_BASE_CTL, .get_optmax = IP_VS_SO_GET_MAX+1, .get = do_ip_vs_get_ctl, + .owner = THIS_MODULE, }; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1149aba9351..29114a9ccd1d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1161,6 +1161,7 @@ static struct nf_sockopt_ops arpt_sockopts = { .get_optmin = ARPT_BASE_CTL, .get_optmax = ARPT_SO_GET_MAX+1, .get = do_arpt_get_ctl, + .owner = THIS_MODULE, }; static int __init arp_tables_init(void) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e1b402c6b855..6486894f450c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2296,6 +2296,7 @@ static struct nf_sockopt_ops ipt_sockopts = { #ifdef CONFIG_COMPAT .compat_get = compat_do_ipt_get_ctl, #endif + .owner = THIS_MODULE, }; static struct xt_match icmp_matchstruct __read_mostly = { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 53cb1772f38f..f813e02aab30 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -399,6 +399,7 @@ static struct nf_sockopt_ops so_getorigdst = { .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, .get = &getorigdst, + .owner = THIS_MODULE, }; struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index aeda617246b7..cd9df02bb85c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1462,6 +1462,7 @@ static struct nf_sockopt_ops ip6t_sockopts = { .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, + .owner = THIS_MODULE, }; static struct xt_match icmp6_matchstruct __read_mostly = { diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 8b8ece750313..e32761ce260c 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -55,18 +55,7 @@ EXPORT_SYMBOL(nf_register_sockopt); void nf_unregister_sockopt(struct nf_sockopt_ops *reg) { - /* No point being interruptible: we're probably in cleanup_module() */ - restart: mutex_lock(&nf_sockopt_mutex); - if (reg->use != 0) { - /* To be woken by nf_sockopt call... */ - /* FIXME: Stuart Young's name appears gratuitously. */ - set_current_state(TASK_UNINTERRUPTIBLE); - reg->cleanup_task = current; - mutex_unlock(&nf_sockopt_mutex); - schedule(); - goto restart; - } list_del(®->list); mutex_unlock(&nf_sockopt_mutex); } @@ -86,10 +75,11 @@ static int nf_sockopt(struct sock *sk, int pf, int val, list_for_each(i, &nf_sockopts) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { + if (!try_module_get(ops->owner)) + goto out_nosup; if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); ret = ops->get(sk, val, opt, len); goto out; @@ -97,23 +87,20 @@ static int nf_sockopt(struct sock *sk, int pf, int val, } else { if (val >= ops->set_optmin && val < ops->set_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); ret = ops->set(sk, val, opt, *len); goto out; } } + module_put(ops->owner); } } + out_nosup: mutex_unlock(&nf_sockopt_mutex); return -ENOPROTOOPT; out: - mutex_lock(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - mutex_unlock(&nf_sockopt_mutex); + module_put(ops->owner); return ret; } @@ -144,10 +131,12 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, list_for_each(i, &nf_sockopts) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { + if (!try_module_get(ops->owner)) + goto out_nosup; + if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); if (ops->compat_get) ret = ops->compat_get(sk, @@ -160,7 +149,6 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, } else { if (val >= ops->set_optmin && val < ops->set_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); if (ops->compat_set) ret = ops->compat_set(sk, @@ -171,17 +159,15 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, goto out; } } + module_put(ops->owner); } } + out_nosup: mutex_unlock(&nf_sockopt_mutex); return -ENOPROTOOPT; out: - mutex_lock(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - mutex_unlock(&nf_sockopt_mutex); + module_put(ops->owner); return ret; } -- cgit v1.2.3 From 5547bbeed37f7ab64942ffcce9293681101577ef Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 23 Aug 2007 10:37:53 -0700 Subject: PCI AER: fix warnings when PCIEAER=n Fix warnings when CONFIG_PCIEAER=n: drivers/pci/pcie/portdrv_pci.c:105: warning: statement with no effect drivers/pci/pcie/portdrv_pci.c:226: warning: statement with no effect drivers/scsi/arcmsr/arcmsr_hba.c:352: warning: statement with no effect Signed-off-by: Randy Dunlap Acked-by: Linas Vepstas Signed-off-by: Greg Kroah-Hartman --- include/linux/aer.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index 509656286e53..bcf236d825e8 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -15,11 +15,26 @@ extern int pci_disable_pcie_error_reporting(struct pci_dev *dev); extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); extern int pci_cleanup_aer_correct_error_status(struct pci_dev *dev); #else -#define pci_enable_pcie_error_reporting(dev) (-EINVAL) -#define pci_find_aer_capability(dev) (0) -#define pci_disable_pcie_error_reporting(dev) (-EINVAL) -#define pci_cleanup_aer_uncorrect_error_status(dev) (-EINVAL) -#define pci_cleanup_aer_correct_error_status(dev) (-EINVAL) +static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + return -EINVAL; +} +static inline int pci_find_aer_capability(struct pci_dev *dev) +{ + return 0; +} +static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) +{ + return -EINVAL; +} +static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) +{ + return -EINVAL; +} +static inline int pci_cleanup_aer_correct_error_status(struct pci_dev *dev) +{ + return -EINVAL; +} #endif #endif //_AER_H_ -- cgit v1.2.3 From 99fa9844f0eed5582b5648f745204758b27db659 Mon Sep 17 00:00:00 2001 From: Jason Gaston Date: Thu, 30 Aug 2007 17:50:56 -0700 Subject: PCI: irq and pci_ids patch for Intel Tolapai This patch adds the Intel Tolapai LPC and SMBus Controller DID's. Signed-off-by: Jason Gaston Signed-off-by: Greg Kroah-Hartman --- arch/i386/pci/irq.c | 1 + include/linux/pci_ids.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 665db063a40a..8434f2323b87 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -550,6 +550,7 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH9_3: case PCI_DEVICE_ID_INTEL_ICH9_4: case PCI_DEVICE_ID_INTEL_ICH9_5: + case PCI_DEVICE_ID_INTEL_TOLAPAI_0: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d41747b9fd15..55f307ffbf96 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2293,6 +2293,8 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC 0x3599 #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e +#define PCI_DEVICE_ID_INTEL_TOLAPAI_0 0x5031 +#define PCI_DEVICE_ID_INTEL_TOLAPAI_1 0x5032 #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 #define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 -- cgit v1.2.3 From 6c3c22f3cb2b7cd0a42a024b93db76b5c3133d37 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 11 Sep 2007 22:28:36 +0200 Subject: ide: add ide_dev_is_sata() helper (take 2) Make the SATA drive detection code from eighty_ninty_three() into inline ide_dev_is_sata() helper fixing it along the way to be more strict while checking word 80 for the reserved values... Signed-off-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-iops.c | 3 +-- include/linux/ide.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index f4cd2700cae5..646a54e233d3 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -615,8 +615,7 @@ u8 eighty_ninty_three (ide_drive_t *drive) if (hwif->cbl != ATA_CBL_PATA80 && !ivb) goto no_80w; - /* Check for SATA but only if we are ATA5 or higher */ - if (id->hw_config == 0 && (id->major_rev_num & 0x7FE0)) + if (ide_dev_is_sata(id)) return 1; /* diff --git a/include/linux/ide.h b/include/linux/ide.h index c792b4fd1588..b9f66c10caa0 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1378,6 +1378,19 @@ static inline int ide_dev_has_iordy(struct hd_driveid *id) return ((id->field_valid & 2) && (id->capability & 8)) ? 1 : 0; } +static inline int ide_dev_is_sata(struct hd_driveid *id) +{ + /* + * See if word 93 is 0 AND drive is at least ATA-5 compatible + * verifying that word 80 by casting it to a signed type -- + * this trick allows us to filter out the reserved values of + * 0x0000 and 0xffff along with the earlier ATA revisions... + */ + if (id->hw_config == 0 && (short)id->major_rev_num >= 0x0020) + return 1; + return 0; +} + u8 ide_dump_status(ide_drive_t *, const char *, u8); typedef struct ide_pio_timings_s { -- cgit v1.2.3 From df96efd73b81b8bc2d23b3d8b6025cce3d43db6c Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Tue, 11 Sep 2007 22:24:45 +0100 Subject: leds: Add missing include for leds.h This patch has added #include to include/linux/leds.h for rwlock_t. Signed-off-by: Yoichi Yuasa Signed-off-by: Richard Purdie --- include/linux/leds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 421175092ee2..dc1178f6184b 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -13,6 +13,7 @@ #define __LINUX_LEDS_H_INCLUDED #include +#include struct device; /* -- cgit v1.2.3 From a83308e60f63749dc1d08acb0d8fa9e2ec13c9a7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 11 Sep 2007 15:23:47 -0700 Subject: PTR_ALIGN The AdvanSys driver wants to align some pointers, and the ALIGN macro doesn't work for pointers. Rather than try to make it work, add a new PTR_ALIGN macro which is typesafe. Signed-off-by: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f592df74b3cf..47160fe378c9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -34,6 +34,7 @@ extern const char linux_proc_banner[]; #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) -- cgit v1.2.3 From dd23aae4f5edf4e1dbd8f7f8013a754ba3253f48 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Sep 2007 15:23:55 -0700 Subject: Fix select on /proc files without ->poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Taneli Vähäkangas reported that commit 786d7e1612f0b0adb6046f19b906609e4fe8b1ba aka "Fix rmmod/read/write races in /proc entries" broke SBCL + SLIME combo. The old code in do_select() used DEFAULT_POLLMASK, if couldn't find ->poll handler. The new code makes ->poll always there and returns 0 by default, which is not correct. Return DEFAULT_POLLMASK instead. Steps to reproduce: install emacs, SBCL, SLIME emacs M-x slime in *inferior-lisp* buffer [watch it doing "Connecting to Swank on port X.."] Please, apply before 2.6.23. P.S.: why SBCL can't just read(2) /proc/cpuinfo is a mystery. Signed-off-by: Alexey Dobriyan Cc: T Taneli Vahakangas Cc: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 3 ++- fs/select.c | 2 -- include/linux/poll.h | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a5b0dfd89a17..0e4d37c93eea 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -232,7 +233,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts) { struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); - unsigned int rv = 0; + unsigned int rv = DEFAULT_POLLMASK; unsigned int (*poll)(struct file *, struct poll_table_struct *); spin_lock(&pde->pde_unload_lock); diff --git a/fs/select.c b/fs/select.c index a974082b0824..46dca31c607a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -26,8 +26,6 @@ #include -#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) - struct poll_table_page { struct poll_table_page * next; struct poll_table_entry * entry; diff --git a/include/linux/poll.h b/include/linux/poll.h index 27690798623f..16d813b364ef 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -21,6 +21,8 @@ #define WQUEUES_STACK_ALLOC (MAX_STACK_ALLOC - FRONTEND_STACK_ALLOC) #define N_INLINE_POLL_ENTRIES (WQUEUES_STACK_ALLOC / sizeof(struct poll_table_entry)) +#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) + struct poll_table_struct; /* -- cgit v1.2.3