From 0afd4a21ba7d75e93fa79cf05d7a21774e149c0f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Feb 2009 13:27:44 -0800 Subject: net: Fix userland breakage wrt. linux/if_tunnel.h Reported by Andrew Walrond Changeset c19e654ddbe3831252f61e76a74d661e1a755530 ("gre: Add netlink interface") added an include of linux/ip.h to linux/if_tunnel.h We can't really let that get exposed to userspace because this conflicts with types defined in netinet/ip.h which userland is almost certainly going to have included either explicitly or implicitly. So guard this include with a __KERNEL__ ifdef. Signed-off-by: David S. Miller --- include/linux/Kbuild | 2 +- include/linux/if_tunnel.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 12e9a2957caf..6a9bb9753235 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -89,7 +89,6 @@ header-y += if_ppp.h header-y += if_slip.h header-y += if_strip.h header-y += if_tun.h -header-y += if_tunnel.h header-y += in_route.h header-y += ioctl.h header-y += ip6_tunnel.h @@ -235,6 +234,7 @@ unifdef-y += if_phonet.h unifdef-y += if_pppol2tp.h unifdef-y += if_pppox.h unifdef-y += if_tr.h +unifdef-y += if_tunnel.h unifdef-y += if_vlan.h unifdef-y += igmp.h unifdef-y += inet_diag.h diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index aeab2cb32a9c..82c43624c067 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -2,7 +2,10 @@ #define _IF_TUNNEL_H_ #include + +#ifdef __KERNEL__ #include +#endif #define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0) #define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) -- cgit v1.2.3 From 2999b58b795ad81f10e34bdbbfd2742172f247e4 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 1 Feb 2009 20:46:39 +0400 Subject: ide/libata: fix ata_id_is_cfa() (take 4) When checking for the CFA feature set support, ata_id_is_cfa() tests bit 2 in word 82 of the identify data instead the word 83; it also checks the ATA/PI version support in the word 80 (which the CompactFlash specifications have as reserved), this having no slightest chance to work on the modern CF cards that don't have 0x848A in the word 0... Signed-off-by: Sergei Shtylyov Signed-off-by: Jeff Garzik --- include/linux/ata.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index a53318b8cbd0..08a86d5cdf1b 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -731,12 +731,17 @@ static inline int ata_id_current_chs_valid(const u16 *id) static inline int ata_id_is_cfa(const u16 *id) { - if (id[ATA_ID_CONFIG] == 0x848A) /* Standard CF */ + if (id[ATA_ID_CONFIG] == 0x848A) /* Traditional CF */ return 1; - /* Could be CF hiding as standard ATA */ - if (ata_id_major_version(id) >= 3 && - id[ATA_ID_COMMAND_SET_1] != 0xFFFF && - (id[ATA_ID_COMMAND_SET_1] & (1 << 2))) + /* + * CF specs don't require specific value in the word 0 anymore and yet + * they forbid to report the ATA version in the word 80 and require the + * CFA feature set support to be indicated in the word 83 in this case. + * Unfortunately, some cards only follow either of this requirements, + * and while those that don't indicate CFA feature support need some + * sort of quirk list, it seems impractical for the ones that do... + */ + if ((id[ATA_ID_COMMAND_SET_2] & 0xC004) == 0x4004) return 1; return 0; } -- cgit v1.2.3 From 99cf610aa4840d822cdc67d194b23b55010ca9bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Jan 2009 20:31:32 +0900 Subject: libata: clear dev->ering in smarter way dev->ering used to be cleared together with the rest of ata_device in ata_dev_init() which is called whenever a probing event occurs. dev->ering is about to be used to track probing failures so it needs to remain persistent over multiple porbing events. This patch achieves this by doing the following. * Instead of CLEAR_OFFSET, define CLEAR_BEGIN and CLEAR_END and only clear between BEGIN and END. ering is moved after END. The split of persistent area is to allow hotter items remain at the head. * ering is explicitly cleared on ata_dev_disable() and when device attach succeeds. So, ering is persistent throug a device's life time (unless explicitly cleared of course) and also through periods inbetween disablement of an attached device and successful detection of the next one. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 4 ++-- drivers/ata/libata-eh.c | 7 +++++++ include/linux/libata.h | 18 ++++++++++-------- 3 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d006e5c4768c..564c03c4ebb3 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5404,8 +5404,8 @@ void ata_dev_init(struct ata_device *dev) dev->horkage = 0; spin_unlock_irqrestore(ap->lock, flags); - memset((void *)dev + ATA_DEVICE_CLEAR_OFFSET, 0, - sizeof(*dev) - ATA_DEVICE_CLEAR_OFFSET); + memset((void *)dev + ATA_DEVICE_CLEAR_BEGIN, 0, + ATA_DEVICE_CLEAR_END - ATA_DEVICE_CLEAR_BEGIN); dev->pio_mask = UINT_MAX; dev->mwdma_mask = UINT_MAX; dev->udma_mask = UINT_MAX; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index aafe82bf5e2e..90092c1aae53 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1194,6 +1194,11 @@ void ata_dev_disable(struct ata_device *dev) ata_acpi_on_disable(dev); ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); dev->class++; + + /* From now till the next successful probe, ering is used to + * track probe failures. Clear accumulated device error info. + */ + ata_ering_clear(&dev->ering); } /** @@ -2765,6 +2770,8 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, readid_flags, dev->id); switch (rc) { case 0: + /* clear error info accumulated during probe */ + ata_ering_clear(&dev->ering); new_mask |= 1 << dev->devno; break; case -ENOENT: diff --git a/include/linux/libata.h b/include/linux/libata.h index bca3ba25f52a..9dcefee5843c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -580,7 +580,7 @@ struct ata_device { acpi_handle acpi_handle; union acpi_object *gtf_cache; #endif - /* n_sector is used as CLEAR_OFFSET, read comment above CLEAR_OFFSET */ + /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ u64 n_sectors; /* size of device, if ATA */ unsigned int class; /* ATA_DEV_xxx */ unsigned long unpark_deadline; @@ -605,20 +605,22 @@ struct ata_device { u16 heads; /* Number of heads */ u16 sectors; /* Number of sectors per track */ - /* error history */ - int spdn_cnt; - struct ata_ering ering; - union { u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ }; + + /* error history */ + int spdn_cnt; + /* ering is CLEAR_END, read comment above CLEAR_END */ + struct ata_ering ering; }; -/* Offset into struct ata_device. Fields above it are maintained - * acress device init. Fields below are zeroed. +/* Fields between ATA_DEVICE_CLEAR_BEGIN and ATA_DEVICE_CLEAR_END are + * cleared to zero on ata_dev_init(). */ -#define ATA_DEVICE_CLEAR_OFFSET offsetof(struct ata_device, n_sectors) +#define ATA_DEVICE_CLEAR_BEGIN offsetof(struct ata_device, n_sectors) +#define ATA_DEVICE_CLEAR_END offsetof(struct ata_device, ering) struct ata_eh_info { struct ata_device *dev; /* offending device */ -- cgit v1.2.3 From 9062712fa9ed13b531dfc2228086650b8bd6a255 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Jan 2009 20:31:36 +0900 Subject: libata: implement HORKAGE_1_5_GBPS and apply it to WD My Book 3Gbps is often much more prone to transmission failures. It's usually okay to let EH handle speed down after transmission failures but some WD My Book drives completely shutdown after certain transmission failures and after it only power cycling can revive them. Combined with the fact that external drives often end up with cable assembly which is longer than usual and more likely to have intervening gender, this makes these drives very likely to shutdown under certain configurations virtually rendering them unusable. This patch implements HOARKGE_1_5_GBPS and applies it to WD My Book such that 1.5Gbps is forced once the device is identified. Please take a look at the following bz for related reports. http://bugzilla.kernel.org/show_bug.cgi?id=9913 Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/libata.h | 1 + 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 3c5965d56c47..9fbf0595f3d4 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2232,6 +2232,40 @@ retry: return rc; } +static int ata_do_link_spd_horkage(struct ata_device *dev) +{ + struct ata_link *plink = ata_dev_phys_link(dev); + u32 target, target_limit; + + if (!sata_scr_valid(plink)) + return 0; + + if (dev->horkage & ATA_HORKAGE_1_5_GBPS) + target = 1; + else + return 0; + + target_limit = (1 << target) - 1; + + /* if already on stricter limit, no need to push further */ + if (plink->sata_spd_limit <= target_limit) + return 0; + + plink->sata_spd_limit = target_limit; + + /* Request another EH round by returning -EAGAIN if link is + * going faster than the target speed. Forward progress is + * guaranteed by setting sata_spd_limit to target_limit above. + */ + if (plink->sata_spd > target) { + ata_dev_printk(dev, KERN_INFO, + "applying link speed limit horkage to %s\n", + sata_spd_string(target)); + return -EAGAIN; + } + return 0; +} + static inline u8 ata_dev_knobble(struct ata_device *dev) { struct ata_port *ap = dev->link->ap; @@ -2322,6 +2356,10 @@ int ata_dev_configure(struct ata_device *dev) return 0; } + rc = ata_do_link_spd_horkage(dev); + if (rc) + return rc; + /* let ACPI work its magic */ rc = ata_acpi_on_devcfg(dev); if (rc) @@ -4223,6 +4261,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Devices that do not need bridging limits applied */ { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK, }, + /* Devices which aren't very happy with higher link speeds */ + { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS, }, + /* End Marker */ { } }; diff --git a/include/linux/libata.h b/include/linux/libata.h index 9dcefee5843c..5d87bc09a1f5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -380,6 +380,7 @@ enum { ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands not multiple of 16 bytes */ ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firwmare update warning */ + ATA_HORKAGE_1_5_GBPS = (1 << 13), /* force 1.5 Gbps */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 35626129abcd6a7547e84c817ef5b6eff7a8758b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Feb 2009 16:00:29 -0800 Subject: sched: add missing kernel-doc in sched.h Add kernel-doc notation for @lock: include/linux/sched.h:457: No description found for parameter 'lock' Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a7c76388731..2127e959e0f4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -443,6 +443,7 @@ struct pacct_struct { * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds + * @lock: lock for fields in this struct * * This structure groups together three kinds of CPU time that are * tracked for threads and thread groups. Most things considering -- cgit v1.2.3 From 97c44836cdec1ea713a15d84098a1a908157e68f Mon Sep 17 00:00:00 2001 From: "Timothy S. Nelson" Date: Fri, 30 Jan 2009 06:12:47 +1100 Subject: PCI: return error on failure to read PCI ROMs This patch makes the ROM reading code return an error to user space if the size of the ROM read is equal to 0. The patch also emits a warnings if the contents of the ROM are invalid, and documents the effects of the "enable" file on ROM reading. Signed-off-by: Timothy S. Nelson Acked-by: Alex Villacis-Lasso Signed-off-by: Jesse Barnes --- Documentation/filesystems/sysfs-pci.txt | 13 ++++++++++++- arch/ia64/sn/kernel/io_acpi_init.c | 2 +- arch/ia64/sn/kernel/io_init.c | 2 +- drivers/pci/pci-sysfs.c | 4 ++-- drivers/pci/rom.c | 8 +++++--- include/linux/pci.h | 2 +- 6 files changed, 22 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 68ef48839c04..9f8740ca3f3b 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -9,6 +9,7 @@ that support it. For example, a given bus might look like this: | |-- class | |-- config | |-- device + | |-- enable | |-- irq | |-- local_cpus | |-- resource @@ -32,6 +33,7 @@ files, each with their own function. class PCI class (ascii, ro) config PCI config space (binary, rw) device PCI device (ascii, ro) + enable Whether the device is enabled (ascii, rw) irq IRQ number (ascii, ro) local_cpus nearby CPU mask (cpumask, ro) resource PCI resource host addresses (ascii, ro) @@ -57,10 +59,19 @@ used to do actual device programming from userspace. Note that some platforms don't support mmapping of certain resources, so be sure to check the return value from any attempted mmap. +The 'enable' file provides a counter that indicates how many times the device +has been enabled. If the 'enable' file currently returns '4', and a '1' is +echoed into it, it will then return '5'. Echoing a '0' into it will decrease +the count. Even when it returns to 0, though, some of the initialisation +may not be reversed. + The 'rom' file is special in that it provides read-only access to the device's ROM file, if available. It's disabled by default, however, so applications should write the string "1" to the file to enable it before attempting a read -call, and disable it following the access by writing "0" to the file. +call, and disable it following the access by writing "0" to the file. Note +that the device must be enabled for a rom read to return data succesfully. +In the event a driver is not bound to the device, it can be enabled using the +'enable' file, documented above. Accessing legacy resources through sysfs ---------------------------------------- diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c index c5a214026a77..d0223abbbbd4 100644 --- a/arch/ia64/sn/kernel/io_acpi_init.c +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -443,7 +443,7 @@ sn_acpi_slot_fixup(struct pci_dev *dev) size = pci_resource_len(dev, PCI_ROM_RESOURCE); addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], size); - image_size = pci_get_rom_size(addr, size); + image_size = pci_get_rom_size(dev, addr, size); dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr; dev->resource[PCI_ROM_RESOURCE].end = (unsigned long) addr + image_size - 1; diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 4e1801bad83a..e2eb2da60f96 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -269,7 +269,7 @@ sn_io_slot_fixup(struct pci_dev *dev) rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE), size + 1); - image_size = pci_get_rom_size(rom, size + 1); + image_size = pci_get_rom_size(dev, rom, size + 1); dev->resource[PCI_ROM_RESOURCE].end = dev->resource[PCI_ROM_RESOURCE].start + image_size - 1; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index db7ec14fa719..dfc4e0ddf241 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -768,8 +768,8 @@ pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr, return -EINVAL; rom = pci_map_rom(pdev, &size); /* size starts out as PCI window size */ - if (!rom) - return 0; + if (!rom || !size) + return -EIO; if (off >= size) count = 0; diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index 132a78159b60..29cbe47f219f 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -63,7 +63,7 @@ void pci_disable_rom(struct pci_dev *pdev) * The PCI window size could be much larger than the * actual image size. */ -size_t pci_get_rom_size(void __iomem *rom, size_t size) +size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size) { void __iomem *image; int last_image; @@ -72,8 +72,10 @@ size_t pci_get_rom_size(void __iomem *rom, size_t size) do { void __iomem *pds; /* Standard PCI ROMs start out with these bytes 55 AA */ - if (readb(image) != 0x55) + if (readb(image) != 0x55) { + dev_err(&pdev->dev, "Invalid ROM contents\n"); break; + } if (readb(image + 1) != 0xAA) break; /* get the PCI data structure and check its signature */ @@ -159,7 +161,7 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) * size is much larger than the actual size of the ROM. * True size is important if the ROM is going to be copied. */ - *size = pci_get_rom_size(rom, *size); + *size = pci_get_rom_size(pdev, rom, *size); return rom; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 48890cf3f96e..7bd624bfdcfd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -684,7 +684,7 @@ int pci_enable_rom(struct pci_dev *pdev); void pci_disable_rom(struct pci_dev *pdev); void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); -size_t pci_get_rom_size(void __iomem *rom, size_t size); +size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); -- cgit v1.2.3 From ac7b9004909d03d67016368093e81d37cae72895 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Feb 2009 15:11:59 -0800 Subject: generic swap(): don't return a value from swap() The swap() macro is accidentally retuning the value of its first argument. Change it into a doesn't-return-anything macro before someone goes and relies upon this behaviour. Signed-off-by: Peter Zijlstra Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 343df9ef2412..7fa371898e3e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -480,7 +480,8 @@ static inline char *pack_hex_byte(char *buf, u8 byte) /* * swap - swap value of @a and @b */ -#define swap(a, b) ({ typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; }) +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) /** * container_of - cast a member of a structure out to the containing structure -- cgit v1.2.3 From 1f5e31d7e55ac7fbd4ec5e5b20c8868b0e4564c9 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 4 Feb 2009 15:12:03 -0800 Subject: fbmem: don't call copy_from/to_user() with mutex held Avoid calling copy_from/to_user() with fb_info->lock mutex held in fbmem ioctl(). fb_mmap() is called under mm->mmap_sem (A) held, that also acquires fb_info->lock (B); fb_ioctl() takes fb_info->lock (B) and does copy_from/to_user() that might acquire mm->mmap_sem (A), causing a deadlock. NOTE: it doesn't push down the fb_info->lock in each own driver's fb_ioctl(), so there are still potential deadlocks elsewhere. Signed-off-by: Andrea Righi Cc: Dave Jones Cc: "Rafael J. Wysocki" Cc: Johannes Weiner Cc: Krzysztof Helt Cc: Harvey Harrison Cc: Stefan Richter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbcmap.c | 20 +++++--- drivers/video/fbmem.c | 135 +++++++++++++++++++++++++------------------------ include/linux/fb.h | 15 ++++++ 3 files changed, 98 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c index 91b78e691505..f53b9f1d6aba 100644 --- a/drivers/video/fbcmap.c +++ b/drivers/video/fbcmap.c @@ -250,10 +250,6 @@ int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) int rc, size = cmap->len * sizeof(u16); struct fb_cmap umap; - if (cmap->start < 0 || (!info->fbops->fb_setcolreg && - !info->fbops->fb_setcmap)) - return -EINVAL; - memset(&umap, 0, sizeof(struct fb_cmap)); rc = fb_alloc_cmap(&umap, cmap->len, cmap->transp != NULL); if (rc) @@ -262,11 +258,23 @@ int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) copy_from_user(umap.green, cmap->green, size) || copy_from_user(umap.blue, cmap->blue, size) || (cmap->transp && copy_from_user(umap.transp, cmap->transp, size))) { - fb_dealloc_cmap(&umap); - return -EFAULT; + rc = -EFAULT; + goto out; } umap.start = cmap->start; + if (!lock_fb_info(info)) { + rc = -ENODEV; + goto out; + } + if (cmap->start < 0 || (!info->fbops->fb_setcolreg && + !info->fbops->fb_setcmap)) { + rc = -EINVAL; + goto out1; + } rc = fb_set_cmap(&umap, info); +out1: + unlock_fb_info(info); +out: fb_dealloc_cmap(&umap); return rc; } diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 756efeb91abc..cfd9dce1ce0b 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1013,132 +1013,139 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, struct fb_var_screeninfo var; struct fb_fix_screeninfo fix; struct fb_con2fbmap con2fb; + struct fb_cmap cmap_from; struct fb_cmap_user cmap; struct fb_event event; void __user *argp = (void __user *)arg; long ret = 0; - fb = info->fbops; - if (!fb) - return -ENODEV; - switch (cmd) { case FBIOGET_VSCREENINFO: - ret = copy_to_user(argp, &info->var, - sizeof(var)) ? -EFAULT : 0; + if (!lock_fb_info(info)) + return -ENODEV; + var = info->var; + unlock_fb_info(info); + + ret = copy_to_user(argp, &var, sizeof(var)) ? -EFAULT : 0; break; case FBIOPUT_VSCREENINFO: - if (copy_from_user(&var, argp, sizeof(var))) { - ret = -EFAULT; - break; - } + if (copy_from_user(&var, argp, sizeof(var))) + return -EFAULT; + if (!lock_fb_info(info)) + return -ENODEV; acquire_console_sem(); info->flags |= FBINFO_MISC_USEREVENT; ret = fb_set_var(info, &var); info->flags &= ~FBINFO_MISC_USEREVENT; release_console_sem(); - if (ret == 0 && copy_to_user(argp, &var, sizeof(var))) + unlock_fb_info(info); + if (!ret && copy_to_user(argp, &var, sizeof(var))) ret = -EFAULT; break; case FBIOGET_FSCREENINFO: - ret = copy_to_user(argp, &info->fix, - sizeof(fix)) ? -EFAULT : 0; + if (!lock_fb_info(info)) + return -ENODEV; + fix = info->fix; + unlock_fb_info(info); + + ret = copy_to_user(argp, &fix, sizeof(fix)) ? -EFAULT : 0; break; case FBIOPUTCMAP: if (copy_from_user(&cmap, argp, sizeof(cmap))) - ret = -EFAULT; - else - ret = fb_set_user_cmap(&cmap, info); + return -EFAULT; + ret = fb_set_user_cmap(&cmap, info); break; case FBIOGETCMAP: if (copy_from_user(&cmap, argp, sizeof(cmap))) - ret = -EFAULT; - else - ret = fb_cmap_to_user(&info->cmap, &cmap); + return -EFAULT; + if (!lock_fb_info(info)) + return -ENODEV; + cmap_from = info->cmap; + unlock_fb_info(info); + ret = fb_cmap_to_user(&cmap_from, &cmap); break; case FBIOPAN_DISPLAY: - if (copy_from_user(&var, argp, sizeof(var))) { - ret = -EFAULT; - break; - } + if (copy_from_user(&var, argp, sizeof(var))) + return -EFAULT; + if (!lock_fb_info(info)) + return -ENODEV; acquire_console_sem(); ret = fb_pan_display(info, &var); release_console_sem(); + unlock_fb_info(info); if (ret == 0 && copy_to_user(argp, &var, sizeof(var))) - ret = -EFAULT; + return -EFAULT; break; case FBIO_CURSOR: ret = -EINVAL; break; case FBIOGET_CON2FBMAP: if (copy_from_user(&con2fb, argp, sizeof(con2fb))) - ret = -EFAULT; - else if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) - ret = -EINVAL; - else { - con2fb.framebuffer = -1; - event.info = info; - event.data = &con2fb; - fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, - &event); - ret = copy_to_user(argp, &con2fb, - sizeof(con2fb)) ? -EFAULT : 0; - } + return -EFAULT; + if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) + return -EINVAL; + con2fb.framebuffer = -1; + event.data = &con2fb; + + if (!lock_fb_info(info)) + return -ENODEV; + event.info = info; + fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event); + unlock_fb_info(info); + + ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0; break; case FBIOPUT_CON2FBMAP: - if (copy_from_user(&con2fb, argp, sizeof(con2fb))) { - ret = -EFAULT; - break; - } - if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) { - ret = -EINVAL; - break; - } - if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) { - ret = -EINVAL; - break; - } + if (copy_from_user(&con2fb, argp, sizeof(con2fb))) + return -EFAULT; + if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) + return -EINVAL; + if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) + return -EINVAL; if (!registered_fb[con2fb.framebuffer]) request_module("fb%d", con2fb.framebuffer); if (!registered_fb[con2fb.framebuffer]) { ret = -EINVAL; break; } - event.info = info; event.data = &con2fb; + if (!lock_fb_info(info)) + return -ENODEV; + event.info = info; ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event); + unlock_fb_info(info); break; case FBIOBLANK: + if (!lock_fb_info(info)) + return -ENODEV; acquire_console_sem(); info->flags |= FBINFO_MISC_USEREVENT; ret = fb_blank(info, arg); info->flags &= ~FBINFO_MISC_USEREVENT; release_console_sem(); - break;; + unlock_fb_info(info); + break; default: - if (fb->fb_ioctl == NULL) - ret = -ENOTTY; - else + if (!lock_fb_info(info)) + return -ENODEV; + fb = info->fbops; + if (fb->fb_ioctl) ret = fb->fb_ioctl(info, cmd, arg); + else + ret = -ENOTTY; + unlock_fb_info(info); } return ret; } static long fb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -__acquires(&info->lock) -__releases(&info->lock) { struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); - struct fb_info *info; - long ret; + struct fb_info *info = registered_fb[fbidx]; - info = registered_fb[fbidx]; - mutex_lock(&info->lock); - ret = do_fb_ioctl(info, cmd, arg); - mutex_unlock(&info->lock); - return ret; + return do_fb_ioctl(info, cmd, arg); } #ifdef CONFIG_COMPAT @@ -1257,8 +1264,6 @@ static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd, static long fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -__acquires(&info->lock) -__releases(&info->lock) { struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); @@ -1266,7 +1271,6 @@ __releases(&info->lock) struct fb_ops *fb = info->fbops; long ret = -ENOIOCTLCMD; - mutex_lock(&info->lock); switch(cmd) { case FBIOGET_VSCREENINFO: case FBIOPUT_VSCREENINFO: @@ -1292,7 +1296,6 @@ __releases(&info->lock) ret = fb->fb_compat_ioctl(info, cmd, arg); break; } - mutex_unlock(&info->lock); return ret; } #endif diff --git a/include/linux/fb.h b/include/linux/fb.h index 818fe21257e8..31527e17076b 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -960,6 +960,21 @@ extern struct fb_info *registered_fb[FB_MAX]; extern int num_registered_fb; extern struct class *fb_class; +static inline int lock_fb_info(struct fb_info *info) +{ + mutex_lock(&info->lock); + if (!info->fbops) { + mutex_unlock(&info->lock); + return 0; + } + return 1; +} + +static inline void unlock_fb_info(struct fb_info *info) +{ + mutex_unlock(&info->lock); +} + static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height) { -- cgit v1.2.3 From 777c6c5f1f6e757ae49ecca2ed72d6b1f523c007 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 4 Feb 2009 15:12:14 -0800 Subject: wait: prevent exclusive waiter starvation With exclusive waiters, every process woken up through the wait queue must ensure that the next waiter down the line is woken when it has finished. Interruptible waiters don't do that when aborting due to a signal. And if an aborting waiter is concurrently woken up through the waitqueue, noone will ever wake up the next waiter. This has been observed with __wait_on_bit_lock() used by lock_page_killable(): the first contender on the queue was aborting when the actual lock holder woke it up concurrently. The aborted contender didn't acquire the lock and therefor never did an unlock followed by waking up the next waiter. Add abort_exclusive_wait() which removes the process' wait descriptor from the waitqueue, iff still queued, or wakes up the next waiter otherwise. It does so under the waitqueue lock. Racing with a wake up means the aborting process is either already woken (removed from the queue) and will wake up the next waiter, or it will remove itself from the queue and the concurrent wake up will apply to the next waiter after it. Use abort_exclusive_wait() in __wait_event_interruptible_exclusive() and __wait_on_bit_lock() when they were interrupted by other means than a wake up through the queue. [akpm@linux-foundation.org: coding-style fixes] Reported-by: Chris Mason Signed-off-by: Johannes Weiner Mentored-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Matthew Wilcox Cc: Chuck Lever Cc: Nick Piggin Cc: Ingo Molnar Cc: ["after some testing"] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 11 ++++++++-- kernel/sched.c | 4 ++-- kernel/wait.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 63 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index ef609f842fac..a210ede73b56 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -132,6 +132,8 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, list_del(&old->task_list); } +void __wake_up_common(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); @@ -333,16 +335,19 @@ do { \ for (;;) { \ prepare_to_wait_exclusive(&wq, &__wait, \ TASK_INTERRUPTIBLE); \ - if (condition) \ + if (condition) { \ + finish_wait(&wq, &__wait); \ break; \ + } \ if (!signal_pending(current)) { \ schedule(); \ continue; \ } \ ret = -ERESTARTSYS; \ + abort_exclusive_wait(&wq, &__wait, \ + TASK_INTERRUPTIBLE, NULL); \ break; \ } \ - finish_wait(&wq, &__wait); \ } while (0) #define wait_event_interruptible_exclusive(wq, condition) \ @@ -431,6 +436,8 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); +void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, + unsigned int mode, void *key); int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); diff --git a/kernel/sched.c b/kernel/sched.c index 242d0d47a70d..8ee437a5ec1d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4697,8 +4697,8 @@ EXPORT_SYMBOL(default_wake_function); * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */ -static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int sync, void *key) +void __wake_up_common(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, int sync, void *key) { wait_queue_t *curr, *next; diff --git a/kernel/wait.c b/kernel/wait.c index cd87131f2fc2..42a2dbc181c8 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) } EXPORT_SYMBOL(prepare_to_wait_exclusive); +/* + * finish_wait - clean up after waiting in a queue + * @q: waitqueue waited on + * @wait: wait descriptor + * + * Sets current thread back to running state and removes + * the wait descriptor from the given waitqueue if still + * queued. + */ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; @@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) } EXPORT_SYMBOL(finish_wait); +/* + * abort_exclusive_wait - abort exclusive waiting in a queue + * @q: waitqueue waited on + * @wait: wait descriptor + * @state: runstate of the waiter to be woken + * @key: key to identify a wait bit queue or %NULL + * + * Sets current thread back to running state and removes + * the wait descriptor from the given waitqueue if still + * queued. + * + * Wakes up the next waiter if the caller is concurrently + * woken up through the queue. + * + * This prevents waiter starvation where an exclusive waiter + * aborts and is woken up concurrently and noone wakes up + * the next waiter. + */ +void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, + unsigned int mode, void *key) +{ + unsigned long flags; + + __set_current_state(TASK_RUNNING); + spin_lock_irqsave(&q->lock, flags); + if (!list_empty(&wait->task_list)) + list_del_init(&wait->task_list); + else if (waitqueue_active(q)) + __wake_up_common(q, mode, 1, 0, key); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(abort_exclusive_wait); + int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) { int ret = default_wake_function(wait, mode, sync, key); @@ -177,17 +219,20 @@ int __sched __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, int (*action)(void *), unsigned mode) { - int ret = 0; - do { + int ret; + prepare_to_wait_exclusive(wq, &q->wait, mode); - if (test_bit(q->key.bit_nr, q->key.flags)) { - if ((ret = (*action)(q->key.flags))) - break; - } + if (!test_bit(q->key.bit_nr, q->key.flags)) + continue; + ret = action(q->key.flags); + if (!ret) + continue; + abort_exclusive_wait(wq, &q->wait, mode, &q->key); + return ret; } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); finish_wait(wq, &q->wait); - return ret; + return 0; } EXPORT_SYMBOL(__wait_on_bit_lock); -- cgit v1.2.3 From 7f9a50a5b89b87f8e754f59ae9968da28be618a5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 7 Feb 2009 18:15:56 +1030 Subject: module: remove over-zealous check in __module_get() Impact: fix spurious BUG_ON() triggered under load module_refcount() isn't reliable outside stop_machine(), as demonstrated by Karsten Keil , networking can trigger it under load (an inc on one cpu and dec on another while module_refcount() is tallying can give false results, for example). Almost noone should be using __module_get, but that's another issue. Cc: Karsten Keil Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/linux/module.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index f3b8329eb5b8..145a75528cc1 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -407,7 +407,6 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu) static inline void __module_get(struct module *module) { if (module) { - BUG_ON(module_refcount(module) == 0); local_inc(__module_ref_addr(module, get_cpu())); put_cpu(); } -- cgit v1.2.3 From 766ccb9ed406c230d13c145def08ebea1b932982 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 20 Jan 2009 15:31:31 +0100 Subject: async: Rename _special -> _domain for clarity. Rename the async_*_special() functions to async_*_domain(), which describes the purpose of these functions much better. [Broke up long lines to silence checkpatch] Signed-off-by: Cornelia Huck Signed-off-by: Arjan van de Ven --- fs/super.c | 4 ++-- include/linux/async.h | 8 +++++--- kernel/async.c | 41 ++++++++++++++++++++++------------------- 3 files changed, 29 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 645e5403f2a0..61dce001dd57 100644 --- a/fs/super.c +++ b/fs/super.c @@ -301,7 +301,7 @@ void generic_shutdown_super(struct super_block *sb) /* * wait for asynchronous fs operations to finish before going further */ - async_synchronize_full_special(&sb->s_async_list); + async_synchronize_full_domain(&sb->s_async_list); /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); @@ -470,7 +470,7 @@ restart: sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); - async_synchronize_full_special(&sb->s_async_list); + async_synchronize_full_domain(&sb->s_async_list); if (sb->s_root && (wait || sb->s_dirt)) sb->s_op->sync_fs(sb, wait); up_read(&sb->s_umount); diff --git a/include/linux/async.h b/include/linux/async.h index c4ecacd0b327..68a9530196f2 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -17,9 +17,11 @@ typedef u64 async_cookie_t; typedef void (async_func_ptr) (void *data, async_cookie_t cookie); extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); -extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list); +extern async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data, + struct list_head *list); extern void async_synchronize_full(void); -extern void async_synchronize_full_special(struct list_head *list); +extern void async_synchronize_full_domain(struct list_head *list); extern void async_synchronize_cookie(async_cookie_t cookie); -extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list); +extern void async_synchronize_cookie_domain(async_cookie_t cookie, + struct list_head *list); diff --git a/kernel/async.c b/kernel/async.c index b5f0d4b94937..e23399d88bac 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -224,22 +224,23 @@ async_cookie_t async_schedule(async_func_ptr *ptr, void *data) EXPORT_SYMBOL_GPL(async_schedule); /** - * async_schedule_special - schedule a function for asynchronous execution with a special running queue + * async_schedule_domain - schedule a function for asynchronous execution within a certain domain * @ptr: function to execute asynchronously * @data: data pointer to pass to the function - * @running: list head to add to while running + * @running: running list for the domain * * Returns an async_cookie_t that may be used for checkpointing later. - * @running may be used in the async_synchronize_*_special() functions - * to wait on a special running queue rather than on the global running - * queue. + * @running may be used in the async_synchronize_*_domain() functions + * to wait within a certain synchronization domain rather than globally. + * A synchronization domain is specified via the running queue @running to use. * Note: This function may be called from atomic or non-atomic contexts. */ -async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) +async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data, + struct list_head *running) { return __async_schedule(ptr, data, running); } -EXPORT_SYMBOL_GPL(async_schedule_special); +EXPORT_SYMBOL_GPL(async_schedule_domain); /** * async_synchronize_full - synchronize all asynchronous function calls @@ -255,27 +256,29 @@ void async_synchronize_full(void) EXPORT_SYMBOL_GPL(async_synchronize_full); /** - * async_synchronize_full_special - synchronize all asynchronous function calls for a running list + * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain * @list: running list to synchronize on * - * This function waits until all asynchronous function calls for the running - * list @list have been done. + * This function waits until all asynchronous function calls for the + * synchronization domain specified by the running list @list have been done. */ -void async_synchronize_full_special(struct list_head *list) +void async_synchronize_full_domain(struct list_head *list) { - async_synchronize_cookie_special(next_cookie, list); + async_synchronize_cookie_domain(next_cookie, list); } -EXPORT_SYMBOL_GPL(async_synchronize_full_special); +EXPORT_SYMBOL_GPL(async_synchronize_full_domain); /** - * async_synchronize_cookie_special - synchronize asynchronous function calls on a running list with cookie checkpointing + * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing * @cookie: async_cookie_t to use as checkpoint * @running: running list to synchronize on * - * This function waits until all asynchronous function calls for the running - * list @list submitted prior to @cookie have been done. + * This function waits until all asynchronous function calls for the + * synchronization domain specified by the running list @list submitted + * prior to @cookie have been done. */ -void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) +void async_synchronize_cookie_domain(async_cookie_t cookie, + struct list_head *running) { ktime_t starttime, delta, endtime; @@ -295,7 +298,7 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r (long long)ktime_to_ns(delta) >> 10); } } -EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); +EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain); /** * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing @@ -306,7 +309,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); */ void async_synchronize_cookie(async_cookie_t cookie) { - async_synchronize_cookie_special(cookie, &async_running); + async_synchronize_cookie_domain(cookie, &async_running); } EXPORT_SYMBOL_GPL(async_synchronize_cookie); -- cgit v1.2.3