From c132419e560a2ecd3c8cf77f9c37e103e74b3754 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Thu, 30 Oct 2008 18:17:06 -0700 Subject: gianfar: Fix race in TBI/SerDes configuration The init_phy() function attaches to the PHY, then configures the SerDes<->TBI link (in SGMII mode). The TBI is on the MDIO bus with the PHY (sort of) and is accessed via the gianfar's MDIO registers, using the functions gfar_local_mdio_read/write(), which don't do any locking. The previously attached PHY will start a work-queue on a timer, and probably an irq handler as well, which will talk to the PHY and thus use the MDIO bus. This uses phy_read/write(), which have locking, but not against the gfar_local_mdio versions. The result is that PHY code will try to use the MDIO bus at the same time as the SerDes setup code, corrupting the transfers. Setting up the SerDes before attaching to the PHY will insure that there is no race between the SerDes code and *our* PHY, but doesn't fix everything. Typically the PHYs for all gianfar devices are on the same MDIO bus, which is associated with the first gianfar device. This means that the first gianfar's SerDes code could corrupt the MDIO transfers for a different gianfar's PHY. The lock used by phy_read/write() is contained in the mii_bus structure, which is pointed to by the PHY. This is difficult to access from the gianfar drivers, as there is no link between a gianfar device and the mii_bus which shares the same MDIO registers. As far as the device layer and drivers are concerned they are two unrelated devices (which happen to share registers). Generally all gianfar devices' PHYs will be on the bus associated with the first gianfar. But this might not be the case, so simply locking the gianfar's PHY's mii bus might not lock the mii bus that the SerDes setup code is going to use. We solve this by having the code that creates the gianfar platform device look in the device tree for an mdio device that shares the gianfar's registers. If one is found the ID of its platform device is saved in the gianfar's platform data. A new function in the gianfar mii code, gfar_get_miibus(), can use the bus ID to search through the platform devices for a gianfar_mdio device with the right ID. The platform device's driver data is the mii_bus structure, which the SerDes setup code can use to lock the current bus. Signed-off-by: Trent Piepho CC: Andy Fleming Signed-off-by: Jeff Garzik --- include/linux/fsl_devices.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 4e625e0094c8..708bab58d8d0 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -49,7 +49,8 @@ struct gianfar_platform_data { u32 device_flags; /* board specific information */ u32 board_flags; - char bus_id[MII_BUS_ID_SIZE]; + int mdio_bus; /* Bus controlled by us */ + char bus_id[MII_BUS_ID_SIZE]; /* Bus PHY is on */ u32 phy_id; u8 mac_addr[6]; phy_interface_t interface; -- cgit v1.2.3 From 9ce8e3073d9cfd6f859c22a25441db41b85cbf6e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Aug 2008 15:23:18 +0200 Subject: libata: add whitelist for devices with known good pata-sata bridges libata currently imposes a UDMA5 max transfer rate and 200 sector max transfer size for SATA devices that sit behind a pata-sata bridge. Lots of devices have known good bridges that don't need this limit applied. The MTRON SSD disks are such devices. Transfer rates are increased by 20-30% with the restriction removed. So add a "blacklist" entry for the MTRON devices, with a flag indicating that the bridge is known good. Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 7 +++++++ include/linux/libata.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8824c8da3f2f..82af7011f2dd 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2158,6 +2158,10 @@ retry: static inline u8 ata_dev_knobble(struct ata_device *dev) { struct ata_port *ap = dev->link->ap; + + if (ata_dev_blacklisted(dev) & ATA_HORKAGE_BRIDGE_OK) + return 0; + return ((ap->cbl == ATA_CBL_SATA) && (!ata_id_is_sata(dev->id))); } @@ -4062,6 +4066,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "TSSTcorp CDDVDW SH-S202N", "SB00", ATA_HORKAGE_IVB, }, { "TSSTcorp CDDVDW SH-S202N", "SB01", ATA_HORKAGE_IVB, }, + /* Devices that do not need bridging limits applied */ + { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK, }, + /* End Marker */ { } }; diff --git a/include/linux/libata.h b/include/linux/libata.h index 507f53ef8038..f5441edee55f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -372,6 +372,7 @@ enum { ATA_HORKAGE_IPM = (1 << 7), /* Link PM problems */ ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ + ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From ad1d967c88e349c7e822ad75dd3247a2a50d2ea3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 30 Oct 2008 23:54:35 -0700 Subject: net: delete excess kernel-doc notation Remove excess kernel-doc function parameters from networking header & driver files: Warning(include/net/sock.h:946): Excess function parameter or struct member 'sk' description in 'sk_filter_release' Warning(include/linux/netdevice.h:1545): Excess function parameter or struct member 'cpu' description in 'netif_tx_lock' Warning(drivers/net/wan/z85230.c:712): Excess function parameter or struct member 'regs' description in 'z8530_interrupt' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/wan/z85230.c | 1 - include/linux/netdevice.h | 1 - include/net/sock.h | 1 - 3 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c index ccd9cd35ecbe..5bf7e01ef0e9 100644 --- a/drivers/net/wan/z85230.c +++ b/drivers/net/wan/z85230.c @@ -695,7 +695,6 @@ EXPORT_SYMBOL(z8530_nop); * z8530_interrupt - Handle an interrupt from a Z8530 * @irq: Interrupt number * @dev_id: The Z8530 device that is interrupting. - * @regs: unused * * A Z85[2]30 device has stuck its hand in the air for attention. * We scan both the channels on the chip for events and then call diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c8bcb59adfdf..9d77b1d7dca8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1537,7 +1537,6 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) /** * netif_tx_lock - grab network device transmit lock * @dev: network device - * @cpu: cpu number of lock owner * * Get network device transmit lock */ diff --git a/include/net/sock.h b/include/net/sock.h index ada50c04d09f..c04f9e18ea22 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -936,7 +936,6 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); /** * sk_filter_release: Release a socket filter - * @sk: socket * @fp: filter to remove * * Remove a filter from a socket and release its resources. -- cgit v1.2.3 From 9663f2e6a6cf3f82b06d8fb699b11b80f92553ba Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 30 Oct 2008 19:38:18 -0700 Subject: resources: add io-mapping functions to dynamically map large device apertures Impact: add new generic io_map_*() APIs Graphics devices have large PCI apertures which would consume a significant fraction of a 32-bit address space if mapped during driver initialization. Using ioremap at runtime is impractical as it is too slow. This new set of interfaces uses atomic mappings on 32-bit processors and a large static mapping on 64-bit processors to provide reasonable 32-bit performance and optimal 64-bit performance. The current implementation sits atop the io_map_atomic fixmap-based mechanism for 32-bit processors. This includes some editorial suggestions from Randy Dunlap for Documentation/io-mapping.txt Signed-off-by: Keith Packard Signed-off-by: Eric Anholt Signed-off-by: Ingo Molnar --- Documentation/io-mapping.txt | 76 ++++++++++++++++++++++++++++ include/linux/io-mapping.h | 118 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 Documentation/io-mapping.txt create mode 100644 include/linux/io-mapping.h (limited to 'include/linux') diff --git a/Documentation/io-mapping.txt b/Documentation/io-mapping.txt new file mode 100644 index 000000000000..cd2f726becc8 --- /dev/null +++ b/Documentation/io-mapping.txt @@ -0,0 +1,76 @@ +The io_mapping functions in linux/io-mapping.h provide an abstraction for +efficiently mapping small regions of an I/O device to the CPU. The initial +usage is to support the large graphics aperture on 32-bit processors where +ioremap_wc cannot be used to statically map the entire aperture to the CPU +as it would consume too much of the kernel address space. + +A mapping object is created during driver initialization using + + struct io_mapping *io_mapping_create_wc(unsigned long base, + unsigned long size) + + 'base' is the bus address of the region to be made + mappable, while 'size' indicates how large a mapping region to + enable. Both are in bytes. + + This _wc variant provides a mapping which may only be used + with the io_mapping_map_atomic_wc or io_mapping_map_wc. + +With this mapping object, individual pages can be mapped either atomically +or not, depending on the necessary scheduling environment. Of course, atomic +maps are more efficient: + + void *io_mapping_map_atomic_wc(struct io_mapping *mapping, + unsigned long offset) + + 'offset' is the offset within the defined mapping region. + Accessing addresses beyond the region specified in the + creation function yields undefined results. Using an offset + which is not page aligned yields an undefined result. The + return value points to a single page in CPU address space. + + This _wc variant returns a write-combining map to the + page and may only be used with mappings created by + io_mapping_create_wc + + Note that the task may not sleep while holding this page + mapped. + + void io_mapping_unmap_atomic(void *vaddr) + + 'vaddr' must be the the value returned by the last + io_mapping_map_atomic_wc call. This unmaps the specified + page and allows the task to sleep once again. + +If you need to sleep while holding the lock, you can use the non-atomic +variant, although they may be significantly slower. + + void *io_mapping_map_wc(struct io_mapping *mapping, + unsigned long offset) + + This works like io_mapping_map_atomic_wc except it allows + the task to sleep while holding the page mapped. + + void io_mapping_unmap(void *vaddr) + + This works like io_mapping_unmap_atomic, except it is used + for pages mapped with io_mapping_map_wc. + +At driver close time, the io_mapping object must be freed: + + void io_mapping_free(struct io_mapping *mapping) + +Current Implementation: + +The initial implementation of these functions uses existing mapping +mechanisms and so provides only an abstraction layer and no new +functionality. + +On 64-bit processors, io_mapping_create_wc calls ioremap_wc for the whole +range, creating a permanent kernel-visible mapping to the resource. The +map_atomic and map functions add the requested offset to the base of the +virtual address returned by ioremap_wc. + +On 32-bit processors, io_mapping_map_atomic_wc uses io_map_atomic_prot_pfn, +which uses the fixmaps to get us a mapping to a page using an atomic fashion. +For io_mapping_map_wc, ioremap_wc() is used to get a mapping of the region. diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h new file mode 100644 index 000000000000..1b566993db6e --- /dev/null +++ b/include/linux/io-mapping.h @@ -0,0 +1,118 @@ +/* + * Copyright © 2008 Keith Packard + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_IO_MAPPING_H +#define _LINUX_IO_MAPPING_H + +#include +#include +#include +#include + +/* + * The io_mapping mechanism provides an abstraction for mapping + * individual pages from an io device to the CPU in an efficient fashion. + * + * See Documentation/io_mapping.txt + */ + +/* this struct isn't actually defined anywhere */ +struct io_mapping; + +#ifdef CONFIG_X86_64 + +/* Create the io_mapping object*/ +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) ioremap_wc(base, size); +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ + iounmap(mapping); +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ +} + +/* Non-atomic map/unmap */ +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap(void *vaddr) +{ +} + +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_X86_32 +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) base; +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + offset += (unsigned long) mapping; + return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, + __pgprot(__PAGE_KERNEL_WC)); +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ + iounmap_atomic(vaddr, KM_USER0); +} + +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + offset += (unsigned long) mapping; + return ioremap_wc(offset, PAGE_SIZE); +} + +static inline void +io_mapping_unmap(void *vaddr) +{ + iounmap(vaddr); +} +#endif /* CONFIG_X86_32 */ + +#endif /* _LINUX_IO_MAPPING_H */ -- cgit v1.2.3 From 4ac96572f1f6abe44b5e02e80fdfb5a990129613 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 2 Nov 2008 09:51:27 -0500 Subject: linux/string.h: fix comment typo s/user/used/ Signed-off-by: Jeff Garzik Signed-off-by: Linus Torvalds --- include/linux/string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 810d80df0a1d..d18fc198aa2f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -1,7 +1,7 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ -/* We don't want strings.h stuff being user by user stuff by accident */ +/* We don't want strings.h stuff being used by user stuff by accident */ #ifndef __KERNEL__ #include -- cgit v1.2.3 From e5beae16901795223d677f15aa2fe192976278ee Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 3 Nov 2008 18:21:45 +0100 Subject: io mapping: clean up #ifdefs Impact: cleanup clean up ifdefs: change #ifdef CONFIG_X86_32/64 to CONFIG_HAVE_ATOMIC_IOMAP. flip around the #ifdef sections to clean up the structure. Signed-off-by: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++++ include/linux/io-mapping.h | 43 +++++++++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f20718d3156..e60c59b81bdd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1894,6 +1894,10 @@ config SYSVIPC_COMPAT endmenu +config HAVE_ATOMIC_IOMAP + def_bool y + depends on X86_32 + source "net/Kconfig" source "drivers/Kconfig" diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 1b566993db6e..82df31726a54 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -33,86 +33,93 @@ /* this struct isn't actually defined anywhere */ struct io_mapping; -#ifdef CONFIG_X86_64 +#ifdef CONFIG_HAVE_ATOMIC_IOMAP + +/* + * For small address space machines, mapping large objects + * into the kernel virtual space isn't practical. Where + * available, use fixmap support to dynamically map pages + * of the object at run time. + */ -/* Create the io_mapping object*/ static inline struct io_mapping * io_mapping_create_wc(unsigned long base, unsigned long size) { - return (struct io_mapping *) ioremap_wc(base, size); + return (struct io_mapping *) base; } static inline void io_mapping_free(struct io_mapping *mapping) { - iounmap(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + offset += (unsigned long) mapping; + return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, + __pgprot(__PAGE_KERNEL_WC)); } static inline void io_mapping_unmap_atomic(void *vaddr) { + iounmap_atomic(vaddr, KM_USER0); } -/* Non-atomic map/unmap */ static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + offset += (unsigned long) mapping; + return ioremap_wc(offset, PAGE_SIZE); } static inline void io_mapping_unmap(void *vaddr) { + iounmap(vaddr); } -#endif /* CONFIG_X86_64 */ +#else -#ifdef CONFIG_X86_32 +/* Create the io_mapping object*/ static inline struct io_mapping * io_mapping_create_wc(unsigned long base, unsigned long size) { - return (struct io_mapping *) base; + return (struct io_mapping *) ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { + iounmap(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, - __pgprot(__PAGE_KERNEL_WC)); + return ((char *) mapping) + offset; } static inline void io_mapping_unmap_atomic(void *vaddr) { - iounmap_atomic(vaddr, KM_USER0); } +/* Non-atomic map/unmap */ static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return ioremap_wc(offset, PAGE_SIZE); + return ((char *) mapping) + offset; } static inline void io_mapping_unmap(void *vaddr) { - iounmap(vaddr); } -#endif /* CONFIG_X86_32 */ + +#endif /* HAVE_ATOMIC_IOMAP */ #endif /* _LINUX_IO_MAPPING_H */ -- cgit v1.2.3 From a7b930cdf8ec790c85f81416c87f7c066679d373 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sun, 2 Nov 2008 13:32:43 -0800 Subject: PCI: annotate return value of pci_ioremap_bar with __iomem Was missing from the initial patch. Acked-by: Arjan van de Ven Signed-off-by: Harvey Harrison Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c75b82bda327..feb4657bb043 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1136,7 +1136,7 @@ static inline void pci_mmcfg_late_init(void) { } #endif #ifdef CONFIG_HAS_IOMEM -static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar) +static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) { /* * Make sure the BAR is actually a memory resource, not an IO resource -- cgit v1.2.3 From 6a87e42e955ff27e07a77f65f8f077dc7c4171e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 19:01:09 +0900 Subject: libata: implement ATA_HORKAGE_ATAPI_MOD16_DMA and apply it libata always uses PIO for ATAPI commands when the number of bytes to transfer isn't multiple of 16 but quantum DAT72 chokes on odd bytes PIO transfers. Implement a horkage to skip the mod16 check and apply it to the quantum device. This is reported by John Clark in the following thread. http://thread.gmane.org/gmane.linux.ide/34748 Signed-off-by: Tejun Heo Cc: John Clark Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 4 +++- include/linux/libata.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 82af7011f2dd..91b478f20557 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4024,6 +4024,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, + { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA }, /* Devices we expect to fail diagnostics */ @@ -4444,7 +4445,8 @@ int atapi_check_dma(struct ata_queued_cmd *qc) /* Don't allow DMA if it isn't multiple of 16 bytes. Quite a * few ATAPI devices choke on such DMA requests. */ - if (unlikely(qc->nbytes & 15)) + if (!(qc->dev->horkage & ATA_HORKAGE_ATAPI_MOD16_DMA) && + unlikely(qc->nbytes & 15)) return 1; if (ap->ops->check_atapi_dma) diff --git a/include/linux/libata.h b/include/linux/libata.h index f5441edee55f..c7665a4134c5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -373,6 +373,8 @@ enum { ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ + ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands + not multiple of 16 bytes */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 9b22ea560957de1484e6b3e8538f7eef202e3596 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 4 Nov 2008 14:49:57 -0800 Subject: net: fix packet socket delivery in rx irq handler The changes to deliver hardware accelerated VLAN packets to packet sockets (commit bc1d0411) caused a warning for non-NAPI drivers. The __vlan_hwaccel_rx() function is called directly from the drivers RX function, for non-NAPI drivers that means its still in RX IRQ context: [ 27.779463] ------------[ cut here ]------------ [ 27.779509] WARNING: at kernel/softirq.c:136 local_bh_enable+0x37/0x81() ... [ 27.782520] [] netif_nit_deliver+0x5b/0x75 [ 27.782590] [] __vlan_hwaccel_rx+0x79/0x162 [ 27.782664] [] atl1_intr+0x9a9/0xa7c [atl1] [ 27.782738] [] handle_IRQ_event+0x23/0x51 [ 27.782808] [] handle_edge_irq+0xc2/0x102 [ 27.782878] [] do_IRQ+0x4d/0x64 Split hardware accelerated VLAN reception into two parts to fix this: - __vlan_hwaccel_rx just stores the VLAN TCI and performs the VLAN device lookup, then calls netif_receive_skb()/netif_rx() - vlan_hwaccel_do_receive(), which is invoked by netif_receive_skb() in softirq context, performs the real reception and delivery to packet sockets. Reported-and-tested-by: Ramon Casellas Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 +++++++ net/8021q/vlan_core.c | 46 +++++++++++++++++++++++++++++++++------------- net/core/dev.c | 3 +++ 3 files changed, 43 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 9e7b49b8062d..a5cb0c3f6dcf 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -114,6 +114,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); +extern int vlan_hwaccel_do_receive(struct sk_buff *skb); + #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { @@ -133,6 +135,11 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, BUG(); return NET_XMIT_SUCCESS; } + +static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + return 0; +} #endif /** diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 916061f681b6..68ced4bf158c 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -3,11 +3,20 @@ #include #include "vlan.h" +struct vlan_hwaccel_cb { + struct net_device *dev; +}; + +static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb) +{ + return (struct vlan_hwaccel_cb *)skb->cb; +} + /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { - struct net_device_stats *stats; + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); if (skb_bond_should_drop(skb)) { dev_kfree_skb_any(skb); @@ -15,23 +24,35 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, } skb->vlan_tci = vlan_tci; + cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); + struct net_device *dev = cb->dev; + struct net_device_stats *stats; + netif_nit_deliver(skb); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); - if (skb->dev == NULL) { - dev_kfree_skb_any(skb); - /* Not NET_RX_DROP, this is not being dropped - * due to congestion. */ - return NET_RX_SUCCESS; + if (dev == NULL) { + kfree_skb(skb); + return -1; } - skb->dev->last_rx = jiffies; + + skb->dev = dev; + skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - stats = &skb->dev->stats; + dev->last_rx = jiffies; + + stats = &dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: break; @@ -43,13 +64,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) + dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; }; - return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + return 0; } -EXPORT_SYMBOL(__vlan_hwaccel_rx); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc1..9174c77d3112 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2218,6 +2218,9 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; + if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + return NET_RX_SUCCESS; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; -- cgit v1.2.3 From 467622ef2acb01986eab37ef96c3632b3ea35999 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 1 Nov 2008 04:19:11 -0700 Subject: [MTD] [NOR] Fix cfi_send_gen_cmd handling of x16 devices in x8 mode (v4) For "unlock" cycles to 16bit devices in 8bit compatibility mode we need to use the byte addresses 0xaaa and 0x555. These effectively match the word address 0x555 and 0x2aa, except the latter has its low bit set. Most chips don't care about the value of the 'A-1' pin in x8 mode, but some -- like the ST M29W320D -- do. So we need to be careful to set it where appropriate. cfi_send_gen_cmd is only ever passed addresses where the low byte is 0x00, 0x55 or 0xaa. Of those, only addresses ending 0xaa are affected by this patch, by masking in the extra low bit when the device is known to be in compatibility mode. [dwmw2: Do it only when (cmd_ofs & 0xff) == 0xaa] v4: Fix stupid typo in cfi_build_cmd_addr that failed to compile I'm writing this patch way to late at night. v3: Bring all of the work back into cfi_build_cmd_addr including calling of map_bankwidth(map) and cfi_interleave(cfi) So every caller doesn't need to. v2: Only modified the address if we our device_type is larger than our bus width. Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0002.c | 13 ------------- drivers/mtd/chips/jedec_probe.c | 10 ++++------ include/linux/mtd/cfi.h | 22 +++++++++++++++++++--- 3 files changed, 23 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 3e6f5d8609e8..d74ec46aa032 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -406,19 +406,6 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) /* Set the default CFI lock/unlock addresses */ cfi->addr_unlock1 = 0x555; cfi->addr_unlock2 = 0x2aa; - /* Modify the unlock address if we are in compatibility mode */ - if ( /* x16 in x8 mode */ - ((cfi->device_type == CFI_DEVICETYPE_X8) && - (cfi->cfiq->InterfaceDesc == - CFI_INTERFACE_X8_BY_X16_ASYNC)) || - /* x32 in x16 mode */ - ((cfi->device_type == CFI_DEVICETYPE_X16) && - (cfi->cfiq->InterfaceDesc == - CFI_INTERFACE_X16_BY_X32_ASYNC))) - { - cfi->addr_unlock1 = 0xaaa; - cfi->addr_unlock2 = 0x555; - } } /* CFI mode */ else if (cfi->cfi_mode == CFI_MODE_JEDEC) { diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c index f84ab6182148..2f3f2f719ba4 100644 --- a/drivers/mtd/chips/jedec_probe.c +++ b/drivers/mtd/chips/jedec_probe.c @@ -1808,9 +1808,7 @@ static inline u32 jedec_read_mfr(struct map_info *map, uint32_t base, * several first banks can contain 0x7f instead of actual ID */ do { - uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), - cfi_interleave(cfi), - cfi->device_type); + uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), map, cfi); mask = (1 << (cfi->device_type * 8)) - 1; result = map_read(map, base + ofs); bank++; @@ -1824,7 +1822,7 @@ static inline u32 jedec_read_id(struct map_info *map, uint32_t base, { map_word result; unsigned long mask; - u32 ofs = cfi_build_cmd_addr(1, cfi_interleave(cfi), cfi->device_type); + u32 ofs = cfi_build_cmd_addr(1, map, cfi); mask = (1 << (cfi->device_type * 8)) -1; result = map_read(map, base + ofs); return result.x[0] & mask; @@ -2067,8 +2065,8 @@ static int jedec_probe_chip(struct map_info *map, __u32 base, } /* Ensure the unlock addresses we try stay inside the map */ - probe_offset1 = cfi_build_cmd_addr(cfi->addr_unlock1, cfi_interleave(cfi), cfi->device_type); - probe_offset2 = cfi_build_cmd_addr(cfi->addr_unlock2, cfi_interleave(cfi), cfi->device_type); + probe_offset1 = cfi_build_cmd_addr(cfi->addr_unlock1, map, cfi); + probe_offset2 = cfi_build_cmd_addr(cfi->addr_unlock2, map, cfi); if ( ((base + probe_offset1 + map_bankwidth(map)) >= map->size) || ((base + probe_offset2 + map_bankwidth(map)) >= map->size)) goto retry; diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index ee5124ec319e..00e2b575021f 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -282,9 +282,25 @@ struct cfi_private { /* * Returns the command address according to the given geometry. */ -static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, int interleave, int type) +static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, + struct map_info *map, struct cfi_private *cfi) { - return (cmd_ofs * type) * interleave; + unsigned bankwidth = map_bankwidth(map); + unsigned interleave = cfi_interleave(cfi); + unsigned type = cfi->device_type; + uint32_t addr; + + addr = (cmd_ofs * type) * interleave; + + /* Modify the unlock address if we are in compatiblity mode. + * For 16bit devices on 8 bit busses + * and 32bit devices on 16 bit busses + * set the low bit of the alternating bit sequence of the address. + */ + if (((type * interleave) > bankwidth) && ((uint8_t)cmd_ofs == 0xaa)) + addr |= (type >> 1)*interleave; + + return addr; } /* @@ -430,7 +446,7 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t int type, map_word *prev_val) { map_word val; - uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); + uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi); val = cfi_build_cmd(cmd, map, cfi); if (prev_val) -- cgit v1.2.3 From 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Nov 2008 16:52:08 +0100 Subject: sched: re-tune balancing Impact: improve wakeup affinity on NUMA systems, tweak SMP systems Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain balancing defaults on NUMA and SMP systems. Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason why we would not want to have wakeup affinity across nodes as well. (we already do this in the standard NUMA template.) lat_ctx on a NUMA box is particularly happy about this change: before: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.60 | 2 5.70 after: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.65 | 2 2.07 a 2.75x speedup. pipe-test is similarly happy about it too: | phoenix:~/sched-tests> ./pipe-test | 18.26 usecs/loop. | 14.70 usecs/loop. | 14.38 usecs/loop. | 10.55 usecs/loop. # +WAKE_AFFINE on domain0+domain1 | 8.63 usecs/loop. | 8.59 usecs/loop. | 9.03 usecs/loop. | 8.94 usecs/loop. | 8.96 usecs/loop. | 8.63 usecs/loop. Also: - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings) - enable SD_WAKE_BALANCE on SMP domains Sysbench+postgresql improves all around the board, quite significantly: .28-rc3-11474e2c .28-rc3-11474e2c-tune ------------------------------------------------- 1: 571 688 +17.08% 2: 1236 1206 -2.55% 4: 2381 2642 +9.89% 8: 4958 5164 +3.99% 16: 9580 9574 -0.07% 32: 7128 8118 +12.20% 64: 7342 8266 +11.18% 128: 7342 8064 +8.95% 256: 7519 7884 +4.62% 512: 7350 7731 +4.93% ------------------------------------------------- SUM: 55412 59341 +6.62% So it's a win both for the runup portion, the peak area and the tail. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 7 ++++--- include/linux/topology.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 90ac7718469a..4850e4b02b61 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,7 @@ extern unsigned long node_remap_size[]; #endif -/* sched_domains SD_NODE_INIT for NUMAQ machines */ +/* sched_domains SD_NODE_INIT for NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .min_interval = 8, \ .max_interval = 32, \ @@ -169,8 +169,9 @@ extern unsigned long node_remap_size[]; .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/include/linux/topology.h b/include/linux/topology.h index 2158fc0d5a56..34a7ee0ebed2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -146,10 +146,10 @@ void arch_update_cpu_topology(void); .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3 From f92131c3dd567fc6df18ce3f46fcf57ecbdefbe0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 29 Oct 2008 14:10:51 +0100 Subject: bio: define __BIOVEC_PHYS_MERGEABLE Define __BIOVEC_PHYS_MERGEABLE as the default implementation of BIOVEC_PHYS_MERGEABLE, so that its available for reuse within an arch-specific definition of BIOVEC_PHYS_MERGEABLE. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1c91a176b9ae..6a642098e5c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -236,12 +236,16 @@ static inline void *bio_data(struct bio *bio) #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) #define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) +/* Default implementation of BIOVEC_PHYS_MERGEABLE */ +#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + /* * allow arch override, for eg virtualized architectures (put in asm/io.h) */ #ifndef BIOVEC_PHYS_MERGEABLE #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + __BIOVEC_PHYS_MERGEABLE(vec1, vec2) #endif #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ -- cgit v1.2.3 From 9c133c469d38043d5aadaa03f2fb840d88d1cf4f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Nov 2008 08:42:48 +0100 Subject: Add round_jiffies_up and related routines This patch (as1158b) adds round_jiffies_up() and friends. These routines work like the analogous round_jiffies() functions, except that they will never round down. The new routines will be useful for timeouts where we don't care exactly when the timer expires, provided it doesn't expire too soon. Signed-off-by: Alan Stern Signed-off-by: Jens Axboe --- include/linux/timer.h | 5 ++ kernel/timer.c | 129 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 104 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index d4ba79248a27..daf9685b861c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -186,4 +186,9 @@ unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); +unsigned long __round_jiffies_up(unsigned long j, int cpu); +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); +unsigned long round_jiffies_up(unsigned long j); +unsigned long round_jiffies_up_relative(unsigned long j); + #endif diff --git a/kernel/timer.c b/kernel/timer.c index 56becf373c58..dbd50fabe4c7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -112,27 +112,8 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base) tbase_get_deferrable(timer->base)); } -/** - * __round_jiffies - function to round jiffies to a full second - * @j: the time in (absolute) jiffies that should be rounded - * @cpu: the processor number on which the timeout will happen - * - * __round_jiffies() rounds an absolute time in the future (in jiffies) - * up or down to (approximately) full seconds. This is useful for timers - * for which the exact time they fire does not matter too much, as long as - * they fire approximately every X seconds. - * - * By rounding these timers to whole seconds, all such timers will fire - * at the same time, rather than at various times spread out. The goal - * of this is to have the CPU wake up less, which saves power. - * - * The exact rounding is skewed for each processor to avoid all - * processors firing at the exact same time, which could lead - * to lock contention or spurious cache line bouncing. - * - * The return value is the rounded version of the @j parameter. - */ -unsigned long __round_jiffies(unsigned long j, int cpu) +static unsigned long round_jiffies_common(unsigned long j, int cpu, + bool force_up) { int rem; unsigned long original = j; @@ -154,8 +135,9 @@ unsigned long __round_jiffies(unsigned long j, int cpu) * due to delays of the timer irq, long irq off times etc etc) then * we should round down to the whole second, not up. Use 1/4th second * as cutoff for this rounding as an extreme upper bound for this. + * But never round down if @force_up is set. */ - if (rem < HZ/4) /* round down */ + if (rem < HZ/4 && !force_up) /* round down */ j = j - rem; else /* round up */ j = j - rem + HZ; @@ -167,6 +149,31 @@ unsigned long __round_jiffies(unsigned long j, int cpu) return original; return j; } + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +unsigned long __round_jiffies(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, false); +} EXPORT_SYMBOL_GPL(__round_jiffies); /** @@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies); */ unsigned long __round_jiffies_relative(unsigned long j, int cpu) { - /* - * In theory the following code can skip a jiffy in case jiffies - * increments right between the addition and the later subtraction. - * However since the entire point of this function is to use approximate - * timeouts, it's entirely ok to not handle that. - */ - return __round_jiffies(j + jiffies, cpu) - jiffies; + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, false) - j0; } EXPORT_SYMBOL_GPL(__round_jiffies_relative); @@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); */ unsigned long round_jiffies(unsigned long j) { - return __round_jiffies(j, raw_smp_processor_id()); + return round_jiffies_common(j, raw_smp_processor_id(), false); } EXPORT_SYMBOL_GPL(round_jiffies); @@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_relative); +/** + * __round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, true); +} +EXPORT_SYMBOL_GPL(__round_jiffies_up); + +/** + * __round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) +{ + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, true) - j0; +} +EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); + +/** + * round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * This is the same as round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up(unsigned long j) +{ + return round_jiffies_common(j, raw_smp_processor_id(), true); +} +EXPORT_SYMBOL_GPL(round_jiffies_up); + +/** + * round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * + * This is the same as round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up_relative(unsigned long j) +{ + return __round_jiffies_up_relative(j, raw_smp_processor_id()); +} +EXPORT_SYMBOL_GPL(round_jiffies_up_relative); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) -- cgit v1.2.3 From 2d3854a37e8b767a51aba38ed6d22817b0631e33 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 5 Nov 2008 13:39:10 +1100 Subject: cpumask: introduce new API, without changing anything Impact: introduce new APIs We want to deprecate cpumasks on the stack, as we are headed for gynormous numbers of CPUs. Eventually, we want to head towards an undefined 'struct cpumask' so they can never be declared on stack. 1) New cpumask functions which take pointers instead of copies. (cpus_* -> cpumask_*) 2) Several new helpers to reduce requirements for temporary cpumasks (cpumask_first_and, cpumask_next_and, cpumask_any_and) 3) Helpers for declaring cpumasks on or offstack for large NR_CPUS (cpumask_var_t, alloc_cpumask_var and free_cpumask_var) 4) 'struct cpumask' for explicitness and to mark new-style code. 5) Make iterator functions stop at nr_cpu_ids (a runtime constant), not NR_CPUS for time efficiency and for smaller dynamic allocations in future. 6) cpumask_copy() so we can allocate less than a full cpumask eventually (for alloc_cpumask_var), and so we can eliminate the 'struct cpumask' definition eventually. 7) work_on_cpu() helper for doing task on a CPU, rather than saving old cpumask for current thread and manipulating it. 8) smp_call_function_many() which is smp_call_function_mask() except taking a cpumask pointer. Note that this patch simply introduces the new functions and leaves the obsolescent ones in place. This is to simplify the transition patches. Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 502 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/smp.h | 9 + include/linux/workqueue.h | 8 + kernel/cpu.c | 3 + kernel/workqueue.c | 45 +++++ lib/cpumask.c | 73 +++++++ 6 files changed, 638 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d3219d73f8e6..c8e66619097b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -5,6 +5,9 @@ * Cpumasks provide a bitmap suitable for representing the * set of CPU's in a system, one bit position per CPU number. * + * The new cpumask_ ops take a "struct cpumask *"; the old ones + * use cpumask_t. + * * See detailed comments in the file linux/bitmap.h describing the * data type on which these cpumasks are based. * @@ -31,7 +34,7 @@ * will span the entire range of NR_CPUS. * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . * - * The available cpumask operations are: + * The obsolescent cpumask operations are: * * void cpu_set(cpu, mask) turn on bit 'cpu' in mask * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask @@ -138,7 +141,7 @@ #include #include -typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; +typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; extern cpumask_t _unused_cpumask_arg_; #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) @@ -527,4 +530,499 @@ extern cpumask_t cpu_active_map; #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) +/* These are the new versions of the cpumask operators: passed by pointer. + * The older versions will be implemented in terms of these, then deleted. */ +#define cpumask_bits(maskp) ((maskp)->bits) + +#if NR_CPUS <= BITS_PER_LONG +#define CPU_BITS_ALL \ +{ \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +/* This produces more efficient code. */ +#define nr_cpumask_bits NR_CPUS + +#else /* NR_CPUS > BITS_PER_LONG */ + +#define CPU_BITS_ALL \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +#define nr_cpumask_bits nr_cpu_ids +#endif /* NR_CPUS > BITS_PER_LONG */ + +/* verify cpu argument to cpumask_* operators */ +static inline unsigned int cpumask_check(unsigned int cpu) +{ +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + WARN_ON_ONCE(cpu >= nr_cpumask_bits); +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ + return cpu; +} + +#if NR_CPUS == 1 +/* Uniprocesor. */ +#define cpumask_first(src) ({ (void)(src); 0; }) +#define cpumask_next(n, src) ({ (void)(src); 1; }) +#define cpumask_next_zero(n, src) ({ (void)(src); 1; }) +#define cpumask_next_and(n, srcp, andp) ({ (void)(srcp), (void)(andp); 1; }) +#define cpumask_any_but(mask, cpu) ({ (void)(mask); (void)(cpu); 0; }) + +#define for_each_cpu(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_and(cpu, mask, and) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) +#else +/** + * cpumask_first - get the first cpu in a cpumask + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no cpus set. + */ +static inline unsigned int cpumask_first(const struct cpumask *srcp) +{ + return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_next - get the next cpu in a cpumask + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set. + */ +static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); +} + +/** + * cpumask_next_zero - get the next unset cpu in a cpumask + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus unset. + */ +static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); +} + +int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); +int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); + +#define for_each_cpu(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next((cpu), (mask)), \ + (cpu) < nr_cpu_ids;) +#define for_each_cpu_and(cpu, mask, and) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next_and((cpu), (mask), (and)), \ + (cpu) < nr_cpu_ids;) +#endif /* SMP */ + +#define CPU_BITS_NONE \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} + +#define CPU_BITS_CPU0 \ +{ \ + [0] = 1UL \ +} + +/** + * cpumask_set_cpu - set a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + */ +static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +{ + set_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} + +/** + * cpumask_clear_cpu - clear a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + */ +static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} + +/** + * cpumask_test_cpu - test for a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @cpumask: the cpumask pointer + * + * No static inline type checking - see Subtlety (1) above. + */ +#define cpumask_test_cpu(cpu, cpumask) \ + test_bit(cpumask_check(cpu), (cpumask)->bits) + +/** + * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @cpumask: the cpumask pointer + * + * test_and_set_bit wrapper for cpumasks. + */ +static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +{ + return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); +} + +/** + * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask + * @dstp: the cpumask pointer + */ +static inline void cpumask_setall(struct cpumask *dstp) +{ + bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); +} + +/** + * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask + * @dstp: the cpumask pointer + */ +static inline void cpumask_clear(struct cpumask *dstp) +{ + bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits); +} + +/** + * cpumask_and - *dstp = *src1p & *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_and(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_or - *dstp = *src1p | *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_xor - *dstp = *src1p ^ *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_xor(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_andnot - *dstp = *src1p & ~*src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_andnot(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_complement - *dstp = ~*srcp + * @dstp: the cpumask result + * @srcp: the input to invert + */ +static inline void cpumask_complement(struct cpumask *dstp, + const struct cpumask *srcp) +{ + bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp), + nr_cpumask_bits); +} + +/** + * cpumask_equal - *src1p == *src2p + * @src1p: the first input + * @src2p: the second input + */ +static inline bool cpumask_equal(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_intersects - (*src1p & *src2p) != 0 + * @src1p: the first input + * @src2p: the second input + */ +static inline bool cpumask_intersects(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_subset - (*src1p & ~*src2p) == 0 + * @src1p: the first input + * @src2p: the second input + */ +static inline int cpumask_subset(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_empty - *srcp == 0 + * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. + */ +static inline bool cpumask_empty(const struct cpumask *srcp) +{ + return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_full - *srcp == 0xFFFFFFFF... + * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. + */ +static inline bool cpumask_full(const struct cpumask *srcp) +{ + return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_weight - Count of bits in *srcp + * @srcp: the cpumask to count bits (< nr_cpu_ids) in. + */ +static inline unsigned int cpumask_weight(const struct cpumask *srcp) +{ + return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_shift_right - *dstp = *srcp >> n + * @dstp: the cpumask result + * @srcp: the input to shift + * @n: the number of bits to shift by + */ +static inline void cpumask_shift_right(struct cpumask *dstp, + const struct cpumask *srcp, int n) +{ + bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, + nr_cpumask_bits); +} + +/** + * cpumask_shift_left - *dstp = *srcp << n + * @dstp: the cpumask result + * @srcp: the input to shift + * @n: the number of bits to shift by + */ +static inline void cpumask_shift_left(struct cpumask *dstp, + const struct cpumask *srcp, int n) +{ + bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n, + nr_cpumask_bits); +} + +/** + * cpumask_copy - *dstp = *srcp + * @dstp: the result + * @srcp: the input cpumask + */ +static inline void cpumask_copy(struct cpumask *dstp, + const struct cpumask *srcp) +{ + bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_any - pick a "random" cpu from *srcp + * @srcp: the input cpumask + * + * Returns >= nr_cpu_ids if no cpus set. + */ +#define cpumask_any(srcp) cpumask_first(srcp) + +/** + * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 + * @src1p: the first input + * @src2p: the second input + * + * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + */ +#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p)) + +/** + * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 + * @mask1: the first input cpumask + * @mask2: the second input cpumask + * + * Returns >= nr_cpu_ids if no cpus set. + */ +#define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) + +/** + * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * @bitmap: the bitmap + * + * There are a few places where cpumask_var_t isn't appropriate and + * static cpumasks must be used (eg. very early boot), yet we don't + * expose the definition of 'struct cpumask'. + * + * This does the conversion, and can be used as a constant initializer. + */ +#define to_cpumask(bitmap) \ + ((struct cpumask *)(1 ? (bitmap) \ + : (void *)sizeof(__check_is_bitmap(bitmap)))) + +static inline int __check_is_bitmap(const unsigned long *bitmap) +{ + return 1; +} + +/** + * cpumask_size - size to allocate for a 'struct cpumask' in bytes + * + * This will eventually be a runtime variable, depending on nr_cpu_ids. + */ +static inline size_t cpumask_size(void) +{ + /* FIXME: Once all cpumask assignments are eliminated, this + * can be nr_cpumask_bits */ + return BITS_TO_LONGS(NR_CPUS) * sizeof(long); +} + +/* + * cpumask_var_t: struct cpumask for stack usage. + * + * Oh, the wicked games we play! In order to make kernel coding a + * little more difficult, we typedef cpumask_var_t to an array or a + * pointer: doing &mask on an array is a noop, so it still works. + * + * ie. + * cpumask_var_t tmpmask; + * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + * return -ENOMEM; + * + * ... use 'tmpmask' like a normal struct cpumask * ... + * + * free_cpumask_var(tmpmask); + */ +#ifdef CONFIG_CPUMASK_OFFSTACK +typedef struct cpumask *cpumask_var_t; + +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); +void alloc_bootmem_cpumask_var(cpumask_var_t *mask); +void free_cpumask_var(cpumask_var_t mask); + +#else +typedef struct cpumask cpumask_var_t[1]; + +static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return true; +} + +static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) +{ +} + +static inline void free_cpumask_var(cpumask_var_t mask) +{ +} +#endif /* CONFIG_CPUMASK_OFFSTACK */ + +/* The pointer versions of the maps, these will become the primary versions. */ +#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map) +#define cpu_online_mask ((const struct cpumask *)&cpu_online_map) +#define cpu_present_mask ((const struct cpumask *)&cpu_present_map) +#define cpu_active_mask ((const struct cpumask *)&cpu_active_map) + +/* It's common to want to use cpu_all_mask in struct member initializers, + * so it has to refer to an address rather than a pointer. */ +extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); +#define cpu_all_mask to_cpumask(cpu_all_bits) + +/* First bits of cpu_bit_bitmap are in fact unset. */ +#define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) + +/* Wrappers for arch boot code to manipulate normally-constant masks */ +static inline void set_cpu_possible(unsigned int cpu, bool possible) +{ + if (possible) + cpumask_set_cpu(cpu, &cpu_possible_map); + else + cpumask_clear_cpu(cpu, &cpu_possible_map); +} + +static inline void set_cpu_present(unsigned int cpu, bool present) +{ + if (present) + cpumask_set_cpu(cpu, &cpu_present_map); + else + cpumask_clear_cpu(cpu, &cpu_present_map); +} + +static inline void set_cpu_online(unsigned int cpu, bool online) +{ + if (online) + cpumask_set_cpu(cpu, &cpu_online_map); + else + cpumask_clear_cpu(cpu, &cpu_online_map); +} + +static inline void set_cpu_active(unsigned int cpu, bool active) +{ + if (active) + cpumask_set_cpu(cpu, &cpu_active_map); + else + cpumask_clear_cpu(cpu, &cpu_active_map); +} + +static inline void init_cpu_present(const struct cpumask *src) +{ + cpumask_copy(&cpu_present_map, src); +} + +static inline void init_cpu_possible(const struct cpumask *src) +{ + cpumask_copy(&cpu_possible_map, src); +} + +static inline void init_cpu_online(const struct cpumask *src) +{ + cpumask_copy(&cpu_online_map, src); +} #endif /* __LINUX_CPUMASK_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 2e4d58b26c06..3f9a60043a97 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -64,8 +64,17 @@ extern void smp_cpus_done(unsigned int max_cpus); * Call a function on all other processors */ int smp_call_function(void(*func)(void *info), void *info, int wait); +/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */ int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, int wait); + +static inline void smp_call_function_many(const struct cpumask *mask, + void (*func)(void *info), void *info, + int wait) +{ + smp_call_function_mask(*mask, func, info, wait); +} + int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, int wait); void __smp_call_function_single(int cpuid, struct call_single_data *data); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 89a5a1231ffb..b36291130f22 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -240,4 +240,12 @@ void cancel_rearming_delayed_work(struct delayed_work *work) cancel_delayed_work_sync(work); } +#ifndef CONFIG_SMP +static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +{ + return fn(arg); +} +#else +long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg); +#endif /* CONFIG_SMP */ #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 86d49045daed..5a732c5ef08b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { #endif }; EXPORT_SYMBOL_GPL(cpu_bit_bitmap); + +const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; +EXPORT_SYMBOL(cpu_all_bits); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f928f2a87b9b..d4dc69ddebd7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -970,6 +970,51 @@ undo: return ret; } +#ifdef CONFIG_SMP +struct work_for_cpu { + struct work_struct work; + long (*fn)(void *); + void *arg; + long ret; +}; + +static void do_work_for_cpu(struct work_struct *w) +{ + struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); + + wfc->ret = wfc->fn(wfc->arg); +} + +/** + * work_on_cpu - run a function in user context on a particular cpu + * @cpu: the cpu to run on + * @fn: the function to run + * @arg: the function arg + * + * This will return -EINVAL in the cpu is not online, or the return value + * of @fn otherwise. + */ +long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +{ + struct work_for_cpu wfc; + + INIT_WORK(&wfc.work, do_work_for_cpu); + wfc.fn = fn; + wfc.arg = arg; + get_online_cpus(); + if (unlikely(!cpu_online(cpu))) + wfc.ret = -EINVAL; + else { + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); + } + put_online_cpus(); + + return wfc.ret; +} +EXPORT_SYMBOL_GPL(work_on_cpu); +#endif /* CONFIG_SMP */ + void __init init_workqueues(void) { cpu_populated_map = cpu_online_map; diff --git a/lib/cpumask.c b/lib/cpumask.c index 5f97dc25ef9c..5ceb4211c834 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -2,6 +2,7 @@ #include #include #include +#include int __first_cpu(const cpumask_t *srcp) { @@ -35,3 +36,75 @@ int __any_online_cpu(const cpumask_t *mask) return cpu; } EXPORT_SYMBOL(__any_online_cpu); + +/** + * cpumask_next_and - get the next cpu in *src1p & *src2p + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set in both. + */ +int cpumask_next_and(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) + if (cpumask_test_cpu(n, src2p)) + break; + return n; +} +EXPORT_SYMBOL(cpumask_next_and); + +/** + * cpumask_any_but - return a "random" in a cpumask, but not this one. + * @mask: the cpumask to search + * @cpu: the cpu to ignore. + * + * Often used to find any cpu but smp_processor_id() in a mask. + * Returns >= nr_cpu_ids if no cpus set. + */ +int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) +{ + unsigned int i; + + for_each_cpu(i, mask) + if (i != cpu) + break; + return i; +} + +/* These are not inline because of header tangles. */ +#ifdef CONFIG_CPUMASK_OFFSTACK +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + if (likely(slab_is_available())) + *mask = kmalloc(cpumask_size(), flags); + else { +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + printk(KERN_ERR + "=> alloc_cpumask_var: kmalloc not available!\n"); + dump_stack(); +#endif + *mask = NULL; + } +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (!*mask) { + printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); + dump_stack(); + } +#endif + return *mask != NULL; +} +EXPORT_SYMBOL(alloc_cpumask_var); + +void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) +{ + *mask = alloc_bootmem(cpumask_size()); +} + +void free_cpumask_var(cpumask_var_t mask) +{ + kfree(mask); +} +EXPORT_SYMBOL(free_cpumask_var); +#endif -- cgit v1.2.3 From f8d570a4745835f2238a33b537218a1bb03fc671 Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 6 Nov 2008 00:37:40 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..33e9986beb86 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3 From 9e975dae2970d22557662761c8505ce9fd165684 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:46 -0800 Subject: fat: split include/msdos_fs.h This splits __KERNEL__ stuff in include/msdos_fs.h into fs/fat/fat.h. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/cache.c | 2 +- fs/fat/dir.c | 2 +- fs/fat/fat.h | 274 ++++++++++++++++++++++++++++++++++++++++++++++ fs/fat/fatent.c | 1 + fs/fat/file.c | 2 +- fs/fat/inode.c | 2 +- fs/fat/misc.c | 2 +- fs/fat/namei_msdos.c | 2 +- fs/fat/namei_vfat.c | 3 +- include/linux/msdos_fs.h | 276 +---------------------------------------------- 10 files changed, 284 insertions(+), 282 deletions(-) create mode 100644 fs/fat/fat.h (limited to 'include/linux') diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3222f51c41cf..589edde9053c 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -9,8 +9,8 @@ */ #include -#include #include +#include "fat.h" /* this must be > 0. */ #define FAT_MAX_CACHE 8 diff --git a/fs/fat/dir.c b/fs/fat/dir.c index bae1c3292522..08b23ad25f1c 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,11 +16,11 @@ #include #include #include -#include #include #include #include #include +#include "fat.h" static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, diff --git a/fs/fat/fat.h b/fs/fat/fat.h new file mode 100644 index 000000000000..51f1c42ca5e3 --- /dev/null +++ b/fs/fat/fat.h @@ -0,0 +1,274 @@ +#ifndef _FAT_H +#define _FAT_H + +#include +#include +#include +#include +#include +#include + +/* + * vfat shortname flags + */ +#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ +#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ +#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ +#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ +#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ + +struct fat_mount_options { + uid_t fs_uid; + gid_t fs_gid; + unsigned short fs_fmask; + unsigned short fs_dmask; + unsigned short codepage; /* Codepage for shortname conversions */ + char *iocharset; /* Charset used for filename input/display */ + unsigned short shortname; /* flags for shortname display/create rule */ + unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned short allow_utime;/* permission for setting the [am]time */ + unsigned quiet:1, /* set = fake successful chmods and chowns */ + showexec:1, /* set = only set x bit for com/exe/bat */ + sys_immutable:1, /* set = system files are immutable */ + dotsOK:1, /* set = hidden and system files are named '.filename' */ + isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ + utf8:1, /* Use of UTF-8 character set (Default) */ + unicode_xlate:1, /* create escape sequences for unhandled Unicode */ + numtail:1, /* Does first alias have a numeric '~1' type tail? */ + flush:1, /* write things quickly */ + nocase:1, /* Does this need case conversion? 0=need case conversion*/ + usefree:1, /* Use free_clusters for FAT32 */ + tz_utc:1; /* Filesystem timestamps are in UTC */ +}; + +#define FAT_HASH_BITS 8 +#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) +#define FAT_HASH_MASK (FAT_HASH_SIZE-1) + +/* + * MS-DOS file system in-core superblock data + */ +struct msdos_sb_info { + unsigned short sec_per_clus; /* sectors/cluster */ + unsigned short cluster_bits; /* log2(cluster_size) */ + unsigned int cluster_size; /* cluster size */ + unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ + unsigned short fat_start; + unsigned long fat_length; /* FAT start & length (sec.) */ + unsigned long dir_start; + unsigned short dir_entries; /* root dir start & entries */ + unsigned long data_start; /* first data sector */ + unsigned long max_cluster; /* maximum cluster number */ + unsigned long root_cluster; /* first cluster of the root directory */ + unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ + struct mutex fat_lock; + unsigned int prev_free; /* previously allocated cluster number */ + unsigned int free_clusters; /* -1 if undefined */ + unsigned int free_clus_valid; /* is free_clusters valid? */ + struct fat_mount_options options; + struct nls_table *nls_disk; /* Codepage used on disk */ + struct nls_table *nls_io; /* Charset used for input and display */ + const void *dir_ops; /* Opaque; default directory operations */ + int dir_per_block; /* dir entries per block */ + int dir_per_block_bits; /* log2(dir_per_block) */ + + int fatent_shift; + struct fatent_operations *fatent_ops; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[FAT_HASH_SIZE]; +}; + +#define FAT_CACHE_VALID 0 /* special case for valid cache */ + +/* + * MS-DOS file system inode data in memory + */ +struct msdos_inode_info { + spinlock_t cache_lru_lock; + struct list_head cache_lru; + int nr_caches; + /* for avoiding the race between fat_free() and fat_get_cluster() */ + unsigned int cache_valid_id; + + loff_t mmu_private; + int i_start; /* first cluster or 0 */ + int i_logstart; /* logical first cluster */ + int i_attrs; /* unused attribute bits */ + loff_t i_pos; /* on-disk position of directory entry or 0 */ + struct hlist_node i_fat_hash; /* hash by i_location */ + struct inode vfs_inode; +}; + +struct fat_slot_info { + loff_t i_pos; /* on-disk position of directory entry */ + loff_t slot_off; /* offset for slot or de start */ + int nr_slots; /* number of slots + 1(de) in filename */ + struct msdos_dir_entry *de; + struct buffer_head *bh; +}; + +static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) +{ + return container_of(inode, struct msdos_inode_info, vfs_inode); +} + +/* Return the FAT attribute byte for this inode */ +static inline u8 fat_attr(struct inode *inode) +{ + return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | + (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | + MSDOS_I(inode)->i_attrs; +} + +static inline unsigned char fat_checksum(const __u8 *name) +{ + unsigned char s = name[0]; + s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; + s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; + s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; + s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; + s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; + return s; +} + +static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) +{ + return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus + + sbi->data_start; +} + +static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + *dst++ = src[0] | (src[1] << 8); + src += 2; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + dst[0] = *src & 0x00FF; + dst[1] = (*src & 0xFF00) >> 8; + dst += 2; + src++; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +/* fat/cache.c */ +extern void fat_cache_inval_inode(struct inode *inode); +extern int fat_get_cluster(struct inode *inode, int cluster, + int *fclus, int *dclus); +extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks); + +/* fat/dir.c */ +extern const struct file_operations fat_dir_operations; +extern int fat_search_long(struct inode *inode, const unsigned char *name, + int name_len, struct fat_slot_info *sinfo); +extern int fat_dir_empty(struct inode *dir); +extern int fat_subdirs(struct inode *dir); +extern int fat_scan(struct inode *dir, const unsigned char *name, + struct fat_slot_info *sinfo); +extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, + struct msdos_dir_entry **de, loff_t *i_pos); +extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); +extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, + struct fat_slot_info *sinfo); +extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); + +/* fat/fatent.c */ +struct fat_entry { + int entry; + union { + u8 *ent12_p[2]; + __le16 *ent16_p; + __le32 *ent32_p; + } u; + int nr_bhs; + struct buffer_head *bhs[2]; +}; + +static inline void fatent_init(struct fat_entry *fatent) +{ + fatent->nr_bhs = 0; + fatent->entry = 0; + fatent->u.ent32_p = NULL; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +static inline void fatent_set_entry(struct fat_entry *fatent, int entry) +{ + fatent->entry = entry; + fatent->u.ent32_p = NULL; +} + +static inline void fatent_brelse(struct fat_entry *fatent) +{ + int i; + fatent->u.ent32_p = NULL; + for (i = 0; i < fatent->nr_bhs; i++) + brelse(fatent->bhs[i]); + fatent->nr_bhs = 0; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +extern void fat_ent_access_init(struct super_block *sb); +extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, + int entry); +extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, + int new, int wait); +extern int fat_alloc_clusters(struct inode *inode, int *cluster, + int nr_cluster); +extern int fat_free_clusters(struct inode *inode, int cluster); +extern int fat_count_free_clusters(struct super_block *sb); + +/* fat/file.c */ +extern int fat_generic_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern const struct file_operations fat_file_operations; +extern const struct inode_operations fat_file_inode_operations; +extern int fat_setattr(struct dentry * dentry, struct iattr * attr); +extern void fat_truncate(struct inode *inode); +extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* fat/inode.c */ +extern void fat_attach(struct inode *inode, loff_t i_pos); +extern void fat_detach(struct inode *inode); +extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); +extern struct inode *fat_build_inode(struct super_block *sb, + struct msdos_dir_entry *de, loff_t i_pos); +extern int fat_sync_inode(struct inode *inode); +extern int fat_fill_super(struct super_block *sb, void *data, int silent, + const struct inode_operations *fs_dir_inode_ops, int isvfat); + +extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2); +/* fat/misc.c */ +extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); +extern void fat_clusters_flush(struct super_block *sb); +extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); +extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); +extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, + int tz_utc); +extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); + +int fat_cache_init(void); +void fat_cache_destroy(void); + +#endif /* !_FAT_H */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index fb98b3d847ed..5b5f49061b7c 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -7,6 +7,7 @@ #include #include #include +#include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); diff --git a/fs/fat/file.c b/fs/fat/file.c index ddde37025ca6..b21973f266a1 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -10,13 +10,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include "fat.h" int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 2b2eec1283bf..3921de2013a4 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -28,6 +27,7 @@ #include #include #include +#include "fat.h" #ifndef CONFIG_FAT_DEFAULT_IOCHARSET /* if user don't select VFAT, this is undefined. */ diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 79fb98ad36d4..91ad9be18ff9 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -8,8 +8,8 @@ #include #include -#include #include +#include "fat.h" /* * fat_fs_panic reports a severe file system problem and sets the file system diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e844b9809d27..c0a4d5cd99b2 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 155c10b4adbd..facf3bf0211a 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -16,14 +16,13 @@ */ #include - #include -#include #include #include #include #include #include +#include "fat.h" static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) { diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index ba63858056c7..0982fb47a90d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -167,282 +167,10 @@ struct msdos_dir_slot { }; #ifdef __KERNEL__ - -#include -#include -#include -#include -#include - -/* - * vfat shortname flags - */ -#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ -#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ -#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ -#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ -#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ - -struct fat_mount_options { - uid_t fs_uid; - gid_t fs_gid; - unsigned short fs_fmask; - unsigned short fs_dmask; - unsigned short codepage; /* Codepage for shortname conversions */ - char *iocharset; /* Charset used for filename input/display */ - unsigned short shortname; /* flags for shortname display/create rule */ - unsigned char name_check; /* r = relaxed, n = normal, s = strict */ - unsigned short allow_utime;/* permission for setting the [am]time */ - unsigned quiet:1, /* set = fake successful chmods and chowns */ - showexec:1, /* set = only set x bit for com/exe/bat */ - sys_immutable:1, /* set = system files are immutable */ - dotsOK:1, /* set = hidden and system files are named '.filename' */ - isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ - utf8:1, /* Use of UTF-8 character set (Default) */ - unicode_xlate:1, /* create escape sequences for unhandled Unicode */ - numtail:1, /* Does first alias have a numeric '~1' type tail? */ - flush:1, /* write things quickly */ - nocase:1, /* Does this need case conversion? 0=need case conversion*/ - usefree:1, /* Use free_clusters for FAT32 */ - tz_utc:1; /* Filesystem timestamps are in UTC */ -}; - -#define FAT_HASH_BITS 8 -#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) -#define FAT_HASH_MASK (FAT_HASH_SIZE-1) - -/* - * MS-DOS file system in-core superblock data - */ -struct msdos_sb_info { - unsigned short sec_per_clus; /* sectors/cluster */ - unsigned short cluster_bits; /* log2(cluster_size) */ - unsigned int cluster_size; /* cluster size */ - unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ - unsigned short fat_start; - unsigned long fat_length; /* FAT start & length (sec.) */ - unsigned long dir_start; - unsigned short dir_entries; /* root dir start & entries */ - unsigned long data_start; /* first data sector */ - unsigned long max_cluster; /* maximum cluster number */ - unsigned long root_cluster; /* first cluster of the root directory */ - unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ - struct mutex fat_lock; - unsigned int prev_free; /* previously allocated cluster number */ - unsigned int free_clusters; /* -1 if undefined */ - unsigned int free_clus_valid; /* is free_clusters valid? */ - struct fat_mount_options options; - struct nls_table *nls_disk; /* Codepage used on disk */ - struct nls_table *nls_io; /* Charset used for input and display */ - const void *dir_ops; /* Opaque; default directory operations */ - int dir_per_block; /* dir entries per block */ - int dir_per_block_bits; /* log2(dir_per_block) */ - - int fatent_shift; - struct fatent_operations *fatent_ops; - - spinlock_t inode_hash_lock; - struct hlist_head inode_hashtable[FAT_HASH_SIZE]; -}; - -#define FAT_CACHE_VALID 0 /* special case for valid cache */ - -/* - * MS-DOS file system inode data in memory - */ -struct msdos_inode_info { - spinlock_t cache_lru_lock; - struct list_head cache_lru; - int nr_caches; - /* for avoiding the race between fat_free() and fat_get_cluster() */ - unsigned int cache_valid_id; - - loff_t mmu_private; - int i_start; /* first cluster or 0 */ - int i_logstart; /* logical first cluster */ - int i_attrs; /* unused attribute bits */ - loff_t i_pos; /* on-disk position of directory entry or 0 */ - struct hlist_node i_fat_hash; /* hash by i_location */ - struct inode vfs_inode; -}; - -struct fat_slot_info { - loff_t i_pos; /* on-disk position of directory entry */ - loff_t slot_off; /* offset for slot or de start */ - int nr_slots; /* number of slots + 1(de) in filename */ - struct msdos_dir_entry *de; - struct buffer_head *bh; -}; - -static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) -{ - return container_of(inode, struct msdos_inode_info, vfs_inode); -} - -/* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) -{ - return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | - (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | - MSDOS_I(inode)->i_attrs; -} - -static inline unsigned char fat_checksum(const __u8 *name) -{ - unsigned char s = name[0]; - s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; - s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; - s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; - s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; - s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; - return s; -} - -static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) -{ - return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus - + sbi->data_start; -} - -static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - *dst++ = src[0] | (src[1] << 8); - src += 2; - } -#else - memcpy(dst, src, len * 2); -#endif -} - -static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - dst[0] = *src & 0x00FF; - dst[1] = (*src & 0xFF00) >> 8; - dst += 2; - src++; - } -#else - memcpy(dst, src, len * 2); -#endif -} - /* media of boot sector */ static inline int fat_valid_media(u8 media) { return 0xf8 <= media || media == 0xf0; } - -/* fat/cache.c */ -extern void fat_cache_inval_inode(struct inode *inode); -extern int fat_get_cluster(struct inode *inode, int cluster, - int *fclus, int *dclus); -extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks); - -/* fat/dir.c */ -extern const struct file_operations fat_dir_operations; -extern int fat_search_long(struct inode *inode, const unsigned char *name, - int name_len, struct fat_slot_info *sinfo); -extern int fat_dir_empty(struct inode *dir); -extern int fat_subdirs(struct inode *dir); -extern int fat_scan(struct inode *dir, const unsigned char *name, - struct fat_slot_info *sinfo); -extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, - struct msdos_dir_entry **de, loff_t *i_pos); -extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); -extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, - struct fat_slot_info *sinfo); -extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); - -/* fat/fatent.c */ -struct fat_entry { - int entry; - union { - u8 *ent12_p[2]; - __le16 *ent16_p; - __le32 *ent32_p; - } u; - int nr_bhs; - struct buffer_head *bhs[2]; -}; - -static inline void fatent_init(struct fat_entry *fatent) -{ - fatent->nr_bhs = 0; - fatent->entry = 0; - fatent->u.ent32_p = NULL; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -static inline void fatent_set_entry(struct fat_entry *fatent, int entry) -{ - fatent->entry = entry; - fatent->u.ent32_p = NULL; -} - -static inline void fatent_brelse(struct fat_entry *fatent) -{ - int i; - fatent->u.ent32_p = NULL; - for (i = 0; i < fatent->nr_bhs; i++) - brelse(fatent->bhs[i]); - fatent->nr_bhs = 0; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -extern void fat_ent_access_init(struct super_block *sb); -extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, - int entry); -extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, - int new, int wait); -extern int fat_alloc_clusters(struct inode *inode, int *cluster, - int nr_cluster); -extern int fat_free_clusters(struct inode *inode, int cluster); -extern int fat_count_free_clusters(struct super_block *sb); - -/* fat/file.c */ -extern int fat_generic_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern const struct file_operations fat_file_operations; -extern const struct inode_operations fat_file_inode_operations; -extern int fat_setattr(struct dentry * dentry, struct iattr * attr); -extern void fat_truncate(struct inode *inode); -extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); - -/* fat/inode.c */ -extern void fat_attach(struct inode *inode, loff_t i_pos); -extern void fat_detach(struct inode *inode); -extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); -extern struct inode *fat_build_inode(struct super_block *sb, - struct msdos_dir_entry *de, loff_t i_pos); -extern int fat_sync_inode(struct inode *inode); -extern int fat_fill_super(struct super_block *sb, void *data, int silent, - const struct inode_operations *fs_dir_inode_ops, int isvfat); - -extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, - struct inode *i2); -/* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); -extern void fat_clusters_flush(struct super_block *sb); -extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); -extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); -extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, - int tz_utc); -extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); - -int fat_cache_init(void); -void fat_cache_destroy(void); - -#endif /* __KERNEL__ */ - -#endif +#endif /* !__KERNEL__ */ +#endif /* !_LINUX_MSDOS_FS_H */ -- cgit v1.2.3 From 9c0aa1b87bf541affef519eb4879ce7c5a5941ae Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:54 -0800 Subject: fat: Cleanup FAT attribute stuff This adds three helpers: fat_make_attrs() - makes FAT attributes from inode. fat_make_mode() - makes mode_t from FAT attributes. fat_save_attrs() - saves FAT attributes to inode. Then this replaces: MSDOS_MKMODE() by fat_make_mode(), fat_attr() by fat_make_attrs(), ->i_attrs = attr & ATTR_UNUSED by fat_save_attrs(). And for root inode, those is used with ATTR_DIR instead of bogus ATTR_NONE. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 20 +++++++++++++++++++- fs/fat/file.c | 32 ++++++++++++-------------------- fs/fat/inode.c | 19 +++++++++---------- include/linux/msdos_fs.h | 5 ----- 4 files changed, 40 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 2b8e94c3eef4..3b4753a024e3 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -117,14 +117,32 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) return container_of(inode, struct msdos_inode_info, vfs_inode); } +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, + u8 attrs, mode_t mode) +{ + if (attrs & ATTR_RO) + mode &= ~S_IWUGO; + + if (attrs & ATTR_DIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + else + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + /* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) +static inline u8 fat_make_attrs(struct inode *inode) { return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | MSDOS_I(inode)->i_attrs; } +static inline void fat_save_attrs(struct inode *inode, u8 attrs) +{ + MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; +} + static inline unsigned char fat_checksum(const __u8 *name) { unsigned char s = name[0]; diff --git a/fs/fat/file.c b/fs/fat/file.c index b21973f266a1..f5a7e907a8fa 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -27,13 +27,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, switch (cmd) { case FAT_IOCTL_GET_ATTRIBUTES: { - u32 attr; - - if (inode->i_ino == MSDOS_ROOT_INO) - attr = ATTR_DIR; - else - attr = fat_attr(inode); - + u32 attr = fat_make_attrs(inode); return put_user(attr, user_attr); } case FAT_IOCTL_SET_ATTRIBUTES: @@ -62,20 +56,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, /* Merge in ATTR_VOLUME and ATTR_DIR */ attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | (is_dir ? ATTR_DIR : 0); - oldattr = fat_attr(inode); + oldattr = fat_make_attrs(inode); /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; ia.ia_ctime = current_fs_time(inode->i_sb); - if (is_dir) { - ia.ia_mode = MSDOS_MKMODE(attr, - S_IRWXUGO & ~sbi->options.fs_dmask) - | S_IFDIR; - } else { - ia.ia_mode = MSDOS_MKMODE(attr, - (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)) - & ~sbi->options.fs_fmask) - | S_IFREG; + if (is_dir) + ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); + else { + ia.ia_mode = fat_make_mode(sbi, attr, + S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); } /* The root directory has no attributes */ @@ -115,7 +105,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, inode->i_flags &= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; + fat_save_attrs(inode, attr); mark_inode_dirty(inode); up: mnt_drop_write(filp->f_path.mnt); @@ -274,7 +264,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Note, the basic check is already done by a caller of - * (attr->ia_mode & ~MSDOS_VALID_MODE) + * (attr->ia_mode & ~FAT_VALID_MODE) */ if (S_ISREG(inode->i_mode)) @@ -314,6 +304,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) } #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) +/* valid file mode bits */ +#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) int fat_setattr(struct dentry *dentry, struct iattr *attr) { @@ -356,7 +348,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & ~MSDOS_VALID_MODE))) + (attr->ia_mode & ~FAT_VALID_MODE))) error = -EPERM; if (error) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8e1b75c63c7f..7aaa21cf019a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -337,8 +337,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { inode->i_generation &= ~1; - inode->i_mode = MSDOS_MKMODE(de->attr, - S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; @@ -355,10 +354,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_nlink = fat_subdirs(inode); } else { /* not a directory */ inode->i_generation |= 1; - inode->i_mode = MSDOS_MKMODE(de->attr, - ((sbi->options.showexec && !is_exec(de->name + 8)) - ? S_IRUGO|S_IWUGO : S_IRWXUGO) - & ~sbi->options.fs_fmask) | S_IFREG; + inode->i_mode = fat_make_mode(sbi, de->attr, + ((sbi->options.showexec && !is_exec(de->name + 8)) + ? S_IRUGO|S_IWUGO : S_IRWXUGO)); MSDOS_I(inode)->i_start = le16_to_cpu(de->start); if (sbi->fat_bits == 32) MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); @@ -374,7 +372,8 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if (sbi->options.sys_immutable) inode->i_flags |= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; + fat_save_attrs(inode, de->attr); + inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; @@ -569,7 +568,7 @@ retry: raw_entry->size = 0; else raw_entry->size = cpu_to_le32(inode->i_size); - raw_entry->attr = fat_attr(inode); + raw_entry->attr = fat_make_attrs(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, @@ -1105,7 +1104,7 @@ static int fat_read_root(struct inode *inode) inode->i_gid = sbi->options.fs_gid; inode->i_version++; inode->i_generation = 0; - inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; if (sbi->fat_bits == 32) { @@ -1122,7 +1121,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_logstart = 0; MSDOS_I(inode)->mmu_private = inode->i_size; - MSDOS_I(inode)->i_attrs = ATTR_NONE; + fat_save_attrs(inode, ATTR_DIR); inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; inode->i_nlink = fat_subdirs(inode)+2; diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 0982fb47a90d..e0a9b207920d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -46,11 +46,6 @@ #define DELETED_FLAG 0xe5 /* marks file as deleted when in name[0] */ #define IS_FREE(n) (!*(n) || *(n) == DELETED_FLAG) -/* valid file mode bits */ -#define MSDOS_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO) -/* Convert attribute bits and a mask to the UNIX mode. */ -#define MSDOS_MKMODE(a, m) (m & (a & ATTR_RO ? S_IRUGO|S_IXUGO : S_IRWXUGO)) - #define MSDOS_NAME 11 /* maximum name length */ #define MSDOS_LONGNAME 256 /* maximum name length */ #define MSDOS_SLOTS 21 /* max # of slots for short and long names */ -- cgit v1.2.3 From 7597bc94d6f3bdccb086ac7f2ad91292fdaee2a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Nov 2008 17:38:47 +0000 Subject: Fix accidental implicit cast in HR-timer conversion Fix the hrtimer_add_expires_ns() function. It should take a 'u64 ns' argument, but rather takes an 'unsigned long ns' argument - which might only be 32-bits. On FRV, this results in the kernel locking up because hrtimer_forward() passes the result of a 64-bit multiplication to this function, for which the compiler discards the top 32-bits - something that didn't happen when ktime_add_ns() was called directly. Signed-off-by: David Howells Acked-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2b3645b1acf4..07e510a3b00a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -239,7 +239,7 @@ static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } -static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) +static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { timer->_expires = ktime_add_ns(timer->_expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); -- cgit v1.2.3 From 3b53fbf4314594fa04544b02b2fc6e607912da18 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 6 Nov 2008 15:45:32 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..33e9986beb86 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3 From cd83e42c6b0413dcbb548c2ead799111ff7e6a13 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 7 Nov 2008 11:12:29 +1100 Subject: cpumask: new API, v2 - add cpumask_of() - add free_bootmem_cpumask_var() Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 11 +++++++++++ lib/cpumask.c | 5 +++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c8e66619097b..31caa1bc620a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -893,6 +893,12 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) +/** + * cpumask_of - the cpumask containing just a given cpu + * @cpu: the cpu (<= nr_cpu_ids) + */ +#define cpumask_of(cpu) (get_cpu_mask(cpu)) + /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap @@ -946,6 +952,7 @@ typedef struct cpumask *cpumask_var_t; bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); +void free_bootmem_cpumask_var(cpumask_var_t mask); #else typedef struct cpumask cpumask_var_t[1]; @@ -962,6 +969,10 @@ static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) static inline void free_cpumask_var(cpumask_var_t mask) { } + +static inline void free_bootmem_cpumask_var(cpumask_var_t mask) +{ +} #endif /* CONFIG_CPUMASK_OFFSTACK */ /* The pointer versions of the maps, these will become the primary versions. */ diff --git a/lib/cpumask.c b/lib/cpumask.c index 5ceb4211c834..2ebc3a9a7465 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -107,4 +107,9 @@ void free_cpumask_var(cpumask_var_t mask) kfree(mask); } EXPORT_SYMBOL(free_cpumask_var); + +void free_bootmem_cpumask_var(cpumask_var_t mask) +{ + free_bootmem((unsigned long)mask, cpumask_size()); +} #endif -- cgit v1.2.3 From 14800984706bf6936bbec5187f736e928be5c218 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 7 Nov 2008 15:26:50 +0100 Subject: sched: fine-tune SD_MC_INIT Tune SD_MC_INIT the same way as SD_CPU_INIT: unset SD_BALANCE_NEWIDLE, and set SD_WAKE_BALANCE. This improves vmark by 5%: vmark 132102 125968 125497 messages/sec avg 127855.66 .984 vmark 139404 131719 131272 messages/sec avg 134131.66 1.033 Signed-off-by: Mike Galbraith Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar # *DOCUMENTATION* --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 34a7ee0ebed2..a8d840595b7e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -120,10 +120,10 @@ void arch_update_cpu_topology(void); .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ -- cgit v1.2.3 From 52c642f33b14bfa1b00ef2b68296effb34a573f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 7 Nov 2008 16:09:23 +0100 Subject: sched: fine-tune SD_SIBLING_INIT fine-tune the HT sched-domains parameters as well. On a HT capable box, this increases lat_ctx performance from 23.87 usecs to 1.49 usecs: # before $ ./lat_ctx -s 0 2 "size=0k ovr=1.89 2 23.87 # after $ ./lat_ctx -s 0 2 "size=0k ovr=1.84 2 1.49 Signed-off-by: Ingo Molnar --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index a8d840595b7e..117f1b7405cf 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -99,7 +99,7 @@ void arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ + | SD_WAKE_BALANCE \ | SD_SHARE_CPUPOWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3 From d1b268630875a7713b5d468a0c03403c5b721c8e Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sat, 8 Nov 2008 21:37:46 +0100 Subject: mmc: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Greg Kroah-Hartman Signed-Off-By: Kay Sievers Signed-off-by: Pierre Ossman --- drivers/mmc/core/bus.c | 3 +-- drivers/mmc/core/host.c | 5 ++--- drivers/mmc/core/sdio_bus.c | 3 +-- drivers/mmc/host/mmc_spi.c | 2 +- drivers/mmc/host/sdhci.c | 2 +- drivers/mmc/host/tifm_sd.c | 16 ++++++++-------- include/linux/mmc/card.h | 2 +- include/linux/mmc/host.h | 2 +- include/linux/mmc/sdio_func.h | 2 +- 9 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 0d9b2d6f9ebf..f210a8ee6861 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -216,8 +216,7 @@ int mmc_add_card(struct mmc_card *card) int ret; const char *type; - snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), - "%s:%04x", mmc_hostname(card->host), card->rca); + dev_set_name(&card->dev, "%s:%04x", mmc_hostname(card->host), card->rca); switch (card->type) { case MMC_TYPE_MMC: diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 6da80fd4d974..5e945e64ead7 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -73,8 +73,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) if (err) goto free; - snprintf(host->class_dev.bus_id, BUS_ID_SIZE, - "mmc%d", host->index); + dev_set_name(&host->class_dev, "mmc%d", host->index); host->parent = dev; host->class_dev.parent = dev; @@ -121,7 +120,7 @@ int mmc_add_host(struct mmc_host *host) WARN_ON((host->caps & MMC_CAP_SDIO_IRQ) && !host->ops->enable_sdio_irq); - led_trigger_register_simple(host->class_dev.bus_id, &host->led); + led_trigger_register_simple(dev_name(&host->class_dev), &host->led); err = device_add(&host->class_dev); if (err) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 233d0f9b3c4b..46284b527397 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -239,8 +239,7 @@ int sdio_add_func(struct sdio_func *func) { int ret; - snprintf(func->dev.bus_id, sizeof(func->dev.bus_id), - "%s:%d", mmc_card_id(func->card), func->num); + dev_set_name(&func->dev, "%s:%d", mmc_card_id(func->card), func->num); ret = device_add(&func->dev); if (ret == 0) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 07faf5412a1f..ad00e1632317 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1348,7 +1348,7 @@ static int mmc_spi_probe(struct spi_device *spi) goto fail_add_host; dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", - mmc->class_dev.bus_id, + dev_name(&mmc->class_dev), host->dma_dev ? "" : ", no DMA", (host->pdata && host->pdata->get_ro) ? "" : ", no WP", diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 30f64b1f2354..4d010a984bed 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1733,7 +1733,7 @@ int sdhci_add_host(struct sdhci_host *host) mmc_add_host(mmc); printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n", - mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id, + mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)), (host->flags & SDHCI_USE_ADMA)?"A":"", (host->flags & SDHCI_USE_DMA)?"DMA":"PIO"); diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index 13844843e8de..82554ddec6b3 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -632,7 +632,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) if (host->req) { printk(KERN_ERR "%s : unfinished request detected\n", - sock->dev.bus_id); + dev_name(&sock->dev)); mrq->cmd->error = -ETIMEDOUT; goto err_out; } @@ -672,7 +672,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE)) { printk(KERN_ERR "%s : scatterlist map failed\n", - sock->dev.bus_id); + dev_name(&sock->dev)); mrq->cmd->error = -ENOMEM; goto err_out; } @@ -684,7 +684,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) : PCI_DMA_FROMDEVICE); if (host->sg_len < 1) { printk(KERN_ERR "%s : scatterlist map failed\n", - sock->dev.bus_id); + dev_name(&sock->dev)); tifm_unmap_sg(sock, &host->bounce_buf, 1, r_data->flags & MMC_DATA_WRITE ? PCI_DMA_TODEVICE @@ -748,7 +748,7 @@ static void tifm_sd_end_cmd(unsigned long data) if (!mrq) { printk(KERN_ERR " %s : no request to complete?\n", - sock->dev.bus_id); + dev_name(&sock->dev)); spin_unlock_irqrestore(&sock->lock, flags); return; } @@ -789,7 +789,7 @@ static void tifm_sd_abort(unsigned long data) printk(KERN_ERR "%s : card failed to respond for a long period of time " "(%x, %x)\n", - host->dev->dev.bus_id, host->req->cmd->opcode, host->cmd_flags); + dev_name(&host->dev->dev), host->req->cmd->opcode, host->cmd_flags); tifm_eject(host->dev); } @@ -906,7 +906,7 @@ static int tifm_sd_initialize_host(struct tifm_sd *host) if (rc) { printk(KERN_ERR "%s : controller failed to reset\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return -ENODEV; } @@ -933,7 +933,7 @@ static int tifm_sd_initialize_host(struct tifm_sd *host) if (rc) { printk(KERN_ERR "%s : card not ready - probe failed on initialization\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return -ENODEV; } @@ -954,7 +954,7 @@ static int tifm_sd_probe(struct tifm_dev *sock) if (!(TIFM_SOCK_STATE_OCCUPIED & readl(sock->addr + SOCK_PRESENT_STATE))) { printk(KERN_WARNING "%s : card gone, unexpectedly\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return rc; } diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index ee6e822d5994..403aa505f27e 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -130,7 +130,7 @@ struct mmc_card { #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) #define mmc_card_name(c) ((c)->cid.prod_name) -#define mmc_card_id(c) ((c)->dev.bus_id) +#define mmc_card_id(c) (dev_name(&(c)->dev)) #define mmc_list_to_card(l) container_of(l, struct mmc_card, node) #define mmc_get_drvdata(c) dev_get_drvdata(&(c)->dev) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index bde891f64591..f842f234e44f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -176,7 +176,7 @@ static inline void *mmc_priv(struct mmc_host *host) #define mmc_dev(x) ((x)->parent) #define mmc_classdev(x) (&(x)->class_dev) -#define mmc_hostname(x) ((x)->class_dev.bus_id) +#define mmc_hostname(x) (dev_name(&(x)->class_dev)) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 07bee4a0d457..451bdfc85830 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -63,7 +63,7 @@ struct sdio_func { #define sdio_func_set_present(f) ((f)->state |= SDIO_STATE_PRESENT) -#define sdio_func_id(f) ((f)->dev.bus_id) +#define sdio_func_id(f) (dev_name(&(f)->dev)) #define sdio_get_drvdata(f) dev_get_drvdata(&(f)->dev) #define sdio_set_drvdata(f,d) dev_set_drvdata(&(f)->dev, d) -- cgit v1.2.3 From 058e3739f6b0753696db1952378de9e8d2a11735 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 9 Nov 2008 00:27:53 -0500 Subject: clarify usage expectations for cnt32_to_63() Currently, all existing users of cnt32_to_63() are fine since the CPU architectures where it is used don't do read access reordering, and user mode preemption is disabled already. It is nevertheless a good idea to better elaborate usage requirements wrt preemption, and use an explicit memory barrier on SMP to avoid different CPUs accessing the counter value in the wrong order. On UP a simple compiler barrier is sufficient. Signed-off-by: Nicolas Pitre Acked-by: Mathieu Desnoyers Signed-off-by: Linus Torvalds --- include/linux/cnt32_to_63.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h index 8c0f9505b48c..7605fdd1eb65 100644 --- a/include/linux/cnt32_to_63.h +++ b/include/linux/cnt32_to_63.h @@ -16,6 +16,7 @@ #include #include #include +#include /* this is used only to give gcc a clue about good code generation */ union cnt32_to_63 { @@ -53,11 +54,19 @@ union cnt32_to_63 { * needed increment. And any race in updating the value in memory is harmless * as the same value would simply be stored more than once. * - * The only restriction for the algorithm to work properly is that this - * code must be executed at least once per each half period of the 32-bit - * counter to properly update the state bit in memory. This is usually not a - * problem in practice, but if it is then a kernel timer could be scheduled - * to manage for this code to be executed often enough. + * The restrictions for the algorithm to work properly are: + * + * 1) this code must be called at least once per each half period of the + * 32-bit counter; + * + * 2) this code must not be preempted for a duration longer than the + * 32-bit counter half period minus the longest period between two + * calls to this code. + * + * Those requirements ensure proper update to the state bit in memory. + * This is usually not a problem in practice, but if it is then a kernel + * timer should be scheduled to manage for this code to be executed often + * enough. * * Note that the top bit (bit 63) in the returned value should be considered * as garbage. It is not cleared here because callers are likely to use a @@ -68,9 +77,10 @@ union cnt32_to_63 { */ #define cnt32_to_63(cnt_lo) \ ({ \ - static volatile u32 __m_cnt_hi; \ + static u32 __m_cnt_hi; \ union cnt32_to_63 __x; \ __x.hi = __m_cnt_hi; \ + smp_rmb(); \ __x.lo = (cnt_lo); \ if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \ __m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \ -- cgit v1.2.3 From 984f2f377fdfd098f5ae58d09ee04d5e29e6112b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 8 Nov 2008 20:24:19 +1100 Subject: cpumask: introduce new API, without changing anything, v3 Impact: cleanup Clean up based on feedback from Andrew Morton and others: - change to inline functions instead of macros - add __init to bootmem method - add a missing debug check Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 58 ++++++++++++++++++++++++++++++++++++++++++++----- lib/cpumask.c | 3 ++- 2 files changed, 54 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 31caa1bc620a..21e1dd43e52a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -564,12 +564,36 @@ static inline unsigned int cpumask_check(unsigned int cpu) } #if NR_CPUS == 1 -/* Uniprocesor. */ -#define cpumask_first(src) ({ (void)(src); 0; }) -#define cpumask_next(n, src) ({ (void)(src); 1; }) -#define cpumask_next_zero(n, src) ({ (void)(src); 1; }) -#define cpumask_next_and(n, srcp, andp) ({ (void)(srcp), (void)(andp); 1; }) -#define cpumask_any_but(mask, cpu) ({ (void)(mask); (void)(cpu); 0; }) +/* Uniprocessor. Assume all masks are "1". */ +static inline unsigned int cpumask_first(const struct cpumask *srcp) +{ + return 0; +} + +/* Valid inputs for n are -1 and 0. */ +static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) +{ + return n+1; +} + +static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +{ + return n+1; +} + +static inline unsigned int cpumask_next_and(int n, + const struct cpumask *srcp, + const struct cpumask *andp) +{ + return n+1; +} + +/* cpu must be a valid cpu, ie 0, so there's no other choice. */ +static inline unsigned int cpumask_any_but(const struct cpumask *mask, + unsigned int cpu) +{ + return 1; +} #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) @@ -620,10 +644,32 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); +/** + * for_each_cpu - iterate over every cpu in a mask + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ #define for_each_cpu(cpu, mask) \ for ((cpu) = -1; \ (cpu) = cpumask_next((cpu), (mask)), \ (cpu) < nr_cpu_ids;) + +/** + * for_each_cpu_and - iterate over every cpu in both masks + * @cpu: the (optionally unsigned) integer iterator + * @mask: the first cpumask pointer + * @and: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_and(&tmp, &mask, &and); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ #define for_each_cpu_and(cpu, mask, and) \ for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask), (and)), \ diff --git a/lib/cpumask.c b/lib/cpumask.c index 2ebc3a9a7465..8d03f22c6ced 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -67,6 +67,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) { unsigned int i; + cpumask_check(cpu); for_each_cpu(i, mask) if (i != cpu) break; @@ -108,7 +109,7 @@ void free_cpumask_var(cpumask_var_t mask) } EXPORT_SYMBOL(free_cpumask_var); -void free_bootmem_cpumask_var(cpumask_var_t mask) +void __init free_bootmem_cpumask_var(cpumask_var_t mask) { free_bootmem((unsigned long)mask, cpumask_size()); } -- cgit v1.2.3 From 8a8bc22332ee6ea49137508467a76aa7f4367719 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Nov 2008 14:48:21 +0900 Subject: libata: revert convert-to-block-tagging patches This patch reverts the following three commits which convert libata to use block layer tagging. 43a49cbdf31e812c0d8f553d433b09b421f5d52c e013e13bf605b9e6b702adffbe2853cfc60e7806 2fca5ccf97d2c28bcfce44f5b07d85e74e3cd18e Although using block layer tagging is the right direction, due to the tight coupling among tag number, data structure allocation and hardware command slot allocation, libata doesn't work correctly with the current conversion. The biggest problem is guaranteeing that tag 0 is always used for non-NCQ commands. Due to the way blk-tag is implemented and how SCSI starts and finishes requests, such guarantee can't be made. I'm not sure whether this would actually break any low level driver but it doesn't look like a good idea to break such assumption given the frailty of ATA controllers. So, for the time being, keep using the old dumb in-libata qc allocation. Signed-off-by: Tejun Heo Cc: Jens Axobe Cc: Jeff Garzik Signed-off-by: Linus Torvalds --- drivers/ata/libata-core.c | 66 ++++++++++++++++++++++++++++++++++++++++++----- drivers/ata/libata-scsi.c | 23 ++--------------- drivers/ata/libata.h | 19 ++------------ include/linux/libata.h | 1 + 4 files changed, 65 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 622350d9b2e3..0cd3ad497136 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1712,6 +1712,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, else tag = 0; + if (test_and_set_bit(tag, &ap->qc_allocated)) + BUG(); qc = __ata_qc_from_tag(ap, tag); qc->tag = tag; @@ -4562,6 +4564,37 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) #endif /* __BIG_ENDIAN */ } +/** + * ata_qc_new - Request an available ATA command, for queueing + * @ap: Port associated with device @dev + * @dev: Device from whom we request an available command structure + * + * LOCKING: + * None. + */ + +static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) +{ + struct ata_queued_cmd *qc = NULL; + unsigned int i; + + /* no command while frozen */ + if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) + return NULL; + + /* the last tag is reserved for internal command. */ + for (i = 0; i < ATA_MAX_QUEUE - 1; i++) + if (!test_and_set_bit(i, &ap->qc_allocated)) { + qc = __ata_qc_from_tag(ap, i); + break; + } + + if (qc) + qc->tag = i; + + return qc; +} + /** * ata_qc_new_init - Request an available ATA command, and initialize it * @dev: Device from whom we request an available command structure @@ -4571,20 +4604,16 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) * None. */ -struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) +struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev) { struct ata_port *ap = dev->link->ap; struct ata_queued_cmd *qc; - if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) - return NULL; - - qc = __ata_qc_from_tag(ap, tag); + qc = ata_qc_new(ap); if (qc) { qc->scsicmd = NULL; qc->ap = ap; qc->dev = dev; - qc->tag = tag; ata_qc_reinit(qc); } @@ -4592,6 +4621,31 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) return qc; } +/** + * ata_qc_free - free unused ata_queued_cmd + * @qc: Command to complete + * + * Designed to free unused ata_queued_cmd object + * in case something prevents using it. + * + * LOCKING: + * spin_lock_irqsave(host lock) + */ +void ata_qc_free(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + unsigned int tag; + + WARN_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + + qc->flags = 0; + tag = qc->tag; + if (likely(ata_tag_valid(tag))) { + qc->tag = ATA_TAG_POISON; + clear_bit(tag, &ap->qc_allocated); + } +} + void __ata_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3fa75eac135d..47c7afcb36f2 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -709,11 +709,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev, { struct ata_queued_cmd *qc; - if (cmd->request->tag != -1) - qc = ata_qc_new_init(dev, cmd->request->tag); - else - qc = ata_qc_new_init(dev, 0); - + qc = ata_qc_new_init(dev); if (qc) { qc->scsicmd = cmd; qc->scsidone = done; @@ -1108,17 +1104,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id)); depth = min(ATA_MAX_QUEUE - 1, depth); - - /* - * If this device is behind a port multiplier, we have - * to share the tag map between all devices on that PMP. - * Set up the shared tag map here and we get automatic. - */ - if (dev->link->ap->pmp_link) - scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1); - - scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); - scsi_activate_tcq(sdev, depth); + scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth); } return 0; @@ -1958,11 +1944,6 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) hdr[1] |= (1 << 7); memcpy(rbuf, hdr, sizeof(hdr)); - - /* if ncq, set tags supported */ - if (ata_id_has_ncq(args->id)) - rbuf[7] |= (1 << 1); - memcpy(&rbuf[8], "ATA ", 8); ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16); ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4); diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index d3831d39bdaa..fe2839e58774 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -74,7 +74,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev); extern void ata_force_cbl(struct ata_port *ap); extern u64 ata_tf_to_lba(const struct ata_taskfile *tf); extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf); -extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); +extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, unsigned int tag); @@ -103,6 +103,7 @@ extern int ata_dev_configure(struct ata_device *dev); extern int sata_down_spd_limit(struct ata_link *link); extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel); extern void ata_sg_clean(struct ata_queued_cmd *qc); +extern void ata_qc_free(struct ata_queued_cmd *qc); extern void ata_qc_issue(struct ata_queued_cmd *qc); extern void __ata_qc_complete(struct ata_queued_cmd *qc); extern int atapi_check_dma(struct ata_queued_cmd *qc); @@ -118,22 +119,6 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host); extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy); extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm); -/** - * ata_qc_free - free unused ata_queued_cmd - * @qc: Command to complete - * - * Designed to free unused ata_queued_cmd object - * in case something prevents using it. - * - * LOCKING: - * spin_lock_irqsave(host lock) - */ -static inline void ata_qc_free(struct ata_queued_cmd *qc) -{ - qc->flags = 0; - qc->tag = ATA_TAG_POISON; -} - /* libata-acpi.c */ #ifdef CONFIG_ATA_ACPI extern void ata_acpi_associate_sata_port(struct ata_port *ap); diff --git a/include/linux/libata.h b/include/linux/libata.h index c7665a4134c5..59b0f1c807b5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -698,6 +698,7 @@ struct ata_port { unsigned int cbl; /* cable type; ATA_CBL_xxx */ struct ata_queued_cmd qcmd[ATA_MAX_QUEUE]; + unsigned long qc_allocated; unsigned int qc_active; int nr_active_links; /* #links with active qcs */ -- cgit v1.2.3 From fd0fcf5c29dd0339c5f5d86eb2cbe9fdad5bcd73 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Thu, 6 Nov 2008 10:49:21 +0000 Subject: ssb: Fix DMA-API compilation for non-PCI systems This fixes compilation of the SSB DMA-API code on non-PCI platforms. Signed-off-by: Michael Buesch Signed-off-by: David S. Miller --- include/linux/ssb/ssb.h | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index e530026eedf7..17d9b58f6379 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -427,12 +427,16 @@ static inline int ssb_dma_mapping_error(struct ssb_device *dev, dma_addr_t addr) { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST return pci_dma_mapping_error(dev->bus->host_pci, addr); +#endif + break; case SSB_BUSTYPE_SSB: return dma_mapping_error(dev->dev, addr); default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); return -ENOSYS; } @@ -441,12 +445,16 @@ static inline dma_addr_t ssb_dma_map_single(struct ssb_device *dev, void *p, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST return pci_map_single(dev->bus->host_pci, p, size, dir); +#endif + break; case SSB_BUSTYPE_SSB: return dma_map_single(dev->dev, p, size, dir); default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); return 0; } @@ -455,14 +463,18 @@ static inline void ssb_dma_unmap_single(struct ssb_device *dev, dma_addr_t dma_a { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST pci_unmap_single(dev->bus->host_pci, dma_addr, size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_unmap_single(dev->dev, dma_addr, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } static inline void ssb_dma_sync_single_for_cpu(struct ssb_device *dev, @@ -472,15 +484,19 @@ static inline void ssb_dma_sync_single_for_cpu(struct ssb_device *dev, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST pci_dma_sync_single_for_cpu(dev->bus->host_pci, dma_addr, size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_sync_single_for_cpu(dev->dev, dma_addr, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } static inline void ssb_dma_sync_single_for_device(struct ssb_device *dev, @@ -490,15 +506,19 @@ static inline void ssb_dma_sync_single_for_device(struct ssb_device *dev, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST pci_dma_sync_single_for_device(dev->bus->host_pci, dma_addr, size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_sync_single_for_device(dev->dev, dma_addr, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } static inline void ssb_dma_sync_single_range_for_cpu(struct ssb_device *dev, @@ -509,17 +529,21 @@ static inline void ssb_dma_sync_single_range_for_cpu(struct ssb_device *dev, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST /* Just sync everything. That's all the PCI API can do. */ pci_dma_sync_single_for_cpu(dev->bus->host_pci, dma_addr, offset + size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_sync_single_range_for_cpu(dev->dev, dma_addr, offset, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } static inline void ssb_dma_sync_single_range_for_device(struct ssb_device *dev, @@ -530,17 +554,21 @@ static inline void ssb_dma_sync_single_range_for_device(struct ssb_device *dev, { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: +#ifdef CONFIG_SSB_PCIHOST /* Just sync everything. That's all the PCI API can do. */ pci_dma_sync_single_for_device(dev->bus->host_pci, dma_addr, offset + size, dir); return; +#endif + break; case SSB_BUSTYPE_SSB: dma_sync_single_range_for_device(dev->dev, dma_addr, offset, size, dir); return; default: - __ssb_dma_not_implemented(dev); + break; } + __ssb_dma_not_implemented(dev); } -- cgit v1.2.3 From ad474caca3e2a0550b7ce0706527ad5ab389a4d4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 10 Nov 2008 15:39:30 +0100 Subject: fix for account_group_exec_runtime(), make sure ->signal can't be freed under rq->lock Impact: fix hang/crash on ia64 under high load This is ugly, but the simplest patch by far. Unlike other similar routines, account_group_exec_runtime() could be called "implicitly" from within scheduler after exit_notify(). This means we can race with the parent doing release_task(), we can't just check ->signal != NULL. Change __exit_signal() to do spin_unlock_wait(&task_rq(tsk)->lock) before __cleanup_signal() to make sure ->signal can't be freed under task_rq(tsk)->lock. Note that task_rq_unlock_wait() doesn't care about the case when tsk changes cpu/rq under us, this should be OK. Thanks to Ingo who nacked my previous buggy patch. Signed-off-by: Oleg Nesterov Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar Reported-by: Doug Chapman --- include/linux/sched.h | 1 + kernel/exit.c | 5 +++++ kernel/sched.c | 8 ++++++++ 3 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 295b7c756ca6..644ffbda17ca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -247,6 +247,7 @@ extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(void); +extern void task_rq_unlock_wait(struct task_struct *p); extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) diff --git a/kernel/exit.c b/kernel/exit.c index 80137a5d9467..ae2b92be5fae 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -141,6 +141,11 @@ static void __exit_signal(struct task_struct *tsk) if (sig) { flush_sigqueue(&sig->shared_pending); taskstats_tgid_free(sig); + /* + * Make sure ->signal can't go away under rq->lock, + * see account_group_exec_runtime(). + */ + task_rq_unlock_wait(tsk); __cleanup_signal(sig); } } diff --git a/kernel/sched.c b/kernel/sched.c index f3149244e324..50a21f964679 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -969,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) } } +void task_rq_unlock_wait(struct task_struct *p) +{ + struct rq *rq = task_rq(p); + + smp_mb(); /* spin-unlock-wait is not a full memory barrier */ + spin_unlock_wait(&rq->lock); +} + static void __task_rq_unlock(struct rq *rq) __releases(rq->lock) { -- cgit v1.2.3 From 0906dd9df2f79042cfa82d8388895be7cbe7a51b Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 11 Nov 2008 14:51:23 +0000 Subject: telephony: trivial: fix up email address Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/telephony/phonedev.c | 2 +- include/linux/telephony.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c index 37caf4d69037..b52cc830c0b4 100644 --- a/drivers/telephony/phonedev.c +++ b/drivers/telephony/phonedev.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Author: Alan Cox, + * Author: Alan Cox, * * Fixes: Mar 01 2000 Thomas Sparr, * phone_register_device now works with unit!=PHONE_UNIT_ANY diff --git a/include/linux/telephony.h b/include/linux/telephony.h index 5b2b6261f193..f63afe330add 100644 --- a/include/linux/telephony.h +++ b/include/linux/telephony.h @@ -14,7 +14,7 @@ * Authors: Ed Okerson, * Greg Herlein, * - * Contributors: Alan Cox, + * Contributors: Alan Cox, * David W. Erhart, * * IN NO EVENT SHALL QUICKNET TECHNOLOGIES, INC. BE LIABLE TO ANY PARTY FOR -- cgit v1.2.3