diff options
118 files changed, 1905 insertions, 1445 deletions
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt index 7be15e44d481..82a5d250d75e 100644 --- a/Documentation/fault-injection/fault-injection.txt +++ b/Documentation/fault-injection/fault-injection.txt @@ -143,8 +143,7 @@ o provide a way to configure fault attributes failslab, fail_page_alloc, and fail_make_request use this way. Helper functions: - init_fault_attr_dentries(entries, attr, name); - void cleanup_fault_attr_dentries(entries); + fault_create_debugfs_attr(name, parent, attr); - module parameters diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index ea0bace0124a..43f48098220d 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -296,15 +296,6 @@ Who: Ravikiran Thirumalai <kiran@scalex86.org> --------------------------- -What: CONFIG_THERMAL_HWMON -When: January 2009 -Why: This option was introduced just to allow older lm-sensors userspace - to keep working over the upgrade to 2.6.26. At the scheduled time of - removal fixed lm-sensors (2.x or 3.x) should be readily available. -Who: Rene Herman <rene.herman@gmail.com> - ---------------------------- - What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS (in net/core/net-sysfs.c) When: After the only user (hal) has seen a release with the patches diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt index ace200b7c214..37c4d84a0e57 100644 --- a/Documentation/frv/booting.txt +++ b/Documentation/frv/booting.txt @@ -106,13 +106,20 @@ separated by spaces: To use the first on-chip serial port at baud rate 115200, no parity, 8 bits, and no flow control. - (*) root=/dev/<xxxx> + (*) root=<xxxx> - This specifies the device upon which the root filesystem resides. For - example: + This specifies the device upon which the root filesystem resides. It + may be specified by major and minor number, device path, or even + partition uuid, if supported. For example: /dev/nfs NFS root filesystem /dev/mtdblock3 Fourth RedBoot partition on the System Flash + PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1 + first partition after the partition with the given UUID + 253:0 Device with major 253 and minor 0 + + Authoritative information can be found in + "Documentation/kernel-parameters.txt". (*) rw diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 72ba8d51dbc1..845a191004b1 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -292,6 +292,7 @@ Code Seq#(hex) Include File Comments <mailto:buk@buks.ipn.de> 0xA0 all linux/sdp/sdp.h Industrial Device Project <mailto:kenji@bitgate.com> +0xA2 00-0F arch/tile/include/asm/hardwall.h 0xA3 80-8F Port ACL in development: <mailto:tlewis@mindspring.com> 0xA3 90-9F linux/dtlk.h diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 26a83743af19..865e39f1850c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -163,6 +163,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See also Documentation/power/pm.txt, pci=noacpi + acpi_rsdp= [ACPI,EFI,KEXEC] + Pass the RSDP address to the kernel, mostly used + on machines running EFI runtime service to boot the + second kernel for kdump. + acpi_apic_instance= [ACPI, IOAPIC] Format: <int> 2: use 2nd APIC table, if available @@ -2240,6 +2245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ro [KNL] Mount root device read-only on boot root= [KNL] Root filesystem + See name_to_dev_t comment in init/do_mounts.c. rootdelay= [KNL] Delay (in seconds) to pause before attempting to mount the root filesystem diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt index c93bed66e25d..97d45f276fe6 100644 --- a/Documentation/m68k/kernel-options.txt +++ b/Documentation/m68k/kernel-options.txt @@ -129,6 +129,20 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for the first of these. You can find out all valid major numbers by looking into include/linux/major.h. +In addition to major and minor numbers, if the device containing your +root partition uses a partition table format with unique partition +identifiers, then you may use them. For instance, +"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF". It is also +possible to reference another partition on the same device using a +known partition UUID as the starting point. For example, +if partition 5 of the device has the UUID of +00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as +follows: + PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2 + +Authoritative information can be found in +"Documentation/kernel-parameters.txt". + 2.2) ro, rw ----------- diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index 850265373611..466af40c5822 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file); static int sync_serial_release(struct inode *inode, struct file *file); static unsigned int sync_serial_poll(struct file *filp, poll_table *wait); -static int sync_serial_ioctl(struct file *file, +static long sync_serial_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static ssize_t sync_serial_write(struct file *file, const char *buf, size_t count, loff_t *ppos); @@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file) *R_IRQ_MASK1_SET = 1 << port->data_avail_bit; DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev)); } - ret = 0; + err = 0; out: mutex_unlock(&sync_serial_mutex); - return ret; + return err; } static int sync_serial_release(struct inode *inode, struct file *file) diff --git a/arch/cris/arch-v10/kernel/irq.c b/arch/cris/arch-v10/kernel/irq.c index 907cfb5a873d..ba0e5965d6e3 100644 --- a/arch/cris/arch-v10/kernel/irq.c +++ b/arch/cris/arch-v10/kernel/irq.c @@ -20,6 +20,9 @@ #define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr)); #define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr)); +extern void kgdb_init(void); +extern void breakpoint(void); + /* don't use set_int_vector, it bypasses the linux interrupt handlers. it is * global just so that the kernel gdb can use it. */ diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 29b74a105830..332f19c54557 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -11,8 +11,6 @@ #ifdef __KERNEL__ -#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR - #ifndef __ASSEMBLY__ #include <asm/types.h> #include <asm/processor.h> @@ -67,8 +65,10 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR /* thread information allocation */ -#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) +#define alloc_thread_info_node(tsk, node) \ + ((struct thread_info *) __get_free_pages(GFP_KERNEL, 1)) #define free_thread_info(ti) free_pages((unsigned long) (ti), 1) #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 849ab2fa1f5c..aec60dc06007 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -2,3 +2,41 @@ include include/asm-generic/Kbuild.asm header-y += ucontext.h header-y += hardwall.h + +generic-y += bug.h +generic-y += bugs.h +generic-y += cputime.h +generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += fb.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipc.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += local.h +generic-y += module.h +generic-y += msgbuf.h +generic-y += mutex.h +generic-y += param.h +generic-y += parport.h +generic-y += poll.h +generic-y += posix_types.h +generic-y += resource.h +generic-y += scatterlist.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += shmparam.h +generic-y += socket.h +generic-y += sockios.h +generic-y += statfs.h +generic-y += termbits.h +generic-y += termios.h +generic-y += types.h +generic-y += ucontext.h +generic-y += xor.h diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h deleted file mode 100644 index b12fd89e42e9..000000000000 --- a/arch/tile/include/asm/bug.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bug.h> diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h deleted file mode 100644 index 61791e1ad9f5..000000000000 --- a/arch/tile/include/asm/bugs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bugs.h> diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h deleted file mode 100644 index 6d68ad7e0ea3..000000000000 --- a/arch/tile/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/cputime.h> diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h deleted file mode 100644 index f0a4c256403b..000000000000 --- a/arch/tile/include/asm/device.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/device.h> diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h deleted file mode 100644 index 6cd978cefb28..000000000000 --- a/arch/tile/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/div64.h> diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h deleted file mode 100644 index 3711bd9d50bd..000000000000 --- a/arch/tile/include/asm/emergency-restart.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/emergency-restart.h> diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h deleted file mode 100644 index 4c82b503d92f..000000000000 --- a/arch/tile/include/asm/errno.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/errno.h> diff --git a/arch/tile/include/asm/fb.h b/arch/tile/include/asm/fb.h deleted file mode 100644 index 3a4988e8df45..000000000000 --- a/arch/tile/include/asm/fb.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fb.h> diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h deleted file mode 100644 index 46ab12db5739..000000000000 --- a/arch/tile/include/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fcntl.h> diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h index 51537ff9265a..c66f7933beaa 100644 --- a/arch/tile/include/asm/fixmap.h +++ b/arch/tile/include/asm/fixmap.h @@ -75,12 +75,6 @@ extern void __set_fixmap(enum fixed_addresses idx, #define set_fixmap(idx, phys) \ __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - #define clear_fixmap(idx) \ __set_fixmap(idx, 0, __pgprot(0)) diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h deleted file mode 100644 index b279fe06dfe5..000000000000 --- a/arch/tile/include/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctl.h> diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h deleted file mode 100644 index ec34c760665e..000000000000 --- a/arch/tile/include/asm/ioctls.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctls.h> diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h deleted file mode 100644 index a46e3d9c2a3f..000000000000 --- a/arch/tile/include/asm/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipc.h> diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h deleted file mode 100644 index 84c7e51cb6d0..000000000000 --- a/arch/tile/include/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipcbuf.h> diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/tile/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h deleted file mode 100644 index 6ece1b037665..000000000000 --- a/arch/tile/include/asm/kdebug.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/kdebug.h> diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h deleted file mode 100644 index c11c530f74d0..000000000000 --- a/arch/tile/include/asm/local.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local.h> diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h deleted file mode 100644 index 1e4b79fe8584..000000000000 --- a/arch/tile/include/asm/module.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/module.h> diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h deleted file mode 100644 index 809134c644a6..000000000000 --- a/arch/tile/include/asm/msgbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/msgbuf.h> diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h deleted file mode 100644 index ff6101aa2c71..000000000000 --- a/arch/tile/include/asm/mutex.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/mutex-dec.h> diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h deleted file mode 100644 index 965d45427975..000000000000 --- a/arch/tile/include/asm/param.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/param.h> diff --git a/arch/tile/include/asm/parport.h b/arch/tile/include/asm/parport.h deleted file mode 100644 index cf252af64590..000000000000 --- a/arch/tile/include/asm/parport.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/parport.h> diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h deleted file mode 100644 index c98509d3149e..000000000000 --- a/arch/tile/include/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h deleted file mode 100644 index 22cae6230ceb..000000000000 --- a/arch/tile/include/asm/posix_types.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/posix_types.h> diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h deleted file mode 100644 index 04bc4db8921b..000000000000 --- a/arch/tile/include/asm/resource.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/resource.h> diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h deleted file mode 100644 index 35d786fe93ae..000000000000 --- a/arch/tile/include/asm/scatterlist.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/scatterlist.h> diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h deleted file mode 100644 index 7673b83cfef7..000000000000 --- a/arch/tile/include/asm/sembuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sembuf.h> diff --git a/arch/tile/include/asm/serial.h b/arch/tile/include/asm/serial.h deleted file mode 100644 index a0cb0caff152..000000000000 --- a/arch/tile/include/asm/serial.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/serial.h> diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h deleted file mode 100644 index 83c05fc2de38..000000000000 --- a/arch/tile/include/asm/shmbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmbuf.h> diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h deleted file mode 100644 index 93f30deb95d0..000000000000 --- a/arch/tile/include/asm/shmparam.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmparam.h> diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h deleted file mode 100644 index 6b71384b9d8b..000000000000 --- a/arch/tile/include/asm/socket.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/socket.h> diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h deleted file mode 100644 index def6d4746ee7..000000000000 --- a/arch/tile/include/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sockios.h> diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h deleted file mode 100644 index 0b91fe198c20..000000000000 --- a/arch/tile/include/asm/statfs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/statfs.h> diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h deleted file mode 100644 index 3935b106de79..000000000000 --- a/arch/tile/include/asm/termbits.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termbits.h> diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h deleted file mode 100644 index 280d78a9d966..000000000000 --- a/arch/tile/include/asm/termios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termios.h> diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h deleted file mode 100644 index b9e79bc580dd..000000000000 --- a/arch/tile/include/asm/types.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/types.h> diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h deleted file mode 100644 index 9bc07b9f30fb..000000000000 --- a/arch/tile/include/asm/ucontext.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ucontext.h> diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h deleted file mode 100644 index c82eb12a5b18..000000000000 --- a/arch/tile/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/xor.h> diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h new file mode 100644 index 000000000000..6395faa6d9e6 --- /dev/null +++ b/arch/tile/include/hv/drv_srom_intf.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_srom_intf.h + * Interface definitions for the SPI Flash ROM driver. + */ + +#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H +#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H + +/** Read this offset to get the total device size. */ +#define SROM_TOTAL_SIZE_OFF 0xF0000000 + +/** Read this offset to get the device sector size. */ +#define SROM_SECTOR_SIZE_OFF 0xF0000004 + +/** Read this offset to get the device page size. */ +#define SROM_PAGE_SIZE_OFF 0xF0000008 + +/** Write this offset to flush any pending writes. */ +#define SROM_FLUSH_OFF 0xF1000000 + +/** Write this offset, plus the byte offset of the start of a sector, to + * erase a sector. Any write data is ignored, but there must be at least + * one byte of write data. Only applies when the driver is in MTD mode. + */ +#define SROM_ERASE_OFF 0xF2000000 + +#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */ diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index c4be58cc5d50..f6f50f2a5e37 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -78,7 +78,6 @@ static struct clocksource cycle_counter_cs = { .rating = 300, .read = clocksource_get_cycles, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, /* typical value, e.g. x86 tsc uses this */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -91,8 +90,6 @@ void __init setup_clock(void) cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); sched_clock_mult = clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); - cycle_counter_cs.mult = - clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift); } void __init calibrate_delay(void) @@ -107,7 +104,7 @@ void __init calibrate_delay(void) void __init time_init(void) { /* Initialize and register the clock source. */ - clocksource_register(&cycle_counter_cs); + clocksource_register_hz(&cycle_counter_cs, cycles_per_sec); /* Start up the tile-timer interrupt source on the boot cpu. */ setup_tile_timer(); diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 4e10c4023028..7309988c9794 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -836,8 +836,7 @@ void __init mem_init(void) #endif #ifdef CONFIG_FLATMEM - if (!mem_map) - BUG(); + BUG_ON(!mem_map); #endif #ifdef CONFIG_HIGHMEM diff --git a/block/blk-core.c b/block/blk-core.c index b850bedad229..b627558c461f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1368,8 +1368,10 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes) static int __init fail_make_request_debugfs(void) { - return init_fault_attr_dentries(&fail_make_request, - "fail_make_request"); + struct dentry *dir = fault_create_debugfs_attr("fail_make_request", + NULL, &fail_make_request); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; } late_initcall(fail_make_request_debugfs); diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 4f0c06c7a338..780354888958 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q) static int __init fail_io_timeout_debugfs(void) { - return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); + struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout", + NULL, &fail_io_timeout); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; } late_initcall(fail_io_timeout_debugfs); diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 73863d86f022..76dc02f15574 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -126,6 +126,12 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE); */ u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE); +/* + * Disable runtime checking and repair of values returned by control methods. + * Use only if the repair is causing a problem on a particular machine. + */ +u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE); + /* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */ struct acpi_table_fadt acpi_gbl_FADT; diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index c7f743ca395b..5552125d8340 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -357,6 +357,7 @@ struct acpi_predefined_data { char *pathname; const union acpi_predefined_info *predefined; union acpi_operand_object *parent_package; + struct acpi_namespace_node *node; u32 flags; u8 node_flags; }; diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h index 94e73c97cf85..c445cca490ea 100644 --- a/drivers/acpi/acpica/acpredef.h +++ b/drivers/acpi/acpica/acpredef.h @@ -468,6 +468,7 @@ static const union acpi_predefined_info predefined_names[] = {{"_SWS", 0, ACPI_RTYPE_INTEGER}}, {{"_TC1", 0, ACPI_RTYPE_INTEGER}}, {{"_TC2", 0, ACPI_RTYPE_INTEGER}}, + {{"_TDL", 0, ACPI_RTYPE_INTEGER}}, {{"_TIP", 1, ACPI_RTYPE_INTEGER}}, {{"_TIV", 1, ACPI_RTYPE_INTEGER}}, {{"_TMP", 0, ACPI_RTYPE_INTEGER}}, diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c index 9fb03fa8ffde..c845c8089f39 100644 --- a/drivers/acpi/acpica/nspredef.c +++ b/drivers/acpi/acpica/nspredef.c @@ -193,14 +193,20 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node, } /* - * 1) We have a return value, but if one wasn't expected, just exit, this is - * not a problem. For example, if the "Implicit Return" feature is - * enabled, methods will always return a value. + * Return value validation and possible repair. * - * 2) If the return value can be of any type, then we cannot perform any - * validation, exit. + * 1) Don't perform return value validation/repair if this feature + * has been disabled via a global option. + * + * 2) We have a return value, but if one wasn't expected, just exit, + * this is not a problem. For example, if the "Implicit Return" + * feature is enabled, methods will always return a value. + * + * 3) If the return value can be of any type, then we cannot perform + * any validation, just exit. */ - if ((!predefined->info.expected_btypes) || + if (acpi_gbl_disable_auto_repair || + (!predefined->info.expected_btypes) || (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) { goto cleanup; } @@ -212,6 +218,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node, goto cleanup; } data->predefined = predefined; + data->node = node; data->node_flags = node->flags; data->pathname = pathname; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 973883babee1..024c4f263f87 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -503,6 +503,21 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data, { union acpi_operand_object *return_object = *return_object_ptr; acpi_status status; + struct acpi_namespace_node *node; + + /* + * We can only sort the _TSS return package if there is no _PSS in the + * same scope. This is because if _PSS is present, the ACPI specification + * dictates that the _TSS Power Dissipation field is to be ignored, and + * therefore some BIOSs leave garbage values in the _TSS Power field(s). + * In this case, it is best to just return the _TSS package as-is. + * (May, 2011) + */ + status = + acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node); + if (ACPI_SUCCESS(status)) { + return (AE_OK); + } status = acpi_ns_check_sorted_list(data, return_object, 5, 1, ACPI_SORT_DESCENDING, diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index 48db0944ce4a..62365f6075dd 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -126,12 +126,29 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index) } /* - * Originally, we checked the table signature for "SSDT" or "PSDT" here. - * Next, we added support for OEMx tables, signature "OEM". - * Valid tables were encountered with a null signature, so we've just - * given up on validating the signature, since it seems to be a waste - * of code. The original code was removed (05/2008). + * Validate the incoming table signature. + * + * 1) Originally, we checked the table signature for "SSDT" or "PSDT". + * 2) We added support for OEMx tables, signature "OEM". + * 3) Valid tables were encountered with a null signature, so we just + * gave up on validating the signature, (05/2008). + * 4) We encountered non-AML tables such as the MADT, which caused + * interpreter errors and kernel faults. So now, we once again allow + * only "SSDT", "OEMx", and now, also a null signature. (05/2011). */ + if ((table_desc->pointer->signature[0] != 0x00) && + (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT)) + && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) { + ACPI_ERROR((AE_INFO, + "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx", + acpi_ut_valid_acpi_name(*(u32 *)table_desc-> + pointer-> + signature) ? table_desc-> + pointer->signature : "????", + *(u32 *)table_desc->pointer->signature)); + + return_ACPI_STATUS(AE_BAD_SIGNATURE); + } (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 2c661353e8f2..87c0a8daa99a 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -55,6 +55,9 @@ #define ACPI_BATTERY_NOTIFY_INFO 0x81 #define ACPI_BATTERY_NOTIFY_THRESHOLD 0x82 +/* Battery power unit: 0 means mW, 1 means mA */ +#define ACPI_BATTERY_POWER_UNIT_MA 1 + #define _COMPONENT ACPI_BATTERY_COMPONENT ACPI_MODULE_NAME("battery"); @@ -91,11 +94,6 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids); enum { ACPI_BATTERY_ALARM_PRESENT, ACPI_BATTERY_XINFO_PRESENT, - /* For buggy DSDTs that report negative 16-bit values for either - * charging or discharging current and/or report 0 as 65536 - * due to bad math. - */ - ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, }; @@ -301,7 +299,8 @@ static enum power_supply_property energy_battery_props[] = { #ifdef CONFIG_ACPI_PROCFS_POWER inline char *acpi_battery_units(struct acpi_battery *battery) { - return (battery->power_unit)?"mA":"mW"; + return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ? + "mA" : "mW"; } #endif @@ -461,9 +460,17 @@ static int acpi_battery_get_state(struct acpi_battery *battery) battery->update_time = jiffies; kfree(buffer.pointer); - if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) && - battery->rate_now != -1) + /* For buggy DSDTs that report negative 16-bit values for either + * charging or discharging current and/or report 0 as 65536 + * due to bad math. + */ + if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA && + battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN && + (s16)(battery->rate_now) < 0) { battery->rate_now = abs((s16)battery->rate_now); + printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate" + " invalid.\n"); + } if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags) && battery->capacity_now >= 0 && battery->capacity_now <= 100) @@ -544,7 +551,7 @@ static int sysfs_add_battery(struct acpi_battery *battery) { int result; - if (battery->power_unit) { + if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) { battery->bat.properties = charge_battery_props; battery->bat.num_properties = ARRAY_SIZE(charge_battery_props); @@ -566,18 +573,16 @@ static int sysfs_add_battery(struct acpi_battery *battery) static void sysfs_remove_battery(struct acpi_battery *battery) { - if (!battery->bat.dev) + mutex_lock(&battery->lock); + if (!battery->bat.dev) { + mutex_unlock(&battery->lock); return; + } + device_remove_file(battery->bat.dev, &alarm_attr); power_supply_unregister(&battery->bat); battery->bat.dev = NULL; -} - -static void acpi_battery_quirks(struct acpi_battery *battery) -{ - if (dmi_name_in_vendors("Acer") && battery->power_unit) { - set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags); - } + mutex_unlock(&battery->lock); } /* @@ -592,7 +597,7 @@ static void acpi_battery_quirks(struct acpi_battery *battery) * * Handle this correctly so that they won't break userspace. */ -static void acpi_battery_quirks2(struct acpi_battery *battery) +static void acpi_battery_quirks(struct acpi_battery *battery) { if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)) return ; @@ -623,13 +628,15 @@ static int acpi_battery_update(struct acpi_battery *battery) result = acpi_battery_get_info(battery); if (result) return result; - acpi_battery_quirks(battery); acpi_battery_init_alarm(battery); } - if (!battery->bat.dev) - sysfs_add_battery(battery); + if (!battery->bat.dev) { + result = sysfs_add_battery(battery); + if (result) + return result; + } result = acpi_battery_get_state(battery); - acpi_battery_quirks2(battery); + acpi_battery_quirks(battery); return result; } @@ -863,7 +870,7 @@ DECLARE_FILE_FUNCTIONS(alarm); }, \ } -static struct battery_file { +static const struct battery_file { struct file_operations ops; mode_t mode; const char *name; @@ -948,9 +955,12 @@ static int battery_notify(struct notifier_block *nb, struct acpi_battery *battery = container_of(nb, struct acpi_battery, pm_nb); switch (mode) { + case PM_POST_HIBERNATION: case PM_POST_SUSPEND: - sysfs_remove_battery(battery); - sysfs_add_battery(battery); + if (battery->bat.dev) { + sysfs_remove_battery(battery); + sysfs_add_battery(battery); + } break; } @@ -975,25 +985,33 @@ static int acpi_battery_add(struct acpi_device *device) if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle, "_BIX", &handle))) set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); - acpi_battery_update(battery); + result = acpi_battery_update(battery); + if (result) + goto fail; #ifdef CONFIG_ACPI_PROCFS_POWER result = acpi_battery_add_fs(device); #endif - if (!result) { - printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n", - ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device), - device->status.battery_present ? "present" : "absent"); - } else { + if (result) { #ifdef CONFIG_ACPI_PROCFS_POWER acpi_battery_remove_fs(device); #endif - kfree(battery); + goto fail; } + printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n", + ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device), + device->status.battery_present ? "present" : "absent"); + battery->pm_nb.notifier_call = battery_notify; register_pm_notifier(&battery->pm_nb); return result; + +fail: + sysfs_remove_battery(battery); + mutex_destroy(&battery->lock); + kfree(battery); + return result; } static int acpi_battery_remove(struct acpi_device *device, int type) diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 1864ad3cf895..19a61136d848 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -77,7 +77,7 @@ struct dock_dependent_device { struct list_head list; struct list_head hotplug_list; acpi_handle handle; - struct acpi_dock_ops *ops; + const struct acpi_dock_ops *ops; void *context; }; @@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier); * the dock driver after _DCK is executed. */ int -register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops, +register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops, void *context) { struct dock_dependent_device *dd; diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c index 05b44201a614..22f918bacd35 100644 --- a/drivers/acpi/ec_sys.c +++ b/drivers/acpi/ec_sys.c @@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf, return count; } -static struct file_operations acpi_ec_io_ops = { +static const struct file_operations acpi_ec_io_ops = { .owner = THIS_MODULE, .open = acpi_ec_open_io, .read = acpi_ec_read_io, diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 467479f07c1f..0f0356ca1a9e 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) return result; } -static struct thermal_cooling_device_ops fan_cooling_ops = { +static const struct thermal_cooling_device_ops fan_cooling_ops = { .get_max_state = fan_get_max_state, .get_cur_state = fan_get_cur_state, .set_cur_state = fan_set_cur_state, diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 372f9b70f7f4..fa32f584229f 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported) { if (!strcmp("Linux", interface)) { - printk(KERN_NOTICE FW_BUG PREFIX + printk_once(KERN_NOTICE FW_BUG PREFIX "BIOS _OSI(Linux) query %s%s\n", osi_linux.enable ? "honored" : "ignored", osi_linux.cmdline ? " via cmdline" : @@ -237,8 +237,23 @@ void acpi_os_vprintf(const char *fmt, va_list args) #endif } +#ifdef CONFIG_KEXEC +static unsigned long acpi_rsdp; +static int __init setup_acpi_rsdp(char *arg) +{ + acpi_rsdp = simple_strtoul(arg, NULL, 16); + return 0; +} +early_param("acpi_rsdp", setup_acpi_rsdp); +#endif + acpi_physical_address __init acpi_os_get_root_pointer(void) { +#ifdef CONFIG_KEXEC + if (acpi_rsdp) + return acpi_rsdp; +#endif + if (efi_enabled) { if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) return efi.acpi20; @@ -1083,7 +1098,13 @@ struct osi_setup_entry { bool enable; }; -static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX]; +static struct osi_setup_entry __initdata + osi_setup_entries[OSI_STRING_ENTRIES_MAX] = { + {"Module Device", true}, + {"Processor Device", true}, + {"3.0 _SCP Extensions", true}, + {"Processor Aggregator Device", true}, +}; void __init acpi_osi_setup(char *str) { diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index f907cfbfa13c..7f9eba9a0b02 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -303,6 +303,61 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus) /* -------------------------------------------------------------------------- PCI Interrupt Routing Support -------------------------------------------------------------------------- */ +#ifdef CONFIG_X86_IO_APIC +extern int noioapicquirk; +extern int noioapicreroute; + +static int bridge_has_boot_interrupt_variant(struct pci_bus *bus) +{ + struct pci_bus *bus_it; + + for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) { + if (!bus_it->self) + return 0; + if (bus_it->self->irq_reroute_variant) + return bus_it->self->irq_reroute_variant; + } + return 0; +} + +/* + * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ + * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does + * during interrupt handling). When this INTx generation cannot be disabled, + * we reroute these interrupts to their legacy equivalent to get rid of + * spurious interrupts. + */ +static int acpi_reroute_boot_interrupt(struct pci_dev *dev, + struct acpi_prt_entry *entry) +{ + if (noioapicquirk || noioapicreroute) { + return 0; + } else { + switch (bridge_has_boot_interrupt_variant(dev->bus)) { + case 0: + /* no rerouting necessary */ + return 0; + case INTEL_IRQ_REROUTE_VARIANT: + /* + * Remap according to INTx routing table in 6700PXH + * specs, intel order number 302628-002, section + * 2.15.2. Other chipsets (80332, ...) have the same + * mapping and are handled here as well. + */ + dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy " + "IRQ %d\n", entry->index, + (entry->index % 4) + 16); + entry->index = (entry->index % 4) + 16; + return 1; + default: + dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy " + "IRQ: unknown mapping\n", entry->index); + return -1; + } + } +} +#endif /* CONFIG_X86_IO_APIC */ + static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) { struct acpi_prt_entry *entry; @@ -311,6 +366,9 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) entry = acpi_pci_irq_find_prt_entry(dev, pin); if (entry) { +#ifdef CONFIG_X86_IO_APIC + acpi_reroute_boot_interrupt(dev, entry); +#endif /* CONFIG_X86_IO_APIC */ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n", pci_name(dev), pin_name(pin))); return entry; diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index d06078d660ad..2672c798272f 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -485,7 +485,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) root->secondary.end = 0xFF; printk(KERN_WARNING FW_BUG PREFIX "no secondary bus range in _CRS\n"); - status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL, &bus); + status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, + NULL, &bus); if (ACPI_SUCCESS(status)) root->secondary.start = bus; else if (status == AE_NOT_FOUND) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 79cb65332894..870550d6a4bf 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev, return result; } -struct thermal_cooling_device_ops processor_cooling_ops = { +const struct thermal_cooling_device_ops processor_cooling_ops = { .get_max_state = processor_get_max_state, .get_cur_state = processor_get_cur_state, .set_cur_state = processor_set_cur_state, diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 50658ff887d9..6e36d0c0057c 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -130,6 +130,9 @@ struct acpi_sbs { #define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger) +static int acpi_sbs_remove(struct acpi_device *device, int type); +static int acpi_battery_get_state(struct acpi_battery *battery); + static inline int battery_scale(int log) { int scale = 1; @@ -195,6 +198,8 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy, if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT) return -ENODEV; + + acpi_battery_get_state(battery); switch (psp) { case POWER_SUPPLY_PROP_STATUS: if (battery->rate_now < 0) @@ -225,11 +230,17 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy, case POWER_SUPPLY_PROP_POWER_NOW: val->intval = abs(battery->rate_now) * acpi_battery_ipscale(battery) * 1000; + val->intval *= (acpi_battery_mode(battery)) ? + (battery->voltage_now * + acpi_battery_vscale(battery) / 1000) : 1; break; case POWER_SUPPLY_PROP_CURRENT_AVG: case POWER_SUPPLY_PROP_POWER_AVG: val->intval = abs(battery->rate_avg) * acpi_battery_ipscale(battery) * 1000; + val->intval *= (acpi_battery_mode(battery)) ? + (battery->voltage_now * + acpi_battery_vscale(battery) / 1000) : 1; break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = battery->state_of_charge; @@ -903,8 +914,6 @@ static void acpi_sbs_callback(void *context) } } -static int acpi_sbs_remove(struct acpi_device *device, int type); - static int acpi_sbs_add(struct acpi_device *device) { struct acpi_sbs *sbs; diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6c949602cbd1..3ed80b2ca907 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -428,6 +428,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"), }, }, + { + .callback = init_old_suspend_ordering, + .ident = "Asus A8N-SLI DELUXE", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"), + }, + }, + { + .callback = init_old_suspend_ordering, + .ident = "Asus A8N-SLI Premium", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"), + }, + }, {}, }; #endif /* CONFIG_SUSPEND */ diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 77255f250dbb..c538d0ef10ff 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp) return result; } -static struct kernel_param_ops param_ops_debug_layer = { +static const struct kernel_param_ops param_ops_debug_layer = { .set = param_set_uint, .get = param_get_debug_layer, }; -static struct kernel_param_ops param_ops_debug_level = { +static const struct kernel_param_ops param_ops_debug_level = { .set = param_set_uint, .get = param_get_debug_level, }; diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 2607e17b520f..48fbc647b178 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal, thermal_zone_unbind_cooling_device); } -static struct thermal_zone_device_ops acpi_thermal_zone_ops = { +static const struct thermal_zone_device_ops acpi_thermal_zone_ops = { .bind = acpi_thermal_bind_cooling_device, .unbind = acpi_thermal_unbind_cooling_device, .get_temp = thermal_get_temp, diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index ada4b4d9bdc8..08a44b532f7c 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -307,7 +307,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st return acpi_video_device_lcd_set_level(video, level); } -static struct thermal_cooling_device_ops video_cooling_ops = { +static const struct thermal_cooling_device_ops video_cooling_ops = { .get_max_state = video_get_max_state, .get_cur_state = video_get_cur_state, .set_cur_state = video_set_cur_state, diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index e0a5b555cee1..bb7c5f1085cc 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data) ata_acpi_uevent(dev->link->ap, dev, event); } -static struct acpi_dock_ops ata_acpi_dev_dock_ops = { +static const struct acpi_dock_ops ata_acpi_dev_dock_ops = { .handler = ata_acpi_dev_notify_dock, .uevent = ata_acpi_dev_uevent, }; -static struct acpi_dock_ops ata_acpi_ap_dock_ops = { +static const struct acpi_dock_ops ata_acpi_ap_dock_ops = { .handler = ata_acpi_ap_notify_dock, .uevent = ata_acpi_ap_uevent, }; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 49502bc5360a..423fd56bf612 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -616,5 +616,16 @@ config MSM_SMD_PKT Enables userspace clients to read and write to some packet SMD ports via device interface for MSM chipset. +config TILE_SROM + bool "Character-device access via hypervisor to the Tilera SPI ROM" + depends on TILE + default y + ---help--- + This device provides character-level read-write access + to the SROM, typically via the "0", "1", and "2" devices + in /dev/srom/. The Tilera hypervisor makes the flash + device appear much like a simple EEPROM, and knows + how to partition a single ROM for multiple purposes. + endmenu diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 7a00672bd85d..32762ba769c2 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -63,3 +63,5 @@ obj-$(CONFIG_RAMOOPS) += ramoops.o obj-$(CONFIG_JS_RTC) += js-rtc.o js-rtc-y = rtc.o + +obj-$(CONFIG_TILE_SROM) += tile-srom.o diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c index fca0c51bbc90..810aff9e750f 100644 --- a/drivers/char/ramoops.c +++ b/drivers/char/ramoops.c @@ -147,6 +147,14 @@ static int __init ramoops_probe(struct platform_device *pdev) cxt->phys_addr = pdata->mem_address; cxt->record_size = pdata->record_size; cxt->dump_oops = pdata->dump_oops; + /* + * Update the module parameter variables as well so they are visible + * through /sys/module/ramoops/parameters/ + */ + mem_size = pdata->mem_size; + mem_address = pdata->mem_address; + record_size = pdata->record_size; + dump_oops = pdata->dump_oops; if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { pr_err("request mem region failed\n"); diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c new file mode 100644 index 000000000000..cf3ee008dca2 --- /dev/null +++ b/drivers/char/tile-srom.c @@ -0,0 +1,481 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * SPI Flash ROM driver + * + * This source code is derived from code provided in "Linux Device + * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and + * Greg Kroah-Hartman, published by O'Reilly Media, Inc. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/kernel.h> /* printk() */ +#include <linux/slab.h> /* kmalloc() */ +#include <linux/fs.h> /* everything... */ +#include <linux/errno.h> /* error codes */ +#include <linux/types.h> /* size_t */ +#include <linux/proc_fs.h> +#include <linux/fcntl.h> /* O_ACCMODE */ +#include <linux/aio.h> +#include <linux/pagemap.h> +#include <linux/hugetlb.h> +#include <linux/uaccess.h> +#include <linux/platform_device.h> +#include <hv/hypervisor.h> +#include <linux/ioctl.h> +#include <linux/cdev.h> +#include <linux/delay.h> +#include <hv/drv_srom_intf.h> + +/* + * Size of our hypervisor I/O requests. We break up large transfers + * so that we don't spend large uninterrupted spans of time in the + * hypervisor. Erasing an SROM sector takes a significant fraction of + * a second, so if we allowed the user to, say, do one I/O to write the + * entire ROM, we'd get soft lockup timeouts, or worse. + */ +#define SROM_CHUNK_SIZE ((size_t)4096) + +/* + * When hypervisor is busy (e.g. erasing), poll the status periodically. + */ + +/* + * Interval to poll the state in msec + */ +#define SROM_WAIT_TRY_INTERVAL 20 + +/* + * Maximum times to poll the state + */ +#define SROM_MAX_WAIT_TRY_TIMES 1000 + +struct srom_dev { + int hv_devhdl; /* Handle for hypervisor device */ + u32 total_size; /* Size of this device */ + u32 sector_size; /* Size of a sector */ + u32 page_size; /* Size of a page */ + struct mutex lock; /* Allow only one accessor at a time */ +}; + +static int srom_major; /* Dynamic major by default */ +module_param(srom_major, int, 0); +MODULE_AUTHOR("Tilera Corporation"); +MODULE_LICENSE("GPL"); + +static int srom_devs; /* Number of SROM partitions */ +static struct cdev srom_cdev; +static struct class *srom_class; +static struct srom_dev *srom_devices; + +/* + * Handle calling the hypervisor and managing EAGAIN/EBUSY. + */ + +static ssize_t _srom_read(int hv_devhdl, void *buf, + loff_t off, size_t count) +{ + int retval, retries = SROM_MAX_WAIT_TRY_TIMES; + for (;;) { + retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf, + count, off); + if (retval >= 0) + return retval; + if (retval == HV_EAGAIN) + continue; + if (retval == HV_EBUSY && --retries > 0) { + msleep(SROM_WAIT_TRY_INTERVAL); + continue; + } + pr_err("_srom_read: error %d\n", retval); + return -EIO; + } +} + +static ssize_t _srom_write(int hv_devhdl, const void *buf, + loff_t off, size_t count) +{ + int retval, retries = SROM_MAX_WAIT_TRY_TIMES; + for (;;) { + retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf, + count, off); + if (retval >= 0) + return retval; + if (retval == HV_EAGAIN) + continue; + if (retval == HV_EBUSY && --retries > 0) { + msleep(SROM_WAIT_TRY_INTERVAL); + continue; + } + pr_err("_srom_write: error %d\n", retval); + return -EIO; + } +} + +/** + * srom_open() - Device open routine. + * @inode: Inode for this device. + * @filp: File for this specific open of the device. + * + * Returns zero, or an error code. + */ +static int srom_open(struct inode *inode, struct file *filp) +{ + filp->private_data = &srom_devices[iminor(inode)]; + return 0; +} + + +/** + * srom_release() - Device release routine. + * @inode: Inode for this device. + * @filp: File for this specific open of the device. + * + * Returns zero, or an error code. + */ +static int srom_release(struct inode *inode, struct file *filp) +{ + struct srom_dev *srom = filp->private_data; + char dummy; + + /* Make sure we've flushed anything written to the ROM. */ + mutex_lock(&srom->lock); + if (srom->hv_devhdl >= 0) + _srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1); + mutex_unlock(&srom->lock); + + filp->private_data = NULL; + + return 0; +} + + +/** + * srom_read() - Read data from the device. + * @filp: File for this specific open of the device. + * @buf: User's data buffer. + * @count: Number of bytes requested. + * @f_pos: File position. + * + * Returns number of bytes read, or an error code. + */ +static ssize_t srom_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + int retval = 0; + void *kernbuf; + struct srom_dev *srom = filp->private_data; + + kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL); + if (!kernbuf) + return -ENOMEM; + + if (mutex_lock_interruptible(&srom->lock)) { + retval = -ERESTARTSYS; + kfree(kernbuf); + return retval; + } + + while (count) { + int hv_retval; + int bytes_this_pass = min(count, SROM_CHUNK_SIZE); + + hv_retval = _srom_read(srom->hv_devhdl, kernbuf, + *f_pos, bytes_this_pass); + if (hv_retval > 0) { + if (copy_to_user(buf, kernbuf, hv_retval) != 0) { + retval = -EFAULT; + break; + } + } else if (hv_retval <= 0) { + if (retval == 0) + retval = hv_retval; + break; + } + + retval += hv_retval; + *f_pos += hv_retval; + buf += hv_retval; + count -= hv_retval; + } + + mutex_unlock(&srom->lock); + kfree(kernbuf); + + return retval; +} + +/** + * srom_write() - Write data to the device. + * @filp: File for this specific open of the device. + * @buf: User's data buffer. + * @count: Number of bytes requested. + * @f_pos: File position. + * + * Returns number of bytes written, or an error code. + */ +static ssize_t srom_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + int retval = 0; + void *kernbuf; + struct srom_dev *srom = filp->private_data; + + kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL); + if (!kernbuf) + return -ENOMEM; + + if (mutex_lock_interruptible(&srom->lock)) { + retval = -ERESTARTSYS; + kfree(kernbuf); + return retval; + } + + while (count) { + int hv_retval; + int bytes_this_pass = min(count, SROM_CHUNK_SIZE); + + if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) { + retval = -EFAULT; + break; + } + + hv_retval = _srom_write(srom->hv_devhdl, kernbuf, + *f_pos, bytes_this_pass); + if (hv_retval <= 0) { + if (retval == 0) + retval = hv_retval; + break; + } + + retval += hv_retval; + *f_pos += hv_retval; + buf += hv_retval; + count -= hv_retval; + } + + mutex_unlock(&srom->lock); + kfree(kernbuf); + + return retval; +} + +/* Provide our own implementation so we can use srom->total_size. */ +loff_t srom_llseek(struct file *filp, loff_t offset, int origin) +{ + struct srom_dev *srom = filp->private_data; + + if (mutex_lock_interruptible(&srom->lock)) + return -ERESTARTSYS; + + switch (origin) { + case SEEK_END: + offset += srom->total_size; + break; + case SEEK_CUR: + offset += filp->f_pos; + break; + } + + if (offset < 0 || offset > srom->total_size) { + offset = -EINVAL; + } else { + filp->f_pos = offset; + filp->f_version = 0; + } + + mutex_unlock(&srom->lock); + + return offset; +} + +static ssize_t total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->total_size); +} + +static ssize_t sector_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->sector_size); +} + +static ssize_t page_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->page_size); +} + +static struct device_attribute srom_dev_attrs[] = { + __ATTR(total_size, S_IRUGO, total_show, NULL), + __ATTR(sector_size, S_IRUGO, sector_show, NULL), + __ATTR(page_size, S_IRUGO, page_show, NULL), + __ATTR_NULL +}; + +static char *srom_devnode(struct device *dev, mode_t *mode) +{ + *mode = S_IRUGO | S_IWUSR; + return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev)); +} + +/* + * The fops + */ +static const struct file_operations srom_fops = { + .owner = THIS_MODULE, + .llseek = srom_llseek, + .read = srom_read, + .write = srom_write, + .open = srom_open, + .release = srom_release, +}; + +/** + * srom_setup_minor() - Initialize per-minor information. + * @srom: Per-device SROM state. + * @index: Device to set up. + */ +static int srom_setup_minor(struct srom_dev *srom, int index) +{ + struct device *dev; + int devhdl = srom->hv_devhdl; + + mutex_init(&srom->lock); + + if (_srom_read(devhdl, &srom->total_size, + SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0) + return -EIO; + if (_srom_read(devhdl, &srom->sector_size, + SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0) + return -EIO; + if (_srom_read(devhdl, &srom->page_size, + SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0) + return -EIO; + + dev = device_create(srom_class, &platform_bus, + MKDEV(srom_major, index), srom, "%d", index); + return IS_ERR(dev) ? PTR_ERR(dev) : 0; +} + +/** srom_init() - Initialize the driver's module. */ +static int srom_init(void) +{ + int result, i; + dev_t dev = MKDEV(srom_major, 0); + + /* + * Start with a plausible number of partitions; the krealloc() call + * below will yield about log(srom_devs) additional allocations. + */ + srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL); + + /* Discover the number of srom partitions. */ + for (i = 0; ; i++) { + int devhdl; + char buf[20]; + struct srom_dev *new_srom_devices = + krealloc(srom_devices, (i+1) * sizeof(struct srom_dev), + GFP_KERNEL | __GFP_ZERO); + if (!new_srom_devices) { + result = -ENOMEM; + goto fail_mem; + } + srom_devices = new_srom_devices; + sprintf(buf, "srom/0/%d", i); + devhdl = hv_dev_open((HV_VirtAddr)buf, 0); + if (devhdl < 0) { + if (devhdl != HV_ENODEV) + pr_notice("srom/%d: hv_dev_open failed: %d.\n", + i, devhdl); + break; + } + srom_devices[i].hv_devhdl = devhdl; + } + srom_devs = i; + + /* Bail out early if we have no partitions at all. */ + if (srom_devs == 0) { + result = -ENODEV; + goto fail_mem; + } + + /* Register our major, and accept a dynamic number. */ + if (srom_major) + result = register_chrdev_region(dev, srom_devs, "srom"); + else { + result = alloc_chrdev_region(&dev, 0, srom_devs, "srom"); + srom_major = MAJOR(dev); + } + if (result < 0) + goto fail_mem; + + /* Register a character device. */ + cdev_init(&srom_cdev, &srom_fops); + srom_cdev.owner = THIS_MODULE; + srom_cdev.ops = &srom_fops; + result = cdev_add(&srom_cdev, dev, srom_devs); + if (result < 0) + goto fail_chrdev; + + /* Create a sysfs class. */ + srom_class = class_create(THIS_MODULE, "srom"); + if (IS_ERR(srom_class)) { + result = PTR_ERR(srom_class); + goto fail_cdev; + } + srom_class->dev_attrs = srom_dev_attrs; + srom_class->devnode = srom_devnode; + + /* Do per-partition initialization */ + for (i = 0; i < srom_devs; i++) { + result = srom_setup_minor(srom_devices + i, i); + if (result < 0) + goto fail_class; + } + + return 0; + +fail_class: + for (i = 0; i < srom_devs; i++) + device_destroy(srom_class, MKDEV(srom_major, i)); + class_destroy(srom_class); +fail_cdev: + cdev_del(&srom_cdev); +fail_chrdev: + unregister_chrdev_region(dev, srom_devs); +fail_mem: + kfree(srom_devices); + return result; +} + +/** srom_cleanup() - Clean up the driver's module. */ +static void srom_cleanup(void) +{ + int i; + for (i = 0; i < srom_devs; i++) + device_destroy(srom_class, MKDEV(srom_major, i)); + class_destroy(srom_class); + cdev_del(&srom_cdev); + unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs); + kfree(srom_devices); +} + +module_init(srom_init); +module_exit(srom_cleanup); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 7fc2f108f490..3f4051a7c5a7 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -80,7 +80,7 @@ enum tis_defaults { static LIST_HEAD(tis_chips); static DEFINE_SPINLOCK(tis_lock); -#ifdef CONFIG_PNP +#if defined(CONFIG_PNP) && defined(CONFIG_ACPI) static int is_itpm(struct pnp_dev *dev) { struct acpi_device *acpi = pnp_acpi_device(dev); @@ -93,6 +93,11 @@ static int is_itpm(struct pnp_dev *dev) return 0; } +#else +static inline int is_itpm(struct pnp_dev *dev) +{ + return 0; +} #endif static int check_locality(struct tpm_chip *chip, int l) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index eacb05e6cfb3..eb80b549ed8d 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -157,7 +157,7 @@ utf16_strnlen(efi_char16_t *s, size_t maxlength) return length; } -static unsigned long +static inline unsigned long utf16_strlen(efi_char16_t *s) { return utf16_strnlen(s, ~0UL); @@ -580,8 +580,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, return -1; } -static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size, - struct pstore_info *psi) +static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi) { return 0; } diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index a70fa89f76fd..220285760b68 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val, } -static struct acpi_dock_ops acpiphp_dock_ops = { +static const struct acpi_dock_ops acpiphp_dock_ops = { .handler = handle_hotplug_event_func, }; diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index bcae8dd41496..7789002bdd5c 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev) pr_info("%s: already running\n", pdev->name); /* force to 24 hour mode */ - new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); + new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); new_ctrl |= OMAP_RTC_CTRL_STOP; /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE: diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index bf7c687519ef..f7f71b2d3101 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -14,11 +14,7 @@ menuconfig THERMAL If you want this support, you should say Y or M here. config THERMAL_HWMON - bool "Hardware monitoring support" + bool depends on THERMAL depends on HWMON=y || HWMON=THERMAL - help - The generic thermal sysfs driver's hardware monitoring support - requires a 2.10.7/3.0.2 or later lm-sensors userspace. - - Say Y if your user-space is new enough. + default y diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 0b1c82ad6805..708f8e92771a 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -420,6 +420,29 @@ thermal_cooling_device_trip_point_show(struct device *dev, /* hwmon sys I/F */ #include <linux/hwmon.h> + +/* thermal zone devices with the same type share one hwmon device */ +struct thermal_hwmon_device { + char type[THERMAL_NAME_LENGTH]; + struct device *device; + int count; + struct list_head tz_list; + struct list_head node; +}; + +struct thermal_hwmon_attr { + struct device_attribute attr; + char name[16]; +}; + +/* one temperature input for each thermal zone */ +struct thermal_hwmon_temp { + struct list_head hwmon_node; + struct thermal_zone_device *tz; + struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ + struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ +}; + static LIST_HEAD(thermal_hwmon_list); static ssize_t @@ -437,9 +460,10 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) int ret; struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_zone_device *tz - = container_of(hwmon_attr, struct thermal_zone_device, + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_input); + struct thermal_zone_device *tz = temp->tz; ret = tz->ops->get_temp(tz, &temperature); @@ -455,9 +479,10 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, { struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_zone_device *tz - = container_of(hwmon_attr, struct thermal_zone_device, + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_crit); + struct thermal_zone_device *tz = temp->tz; long temperature; int ret; @@ -469,22 +494,54 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, } -static int -thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +static struct thermal_hwmon_device * +thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; - int new_hwmon_device = 1; - int result; mutex_lock(&thermal_list_lock); list_for_each_entry(hwmon, &thermal_hwmon_list, node) if (!strcmp(hwmon->type, tz->type)) { - new_hwmon_device = 0; mutex_unlock(&thermal_list_lock); - goto register_sys_interface; + return hwmon; + } + mutex_unlock(&thermal_list_lock); + + return NULL; +} + +/* Find the temperature input matching a given thermal zone */ +static struct thermal_hwmon_temp * +thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, + const struct thermal_zone_device *tz) +{ + struct thermal_hwmon_temp *temp; + + mutex_lock(&thermal_list_lock); + list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) + if (temp->tz == tz) { + mutex_unlock(&thermal_list_lock); + return temp; } mutex_unlock(&thermal_list_lock); + return NULL; +} + +static int +thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + int new_hwmon_device = 1; + int result; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (hwmon) { + new_hwmon_device = 0; + goto register_sys_interface; + } + hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL); if (!hwmon) return -ENOMEM; @@ -502,30 +559,36 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) goto free_mem; register_sys_interface: - tz->hwmon = hwmon; + temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL); + if (!temp) { + result = -ENOMEM; + goto unregister_name; + } + + temp->tz = tz; hwmon->count++; - snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH, + snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH, "temp%d_input", hwmon->count); - tz->temp_input.attr.attr.name = tz->temp_input.name; - tz->temp_input.attr.attr.mode = 0444; - tz->temp_input.attr.show = temp_input_show; - sysfs_attr_init(&tz->temp_input.attr.attr); - result = device_create_file(hwmon->device, &tz->temp_input.attr); + temp->temp_input.attr.attr.name = temp->temp_input.name; + temp->temp_input.attr.attr.mode = 0444; + temp->temp_input.attr.show = temp_input_show; + sysfs_attr_init(&temp->temp_input.attr.attr); + result = device_create_file(hwmon->device, &temp->temp_input.attr); if (result) - goto unregister_name; + goto free_temp_mem; if (tz->ops->get_crit_temp) { unsigned long temperature; if (!tz->ops->get_crit_temp(tz, &temperature)) { - snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH, + snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH, "temp%d_crit", hwmon->count); - tz->temp_crit.attr.attr.name = tz->temp_crit.name; - tz->temp_crit.attr.attr.mode = 0444; - tz->temp_crit.attr.show = temp_crit_show; - sysfs_attr_init(&tz->temp_crit.attr.attr); + temp->temp_crit.attr.attr.name = temp->temp_crit.name; + temp->temp_crit.attr.attr.mode = 0444; + temp->temp_crit.attr.show = temp_crit_show; + sysfs_attr_init(&temp->temp_crit.attr.attr); result = device_create_file(hwmon->device, - &tz->temp_crit.attr); + &temp->temp_crit.attr); if (result) goto unregister_input; } @@ -534,13 +597,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) mutex_lock(&thermal_list_lock); if (new_hwmon_device) list_add_tail(&hwmon->node, &thermal_hwmon_list); - list_add_tail(&tz->hwmon_node, &hwmon->tz_list); + list_add_tail(&temp->hwmon_node, &hwmon->tz_list); mutex_unlock(&thermal_list_lock); return 0; unregister_input: - device_remove_file(hwmon->device, &tz->temp_input.attr); + device_remove_file(hwmon->device, &temp->temp_input.attr); + free_temp_mem: + kfree(temp); unregister_name: if (new_hwmon_device) { device_remove_file(hwmon->device, &dev_attr_name); @@ -556,15 +621,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) static void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { - struct thermal_hwmon_device *hwmon = tz->hwmon; + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (unlikely(!hwmon)) { + /* Should never happen... */ + dev_dbg(&tz->device, "hwmon device lookup failed!\n"); + return; + } + + temp = thermal_hwmon_lookup_temp(hwmon, tz); + if (unlikely(!temp)) { + /* Should never happen... */ + dev_dbg(&tz->device, "temperature input lookup failed!\n"); + return; + } - tz->hwmon = NULL; - device_remove_file(hwmon->device, &tz->temp_input.attr); + device_remove_file(hwmon->device, &temp->temp_input.attr); if (tz->ops->get_crit_temp) - device_remove_file(hwmon->device, &tz->temp_crit.attr); + device_remove_file(hwmon->device, &temp->temp_crit.attr); mutex_lock(&thermal_list_lock); - list_del(&tz->hwmon_node); + list_del(&temp->hwmon_node); + kfree(temp); if (!list_empty(&hwmon->tz_list)) { mutex_unlock(&thermal_list_lock); return; diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 69407e72aac1..278aeaa92505 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633 enable its driver. config BACKLIGHT_AAT2870 - bool "AnalogicTech AAT2870 Backlight" + tristate "AnalogicTech AAT2870 Backlight" depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE help If you have a AnalogicTech AAT2870 say Y to enable the diff --git a/drivers/video/backlight/aat2870_bl.c b/drivers/video/backlight/aat2870_bl.c index 4952a617563d..331f1ef1dad5 100644 --- a/drivers/video/backlight/aat2870_bl.c +++ b/drivers/video/backlight/aat2870_bl.c @@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl, struct backlight_device *bd = aat2870_bl->bd; int val; - val = brightness * aat2870_bl->max_current; + val = brightness * (aat2870_bl->max_current - 1); val /= bd->props.max_brightness; return val; @@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev) props.type = BACKLIGHT_RAW; bd = backlight_device_register("aat2870-backlight", &pdev->dev, aat2870_bl, &aat2870_bl_ops, &props); - if (!bd) { + if (IS_ERR(bd)) { dev_err(&pdev->dev, "Failed allocate memory for backlight device\n"); - ret = -ENOMEM; + ret = PTR_ERR(bd); goto out_kfree; } @@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev) else aat2870_bl->channels = AAT2870_BL_CH_ALL; - if (pdata->max_brightness > 0) + if (pdata->max_current > 0) aat2870_bl->max_current = pdata->max_current; else aat2870_bl->max_current = AAT2870_CURRENT_27_9; diff --git a/fs/Kconfig b/fs/Kconfig index 19891aab9c6e..9fe0b349f4cd 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL select TMPFS_XATTR select GENERIC_ACL help - POSIX Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. + POSIX Access Control Lists (ACLs) support additional access rights + for users and groups beyond the standard owner/group/world scheme, + and this option selects support for ACLs specifically for tmpfs + filesystems. + + If you've selected TMPFS, it's possible that you'll also need + this option as there are a number of Linux distros that require + POSIX ACL support under /dev for certain features to work properly. + For example, some distros need this feature for ALSA-related /dev + files for sound to work properly. In short, if you're not sure, + say Y. To learn more about Access Control Lists, visit the POSIX ACLs for Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N. - config TMPFS_XATTR bool "Tmpfs extended attributes" depends on TMPFS diff --git a/fs/dcache.c b/fs/dcache.c index 2347cdb15abb..c83cae19161e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -795,6 +795,7 @@ relock: /** * prune_dcache_sb - shrink the dcache + * @sb: superblock * @nr_to_scan: number of entries to try to free * * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e2d88baf91d3..4687fea0c00f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags) { void *ret; - ret = kmalloc(size, flags); + ret = kzalloc(size, flags); if (!ret) ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); return ret; diff --git a/fs/stack.c b/fs/stack.c index 4a6f7f440658..b4f2ab48a61f 100644 --- a/fs/stack.c +++ b/fs/stack.c @@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) * * We don't actually know what locking is used at the lower level; * but if it's a filesystem that supports quotas, it will be using - * i_lock as in inode_add_bytes(). tmpfs uses other locking, and - * its 32-bit is (just) able to exceed 2TB i_size with the aid of - * holes; but its i_blocks cannot carry into the upper long without - * almost 2TB swap - let's ignore that case. + * i_lock as in inode_add_bytes(). */ if (sizeof(i_blocks) > sizeof(long)) spin_lock(&src->i_lock); diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 3090471b2a5e..e49c36d38d7e 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -128,7 +128,7 @@ extern int is_dock_device(acpi_handle handle); extern int register_dock_notifier(struct notifier_block *nb); extern void unregister_dock_notifier(struct notifier_block *nb); extern int register_hotplug_dock_device(acpi_handle handle, - struct acpi_dock_ops *ops, + const struct acpi_dock_ops *ops, void *context); extern void unregister_hotplug_dock_device(acpi_handle handle); #else diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 2ed0a8486c19..f554a9313b43 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20110413 +#define ACPI_CA_VERSION 0x20110623 #include "actypes.h" #include "actbl.h" @@ -69,6 +69,7 @@ extern u32 acpi_gbl_trace_flags; extern u32 acpi_gbl_enable_aml_debug_object; extern u8 acpi_gbl_copy_dsdt_locally; extern u8 acpi_gbl_truncate_io_addresses; +extern u8 acpi_gbl_disable_auto_repair; extern u32 acpi_current_gpe_count; extern struct acpi_table_fadt acpi_gbl_FADT; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index ba4928cae473..67055f180330 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -337,7 +337,7 @@ extern struct cpuidle_driver acpi_idle_driver; /* in processor_thermal.c */ int acpi_processor_get_limit_info(struct acpi_processor *pr); -extern struct thermal_cooling_device_ops processor_cooling_ops; +extern const struct thermal_cooling_device_ops processor_cooling_ops; #ifdef CONFIG_CPU_FREQ void acpi_thermal_cpufreq_init(void); void acpi_thermal_cpufreq_exit(void); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1deb2a73c2da..2312e850aab8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -238,7 +238,6 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size); extern int pnpacpi_disabled; #define PXM_INVAL (-1) -#define NID_INVAL (-1) int acpi_check_resource_conflict(const struct resource *res); diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 3ff060ac7810..c6f996f2abb6 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -25,10 +25,6 @@ struct fault_attr { unsigned long reject_end; unsigned long count; - -#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS - struct dentry *dir; -#endif }; #define FAULT_ATTR_INITIALIZER { \ @@ -45,19 +41,15 @@ bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS -int init_fault_attr_dentries(struct fault_attr *attr, const char *name); -void cleanup_fault_attr_dentries(struct fault_attr *attr); +struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr); #else /* CONFIG_FAULT_INJECTION_DEBUG_FS */ -static inline int init_fault_attr_dentries(struct fault_attr *attr, - const char *name) -{ - return -ENODEV; -} - -static inline void cleanup_fault_attr_dentries(struct fault_attr *attr) +static inline struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr) { + return ERR_PTR(-ENODEV); } #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cb4089254f01..3a76faf6a3ee 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -92,7 +92,7 @@ struct vm_area_struct; */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 23 /* Room for 23 __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ diff --git a/include/linux/idr.h b/include/linux/idr.h index 13a801f3d028..255491cf522e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -146,6 +146,10 @@ void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask); +void ida_simple_remove(struct ida *ida, unsigned int id); + void __init idr_init_cache(void); #endif /* __IDR_H__ */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b96600786913..3b535db00a94 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -86,8 +86,6 @@ extern void mem_cgroup_uncharge_end(void); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); @@ -225,12 +223,6 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - return 0; -} - static inline void mem_cgroup_add_lru_list(struct page *page, int lru) { } diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h index 89212df05622..f7316c29bdec 100644 --- a/include/linux/mfd/aat2870.h +++ b/include/linux/mfd/aat2870.h @@ -89,7 +89,7 @@ enum aat2870_id { /* Backlight current magnitude (mA) */ enum aat2870_current { - AAT2870_CURRENT_0_45, + AAT2870_CURRENT_0_45 = 1, AAT2870_CURRENT_0_90, AAT2870_CURRENT_1_80, AAT2870_CURRENT_2_70, diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 23241c2fecce..9d4539c52e53 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -39,7 +39,15 @@ * when it is shrunk, before we rcu free the node. See shrink code for * details. */ -#define RADIX_TREE_INDIRECT_PTR 1 +#define RADIX_TREE_INDIRECT_PTR 1 +/* + * A common use of the radix tree is to store pointers to struct pages; + * but shmem/tmpfs needs also to store swap entries in the same tree: + * those are marked as exceptional entries to distinguish them. + * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it. + */ +#define RADIX_TREE_EXCEPTIONAL_ENTRY 2 +#define RADIX_TREE_EXCEPTIONAL_SHIFT 2 #define radix_tree_indirect_to_ptr(ptr) \ radix_tree_indirect_to_ptr((void __force *)(ptr)) @@ -174,6 +182,28 @@ static inline int radix_tree_deref_retry(void *arg) } /** + * radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry? + * @arg: value returned by radix_tree_deref_slot + * Returns: 0 if well-aligned pointer, non-0 if exceptional entry. + */ +static inline int radix_tree_exceptional_entry(void *arg) +{ + /* Not unlikely because radix_tree_exception often tested first */ + return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY; +} + +/** + * radix_tree_exception - radix_tree_deref_slot returned either exception? + * @arg: value returned by radix_tree_deref_slot + * Returns: 0 if well-aligned pointer, non-0 if either kind of exception. + */ +static inline int radix_tree_exception(void *arg) +{ + return unlikely((unsigned long)arg & + (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY)); +} + +/** * radix_tree_replace_slot - replace item in a slot * @pslot: pointer to slot, returned by radix_tree_lookup_slot * @item: new item to store in the slot. @@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); -unsigned int -radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, +unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, + void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items); unsigned long radix_tree_next_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); @@ -222,6 +252,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int fromtag, unsigned int totag); int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); static inline void radix_tree_preload_end(void) { diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index aa08fa8fd79b..9291ac3cc627 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -8,22 +8,15 @@ /* inode in-kernel data */ -#define SHMEM_NR_DIRECT 16 - -#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t)) - struct shmem_inode_info { spinlock_t lock; unsigned long flags; unsigned long alloced; /* data pages alloced to file */ - unsigned long swapped; /* subtotal assigned to swap */ - unsigned long next_index; /* highest alloced index + 1 */ - struct shared_policy policy; /* NUMA memory alloc policy */ - struct page *i_indirect; /* top indirect blocks page */ union { - swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ - char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; + unsigned long swapped; /* subtotal assigned to swap */ + char *symlink; /* unswappable short symlink */ }; + struct shared_policy policy; /* NUMA memory alloc policy */ struct list_head swaplist; /* chain of maybes on swap */ struct list_head xattr_list; /* list of shmem_xattr */ struct inode vfs_inode; @@ -49,7 +42,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) /* * Functions in mm/shmem.c called directly from elsewhere: */ -extern int init_tmpfs(void); +extern int shmem_init(void); extern int shmem_fill_super(struct super_block *sb, void *data, int silent); extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); @@ -59,8 +52,6 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); -extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent); static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index cd42e30b7c6e..2189d3ffc85d 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -1,3 +1,8 @@ +#ifndef _LINUX_SWAPOPS_H +#define _LINUX_SWAPOPS_H + +#include <linux/radix-tree.h> + /* * swapcache pages are stored in the swapper_space radix tree. We want to * get good packing density in that tree, so the index should be dense in @@ -76,6 +81,22 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry) return __swp_entry_to_pte(arch_entry); } +static inline swp_entry_t radix_to_swp_entry(void *arg) +{ + swp_entry_t entry; + + entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT; + return entry; +} + +static inline void *swp_to_radix_entry(swp_entry_t entry) +{ + unsigned long value; + + value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT; + return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY); +} + #ifdef CONFIG_MIGRATION static inline swp_entry_t make_migration_entry(struct page *page, int write) { @@ -169,3 +190,5 @@ static inline int non_swap_entry(swp_entry_t entry) return 0; } #endif + +#endif /* _LINUX_SWAPOPS_H */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index d3ec89fb4122..47b4a27e6e97 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -85,22 +85,6 @@ struct thermal_cooling_device { ((long)t-2732+5)/10 : ((long)t-2732-5)/10) #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) -#if defined(CONFIG_THERMAL_HWMON) -/* thermal zone devices with the same type share one hwmon device */ -struct thermal_hwmon_device { - char type[THERMAL_NAME_LENGTH]; - struct device *device; - int count; - struct list_head tz_list; - struct list_head node; -}; - -struct thermal_hwmon_attr { - struct device_attribute attr; - char name[16]; -}; -#endif - struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; @@ -120,12 +104,6 @@ struct thermal_zone_device { struct mutex lock; /* protect cooling devices list */ struct list_head node; struct delayed_work poll_queue; -#if defined(CONFIG_THERMAL_HWMON) - struct list_head hwmon_node; - struct thermal_hwmon_device *hwmon; - struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ - struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ -#endif }; /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal_event" diff --git a/init/main.c b/init/main.c index d7211faed2ad..1952d37e4ecb 100644 --- a/init/main.c +++ b/init/main.c @@ -715,7 +715,7 @@ static void __init do_basic_setup(void) { cpuset_init_smp(); usermodehelper_init(); - init_tmpfs(); + shmem_init(); driver_init(); init_irq_proc(); do_ctors(); diff --git a/ipc/shm.c b/ipc/shm.c index 9fb044f3b345..b5bae9d945b6 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -294,7 +294,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) void shm_destroy_orphaned(struct ipc_namespace *ns) { down_write(&shm_ids(ns).rw_mutex); - if (&shm_ids(ns).in_use) + if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); up_write(&shm_ids(ns).rw_mutex); } @@ -304,9 +304,12 @@ void exit_shm(struct task_struct *task) { struct ipc_namespace *ns = task->nsproxy->ipc_ns; + if (shm_ids(ns).in_use == 0) + return; + /* Destroy all already created segments, but not mapped yet */ down_write(&shm_ids(ns).rw_mutex); - if (&shm_ids(ns).in_use) + if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); up_write(&shm_ids(ns).rw_mutex); } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d1db2880d1cf..e19ce1454ee1 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; - s = NULL; if (isadd == REGISTER) { for_each_cpu(cpu, mask) { - if (!s) - s = kmalloc_node(sizeof(struct listener), - GFP_KERNEL, cpu_to_node(cpu)); + s = kmalloc_node(sizeof(struct listener), + GFP_KERNEL, cpu_to_node(cpu)); if (!s) goto cleanup; + s->pid = pid; - INIT_LIST_HEAD(&s->list); s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); - list_for_each_entry_safe(s2, tmp, &listeners->list, list) { - if (s2->pid == pid) - goto next_cpu; + list_for_each_entry(s2, &listeners->list, list) { + if (s2->pid == pid && s2->valid) + goto exists; } list_add(&s->list, &listeners->list); s = NULL; -next_cpu: +exists: up_write(&listeners->sem); + kfree(s); /* nop if NULL */ } - kfree(s); return 0; } diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 2577b121c7c1..f193b7796449 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -197,21 +197,15 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode, return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); } -void cleanup_fault_attr_dentries(struct fault_attr *attr) -{ - debugfs_remove_recursive(attr->dir); -} - -int init_fault_attr_dentries(struct fault_attr *attr, const char *name) +struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr) { mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; - dir = debugfs_create_dir(name, NULL); + dir = debugfs_create_dir(name, parent); if (!dir) - return -ENOMEM; - - attr->dir = dir; + return ERR_PTR(-ENOMEM); if (!debugfs_create_ul("probability", mode, dir, &attr->probability)) goto fail; @@ -243,11 +237,11 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name) #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ - return 0; + return dir; fail: - debugfs_remove_recursive(attr->dir); + debugfs_remove_recursive(dir); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ diff --git a/lib/idr.c b/lib/idr.c index e15502e8b21e..db040ce3fa73 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -34,8 +34,10 @@ #include <linux/err.h> #include <linux/string.h> #include <linux/idr.h> +#include <linux/spinlock.h> static struct kmem_cache *idr_layer_cache; +static DEFINE_SPINLOCK(simple_ida_lock); static struct idr_layer *get_from_free_list(struct idr *idp) { @@ -926,6 +928,71 @@ void ida_destroy(struct ida *ida) EXPORT_SYMBOL(ida_destroy); /** + * ida_simple_get - get a new id. + * @ida: the (initialized) ida. + * @start: the minimum id (inclusive, < 0x8000000) + * @end: the maximum id (exclusive, < 0x8000000 or 0) + * @gfp_mask: memory allocation flags + * + * Allocates an id in the range start <= id < end, or returns -ENOSPC. + * On memory allocation failure, returns -ENOMEM. + * + * Use ida_simple_remove() to get rid of an id. + */ +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask) +{ + int ret, id; + unsigned int max; + + BUG_ON((int)start < 0); + BUG_ON((int)end < 0); + + if (end == 0) + max = 0x80000000; + else { + BUG_ON(end < start); + max = end - 1; + } + +again: + if (!ida_pre_get(ida, gfp_mask)) + return -ENOMEM; + + spin_lock(&simple_ida_lock); + ret = ida_get_new_above(ida, start, &id); + if (!ret) { + if (id > max) { + ida_remove(ida, id); + ret = -ENOSPC; + } else { + ret = id; + } + } + spin_unlock(&simple_ida_lock); + + if (unlikely(ret == -EAGAIN)) + goto again; + + return ret; +} +EXPORT_SYMBOL(ida_simple_get); + +/** + * ida_simple_remove - remove an allocated id. + * @ida: the (initialized) ida. + * @id: the id returned by ida_simple_get. + */ +void ida_simple_remove(struct ida *ida, unsigned int id) +{ + BUG_ON((int)id < 0); + spin_lock(&simple_ida_lock); + ida_remove(ida, id); + spin_unlock(&simple_ida_lock); +} +EXPORT_SYMBOL(ida_simple_remove); + +/** * ida_init - initialize ida handle * @ida: ida handle * diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7ea2e033d715..a2f9da59c197 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, EXPORT_SYMBOL(radix_tree_prev_hole); static unsigned int -__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, - unsigned int max_items, unsigned long *next_index) +__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices, + unsigned long index, unsigned int max_items, unsigned long *next_index) { unsigned int nr_found = 0; unsigned int shift, height; @@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, /* Bottom level: grab some items */ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - index++; if (slot->slots[i]) { - results[nr_found++] = &(slot->slots[i]); - if (nr_found == max_items) + results[nr_found] = &(slot->slots[i]); + if (indices) + indices[nr_found] = index; + if (++nr_found == max_items) { + index++; goto out; + } } + index++; } out: *next_index = index; @@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, if (cur_index > max_index) break; - slots_found = __lookup(node, (void ***)results + ret, cur_index, - max_items - ret, &next_index); + slots_found = __lookup(node, (void ***)results + ret, NULL, + cur_index, max_items - ret, &next_index); nr_found = 0; for (i = 0; i < slots_found; i++) { struct radix_tree_node *slot; @@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree * @root: radix tree root * @results: where the results of the lookup are placed + * @indices: where their indices should be placed (but usually NULL) * @first_index: start the lookup from this key * @max_items: place up to this many items at *results * @@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * protection, radix_tree_deref_slot may fail requiring a retry. */ unsigned int -radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, +radix_tree_gang_lookup_slot(struct radix_tree_root *root, + void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items) { unsigned long max_index; @@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, if (first_index > 0) return 0; results[0] = (void **)&root->rnode; + if (indices) + indices[0] = 0; return 1; } node = indirect_to_ptr(node); @@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, if (cur_index > max_index) break; - slots_found = __lookup(node, results + ret, cur_index, - max_items - ret, &next_index); + slots_found = __lookup(node, results + ret, + indices ? indices + ret : NULL, + cur_index, max_items - ret, &next_index); ret += slots_found; if (next_index == 0) break; @@ -1194,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, } EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); +#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) +#include <linux/sched.h> /* for cond_resched() */ + +/* + * This linear search is at present only useful to shmem_unuse_inode(). + */ +static unsigned long __locate(struct radix_tree_node *slot, void *item, + unsigned long index, unsigned long *found_index) +{ + unsigned int shift, height; + unsigned long i; + + height = slot->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + for ( ; height > 1; height--) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { + if (slot->slots[i] != NULL) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; + } + + shift -= RADIX_TREE_MAP_SHIFT; + slot = rcu_dereference_raw(slot->slots[i]); + if (slot == NULL) + goto out; + } + + /* Bottom level: check items */ + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + if (slot->slots[i] == item) { + *found_index = index + i; + index = 0; + goto out; + } + } + index += RADIX_TREE_MAP_SIZE; +out: + return index; +} + +/** + * radix_tree_locate_item - search through radix tree for item + * @root: radix tree root + * @item: item to be found + * + * Returns index where item was found, or -1 if not found. + * Caller must hold no lock (since this time-consuming function needs + * to be preemptible), and must check afterwards if item is still there. + */ +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) +{ + struct radix_tree_node *node; + unsigned long max_index; + unsigned long cur_index = 0; + unsigned long found_index = -1; + + do { + rcu_read_lock(); + node = rcu_dereference_raw(root->rnode); + if (!radix_tree_is_indirect_ptr(node)) { + rcu_read_unlock(); + if (node == item) + found_index = 0; + break; + } + + node = indirect_to_ptr(node); + max_index = radix_tree_maxindex(node->height); + if (cur_index > max_index) + break; + + cur_index = __locate(node, item, cur_index, &found_index); + rcu_read_unlock(); + cond_resched(); + } while (cur_index != 0 && cur_index <= max_index); + + return found_index; +} +#else +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) +{ + return -1; +} +#endif /* CONFIG_SHMEM && CONFIG_SWAP */ /** * radix_tree_shrink - shrink height of a radix tree to minimal diff --git a/mm/failslab.c b/mm/failslab.c index 1ce58c201dca..0dd7b8fec71c 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS static int __init failslab_debugfs_init(void) { + struct dentry *dir; mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; - int err; - err = init_fault_attr_dentries(&failslab.attr, "failslab"); - if (err) - return err; + dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); - if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir, + if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, &failslab.ignore_gfp_wait)) goto fail; - if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir, + if (!debugfs_create_bool("cache-filter", mode, dir, &failslab.cache_filter)) goto fail; return 0; fail: - cleanup_fault_attr_dentries(&failslab.attr); + debugfs_remove_recursive(dir); return -ENOMEM; } diff --git a/mm/filemap.c b/mm/filemap.c index 867d40222ec7..645a080ba4df 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -33,7 +33,6 @@ #include <linux/cpuset.h> #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ #include <linux/memcontrol.h> -#include <linux/mm_inline.h> /* for page_is_file_cache() */ #include <linux/cleancache.h> #include "internal.h" @@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, int error; VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageSwapBacked(page)); error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); @@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, if (likely(!error)) { mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); - if (PageSwapBacked(page)) - __inc_zone_page_state(page, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; @@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, { int ret; - /* - * Splice_read and readahead add shmem/tmpfs pages into the page cache - * before shmem_readpage has a chance to mark them as SwapBacked: they - * need to go on the anon lru below, and mem_cgroup_cache_charge - * (called in add_to_page_cache) needs to know where they're going too. - */ - if (mapping_cap_swap_backed(mapping)) - SetPageSwapBacked(page); - ret = add_to_page_cache(page, mapping, offset, gfp_mask); - if (ret == 0) { - if (page_is_file_cache(page)) - lru_cache_add_file(page); - else - lru_cache_add_anon(page); - } + if (ret == 0) + lru_cache_add_file(page); return ret; } EXPORT_SYMBOL_GPL(add_to_page_cache_lru); @@ -714,9 +699,16 @@ repeat: page = radix_tree_deref_slot(pagep); if (unlikely(!page)) goto out; - if (radix_tree_deref_retry(page)) - goto repeat; - + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) + goto repeat; + /* + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so return it without + * attempting to raise page count. + */ + goto out; + } if (!page_cache_get_speculative(page)) goto repeat; @@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) repeat: page = find_get_page(mapping, offset); - if (page) { + if (page && !radix_tree_exception(page)) { lock_page(page); /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { @@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, rcu_read_lock(); restart: nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - (void ***)pages, start, nr_pages); + (void ***)pages, NULL, start, nr_pages); ret = 0; for (i = 0; i < nr_found; i++) { struct page *page; @@ -849,13 +841,22 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) { - WARN_ON(start | i); - goto restart; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + WARN_ON(start | i); + goto restart; + } + /* + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so skip over it - + * we only reach this from invalidate_mapping_pages(). + */ + continue; } if (!page_cache_get_speculative(page)) @@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, rcu_read_lock(); restart: nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - (void ***)pages, index, nr_pages); + (void ***)pages, NULL, index, nr_pages); ret = 0; for (i = 0; i < nr_found; i++) { struct page *page; @@ -912,12 +913,22 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) - goto restart; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + goto restart; + } + /* + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so stop looking for + * contiguous pages. + */ + break; + } if (!page_cache_get_speculative(page)) goto repeat; @@ -977,12 +988,21 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) - goto restart; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + goto restart; + } + /* + * This function is never used on a shmem/tmpfs + * mapping, so a swap entry won't be found here. + */ + BUG(); + } if (!page_cache_get_speculative(page)) goto repeat; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5f84d2351ddb..f4ec4e7ca4cd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -35,7 +35,6 @@ #include <linux/limits.h> #include <linux/mutex.h> #include <linux/rbtree.h> -#include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/swapops.h> @@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, return 0; if (PageCompound(page)) return 0; - /* - * Corner case handling. This is called from add_to_page_cache() - * in usual. But some FS (shmem) precharges this page before calling it - * and call add_to_page_cache() with GFP_NOWAIT. - * - * For GFP_NOWAIT case, the page may be pre-charged before calling - * add_to_page_cache(). (See shmem.c) check it here and avoid to call - * charge twice. (It works but has to pay a bit larger cost.) - * And when the page is SwapCache, it should take swap information - * into account. This is under lock_page() now. - */ - if (!(gfp_mask & __GFP_WAIT)) { - struct page_cgroup *pc; - - pc = lookup_page_cgroup(page); - if (!pc) - return 0; - lock_page_cgroup(pc); - if (PageCgroupUsed(pc)) { - unlock_page_cgroup(pc); - return 0; - } - unlock_page_cgroup(pc); - } if (unlikely(!mm)) mm = &init_mm; @@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, cgroup_release_and_wakeup_rmdir(&mem->css); } -/* - * A call to try to shrink memory usage on charge failure at shmem's swapin. - * Calling hierarchical_reclaim is not enough because we should update - * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. - * Moreover considering hierarchy, we should reclaim from the mem_over_limit, - * not from the memcg which this page would be charged to. - * try_charge_swapin does all of these works properly. - */ -int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, - gfp_t gfp_mask) -{ - struct mem_cgroup *mem; - int ret; - - if (mem_cgroup_disabled()) - return 0; - - ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); - if (!ret) - mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ - - return ret; -} - #ifdef CONFIG_DEBUG_VM static struct page_cgroup *lookup_page_cgroup_used(struct page *page) { @@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, pgoff = pte_to_pgoff(ptent); /* page is moved even if it's not RSS of this task(page-faulted). */ - if (!mapping_cap_swap_backed(mapping)) { /* normal file */ - page = find_get_page(mapping, pgoff); - } else { /* shmem/tmpfs file. we should take account of swap too. */ - swp_entry_t ent; - mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent); + page = find_get_page(mapping, pgoff); + +#ifdef CONFIG_SWAP + /* shmem/tmpfs may report page out on swap: account for that too. */ + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swap = radix_to_swp_entry(page); if (do_swap_account) - entry->val = ent.val; + *entry = swap; + page = find_get_page(&swapper_space, swap.val); } - +#endif return page; } diff --git a/mm/mincore.c b/mm/mincore.c index a4e6b9d75c76..636a86876ff2 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) * file will not get a swp_entry_t in its pte, but rather it is like * any other file mapping (ie. marked !present and faulted in with * tmpfs's .fault). So swapped out tmpfs mappings are tested here. - * - * However when tmpfs moves the page from pagecache and into swapcache, - * it is still in core, but the find_get_page below won't find it. - * No big deal, but make a note of it. */ page = find_get_page(mapping, pgoff); +#ifdef CONFIG_SWAP + /* shmem/tmpfs may return swap: account for swapcache page too. */ + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swap = radix_to_swp_entry(page); + page = find_get_page(&swapper_space, swap.val); + } +#endif if (page) { present = PageUptodate(page); page_cache_release(page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1dbcf8888f14..6e8ecb6e021c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void) { mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; - int err; - err = init_fault_attr_dentries(&fail_page_alloc.attr, - "fail_page_alloc"); - if (err) - return err; - - dir = fail_page_alloc.attr.dir; + dir = fault_create_debugfs_attr("fail_page_alloc", NULL, + &fail_page_alloc.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, &fail_page_alloc.ignore_gfp_wait)) @@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void) return 0; fail: - cleanup_fault_attr_dentries(&fail_page_alloc.attr); + debugfs_remove_recursive(dir); return -ENOMEM; } diff --git a/mm/shmem.c b/mm/shmem.c index 5cc21f8b4cd3..32f6763f16fb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -6,7 +6,8 @@ * 2000-2001 Christoph Rohland * 2000-2001 SAP AG * 2002 Red Hat Inc. - * Copyright (C) 2002-2005 Hugh Dickins. + * Copyright (C) 2002-2011 Hugh Dickins. + * Copyright (C) 2011 Google Inc. * Copyright (C) 2002-2005 VERITAS Software Corporation. * Copyright (C) 2004 Andi Kleen, SuSE Labs * @@ -28,7 +29,6 @@ #include <linux/file.h> #include <linux/mm.h> #include <linux/module.h> -#include <linux/percpu_counter.h> #include <linux/swap.h> static struct vfsmount *shm_mnt; @@ -51,6 +51,8 @@ static struct vfsmount *shm_mnt; #include <linux/shmem_fs.h> #include <linux/writeback.h> #include <linux/blkdev.h> +#include <linux/pagevec.h> +#include <linux/percpu_counter.h> #include <linux/splice.h> #include <linux/security.h> #include <linux/swapops.h> @@ -63,43 +65,17 @@ static struct vfsmount *shm_mnt; #include <linux/magic.h> #include <asm/uaccess.h> -#include <asm/div64.h> #include <asm/pgtable.h> -/* - * The maximum size of a shmem/tmpfs file is limited by the maximum size of - * its triple-indirect swap vector - see illustration at shmem_swp_entry(). - * - * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, - * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum - * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, - * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. - * - * We use / and * instead of shifts in the definitions below, so that the swap - * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. - */ -#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) -#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) - -#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) -#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) - -#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) -#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) - #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) -/* info->flags needs VM_flags to handle pagein/truncate races efficiently */ -#define SHMEM_PAGEIN VM_READ -#define SHMEM_TRUNCATE VM_WRITE - -/* Definition to limit shmem_truncate's steps between cond_rescheds */ -#define LATENCY_LIMIT 64 - /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 +/* Symlink up to this size is kmalloc'ed instead of using a swappable page */ +#define SHORT_SYMLINK_LEN 128 + struct shmem_xattr { struct list_head list; /* anchored by shmem_inode_info->xattr_list */ char *name; /* xattr name */ @@ -107,7 +83,7 @@ struct shmem_xattr { char value[0]; }; -/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ +/* Flag allocation requirements to shmem_getpage */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ @@ -137,56 +113,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index, mapping_gfp_mask(inode->i_mapping), fault_type); } -static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) -{ - /* - * The above definition of ENTRIES_PER_PAGE, and the use of - * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: - * might be reconsidered if it ever diverges from PAGE_SIZE. - * - * Mobility flags are masked out as swap vectors cannot move - */ - return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO, - PAGE_CACHE_SHIFT-PAGE_SHIFT); -} - -static inline void shmem_dir_free(struct page *page) -{ - __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); -} - -static struct page **shmem_dir_map(struct page *page) -{ - return (struct page **)kmap_atomic(page, KM_USER0); -} - -static inline void shmem_dir_unmap(struct page **dir) -{ - kunmap_atomic(dir, KM_USER0); -} - -static swp_entry_t *shmem_swp_map(struct page *page) -{ - return (swp_entry_t *)kmap_atomic(page, KM_USER1); -} - -static inline void shmem_swp_balance_unmap(void) -{ - /* - * When passing a pointer to an i_direct entry, to code which - * also handles indirect entries and so will shmem_swp_unmap, - * we must arrange for the preempt count to remain in balance. - * What kmap_atomic of a lowmem page does depends on config - * and architecture, so pretend to kmap_atomic some lowmem page. - */ - (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); -} - -static inline void shmem_swp_unmap(swp_entry_t *entry) -{ - kunmap_atomic(entry, KM_USER1); -} - static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) { return sb->s_fs_info; @@ -244,15 +170,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = { static LIST_HEAD(shmem_swaplist); static DEFINE_MUTEX(shmem_swaplist_mutex); -static void shmem_free_blocks(struct inode *inode, long pages) -{ - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_blocks) { - percpu_counter_add(&sbinfo->used_blocks, -pages); - inode->i_blocks -= pages*BLOCKS_PER_PAGE; - } -} - static int shmem_reserve_inode(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); @@ -279,7 +196,7 @@ static void shmem_free_inode(struct super_block *sb) } /** - * shmem_recalc_inode - recalculate the size of an inode + * shmem_recalc_inode - recalculate the block usage of an inode * @inode: inode to recalc * * We have to calculate the free blocks since the mm can drop @@ -297,474 +214,297 @@ static void shmem_recalc_inode(struct inode *inode) freed = info->alloced - info->swapped - inode->i_mapping->nrpages; if (freed > 0) { + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -freed); info->alloced -= freed; + inode->i_blocks -= freed * BLOCKS_PER_PAGE; shmem_unacct_blocks(info->flags, freed); - shmem_free_blocks(inode, freed); } } -/** - * shmem_swp_entry - find the swap vector position in the info structure - * @info: info structure for the inode - * @index: index of the page to find - * @page: optional page to add to the structure. Has to be preset to - * all zeros - * - * If there is no space allocated yet it will return NULL when - * page is NULL, else it will use the page for the needed block, - * setting it to NULL on return to indicate that it has been used. - * - * The swap vector is organized the following way: - * - * There are SHMEM_NR_DIRECT entries directly stored in the - * shmem_inode_info structure. So small files do not need an addional - * allocation. - * - * For pages with index > SHMEM_NR_DIRECT there is the pointer - * i_indirect which points to a page which holds in the first half - * doubly indirect blocks, in the second half triple indirect blocks: - * - * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the - * following layout (for SHMEM_NR_DIRECT == 16): - * - * i_indirect -> dir --> 16-19 - * | +-> 20-23 - * | - * +-->dir2 --> 24-27 - * | +-> 28-31 - * | +-> 32-35 - * | +-> 36-39 - * | - * +-->dir3 --> 40-43 - * +-> 44-47 - * +-> 48-51 - * +-> 52-55 +/* + * Replace item expected in radix tree by a new item, while holding tree lock. */ -static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) -{ - unsigned long offset; - struct page **dir; - struct page *subdir; - - if (index < SHMEM_NR_DIRECT) { - shmem_swp_balance_unmap(); - return info->i_direct+index; - } - if (!info->i_indirect) { - if (page) { - info->i_indirect = *page; - *page = NULL; - } - return NULL; /* need another page */ - } - - index -= SHMEM_NR_DIRECT; - offset = index % ENTRIES_PER_PAGE; - index /= ENTRIES_PER_PAGE; - dir = shmem_dir_map(info->i_indirect); - - if (index >= ENTRIES_PER_PAGE/2) { - index -= ENTRIES_PER_PAGE/2; - dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; - index %= ENTRIES_PER_PAGE; - subdir = *dir; - if (!subdir) { - if (page) { - *dir = *page; - *page = NULL; - } - shmem_dir_unmap(dir); - return NULL; /* need another page */ - } - shmem_dir_unmap(dir); - dir = shmem_dir_map(subdir); - } +static int shmem_radix_tree_replace(struct address_space *mapping, + pgoff_t index, void *expected, void *replacement) +{ + void **pslot; + void *item = NULL; + + VM_BUG_ON(!expected); + pslot = radix_tree_lookup_slot(&mapping->page_tree, index); + if (pslot) + item = radix_tree_deref_slot_protected(pslot, + &mapping->tree_lock); + if (item != expected) + return -ENOENT; + if (replacement) + radix_tree_replace_slot(pslot, replacement); + else + radix_tree_delete(&mapping->page_tree, index); + return 0; +} - dir += index; - subdir = *dir; - if (!subdir) { - if (!page || !(subdir = *page)) { - shmem_dir_unmap(dir); - return NULL; /* need a page */ +/* + * Like add_to_page_cache_locked, but error if expected item has gone. + */ +static int shmem_add_to_page_cache(struct page *page, + struct address_space *mapping, + pgoff_t index, gfp_t gfp, void *expected) +{ + int error = 0; + + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(!PageSwapBacked(page)); + + if (!expected) + error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); + if (!error) { + page_cache_get(page); + page->mapping = mapping; + page->index = index; + + spin_lock_irq(&mapping->tree_lock); + if (!expected) + error = radix_tree_insert(&mapping->page_tree, + index, page); + else + error = shmem_radix_tree_replace(mapping, index, + expected, page); + if (!error) { + mapping->nrpages++; + __inc_zone_page_state(page, NR_FILE_PAGES); + __inc_zone_page_state(page, NR_SHMEM); + spin_unlock_irq(&mapping->tree_lock); + } else { + page->mapping = NULL; + spin_unlock_irq(&mapping->tree_lock); + page_cache_release(page); } - *dir = subdir; - *page = NULL; + if (!expected) + radix_tree_preload_end(); } - shmem_dir_unmap(dir); - return shmem_swp_map(subdir) + offset; + if (error) + mem_cgroup_uncharge_cache_page(page); + return error; } -static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) +/* + * Like delete_from_page_cache, but substitutes swap for page. + */ +static void shmem_delete_from_page_cache(struct page *page, void *radswap) { - long incdec = value? 1: -1; + struct address_space *mapping = page->mapping; + int error; - entry->val = value; - info->swapped += incdec; - if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { - struct page *page = kmap_atomic_to_page(entry); - set_page_private(page, page_private(page) + incdec); - } + spin_lock_irq(&mapping->tree_lock); + error = shmem_radix_tree_replace(mapping, page->index, page, radswap); + page->mapping = NULL; + mapping->nrpages--; + __dec_zone_page_state(page, NR_FILE_PAGES); + __dec_zone_page_state(page, NR_SHMEM); + spin_unlock_irq(&mapping->tree_lock); + page_cache_release(page); + BUG_ON(error); } -/** - * shmem_swp_alloc - get the position of the swap entry for the page. - * @info: info structure for the inode - * @index: index of the page to find - * @sgp: check and recheck i_size? skip allocation? - * @gfp: gfp mask to use for any page allocation - * - * If the entry does not exist, allocate it. +/* + * Like find_get_pages, but collecting swap entries as well as pages. */ -static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, - unsigned long index, enum sgp_type sgp, gfp_t gfp) -{ - struct inode *inode = &info->vfs_inode; - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - struct page *page = NULL; - swp_entry_t *entry; - - if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return ERR_PTR(-EINVAL); - - while (!(entry = shmem_swp_entry(info, index, &page))) { - if (sgp == SGP_READ) - return shmem_swp_map(ZERO_PAGE(0)); - /* - * Test used_blocks against 1 less max_blocks, since we have 1 data - * page (and perhaps indirect index pages) yet to allocate: - * a waste to allocate index if we cannot allocate data. - */ - if (sbinfo->max_blocks) { - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks - 1) >= 0) - return ERR_PTR(-ENOSPC); - percpu_counter_inc(&sbinfo->used_blocks); - inode->i_blocks += BLOCKS_PER_PAGE; +static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, + pgoff_t start, unsigned int nr_pages, + struct page **pages, pgoff_t *indices) +{ + unsigned int i; + unsigned int ret; + unsigned int nr_found; + + rcu_read_lock(); +restart: + nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, + (void ***)pages, indices, start, nr_pages); + ret = 0; + for (i = 0; i < nr_found; i++) { + struct page *page; +repeat: + page = radix_tree_deref_slot((void **)pages[i]); + if (unlikely(!page)) + continue; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) + goto restart; + /* + * Otherwise, we must be storing a swap entry + * here as an exceptional entry: so return it + * without attempting to raise page count. + */ + goto export; } + if (!page_cache_get_speculative(page)) + goto repeat; - spin_unlock(&info->lock); - page = shmem_dir_alloc(gfp); - spin_lock(&info->lock); - - if (!page) { - shmem_free_blocks(inode, 1); - return ERR_PTR(-ENOMEM); - } - if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { - entry = ERR_PTR(-EINVAL); - break; + /* Has the page moved? */ + if (unlikely(page != *((void **)pages[i]))) { + page_cache_release(page); + goto repeat; } - if (info->next_index <= index) - info->next_index = index + 1; - } - if (page) { - /* another task gave its page, or truncated the file */ - shmem_free_blocks(inode, 1); - shmem_dir_free(page); - } - if (info->next_index <= index && !IS_ERR(entry)) - info->next_index = index + 1; - return entry; +export: + indices[ret] = indices[i]; + pages[ret] = page; + ret++; + } + if (unlikely(!ret && nr_found)) + goto restart; + rcu_read_unlock(); + return ret; } -/** - * shmem_free_swp - free some swap entries in a directory - * @dir: pointer to the directory - * @edir: pointer after last entry of the directory - * @punch_lock: pointer to spinlock when needed for the holepunch case +/* + * Remove swap entry from radix tree, free the swap and its page cache. */ -static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, - spinlock_t *punch_lock) -{ - spinlock_t *punch_unlock = NULL; - swp_entry_t *ptr; - int freed = 0; - - for (ptr = dir; ptr < edir; ptr++) { - if (ptr->val) { - if (unlikely(punch_lock)) { - punch_unlock = punch_lock; - punch_lock = NULL; - spin_lock(punch_unlock); - if (!ptr->val) - continue; - } - free_swap_and_cache(*ptr); - *ptr = (swp_entry_t){0}; - freed++; - } - } - if (punch_unlock) - spin_unlock(punch_unlock); - return freed; -} - -static int shmem_map_and_free_swp(struct page *subdir, int offset, - int limit, struct page ***dir, spinlock_t *punch_lock) -{ - swp_entry_t *ptr; - int freed = 0; - - ptr = shmem_swp_map(subdir); - for (; offset < limit; offset += LATENCY_LIMIT) { - int size = limit - offset; - if (size > LATENCY_LIMIT) - size = LATENCY_LIMIT; - freed += shmem_free_swp(ptr+offset, ptr+offset+size, - punch_lock); - if (need_resched()) { - shmem_swp_unmap(ptr); - if (*dir) { - shmem_dir_unmap(*dir); - *dir = NULL; - } - cond_resched(); - ptr = shmem_swp_map(subdir); - } - } - shmem_swp_unmap(ptr); - return freed; +static int shmem_free_swap(struct address_space *mapping, + pgoff_t index, void *radswap) +{ + int error; + + spin_lock_irq(&mapping->tree_lock); + error = shmem_radix_tree_replace(mapping, index, radswap, NULL); + spin_unlock_irq(&mapping->tree_lock); + if (!error) + free_swap_and_cache(radix_to_swp_entry(radswap)); + return error; } -static void shmem_free_pages(struct list_head *next) +/* + * Pagevec may contain swap entries, so shuffle up pages before releasing. + */ +static void shmem_pagevec_release(struct pagevec *pvec) { - struct page *page; - int freed = 0; - - do { - page = container_of(next, struct page, lru); - next = next->next; - shmem_dir_free(page); - freed++; - if (freed >= LATENCY_LIMIT) { - cond_resched(); - freed = 0; - } - } while (next); + int i, j; + + for (i = 0, j = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + if (!radix_tree_exceptional_entry(page)) + pvec->pages[j++] = page; + } + pvec->nr = j; + pagevec_release(pvec); } -void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +/* + * Remove range of pages and swap entries from radix tree, and free them. + */ +void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { + struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); - unsigned long idx; - unsigned long size; - unsigned long limit; - unsigned long stage; - unsigned long diroff; - struct page **dir; - struct page *topdir; - struct page *middir; - struct page *subdir; - swp_entry_t *ptr; - LIST_HEAD(pages_to_free); - long nr_pages_to_free = 0; + pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); + pgoff_t end = (lend >> PAGE_CACHE_SHIFT); + struct pagevec pvec; + pgoff_t indices[PAGEVEC_SIZE]; long nr_swaps_freed = 0; - int offset; - int freed; - int punch_hole; - spinlock_t *needs_lock; - spinlock_t *punch_lock; - unsigned long upper_limit; + pgoff_t index; + int i; - truncate_inode_pages_range(inode->i_mapping, start, end); + BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (idx >= info->next_index) - return; + pagevec_init(&pvec, 0); + index = start; + while (index <= end) { + pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + pvec.pages, indices); + if (!pvec.nr) + break; + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; - spin_lock(&info->lock); - info->flags |= SHMEM_TRUNCATE; - if (likely(end == (loff_t) -1)) { - limit = info->next_index; - upper_limit = SHMEM_MAX_INDEX; - info->next_index = idx; - needs_lock = NULL; - punch_hole = 0; - } else { - if (end + 1 >= inode->i_size) { /* we may free a little more */ - limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - upper_limit = SHMEM_MAX_INDEX; - } else { - limit = (end + 1) >> PAGE_CACHE_SHIFT; - upper_limit = limit; - } - needs_lock = &info->lock; - punch_hole = 1; - } + index = indices[i]; + if (index > end) + break; + + if (radix_tree_exceptional_entry(page)) { + nr_swaps_freed += !shmem_free_swap(mapping, + index, page); + continue; + } - topdir = info->i_indirect; - if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { - info->i_indirect = NULL; - nr_pages_to_free++; - list_add(&topdir->lru, &pages_to_free); + if (!trylock_page(page)) + continue; + if (page->mapping == mapping) { + VM_BUG_ON(PageWriteback(page)); + truncate_inode_page(mapping, page); + } + unlock_page(page); + } + shmem_pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + cond_resched(); + index++; } - spin_unlock(&info->lock); - if (info->swapped && idx < SHMEM_NR_DIRECT) { - ptr = info->i_direct; - size = limit; - if (size > SHMEM_NR_DIRECT) - size = SHMEM_NR_DIRECT; - nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); + if (partial) { + struct page *page = NULL; + shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); + if (page) { + zero_user_segment(page, partial, PAGE_CACHE_SIZE); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + } } - /* - * If there are no indirect blocks or we are punching a hole - * below indirect blocks, nothing to be done. - */ - if (!topdir || limit <= SHMEM_NR_DIRECT) - goto done2; + index = start; + for ( ; ; ) { + cond_resched(); + pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + pvec.pages, indices); + if (!pvec.nr) { + if (index == start) + break; + index = start; + continue; + } + if (index == start && indices[0] > end) { + shmem_pagevec_release(&pvec); + break; + } + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; - /* - * The truncation case has already dropped info->lock, and we're safe - * because i_size and next_index have already been lowered, preventing - * access beyond. But in the punch_hole case, we still need to take - * the lock when updating the swap directory, because there might be - * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or - * shmem_writepage. However, whenever we find we can remove a whole - * directory page (not at the misaligned start or end of the range), - * we first NULLify its pointer in the level above, and then have no - * need to take the lock when updating its contents: needs_lock and - * punch_lock (either pointing to info->lock or NULL) manage this. - */ + index = indices[i]; + if (index > end) + break; - upper_limit -= SHMEM_NR_DIRECT; - limit -= SHMEM_NR_DIRECT; - idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; - offset = idx % ENTRIES_PER_PAGE; - idx -= offset; - - dir = shmem_dir_map(topdir); - stage = ENTRIES_PER_PAGEPAGE/2; - if (idx < ENTRIES_PER_PAGEPAGE/2) { - middir = topdir; - diroff = idx/ENTRIES_PER_PAGE; - } else { - dir += ENTRIES_PER_PAGE/2; - dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; - while (stage <= idx) - stage += ENTRIES_PER_PAGEPAGE; - middir = *dir; - if (*dir) { - diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % - ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; - if (!diroff && !offset && upper_limit >= stage) { - if (needs_lock) { - spin_lock(needs_lock); - *dir = NULL; - spin_unlock(needs_lock); - needs_lock = NULL; - } else - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); + if (radix_tree_exceptional_entry(page)) { + nr_swaps_freed += !shmem_free_swap(mapping, + index, page); + continue; } - shmem_dir_unmap(dir); - dir = shmem_dir_map(middir); - } else { - diroff = 0; - offset = 0; - idx = stage; - } - } - for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { - if (unlikely(idx == stage)) { - shmem_dir_unmap(dir); - dir = shmem_dir_map(topdir) + - ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; - while (!*dir) { - dir++; - idx += ENTRIES_PER_PAGEPAGE; - if (idx >= limit) - goto done1; - } - stage = idx + ENTRIES_PER_PAGEPAGE; - middir = *dir; - if (punch_hole) - needs_lock = &info->lock; - if (upper_limit >= stage) { - if (needs_lock) { - spin_lock(needs_lock); - *dir = NULL; - spin_unlock(needs_lock); - needs_lock = NULL; - } else - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); + lock_page(page); + if (page->mapping == mapping) { + VM_BUG_ON(PageWriteback(page)); + truncate_inode_page(mapping, page); } - shmem_dir_unmap(dir); - cond_resched(); - dir = shmem_dir_map(middir); - diroff = 0; - } - punch_lock = needs_lock; - subdir = dir[diroff]; - if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { - if (needs_lock) { - spin_lock(needs_lock); - dir[diroff] = NULL; - spin_unlock(needs_lock); - punch_lock = NULL; - } else - dir[diroff] = NULL; - nr_pages_to_free++; - list_add(&subdir->lru, &pages_to_free); - } - if (subdir && page_private(subdir) /* has swap entries */) { - size = limit - idx; - if (size > ENTRIES_PER_PAGE) - size = ENTRIES_PER_PAGE; - freed = shmem_map_and_free_swp(subdir, - offset, size, &dir, punch_lock); - if (!dir) - dir = shmem_dir_map(middir); - nr_swaps_freed += freed; - if (offset || punch_lock) { - spin_lock(&info->lock); - set_page_private(subdir, - page_private(subdir) - freed); - spin_unlock(&info->lock); - } else - BUG_ON(page_private(subdir) != freed); + unlock_page(page); } - offset = 0; - } -done1: - shmem_dir_unmap(dir); -done2: - if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { - /* - * Call truncate_inode_pages again: racing shmem_unuse_inode - * may have swizzled a page in from swap since - * truncate_pagecache or generic_delete_inode did it, before we - * lowered next_index. Also, though shmem_getpage checks - * i_size before adding to cache, no recheck after: so fix the - * narrow window there too. - */ - truncate_inode_pages_range(inode->i_mapping, start, end); + shmem_pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + index++; } spin_lock(&info->lock); - info->flags &= ~SHMEM_TRUNCATE; info->swapped -= nr_swaps_freed; - if (nr_pages_to_free) - shmem_free_blocks(inode, nr_pages_to_free); shmem_recalc_inode(inode); spin_unlock(&info->lock); - /* - * Empty swap vector directory pages to be freed? - */ - if (!list_empty(&pages_to_free)) { - pages_to_free.prev->next = NULL; - shmem_free_pages(pages_to_free.next); - } + inode->i_ctime = inode->i_mtime = CURRENT_TIME; } EXPORT_SYMBOL_GPL(shmem_truncate_range); @@ -780,37 +520,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { loff_t oldsize = inode->i_size; loff_t newsize = attr->ia_size; - struct page *page = NULL; - if (newsize < oldsize) { - /* - * If truncating down to a partial page, then - * if that page is already allocated, hold it - * in memory until the truncation is over, so - * truncate_partial_page cannot miss it were - * it assigned to swap. - */ - if (newsize & (PAGE_CACHE_SIZE-1)) { - (void) shmem_getpage(inode, - newsize >> PAGE_CACHE_SHIFT, - &page, SGP_READ, NULL); - if (page) - unlock_page(page); - } - /* - * Reset SHMEM_PAGEIN flag so that shmem_truncate can - * detect if any pages might have been added to cache - * after truncate_inode_pages. But we needn't bother - * if it's being fully truncated to zero-length: the - * nrpages check is efficient enough in that case. - */ - if (newsize) { - struct shmem_inode_info *info = SHMEM_I(inode); - spin_lock(&info->lock); - info->flags &= ~SHMEM_PAGEIN; - spin_unlock(&info->lock); - } - } if (newsize != oldsize) { i_size_write(inode, newsize); inode->i_ctime = inode->i_mtime = CURRENT_TIME; @@ -822,8 +532,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) /* unmap again to remove racily COWed private pages */ unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); } - if (page) - page_cache_release(page); } setattr_copy(inode, attr); @@ -848,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode) list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); } - } + } else + kfree(info->symlink); list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { kfree(xattr->name); @@ -859,106 +568,27 @@ static void shmem_evict_inode(struct inode *inode) end_writeback(inode); } -static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) -{ - swp_entry_t *ptr; - - for (ptr = dir; ptr < edir; ptr++) { - if (ptr->val == entry.val) - return ptr - dir; - } - return -1; -} - -static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) +/* + * If swap found in inode, free it and move page from swapcache to filecache. + */ +static int shmem_unuse_inode(struct shmem_inode_info *info, + swp_entry_t swap, struct page *page) { - struct address_space *mapping; - unsigned long idx; - unsigned long size; - unsigned long limit; - unsigned long stage; - struct page **dir; - struct page *subdir; - swp_entry_t *ptr; - int offset; + struct address_space *mapping = info->vfs_inode.i_mapping; + void *radswap; + pgoff_t index; int error; - idx = 0; - ptr = info->i_direct; - spin_lock(&info->lock); - if (!info->swapped) { - list_del_init(&info->swaplist); - goto lost2; - } - limit = info->next_index; - size = limit; - if (size > SHMEM_NR_DIRECT) - size = SHMEM_NR_DIRECT; - offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) { - shmem_swp_balance_unmap(); - goto found; - } - if (!info->i_indirect) - goto lost2; - - dir = shmem_dir_map(info->i_indirect); - stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; - - for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { - if (unlikely(idx == stage)) { - shmem_dir_unmap(dir-1); - if (cond_resched_lock(&info->lock)) { - /* check it has not been truncated */ - if (limit > info->next_index) { - limit = info->next_index; - if (idx >= limit) - goto lost2; - } - } - dir = shmem_dir_map(info->i_indirect) + - ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; - while (!*dir) { - dir++; - idx += ENTRIES_PER_PAGEPAGE; - if (idx >= limit) - goto lost1; - } - stage = idx + ENTRIES_PER_PAGEPAGE; - subdir = *dir; - shmem_dir_unmap(dir); - dir = shmem_dir_map(subdir); - } - subdir = *dir; - if (subdir && page_private(subdir)) { - ptr = shmem_swp_map(subdir); - size = limit - idx; - if (size > ENTRIES_PER_PAGE) - size = ENTRIES_PER_PAGE; - offset = shmem_find_swp(entry, ptr, ptr+size); - shmem_swp_unmap(ptr); - if (offset >= 0) { - shmem_dir_unmap(dir); - ptr = shmem_swp_map(subdir); - goto found; - } - } - } -lost1: - shmem_dir_unmap(dir-1); -lost2: - spin_unlock(&info->lock); - return 0; -found: - idx += offset; - ptr += offset; + radswap = swp_to_radix_entry(swap); + index = radix_tree_locate_item(&mapping->page_tree, radswap); + if (index == -1) + return 0; /* * Move _head_ to start search for next from here. * But be careful: shmem_evict_inode checks list_empty without taking * mutex, and there's an instant in list_move_tail when info->swaplist - * would appear empty, if it were the only one on shmem_swaplist. We - * could avoid doing it if inode NULL; or use this minor optimization. + * would appear empty, if it were the only one on shmem_swaplist. */ if (shmem_swaplist.next != &info->swaplist) list_move_tail(&shmem_swaplist, &info->swaplist); @@ -968,29 +598,34 @@ found: * but also to hold up shmem_evict_inode(): so inode cannot be freed * beneath us (pagelock doesn't help until the page is in pagecache). */ - mapping = info->vfs_inode.i_mapping; - error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); + error = shmem_add_to_page_cache(page, mapping, index, + GFP_NOWAIT, radswap); /* which does mem_cgroup_uncharge_cache_page on error */ if (error != -ENOMEM) { + /* + * Truncation and eviction use free_swap_and_cache(), which + * only does trylock page: if we raced, best clean up here. + */ delete_from_swap_cache(page); set_page_dirty(page); - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, ptr, 0); - swap_free(entry); + if (!error) { + spin_lock(&info->lock); + info->swapped--; + spin_unlock(&info->lock); + swap_free(swap); + } error = 1; /* not an error, but entry was found */ } - shmem_swp_unmap(ptr); - spin_unlock(&info->lock); return error; } /* - * shmem_unuse() search for an eventually swapped out shmem page. + * Search through swapped inodes to find and replace swap by page. */ -int shmem_unuse(swp_entry_t entry, struct page *page) +int shmem_unuse(swp_entry_t swap, struct page *page) { - struct list_head *p, *next; + struct list_head *this, *next; struct shmem_inode_info *info; int found = 0; int error; @@ -999,32 +634,25 @@ int shmem_unuse(swp_entry_t entry, struct page *page) * Charge page using GFP_KERNEL while we can wait, before taking * the shmem_swaplist_mutex which might hold up shmem_writepage(). * Charged back to the user (not to caller) when swap account is used. - * add_to_page_cache() will be called with GFP_NOWAIT. */ error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); if (error) goto out; - /* - * Try to preload while we can wait, to not make a habit of - * draining atomic reserves; but don't latch on to this cpu, - * it's okay if sometimes we get rescheduled after this. - */ - error = radix_tree_preload(GFP_KERNEL); - if (error) - goto uncharge; - radix_tree_preload_end(); + /* No radix_tree_preload: swap entry keeps a place for page in tree */ mutex_lock(&shmem_swaplist_mutex); - list_for_each_safe(p, next, &shmem_swaplist) { - info = list_entry(p, struct shmem_inode_info, swaplist); - found = shmem_unuse_inode(info, entry, page); + list_for_each_safe(this, next, &shmem_swaplist) { + info = list_entry(this, struct shmem_inode_info, swaplist); + if (info->swapped) + found = shmem_unuse_inode(info, swap, page); + else + list_del_init(&info->swaplist); cond_resched(); if (found) break; } mutex_unlock(&shmem_swaplist_mutex); -uncharge: if (!found) mem_cgroup_uncharge_cache_page(page); if (found < 0) @@ -1041,10 +669,10 @@ out: static int shmem_writepage(struct page *page, struct writeback_control *wbc) { struct shmem_inode_info *info; - swp_entry_t *entry, swap; struct address_space *mapping; - unsigned long index; struct inode *inode; + swp_entry_t swap; + pgoff_t index; BUG_ON(!PageLocked(page)); mapping = page->mapping; @@ -1073,50 +701,32 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) /* * Add inode to shmem_unuse()'s list of swapped-out inodes, - * if it's not already there. Do it now because we cannot take - * mutex while holding spinlock, and must do so before the page - * is moved to swap cache, when its pagelock no longer protects + * if it's not already there. Do it now before the page is + * moved to swap cache, when its pagelock no longer protects * the inode from eviction. But don't unlock the mutex until - * we've taken the spinlock, because shmem_unuse_inode() will - * prune a !swapped inode from the swaplist under both locks. + * we've incremented swapped, because shmem_unuse_inode() will + * prune a !swapped inode from the swaplist under this mutex. */ mutex_lock(&shmem_swaplist_mutex); if (list_empty(&info->swaplist)) list_add_tail(&info->swaplist, &shmem_swaplist); - spin_lock(&info->lock); - mutex_unlock(&shmem_swaplist_mutex); - - if (index >= info->next_index) { - BUG_ON(!(info->flags & SHMEM_TRUNCATE)); - goto unlock; - } - entry = shmem_swp_entry(info, index, NULL); - if (entry->val) { - WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ - free_swap_and_cache(*entry); - shmem_swp_set(info, entry, 0); - } - shmem_recalc_inode(inode); - if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { - delete_from_page_cache(page); - shmem_swp_set(info, entry, swap.val); - shmem_swp_unmap(entry); swap_shmem_alloc(swap); + shmem_delete_from_page_cache(page, swp_to_radix_entry(swap)); + + spin_lock(&info->lock); + info->swapped++; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + + mutex_unlock(&shmem_swaplist_mutex); BUG_ON(page_mapped(page)); swap_writepage(page, wbc); return 0; } - shmem_swp_unmap(entry); -unlock: - spin_unlock(&info->lock); - /* - * add_to_swap_cache() doesn't return -EEXIST, so we can safely - * clear SWAP_HAS_CACHE flag. - */ + mutex_unlock(&shmem_swaplist_mutex); swapcache_free(swap, NULL); redirty: set_page_dirty(page); @@ -1153,35 +763,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) } #endif /* CONFIG_TMPFS */ -static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index) { struct mempolicy mpol, *spol; struct vm_area_struct pvma; - struct page *page; spol = mpol_cond_copy(&mpol, - mpol_shared_policy_lookup(&info->policy, idx)); + mpol_shared_policy_lookup(&info->policy, index)); /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; - pvma.vm_pgoff = idx; + pvma.vm_pgoff = index; pvma.vm_ops = NULL; pvma.vm_policy = spol; - page = swapin_readahead(entry, gfp, &pvma, 0); - return page; + return swapin_readahead(swap, gfp, &pvma, 0); } static struct page *shmem_alloc_page(gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) + struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; - pvma.vm_pgoff = idx; + pvma.vm_pgoff = index; pvma.vm_ops = NULL; - pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); /* * alloc_page_vma() will drop the shared policy reference @@ -1190,19 +798,19 @@ static struct page *shmem_alloc_page(gfp_t gfp, } #else /* !CONFIG_NUMA */ #ifdef CONFIG_TMPFS -static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p) +static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) { } #endif /* CONFIG_TMPFS */ -static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index) { - return swapin_readahead(entry, gfp, NULL, 0); + return swapin_readahead(swap, gfp, NULL, 0); } static inline struct page *shmem_alloc_page(gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) + struct shmem_inode_info *info, pgoff_t index) { return alloc_page(gfp); } @@ -1222,243 +830,190 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) * vm. If we swap it in we mark it dirty since we also free the swap * entry since a page cannot live in both the swap and page cache */ -static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, +static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) { struct address_space *mapping = inode->i_mapping; - struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_inode_info *info; struct shmem_sb_info *sbinfo; struct page *page; - struct page *prealloc_page = NULL; - swp_entry_t *entry; swp_entry_t swap; int error; - int ret; + int once = 0; - if (idx >= SHMEM_MAX_INDEX) + if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) return -EFBIG; repeat: - page = find_lock_page(mapping, idx); - if (page) { + swap.val = 0; + page = find_lock_page(mapping, index); + if (radix_tree_exceptional_entry(page)) { + swap = radix_to_swp_entry(page); + page = NULL; + } + + if (sgp != SGP_WRITE && + ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { + error = -EINVAL; + goto failed; + } + + if (page || (sgp == SGP_READ && !swap.val)) { /* * Once we can get the page lock, it must be uptodate: * if there were an error in reading back from swap, * the page would not be inserted into the filecache. */ - BUG_ON(!PageUptodate(page)); - goto done; + BUG_ON(page && !PageUptodate(page)); + *pagep = page; + return 0; } /* - * Try to preload while we can wait, to not make a habit of - * draining atomic reserves; but don't latch on to this cpu. + * Fast cache lookup did not find it: + * bring it back from swap or allocate. */ - error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); - if (error) - goto out; - radix_tree_preload_end(); - - if (sgp != SGP_READ && !prealloc_page) { - prealloc_page = shmem_alloc_page(gfp, info, idx); - if (prealloc_page) { - SetPageSwapBacked(prealloc_page); - if (mem_cgroup_cache_charge(prealloc_page, - current->mm, GFP_KERNEL)) { - page_cache_release(prealloc_page); - prealloc_page = NULL; - } - } - } - - spin_lock(&info->lock); - shmem_recalc_inode(inode); - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) { - spin_unlock(&info->lock); - error = PTR_ERR(entry); - goto out; - } - swap = *entry; + info = SHMEM_I(inode); + sbinfo = SHMEM_SB(inode->i_sb); if (swap.val) { /* Look it up and read it in.. */ page = lookup_swap_cache(swap); if (!page) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); /* here we actually do the io */ if (fault_type) *fault_type |= VM_FAULT_MAJOR; - page = shmem_swapin(swap, gfp, info, idx); + page = shmem_swapin(swap, gfp, info, index); if (!page) { - spin_lock(&info->lock); - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - if (entry->val == swap.val) - error = -ENOMEM; - shmem_swp_unmap(entry); - } - spin_unlock(&info->lock); - if (error) - goto out; - goto repeat; + error = -ENOMEM; + goto failed; } - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; } /* We have to do this with page locked to prevent races */ - if (!trylock_page(page)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; - } - if (PageWriteback(page)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - wait_on_page_writeback(page); - unlock_page(page); - page_cache_release(page); - goto repeat; - } + lock_page(page); if (!PageUptodate(page)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - unlock_page(page); - page_cache_release(page); error = -EIO; - goto out; + goto failed; } - - error = add_to_page_cache_locked(page, mapping, - idx, GFP_NOWAIT); - if (error) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - if (error == -ENOMEM) { - /* - * reclaim from proper memory cgroup and - * call memcg's OOM if needed. - */ - error = mem_cgroup_shmem_charge_fallback( - page, current->mm, gfp); - if (error) { - unlock_page(page); - page_cache_release(page); - goto out; - } - } - unlock_page(page); - page_cache_release(page); - goto repeat; + wait_on_page_writeback(page); + + /* Someone may have already done it for us */ + if (page->mapping) { + if (page->mapping == mapping && + page->index == index) + goto done; + error = -EEXIST; + goto failed; } - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, entry, 0); - shmem_swp_unmap(entry); - delete_from_swap_cache(page); + error = mem_cgroup_cache_charge(page, current->mm, + gfp & GFP_RECLAIM_MASK); + if (!error) + error = shmem_add_to_page_cache(page, mapping, index, + gfp, swp_to_radix_entry(swap)); + if (error) + goto failed; + + spin_lock(&info->lock); + info->swapped--; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + + delete_from_swap_cache(page); set_page_dirty(page); swap_free(swap); - } else if (sgp == SGP_READ) { - shmem_swp_unmap(entry); - page = find_get_page(mapping, idx); - if (page && !trylock_page(page)) { - spin_unlock(&info->lock); - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; + } else { + if (shmem_acct_block(info->flags)) { + error = -ENOSPC; + goto failed; } - spin_unlock(&info->lock); - - } else if (prealloc_page) { - shmem_swp_unmap(entry); - sbinfo = SHMEM_SB(inode->i_sb); if (sbinfo->max_blocks) { if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks) >= 0 || - shmem_acct_block(info->flags)) - goto nospace; + sbinfo->max_blocks) >= 0) { + error = -ENOSPC; + goto unacct; + } percpu_counter_inc(&sbinfo->used_blocks); - inode->i_blocks += BLOCKS_PER_PAGE; - } else if (shmem_acct_block(info->flags)) - goto nospace; - - page = prealloc_page; - prealloc_page = NULL; - - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - swap = *entry; - shmem_swp_unmap(entry); } - ret = error || swap.val; - if (ret) - mem_cgroup_uncharge_cache_page(page); - else - ret = add_to_page_cache_lru(page, mapping, - idx, GFP_NOWAIT); - /* - * At add_to_page_cache_lru() failure, - * uncharge will be done automatically. - */ - if (ret) { - shmem_unacct_blocks(info->flags, 1); - shmem_free_blocks(inode, 1); - spin_unlock(&info->lock); - page_cache_release(page); - if (error) - goto out; - goto repeat; + + page = shmem_alloc_page(gfp, info, index); + if (!page) { + error = -ENOMEM; + goto decused; } - info->flags |= SHMEM_PAGEIN; + SetPageSwapBacked(page); + __set_page_locked(page); + error = mem_cgroup_cache_charge(page, current->mm, + gfp & GFP_RECLAIM_MASK); + if (!error) + error = shmem_add_to_page_cache(page, mapping, index, + gfp, NULL); + if (error) + goto decused; + lru_cache_add_anon(page); + + spin_lock(&info->lock); info->alloced++; + inode->i_blocks += BLOCKS_PER_PAGE; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + clear_highpage(page); flush_dcache_page(page); SetPageUptodate(page); if (sgp == SGP_DIRTY) set_page_dirty(page); - - } else { - spin_unlock(&info->lock); - error = -ENOMEM; - goto out; } done: - *pagep = page; - error = 0; -out: - if (prealloc_page) { - mem_cgroup_uncharge_cache_page(prealloc_page); - page_cache_release(prealloc_page); + /* Perhaps the file has been truncated since we checked */ + if (sgp != SGP_WRITE && + ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { + error = -EINVAL; + goto trunc; } - return error; + *pagep = page; + return 0; -nospace: /* - * Perhaps the page was brought in from swap between find_lock_page - * and taking info->lock? We allow for that at add_to_page_cache_lru, - * but must also avoid reporting a spurious ENOSPC while working on a - * full tmpfs. + * Error recovery. */ - page = find_get_page(mapping, idx); +trunc: + ClearPageDirty(page); + delete_from_page_cache(page); + spin_lock(&info->lock); + info->alloced--; + inode->i_blocks -= BLOCKS_PER_PAGE; spin_unlock(&info->lock); +decused: + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -1); +unacct: + shmem_unacct_blocks(info->flags, 1); +failed: + if (swap.val && error != -EINVAL) { + struct page *test = find_get_page(mapping, index); + if (test && !radix_tree_exceptional_entry(test)) + page_cache_release(test); + /* Have another try if the entry has changed */ + if (test != swp_to_radix_entry(swap)) + error = -EEXIST; + } if (page) { + unlock_page(page); page_cache_release(page); + } + if (error == -ENOSPC && !once++) { + info = SHMEM_I(inode); + spin_lock(&info->lock); + shmem_recalc_inode(inode); + spin_unlock(&info->lock); goto repeat; } - error = -ENOSPC; - goto out; + if (error == -EEXIST) + goto repeat; + return error; } static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -1467,9 +1022,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int error; int ret = VM_FAULT_LOCKED; - if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return VM_FAULT_SIGBUS; - error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); @@ -1482,20 +1034,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } #ifdef CONFIG_NUMA -static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) { - struct inode *i = vma->vm_file->f_path.dentry->d_inode; - return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); } static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) { - struct inode *i = vma->vm_file->f_path.dentry->d_inode; - unsigned long idx; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + pgoff_t index; - idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); + index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); } #endif @@ -1593,7 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode #ifdef CONFIG_TMPFS static const struct inode_operations shmem_symlink_inode_operations; -static const struct inode_operations shmem_symlink_inline_operations; +static const struct inode_operations shmem_short_symlink_operations; static int shmem_write_begin(struct file *file, struct address_space *mapping, @@ -1626,7 +1178,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ { struct inode *inode = filp->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; - unsigned long index, offset; + pgoff_t index; + unsigned long offset; enum sgp_type sgp = SGP_READ; /* @@ -1642,7 +1195,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ for (;;) { struct page *page = NULL; - unsigned long end_index, nr, ret; + pgoff_t end_index; + unsigned long nr, ret; loff_t i_size = i_size_read(inode); end_index = i_size >> PAGE_CACHE_SHIFT; @@ -1880,8 +1434,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = NAME_MAX; if (sbinfo->max_blocks) { buf->f_blocks = sbinfo->max_blocks; - buf->f_bavail = buf->f_bfree = - sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks); + buf->f_bavail = + buf->f_bfree = sbinfo->max_blocks - + percpu_counter_sum(&sbinfo->used_blocks); } if (sbinfo->max_inodes) { buf->f_files = sbinfo->max_inodes; @@ -2055,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s info = SHMEM_I(inode); inode->i_size = len-1; - if (len <= SHMEM_SYMLINK_INLINE_LEN) { - /* do it inline */ - memcpy(info->inline_symlink, symname, len); - inode->i_op = &shmem_symlink_inline_operations; + if (len <= SHORT_SYMLINK_LEN) { + info->symlink = kmemdup(symname, len, GFP_KERNEL); + if (!info->symlink) { + iput(inode); + return -ENOMEM; + } + inode->i_op = &shmem_short_symlink_operations; } else { error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); if (error) { @@ -2081,17 +1639,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s return 0; } -static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) +static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd) { - nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); + nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink); return NULL; } static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) { struct page *page = NULL; - int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); - nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); + int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); + nd_set_link(nd, error ? ERR_PTR(error) : kmap(page)); if (page) unlock_page(page); return page; @@ -2202,7 +1760,6 @@ out: return err; } - static const struct xattr_handler *shmem_xattr_handlers[] = { #ifdef CONFIG_TMPFS_POSIX_ACL &generic_acl_access_handler, @@ -2332,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) } #endif /* CONFIG_TMPFS_XATTR */ -static const struct inode_operations shmem_symlink_inline_operations = { +static const struct inode_operations shmem_short_symlink_operations = { .readlink = generic_readlink, - .follow_link = shmem_follow_link_inline, + .follow_link = shmem_follow_short_symlink, #ifdef CONFIG_TMPFS_XATTR .setxattr = shmem_setxattr, .getxattr = shmem_getxattr, @@ -2534,8 +2091,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) if (config.max_inodes < inodes) goto out; /* - * Those tests also disallow limited->unlimited while any are in - * use, so i_blocks will always be zero when max_blocks is zero; + * Those tests disallow limited->unlimited while any are in use; * but we must separately disallow unlimited->limited, because * in that case we have no record of how much is already in use. */ @@ -2627,7 +2183,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) goto failed; sbinfo->free_inodes = sbinfo->max_inodes; - sb->s_maxbytes = SHMEM_MAX_BYTES; + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = TMPFS_MAGIC; @@ -2662,14 +2218,14 @@ static struct kmem_cache *shmem_inode_cachep; static struct inode *shmem_alloc_inode(struct super_block *sb) { - struct shmem_inode_info *p; - p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); - if (!p) + struct shmem_inode_info *info; + info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); + if (!info) return NULL; - return &p->vfs_inode; + return &info->vfs_inode; } -static void shmem_i_callback(struct rcu_head *head) +static void shmem_destroy_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); INIT_LIST_HEAD(&inode->i_dentry); @@ -2678,29 +2234,26 @@ static void shmem_i_callback(struct rcu_head *head) static void shmem_destroy_inode(struct inode *inode) { - if ((inode->i_mode & S_IFMT) == S_IFREG) { - /* only struct inode is valid if it's an inline symlink */ + if ((inode->i_mode & S_IFMT) == S_IFREG) mpol_free_shared_policy(&SHMEM_I(inode)->policy); - } - call_rcu(&inode->i_rcu, shmem_i_callback); + call_rcu(&inode->i_rcu, shmem_destroy_callback); } -static void init_once(void *foo) +static void shmem_init_inode(void *foo) { - struct shmem_inode_info *p = (struct shmem_inode_info *) foo; - - inode_init_once(&p->vfs_inode); + struct shmem_inode_info *info = foo; + inode_init_once(&info->vfs_inode); } -static int init_inodecache(void) +static int shmem_init_inodecache(void) { shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", sizeof(struct shmem_inode_info), - 0, SLAB_PANIC, init_once); + 0, SLAB_PANIC, shmem_init_inode); return 0; } -static void destroy_inodecache(void) +static void shmem_destroy_inodecache(void) { kmem_cache_destroy(shmem_inode_cachep); } @@ -2797,21 +2350,20 @@ static const struct vm_operations_struct shmem_vm_ops = { #endif }; - static struct dentry *shmem_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_nodev(fs_type, flags, data, shmem_fill_super); } -static struct file_system_type tmpfs_fs_type = { +static struct file_system_type shmem_fs_type = { .owner = THIS_MODULE, .name = "tmpfs", .mount = shmem_mount, .kill_sb = kill_litter_super, }; -int __init init_tmpfs(void) +int __init shmem_init(void) { int error; @@ -2819,18 +2371,18 @@ int __init init_tmpfs(void) if (error) goto out4; - error = init_inodecache(); + error = shmem_init_inodecache(); if (error) goto out3; - error = register_filesystem(&tmpfs_fs_type); + error = register_filesystem(&shmem_fs_type); if (error) { printk(KERN_ERR "Could not register tmpfs\n"); goto out2; } - shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, - tmpfs_fs_type.name, NULL); + shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER, + shmem_fs_type.name, NULL); if (IS_ERR(shm_mnt)) { error = PTR_ERR(shm_mnt); printk(KERN_ERR "Could not kern_mount tmpfs\n"); @@ -2839,9 +2391,9 @@ int __init init_tmpfs(void) return 0; out1: - unregister_filesystem(&tmpfs_fs_type); + unregister_filesystem(&shmem_fs_type); out2: - destroy_inodecache(); + shmem_destroy_inodecache(); out3: bdi_destroy(&shmem_backing_dev_info); out4: @@ -2849,45 +2401,6 @@ out4: return error; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -/** - * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file - * @inode: the inode to be searched - * @pgoff: the offset to be searched - * @pagep: the pointer for the found page to be stored - * @ent: the pointer for the found swap entry to be stored - * - * If a page is found, refcount of it is incremented. Callers should handle - * these refcount. - */ -void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent) -{ - swp_entry_t entry = { .val = 0 }, *ptr; - struct page *page = NULL; - struct shmem_inode_info *info = SHMEM_I(inode); - - if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - goto out; - - spin_lock(&info->lock); - ptr = shmem_swp_entry(info, pgoff, NULL); -#ifdef CONFIG_SWAP - if (ptr && ptr->val) { - entry.val = ptr->val; - page = find_get_page(&swapper_space, entry.val); - } else -#endif - page = find_get_page(inode->i_mapping, pgoff); - if (ptr) - shmem_swp_unmap(ptr); - spin_unlock(&info->lock); -out: - *pagep = page; - *ent = entry; -} -#endif - #else /* !CONFIG_SHMEM */ /* @@ -2901,23 +2414,23 @@ out: #include <linux/ramfs.h> -static struct file_system_type tmpfs_fs_type = { +static struct file_system_type shmem_fs_type = { .name = "tmpfs", .mount = ramfs_mount, .kill_sb = kill_litter_super, }; -int __init init_tmpfs(void) +int __init shmem_init(void) { - BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); + BUG_ON(register_filesystem(&shmem_fs_type) != 0); - shm_mnt = kern_mount(&tmpfs_fs_type); + shm_mnt = kern_mount(&shmem_fs_type); BUG_ON(IS_ERR(shm_mnt)); return 0; } -int shmem_unuse(swp_entry_t entry, struct page *page) +int shmem_unuse(swp_entry_t swap, struct page *page) { return 0; } @@ -2927,43 +2440,17 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) return 0; } -void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { - truncate_inode_pages_range(inode->i_mapping, start, end); + truncate_inode_pages_range(inode->i_mapping, lstart, lend); } EXPORT_SYMBOL_GPL(shmem_truncate_range); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -/** - * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file - * @inode: the inode to be searched - * @pgoff: the offset to be searched - * @pagep: the pointer for the found page to be stored - * @ent: the pointer for the found swap entry to be stored - * - * If a page is found, refcount of it is incremented. Callers should handle - * these refcount. - */ -void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent) -{ - struct page *page = NULL; - - if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - goto out; - page = find_get_page(inode->i_mapping, pgoff); -out: - *pagep = page; - *ent = (swp_entry_t){ .val = 0 }; -} -#endif - #define shmem_vm_ops generic_file_vm_ops #define shmem_file_operations ramfs_file_operations #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) #define shmem_acct_size(flags, size) 0 #define shmem_unacct_size(flags, size) do {} while (0) -#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE #endif /* CONFIG_SHMEM */ @@ -2987,7 +2474,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags if (IS_ERR(shm_mnt)) return (void *)shm_mnt; - if (size < 0 || size > SHMEM_MAX_BYTES) + if (size < 0 || size > MAX_LFS_FILESIZE) return ERR_PTR(-EINVAL); if (shmem_acct_size(flags, size)) diff --git a/mm/swapfile.c b/mm/swapfile.c index 1b8c33907242..17bc224bce68 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p, /* * Find out how many pages are allowed for a single swap - * device. There are two limiting factors: 1) the number of - * bits for the swap offset in the swp_entry_t type and - * 2) the number of bits in the a swap pte as defined by - * the different architectures. In order to find the - * largest possible bit mask a swap entry with swap type 0 + * device. There are three limiting factors: 1) the number + * of bits for the swap offset in the swp_entry_t type, and + * 2) the number of bits in the swap pte as defined by the + * the different architectures, and 3) the number of free bits + * in an exceptional radix_tree entry. In order to find the + * largest possible bit mask, a swap entry with swap type 0 * and swap offset ~0UL is created, encoded to a swap pte, - * decoded to a swp_entry_t again and finally the swap + * decoded to a swp_entry_t again, and finally the swap * offset is extracted. This will mask all the bits from * the initial ~0UL mask that can't be encoded in either * the swp_entry_t or the architecture definition of a - * swap pte. + * swap pte. Then the same is done for a radix_tree entry. */ maxpages = swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; + swp_entry_to_pte(swp_entry(0, ~0UL)))); + maxpages = swp_offset(radix_to_swp_entry( + swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; + if (maxpages > swap_header->info.last_page) { maxpages = swap_header->info.last_page + 1; /* p->max is an unsigned int: don't overflow it */ diff --git a/mm/truncate.c b/mm/truncate.c index 232eb2736a79..b40ac6d4e86e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, unsigned long count = 0; int i; + /* + * Note: this function may get called on a shmem/tmpfs mapping: + * pagevec_lookup() might then return 0 prematurely (because it + * got a gangful of swap entries); but it's hardly worth worrying + * about - it can rarely have anything to free from such a mapping + * (most pages are dirty), and already skips over any difficulties. + */ + pagevec_init(&pvec, 0); while (index <= end && pagevec_lookup(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6d8ef4a3a9b5..8b2d37b59c9e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -128,34 +128,34 @@ unsigned long long get_msr(int cpu, off_t offset) void print_header(void) { if (show_pkg) - fprintf(stderr, "pkg "); + fprintf(stderr, "pk"); if (show_core) - fprintf(stderr, "core"); + fprintf(stderr, " cr"); if (show_cpu) fprintf(stderr, " CPU"); if (do_nhm_cstates) - fprintf(stderr, " %%c0 "); + fprintf(stderr, " %%c0 "); if (has_aperf) - fprintf(stderr, " GHz"); + fprintf(stderr, " GHz"); fprintf(stderr, " TSC"); if (do_nhm_cstates) - fprintf(stderr, " %%c1 "); + fprintf(stderr, " %%c1"); if (do_nhm_cstates) - fprintf(stderr, " %%c3 "); + fprintf(stderr, " %%c3"); if (do_nhm_cstates) - fprintf(stderr, " %%c6 "); + fprintf(stderr, " %%c6"); if (do_snb_cstates) - fprintf(stderr, " %%c7 "); + fprintf(stderr, " %%c7"); if (do_snb_cstates) - fprintf(stderr, " %%pc2 "); + fprintf(stderr, " %%pc2"); if (do_nhm_cstates) - fprintf(stderr, " %%pc3 "); + fprintf(stderr, " %%pc3"); if (do_nhm_cstates) - fprintf(stderr, " %%pc6 "); + fprintf(stderr, " %%pc6"); if (do_snb_cstates) - fprintf(stderr, " %%pc7 "); + fprintf(stderr, " %%pc7"); if (extra_msr_offset) - fprintf(stderr, " MSR 0x%x ", extra_msr_offset); + fprintf(stderr, " MSR 0x%x ", extra_msr_offset); putc('\n', stderr); } @@ -194,14 +194,14 @@ void print_cnt(struct counters *p) /* topology columns, print blanks on 1st (average) line */ if (p == cnt_average) { if (show_pkg) - fprintf(stderr, " "); + fprintf(stderr, " "); if (show_core) fprintf(stderr, " "); if (show_cpu) fprintf(stderr, " "); } else { if (show_pkg) - fprintf(stderr, "%4d", p->pkg); + fprintf(stderr, "%d", p->pkg); if (show_core) fprintf(stderr, "%4d", p->core); if (show_cpu) @@ -241,22 +241,22 @@ void print_cnt(struct counters *p) if (!skip_c1) fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); else - fprintf(stderr, " ****"); + fprintf(stderr, " ****"); } if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc); if (extra_msr_offset) fprintf(stderr, " 0x%016llx", p->extra_msr); putc('\n', stderr); diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 2618ef2ba31f..33c5c7ee148f 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -137,7 +137,6 @@ void cmdline(int argc, char **argv) void validate_cpuid(void) { unsigned int eax, ebx, ecx, edx, max_level; - char brand[16]; unsigned int fms, family, model, stepping; eax = ebx = ecx = edx = 0; @@ -160,8 +159,8 @@ void validate_cpuid(void) model += ((fms >> 16) & 0xf) << 4; if (verbose > 1) - printf("CPUID %s %d levels family:model:stepping " - "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level, + printf("CPUID %d levels family:model:stepping " + "0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); if (!(edx & (1 << 5))) { |