From 7e36e2f5355ab87f8946041d044b34cda01e2077 Mon Sep 17 00:00:00 2001 From: Rod Whitby Date: Tue, 1 Apr 2008 10:53:23 +0100 Subject: [ARM] 4874/2: ixp4xx: Add support for the Freecom FSG-3 board The Freecom-FSG3 is a small network-attached-storage device with the following feature set: * Intel IXP422 * 4MB Flash (ixp4xx flash driver) * 64MB RAM * 4 USB 2.0 host ports (ehci and ohci drivers) * 1 WAN (eth1) and 3 LAN (eth0) ethernet ports * Supported by the open source ixp4xx ethernet driver * Via VT6421 disk controller (libata and sata-via drivers) * Internal hard disk (PATA supported, SATA not yet supported) * External SATA port (not yet supported) * ISL1208 RTC chip * Winbond 83782 temp sensor and fan controller * MiniPCI slot The ixp4xx_defconfig is also updated to support this device (the leds-fsg driver is to be submitted separately via the leds tree after this initial support is merged, as it depends on header gpio defines). Signed-off-by: Rod Whitby Signed-off-by: Russell King --- include/asm-arm/arch-ixp4xx/fsg.h | 50 ++++++++++++++++++++++++++++++++++ include/asm-arm/arch-ixp4xx/hardware.h | 1 + include/asm-arm/arch-ixp4xx/irqs.h | 7 +++++ 3 files changed, 58 insertions(+) create mode 100644 include/asm-arm/arch-ixp4xx/fsg.h (limited to 'include') diff --git a/include/asm-arm/arch-ixp4xx/fsg.h b/include/asm-arm/arch-ixp4xx/fsg.h new file mode 100644 index 000000000000..c0100cc7981c --- /dev/null +++ b/include/asm-arm/arch-ixp4xx/fsg.h @@ -0,0 +1,50 @@ +/* + * include/asm-arm/arch-ixp4xx/fsg.h + * + * Freecom FSG-3 platform specific definitions + * + * Author: Rod Whitby + * Author: Tomasz Chmielewski + * Maintainers: http://www.nslu2-linux.org + * + * Based on coyote.h by + * Copyright 2004 (c) MontaVista, Software, Inc. + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __ASM_ARCH_HARDWARE_H__ +#error "Do not include this directly, instead #include " +#endif + +#define FSG_SDA_PIN 12 +#define FSG_SCL_PIN 13 + +/* + * FSG PCI IRQs + */ +#define FSG_PCI_MAX_DEV 3 +#define FSG_PCI_IRQ_LINES 3 + + +/* PCI controller GPIO to IRQ pin mappings */ +#define FSG_PCI_INTA_PIN 6 +#define FSG_PCI_INTB_PIN 7 +#define FSG_PCI_INTC_PIN 5 + +/* Buttons */ + +#define FSG_SB_GPIO 4 /* sync button */ +#define FSG_RB_GPIO 9 /* reset button */ +#define FSG_UB_GPIO 10 /* usb button */ + +/* LEDs */ + +#define FSG_LED_WLAN_BIT 0 +#define FSG_LED_WAN_BIT 1 +#define FSG_LED_SATA_BIT 2 +#define FSG_LED_USB_BIT 4 +#define FSG_LED_RING_BIT 5 +#define FSG_LED_SYNC_BIT 7 diff --git a/include/asm-arm/arch-ixp4xx/hardware.h b/include/asm-arm/arch-ixp4xx/hardware.h index 73e8dc36f6a4..fa723a627854 100644 --- a/include/asm-arm/arch-ixp4xx/hardware.h +++ b/include/asm-arm/arch-ixp4xx/hardware.h @@ -45,5 +45,6 @@ #include "nslu2.h" #include "nas100d.h" #include "dsmg600.h" +#include "fsg.h" #endif /* _ASM_ARCH_HARDWARE_H */ diff --git a/include/asm-arm/arch-ixp4xx/irqs.h b/include/asm-arm/arch-ixp4xx/irqs.h index 11801605047b..674af4a84147 100644 --- a/include/asm-arm/arch-ixp4xx/irqs.h +++ b/include/asm-arm/arch-ixp4xx/irqs.h @@ -128,4 +128,11 @@ #define IRQ_DSMG600_PCI_INTE IRQ_IXP4XX_GPIO7 #define IRQ_DSMG600_PCI_INTF IRQ_IXP4XX_GPIO6 +/* + * Freecom FSG-3 Board IRQs + */ +#define IRQ_FSG_PCI_INTA IRQ_IXP4XX_GPIO6 +#define IRQ_FSG_PCI_INTB IRQ_IXP4XX_GPIO7 +#define IRQ_FSG_PCI_INTC IRQ_IXP4XX_GPIO5 + #endif -- cgit v1.2.3 From e055d5bff318845f99c0fbf93245767fab8dce88 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Apr 2008 01:43:27 +0100 Subject: [ARM] 5015/1: arm: remove ARCH_CO285 Trying to compile a kerel for ARCH_CO285 fails with the following error: <-- snip --> ... CC arch/arm/mach-footbridge/dc21285.o /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c: In function 'dc21285_base_address': /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:54: error: 'PCICFG0_BASE' undeclared (first use in this function) /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:54: error: (Each undeclared identifier is reported only once /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:54: error: for each function it appears in.) /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:57: error: 'PCICFG1_BASE' undeclared (first use in this function) /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c: In function 'dc21285_scan_bus': /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:286: error: implicit declaration of function 'pci_scan_bus' ... make[2]: *** [arch/arm/mach-footbridge/dc21285.o] Error 1 <-- snip --> This does not seem to be a recent breakage. The ARCH_CO285 support is old - kernel 2.2.0 contains first traces of it, an it seems to have been pretty complete in later 2.2 kernels. Since it seems to be completely dead code now this patch therefore removes it. Signed-off-by: Adrian Bunk Signed-off-by: Russell King --- arch/arm/Kconfig | 10 +-------- arch/arm/Makefile | 2 -- arch/arm/mach-footbridge/Makefile | 2 -- arch/arm/mach-footbridge/co285.c | 39 --------------------------------- arch/arm/mach-footbridge/common.c | 21 ------------------ arch/arm/mach-footbridge/ebsa285-leds.c | 2 +- arch/arm/mach-footbridge/time.c | 3 +-- include/asm-arm/arch-ebsa285/hardware.h | 26 ---------------------- include/asm-arm/arch-ebsa285/memory.h | 19 ---------------- include/asm-arm/arch-ebsa285/vmalloc.h | 4 ---- 10 files changed, 3 insertions(+), 125 deletions(-) delete mode 100644 arch/arm/mach-footbridge/co285.c (limited to 'include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4039a133006e..ab78d5c421fd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -232,14 +232,6 @@ config ARCH_CLPS711X help Support for Cirrus Logic 711x/721x based boards. -config ARCH_CO285 - bool "Co-EBSA285" - select FOOTBRIDGE - select FOOTBRIDGE_ADDIN - select HAVE_IDE - help - Support for Intel's EBSA285 companion chip. - config ARCH_EBSA110 bool "EBSA-110" select ISA @@ -784,7 +776,7 @@ source "mm/Kconfig" config LEDS bool "Timer and CPU usage LEDs" - depends on ARCH_CDB89712 || ARCH_CO285 || ARCH_EBSA110 || \ + depends on ARCH_CDB89712 || ARCH_EBSA110 || \ ARCH_EBSA285 || ARCH_IMX || ARCH_INTEGRATOR || \ ARCH_LUBBOCK || MACH_MAINSTONE || ARCH_NETWINDER || \ ARCH_OMAP || ARCH_P720T || ARCH_PXA_IDP || \ diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1a4649667ec8..79b8ca3400e0 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -100,8 +100,6 @@ textofs-y := 0x00008000 incdir-$(CONFIG_ARCH_CLPS7500) := cl7500 machine-$(CONFIG_FOOTBRIDGE) := footbridge incdir-$(CONFIG_FOOTBRIDGE) := ebsa285 - machine-$(CONFIG_ARCH_CO285) := footbridge - incdir-$(CONFIG_ARCH_CO285) := ebsa285 machine-$(CONFIG_ARCH_SHARK) := shark machine-$(CONFIG_ARCH_SA1100) := sa1100 ifeq ($(CONFIG_ARCH_SA1100),y) diff --git a/arch/arm/mach-footbridge/Makefile b/arch/arm/mach-footbridge/Makefile index 0694ad6b6476..32f8609e4f85 100644 --- a/arch/arm/mach-footbridge/Makefile +++ b/arch/arm/mach-footbridge/Makefile @@ -14,12 +14,10 @@ pci-$(CONFIG_ARCH_EBSA285_HOST) += ebsa285-pci.o pci-$(CONFIG_ARCH_NETWINDER) += netwinder-pci.o pci-$(CONFIG_ARCH_PERSONAL_SERVER) += personal-pci.o -leds-$(CONFIG_ARCH_CO285) += ebsa285-leds.o leds-$(CONFIG_ARCH_EBSA285) += ebsa285-leds.o leds-$(CONFIG_ARCH_NETWINDER) += netwinder-leds.o obj-$(CONFIG_ARCH_CATS) += cats-hw.o isa-timer.o -obj-$(CONFIG_ARCH_CO285) += co285.o dc21285-timer.o obj-$(CONFIG_ARCH_EBSA285) += ebsa285.o dc21285-timer.o obj-$(CONFIG_ARCH_NETWINDER) += netwinder-hw.o isa-timer.o obj-$(CONFIG_ARCH_PERSONAL_SERVER) += personal.o dc21285-timer.o diff --git a/arch/arm/mach-footbridge/co285.c b/arch/arm/mach-footbridge/co285.c deleted file mode 100644 index 4545576ad8d9..000000000000 --- a/arch/arm/mach-footbridge/co285.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * linux/arch/arm/mach-footbridge/co285.c - * - * CO285 machine fixup - */ -#include - -#include -#include - -#include - -#include "common.h" - -static void __init -fixup_coebsa285(struct machine_desc *desc, struct tag *tags, - char **cmdline, struct meminfo *mi) -{ - extern unsigned long boot_memory_end; - extern char boot_command_line[]; - - mi->nr_banks = 1; - mi->bank[0].start = PHYS_OFFSET; - mi->bank[0].size = boot_memory_end; - mi->bank[0].node = 0; - - *cmdline = boot_command_line; -} - -MACHINE_START(CO285, "co-EBSA285") - /* Maintainer: Mark van Doesburg */ - .phys_io = DC21285_ARMCSR_BASE, - .io_pg_offst = ((0x7cf00000) >> 18) & 0xfffc, - .fixup = fixup_coebsa285, - .map_io = footbridge_map_io, - .init_irq = footbridge_init_irq, - .timer = &footbridge_timer, -MACHINE_END - diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c index ef29fc34ce65..b08ab507c052 100644 --- a/arch/arm/mach-footbridge/common.c +++ b/arch/arm/mach-footbridge/common.c @@ -177,25 +177,6 @@ static struct map_desc ebsa285_host_io_desc[] __initdata = { #endif }; -/* - * The CO-ebsa285 mapping. - */ -static struct map_desc co285_io_desc[] __initdata = { -#ifdef CONFIG_ARCH_CO285 - { - .virtual = PCIO_BASE, - .pfn = __phys_to_pfn(DC21285_PCI_IO), - .length = PCIO_SIZE, - .type = MT_DEVICE, - }, { - .virtual = PCIMEM_BASE, - .pfn = __phys_to_pfn(DC21285_PCI_MEM), - .length = PCIMEM_SIZE, - .type = MT_DEVICE, - }, -#endif -}; - void __init footbridge_map_io(void) { /* @@ -208,8 +189,6 @@ void __init footbridge_map_io(void) * Now, work out what we've got to map in addition on this * platform. */ - if (machine_is_co285()) - iotable_init(co285_io_desc, ARRAY_SIZE(co285_io_desc)); if (footbridge_cfn_mode()) iotable_init(ebsa285_host_io_desc, ARRAY_SIZE(ebsa285_host_io_desc)); } diff --git a/arch/arm/mach-footbridge/ebsa285-leds.c b/arch/arm/mach-footbridge/ebsa285-leds.c index a64e22226515..09c1fbc51876 100644 --- a/arch/arm/mach-footbridge/ebsa285-leds.c +++ b/arch/arm/mach-footbridge/ebsa285-leds.c @@ -128,7 +128,7 @@ static void ebsa285_leds_event(led_event_t evt) static int __init leds_init(void) { - if (machine_is_ebsa285() || machine_is_co285()) + if (machine_is_ebsa285()) leds_event = ebsa285_leds_event; leds_event(led_start); diff --git a/arch/arm/mach-footbridge/time.c b/arch/arm/mach-footbridge/time.c index 5d02e95dede3..d5cfcda385d6 100644 --- a/arch/arm/mach-footbridge/time.c +++ b/arch/arm/mach-footbridge/time.c @@ -115,8 +115,7 @@ static int set_isa_cmos_time(void) void __init isa_rtc_init(void) { - if (machine_is_co285() || - machine_is_personal_server()) + if (machine_is_personal_server()) /* * Add-in 21285s shouldn't access the RTC */ diff --git a/include/asm-arm/arch-ebsa285/hardware.h b/include/asm-arm/arch-ebsa285/hardware.h index daad8ee2d194..74610c2c63d4 100644 --- a/include/asm-arm/arch-ebsa285/hardware.h +++ b/include/asm-arm/arch-ebsa285/hardware.h @@ -14,7 +14,6 @@ #include -#ifdef CONFIG_ARCH_FOOTBRIDGE /* Virtual Physical Size * 0xff800000 0x40000000 1MB X-Bus * 0xff000000 0x7c000000 1MB PCI I/O space @@ -50,31 +49,6 @@ #define PCIMEM_SIZE 0x01000000 #define PCIMEM_BASE 0xf0000000 -#elif defined(CONFIG_ARCH_CO285) -/* - * This is the COEBSA285 cut-down mapping - */ -#define PCIMEM_SIZE 0x80000000 -#define PCIMEM_BASE 0x80000000 - -#define WFLUSH_SIZE 0x01000000 -#define WFLUSH_BASE 0x7d000000 - -#define ARMCSR_SIZE 0x00100000 -#define ARMCSR_BASE 0x7cf00000 - -#define XBUS_SIZE 0x00020000 -#define XBUS_BASE 0x7cee0000 - -#define PCIO_SIZE 0x00010000 -#define PCIO_BASE 0x7ced0000 - -#else - -#error "Undefined footbridge architecture" - -#endif - #define XBUS_LEDS ((volatile unsigned char *)(XBUS_BASE + 0x12000)) #define XBUS_LED_AMBER (1 << 0) #define XBUS_LED_GREEN (1 << 1) diff --git a/include/asm-arm/arch-ebsa285/memory.h b/include/asm-arm/arch-ebsa285/memory.h index cbd7ae64bcc9..9019a3bf5ab9 100644 --- a/include/asm-arm/arch-ebsa285/memory.h +++ b/include/asm-arm/arch-ebsa285/memory.h @@ -42,8 +42,6 @@ extern unsigned long __bus_to_virt(unsigned long); #endif -#if defined(CONFIG_ARCH_FOOTBRIDGE) - /* Task size and page offset at 3GB */ #define TASK_SIZE UL(0xbf000000) #define PAGE_OFFSET UL(0xc0000000) @@ -53,23 +51,6 @@ extern unsigned long __bus_to_virt(unsigned long); */ #define FLUSH_BASE 0xf9000000 -#elif defined(CONFIG_ARCH_CO285) - -/* Task size and page offset at 1.5GB */ -#define TASK_SIZE UL(0x5f000000) -#define PAGE_OFFSET UL(0x60000000) - -/* - * Cache flushing area. - */ -#define FLUSH_BASE 0x7e000000 - -#else - -#error "Undefined footbridge architecture" - -#endif - /* * Physical DRAM offset. */ diff --git a/include/asm-arm/arch-ebsa285/vmalloc.h b/include/asm-arm/arch-ebsa285/vmalloc.h index 02598200997d..e487d7e8c8a6 100644 --- a/include/asm-arm/arch-ebsa285/vmalloc.h +++ b/include/asm-arm/arch-ebsa285/vmalloc.h @@ -7,8 +7,4 @@ */ -#ifdef CONFIG_ARCH_FOOTBRIDGE #define VMALLOC_END (PAGE_OFFSET + 0x30000000) -#else -#define VMALLOC_END (PAGE_OFFSET + 0x20000000) -#endif -- cgit v1.2.3 From 205bee6ad804d7034773b5978c74dde495df2301 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 13:57:26 +0100 Subject: [ARM] dyntick: Remove obsolete and unused ARM dyntick support dyntick is superseded by the clocksource/clockevent infrastructure, using the NO_HZ configuration option. No one implements dyntick on ARM anymore, so it's pointless keeping it around. Remove dyntick support. Signed-off-by: Russell King --- arch/arm/Kconfig | 21 ------ arch/arm/configs/at91cap9adk_defconfig | 1 - arch/arm/configs/at91rm9200dk_defconfig | 1 - arch/arm/configs/at91rm9200ek_defconfig | 1 - arch/arm/configs/at91sam9260ek_defconfig | 1 - arch/arm/configs/at91sam9261ek_defconfig | 1 - arch/arm/configs/at91sam9263ek_defconfig | 1 - arch/arm/configs/at91sam9rlek_defconfig | 1 - arch/arm/configs/ateb9200_defconfig | 1 - arch/arm/configs/cm_x270_defconfig | 1 - arch/arm/configs/collie_defconfig | 1 - arch/arm/configs/corgi_defconfig | 1 - arch/arm/configs/ecbat91_defconfig | 1 - arch/arm/configs/em_x270_defconfig | 1 - arch/arm/configs/ep93xx_defconfig | 1 - arch/arm/configs/eseries_pxa_defconfig | 1 - arch/arm/configs/iop13xx_defconfig | 1 - arch/arm/configs/iop32x_defconfig | 1 - arch/arm/configs/iop33x_defconfig | 1 - arch/arm/configs/ixp2000_defconfig | 1 - arch/arm/configs/ixp23xx_defconfig | 1 - arch/arm/configs/kafa_defconfig | 1 - arch/arm/configs/kb9202_defconfig | 1 - arch/arm/configs/ks8695_defconfig | 1 - arch/arm/configs/lpd270_defconfig | 1 - arch/arm/configs/lpd7a404_defconfig | 1 - arch/arm/configs/netx_defconfig | 1 - arch/arm/configs/onearm_defconfig | 1 - arch/arm/configs/picotux200_defconfig | 1 - arch/arm/configs/pnx4008_defconfig | 1 - arch/arm/configs/realview-smp_defconfig | 1 - arch/arm/configs/realview_defconfig | 1 - arch/arm/configs/rpc_defconfig | 1 - arch/arm/configs/s3c2410_defconfig | 1 - arch/arm/configs/sam9_l9260_defconfig | 1 - arch/arm/configs/spitz_defconfig | 1 - arch/arm/configs/tct_hammer_defconfig | 1 - arch/arm/configs/trizeps4_defconfig | 1 - arch/arm/configs/versatile_defconfig | 1 - arch/arm/kernel/process.c | 4 +- arch/arm/kernel/time.c | 120 ------------------------------- arch/arm/mach-omap1/pm.c | 7 -- arch/arm/mach-omap2/pm.c | 7 -- include/asm-arm/dyntick.h | 6 -- include/asm-arm/hw_irq.h | 11 --- include/asm-arm/mach/time.h | 22 ------ 46 files changed, 1 insertion(+), 235 deletions(-) delete mode 100644 include/asm-arm/dyntick.h (limited to 'include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b786e68914d4..167824e24edc 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -703,27 +703,6 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. -config NO_IDLE_HZ - bool "Dynamic tick timer" - depends on !GENERIC_CLOCKEVENTS - help - Select this option if you want to disable continuous timer ticks - and have them programmed to occur as required. This option saves - power as the system can remain in idle state for longer. - - By default dynamic tick is disabled during the boot, and can be - manually enabled with: - - echo 1 > /sys/devices/system/timer/timer0/dyn_tick - - Alternatively, if you want dynamic tick automatically enabled - during boot, pass "dyntick=enable" via the kernel command string. - - Please note that dynamic tick may affect the accuracy of - timekeeping on some platforms depending on the implementation. - Currently at least OMAP, PXA2xx and SA11x0 platforms are known - to have accurate timekeeping with dynamic tick. - config HZ int default 128 if ARCH_L7200 diff --git a/arch/arm/configs/at91cap9adk_defconfig b/arch/arm/configs/at91cap9adk_defconfig index e32e73648129..b8e73e95598d 100644 --- a/arch/arm/configs/at91cap9adk_defconfig +++ b/arch/arm/configs/at91cap9adk_defconfig @@ -213,7 +213,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/at91rm9200dk_defconfig b/arch/arm/configs/at91rm9200dk_defconfig index 2dbbbc3d4ac3..868fb7b9530b 100644 --- a/arch/arm/configs/at91rm9200dk_defconfig +++ b/arch/arm/configs/at91rm9200dk_defconfig @@ -169,7 +169,6 @@ CONFIG_AT91_CF=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y diff --git a/arch/arm/configs/at91rm9200ek_defconfig b/arch/arm/configs/at91rm9200ek_defconfig index 6e994f7820c6..de43fc675616 100644 --- a/arch/arm/configs/at91rm9200ek_defconfig +++ b/arch/arm/configs/at91rm9200ek_defconfig @@ -160,7 +160,6 @@ CONFIG_ISA_DMA_API=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y diff --git a/arch/arm/configs/at91sam9260ek_defconfig b/arch/arm/configs/at91sam9260ek_defconfig index f659c938473f..2011adfa6758 100644 --- a/arch/arm/configs/at91sam9260ek_defconfig +++ b/arch/arm/configs/at91sam9260ek_defconfig @@ -220,7 +220,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/at91sam9261ek_defconfig b/arch/arm/configs/at91sam9261ek_defconfig index 3802e85f7483..4049768962d2 100644 --- a/arch/arm/configs/at91sam9261ek_defconfig +++ b/arch/arm/configs/at91sam9261ek_defconfig @@ -213,7 +213,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/at91sam9263ek_defconfig b/arch/arm/configs/at91sam9263ek_defconfig index 32a0d74e0c89..fa1c5aecb5a8 100644 --- a/arch/arm/configs/at91sam9263ek_defconfig +++ b/arch/arm/configs/at91sam9263ek_defconfig @@ -213,7 +213,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/at91sam9rlek_defconfig b/arch/arm/configs/at91sam9rlek_defconfig index 98e6746d02be..d8ec5f9ca6ec 100644 --- a/arch/arm/configs/at91sam9rlek_defconfig +++ b/arch/arm/configs/at91sam9rlek_defconfig @@ -211,7 +211,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/ateb9200_defconfig b/arch/arm/configs/ateb9200_defconfig index d846a492e5ce..85c80f723d8e 100644 --- a/arch/arm/configs/ateb9200_defconfig +++ b/arch/arm/configs/ateb9200_defconfig @@ -171,7 +171,6 @@ CONFIG_AT91_CF=m # Kernel Features # CONFIG_PREEMPT=y -CONFIG_NO_IDLE_HZ=y CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/cm_x270_defconfig b/arch/arm/configs/cm_x270_defconfig index 5cab08397ae7..33b201c3b309 100644 --- a/arch/arm/configs/cm_x270_defconfig +++ b/arch/arm/configs/cm_x270_defconfig @@ -194,7 +194,6 @@ CONFIG_PCI_HOST_ITE8152=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 4264e273202d..f7622e658163 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -166,7 +166,6 @@ CONFIG_PCMCIA_SA1100=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set CONFIG_ARCH_DISCONTIGMEM_ENABLE=y diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig index e8980a9bb893..9b8748a8d9dd 100644 --- a/arch/arm/configs/corgi_defconfig +++ b/arch/arm/configs/corgi_defconfig @@ -165,7 +165,6 @@ CONFIG_PCMCIA_PXA2XX=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y diff --git a/arch/arm/configs/ecbat91_defconfig b/arch/arm/configs/ecbat91_defconfig index 90ed214e3673..cfeb817ad21a 100644 --- a/arch/arm/configs/ecbat91_defconfig +++ b/arch/arm/configs/ecbat91_defconfig @@ -230,7 +230,6 @@ CONFIG_AT91_CF=y # # CONFIG_TICK_ONESHOT is not set CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/em_x270_defconfig b/arch/arm/configs/em_x270_defconfig index 6bea0901bdf0..d3114c23603b 100644 --- a/arch/arm/configs/em_x270_defconfig +++ b/arch/arm/configs/em_x270_defconfig @@ -197,7 +197,6 @@ CONFIG_XSCALE_PMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 24a701ab33e5..21aa013793c6 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -184,7 +184,6 @@ CONFIG_ARM_AMBA=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/eseries_pxa_defconfig b/arch/arm/configs/eseries_pxa_defconfig index ed487b90dbed..493ecee24f94 100644 --- a/arch/arm/configs/eseries_pxa_defconfig +++ b/arch/arm/configs/eseries_pxa_defconfig @@ -251,7 +251,6 @@ CONFIG_PCMCIA_PXA2XX=m # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/iop13xx_defconfig b/arch/arm/configs/iop13xx_defconfig index 988b4d13e76f..482e57061053 100644 --- a/arch/arm/configs/iop13xx_defconfig +++ b/arch/arm/configs/iop13xx_defconfig @@ -197,7 +197,6 @@ CONFIG_PCI_LEGACY=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig index 83f40d4041a6..8612f58e1056 100644 --- a/arch/arm/configs/iop32x_defconfig +++ b/arch/arm/configs/iop32x_defconfig @@ -201,7 +201,6 @@ CONFIG_PCI_LEGACY=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/iop33x_defconfig b/arch/arm/configs/iop33x_defconfig index 917afb5ccfac..8b0098d19d08 100644 --- a/arch/arm/configs/iop33x_defconfig +++ b/arch/arm/configs/iop33x_defconfig @@ -197,7 +197,6 @@ CONFIG_PCI_LEGACY=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/ixp2000_defconfig b/arch/arm/configs/ixp2000_defconfig index f8f9793b526f..84680db6c615 100644 --- a/arch/arm/configs/ixp2000_defconfig +++ b/arch/arm/configs/ixp2000_defconfig @@ -184,7 +184,6 @@ CONFIG_PCI=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/ixp23xx_defconfig b/arch/arm/configs/ixp23xx_defconfig index 27cf022dd807..4a2f7b2372db 100644 --- a/arch/arm/configs/ixp23xx_defconfig +++ b/arch/arm/configs/ixp23xx_defconfig @@ -180,7 +180,6 @@ CONFIG_PCI=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/kafa_defconfig b/arch/arm/configs/kafa_defconfig index ae51a40db6f9..6dd95a2c8d5d 100644 --- a/arch/arm/configs/kafa_defconfig +++ b/arch/arm/configs/kafa_defconfig @@ -162,7 +162,6 @@ CONFIG_CPU_TLB_V4WBI=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/kb9202_defconfig b/arch/arm/configs/kb9202_defconfig index c16537d9d67a..8e74c66f239d 100644 --- a/arch/arm/configs/kb9202_defconfig +++ b/arch/arm/configs/kb9202_defconfig @@ -126,7 +126,6 @@ CONFIG_ISA_DMA_API=y # # Kernel Features # -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y diff --git a/arch/arm/configs/ks8695_defconfig b/arch/arm/configs/ks8695_defconfig index 8ab21a0719e9..6077f2cb88e4 100644 --- a/arch/arm/configs/ks8695_defconfig +++ b/arch/arm/configs/ks8695_defconfig @@ -174,7 +174,6 @@ CONFIG_PCCARD_NONSTATIC=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/lpd270_defconfig b/arch/arm/configs/lpd270_defconfig index a3bf5833b87a..1a38d8e3fe66 100644 --- a/arch/arm/configs/lpd270_defconfig +++ b/arch/arm/configs/lpd270_defconfig @@ -173,7 +173,6 @@ CONFIG_XSCALE_PMU=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/lpd7a404_defconfig b/arch/arm/configs/lpd7a404_defconfig index 46a0f7fe1fa5..7a2e932da1c6 100644 --- a/arch/arm/configs/lpd7a404_defconfig +++ b/arch/arm/configs/lpd7a404_defconfig @@ -148,7 +148,6 @@ CONFIG_ARM_AMBA=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set # CONFIG_AEABI is not set CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y diff --git a/arch/arm/configs/netx_defconfig b/arch/arm/configs/netx_defconfig index 57f32f39d0ff..0884f2370c3a 100644 --- a/arch/arm/configs/netx_defconfig +++ b/arch/arm/configs/netx_defconfig @@ -154,7 +154,6 @@ CONFIG_ARM_AMBA=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/onearm_defconfig b/arch/arm/configs/onearm_defconfig index 650a248613e5..418ca2febbe3 100644 --- a/arch/arm/configs/onearm_defconfig +++ b/arch/arm/configs/onearm_defconfig @@ -202,7 +202,6 @@ CONFIG_AT91_CF=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/picotux200_defconfig b/arch/arm/configs/picotux200_defconfig index 95a22f512805..14826f0dabde 100644 --- a/arch/arm/configs/picotux200_defconfig +++ b/arch/arm/configs/picotux200_defconfig @@ -201,7 +201,6 @@ CONFIG_ARM_THUMB=y # Kernel Features # # CONFIG_PREEMPT is not set -CONFIG_NO_IDLE_HZ=y CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/pnx4008_defconfig b/arch/arm/configs/pnx4008_defconfig index b5e11aa2e290..811b8f60d19d 100644 --- a/arch/arm/configs/pnx4008_defconfig +++ b/arch/arm/configs/pnx4008_defconfig @@ -151,7 +151,6 @@ CONFIG_ARM_THUMB=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/realview-smp_defconfig b/arch/arm/configs/realview-smp_defconfig index fc39ba1a89f3..0c09b23167ec 100644 --- a/arch/arm/configs/realview-smp_defconfig +++ b/arch/arm/configs/realview-smp_defconfig @@ -177,7 +177,6 @@ CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y CONFIG_LOCAL_TIMERS=y # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/realview_defconfig b/arch/arm/configs/realview_defconfig index accbf529ce5b..907e54344dad 100644 --- a/arch/arm/configs/realview_defconfig +++ b/arch/arm/configs/realview_defconfig @@ -126,7 +126,6 @@ CONFIG_ISA_DMA_API=y # # Kernel Features # -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 5ddecb9ddf01..f62d1817d2c6 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -190,7 +190,6 @@ CONFIG_ISA_DMA_API=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index f8a1645b3d4a..6d03763773bf 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -246,7 +246,6 @@ CONFIG_ISA=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=200 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/sam9_l9260_defconfig b/arch/arm/configs/sam9_l9260_defconfig index 484dc9739dfc..8688362bcf7b 100644 --- a/arch/arm/configs/sam9_l9260_defconfig +++ b/arch/arm/configs/sam9_l9260_defconfig @@ -211,7 +211,6 @@ CONFIG_ARM_THUMB=y # # CONFIG_TICK_ONESHOT is not set CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index aa7a01179500..7d59fb1f1cea 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -164,7 +164,6 @@ CONFIG_PCMCIA_PXA2XX=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y diff --git a/arch/arm/configs/tct_hammer_defconfig b/arch/arm/configs/tct_hammer_defconfig index 576b8339f0d6..07dfb98df4f0 100644 --- a/arch/arm/configs/tct_hammer_defconfig +++ b/arch/arm/configs/tct_hammer_defconfig @@ -247,7 +247,6 @@ CONFIG_ARM_THUMB=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=200 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig index 6db6392806f9..8b7a431a8bfc 100644 --- a/arch/arm/configs/trizeps4_defconfig +++ b/arch/arm/configs/trizeps4_defconfig @@ -195,7 +195,6 @@ CONFIG_PCMCIA_PXA2XX=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig index 48dca69addae..8355f88f7292 100644 --- a/arch/arm/configs/versatile_defconfig +++ b/arch/arm/configs/versatile_defconfig @@ -151,7 +151,6 @@ CONFIG_ARM_AMBA=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 46bf2ede6128..199b3680118b 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -133,10 +133,8 @@ static void default_idle(void) cpu_relax(); else { local_irq_disable(); - if (!need_resched()) { - timer_dyn_reprogram(); + if (!need_resched()) arch_idle(); - } local_irq_enable(); } } diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index b5867eca1d0b..cc5145b28e7f 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -365,108 +365,6 @@ static struct sysdev_class timer_sysclass = { .resume = timer_resume, }; -#ifdef CONFIG_NO_IDLE_HZ -static int timer_dyn_tick_enable(void) -{ - struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; - unsigned long flags; - int ret = -ENODEV; - - if (dyn_tick) { - spin_lock_irqsave(&dyn_tick->lock, flags); - ret = 0; - if (!(dyn_tick->state & DYN_TICK_ENABLED)) { - ret = dyn_tick->enable(); - - if (ret == 0) - dyn_tick->state |= DYN_TICK_ENABLED; - } - spin_unlock_irqrestore(&dyn_tick->lock, flags); - } - - return ret; -} - -static int timer_dyn_tick_disable(void) -{ - struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; - unsigned long flags; - int ret = -ENODEV; - - if (dyn_tick) { - spin_lock_irqsave(&dyn_tick->lock, flags); - ret = 0; - if (dyn_tick->state & DYN_TICK_ENABLED) { - ret = dyn_tick->disable(); - - if (ret == 0) - dyn_tick->state &= ~DYN_TICK_ENABLED; - } - spin_unlock_irqrestore(&dyn_tick->lock, flags); - } - - return ret; -} - -/* - * Reprogram the system timer for at least the calculated time interval. - * This function should be called from the idle thread with IRQs disabled, - * immediately before sleeping. - */ -void timer_dyn_reprogram(void) -{ - struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; - unsigned long next, seq, flags; - - if (!dyn_tick) - return; - - spin_lock_irqsave(&dyn_tick->lock, flags); - if (dyn_tick->state & DYN_TICK_ENABLED) { - next = next_timer_interrupt(); - do { - seq = read_seqbegin(&xtime_lock); - dyn_tick->reprogram(next - jiffies); - } while (read_seqretry(&xtime_lock, seq)); - } - spin_unlock_irqrestore(&dyn_tick->lock, flags); -} - -static ssize_t timer_show_dyn_tick(struct sys_device *dev, char *buf) -{ - return sprintf(buf, "%i\n", - (system_timer->dyn_tick->state & DYN_TICK_ENABLED) >> 1); -} - -static ssize_t timer_set_dyn_tick(struct sys_device *dev, const char *buf, - size_t count) -{ - unsigned int enable = simple_strtoul(buf, NULL, 2); - - if (enable) - timer_dyn_tick_enable(); - else - timer_dyn_tick_disable(); - - return count; -} -static SYSDEV_ATTR(dyn_tick, 0644, timer_show_dyn_tick, timer_set_dyn_tick); - -/* - * dyntick=enable|disable - */ -static char dyntick_str[4] __initdata = ""; - -static int __init dyntick_setup(char *str) -{ - if (str) - strlcpy(dyntick_str, str, sizeof(dyntick_str)); - return 1; -} - -__setup("dyntick=", dyntick_setup); -#endif - static int __init timer_init_sysfs(void) { int ret = sysdev_class_register(&timer_sysclass); @@ -475,19 +373,6 @@ static int __init timer_init_sysfs(void) ret = sysdev_register(&system_timer->dev); } -#ifdef CONFIG_NO_IDLE_HZ - if (ret == 0 && system_timer->dyn_tick) { - ret = sysdev_create_file(&system_timer->dev, &attr_dyn_tick); - - /* - * Turn on dynamic tick after calibrate delay - * for correct bogomips - */ - if (ret == 0 && dyntick_str[0] == 'e') - ret = timer_dyn_tick_enable(); - } -#endif - return ret; } @@ -500,10 +385,5 @@ void __init time_init(void) system_timer->offset = dummy_gettimeoffset; #endif system_timer->init(); - -#ifdef CONFIG_NO_IDLE_HZ - if (system_timer->dyn_tick) - spin_lock_init(&system_timer->dyn_tick->lock); -#endif } diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index e6c64e10b7ec..742f79e73bd7 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -116,13 +116,6 @@ void omap_pm_idle(void) return; } - /* - * Since an interrupt may set up a timer, we don't want to - * reprogram the hardware timer with interrupts enabled. - * Re-enable interrupts only after returning from idle. - */ - timer_dyn_reprogram(); - #ifdef CONFIG_OMAP_MPU_TIMER #warning Enable 32kHz OS timer in order to allow sleep states in idle use_idlect1 = use_idlect1 & ~(1 << 9); diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index aad781dcf1b1..d6c9de82ca0c 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -57,13 +57,6 @@ void omap2_pm_idle(void) return; } - /* - * Since an interrupt may set up a timer, we don't want to - * reprogram the hardware timer with interrupts enabled. - * Re-enable interrupts only after returning from idle. - */ - timer_dyn_reprogram(); - omap2_sram_idle(); local_fiq_enable(); local_irq_enable(); diff --git a/include/asm-arm/dyntick.h b/include/asm-arm/dyntick.h deleted file mode 100644 index 19fab2d2b760..000000000000 --- a/include/asm-arm/dyntick.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASMARM_DYNTICK_H -#define _ASMARM_DYNTICK_H - -#include - -#endif /* _ASMARM_DYNTICK_H */ diff --git a/include/asm-arm/hw_irq.h b/include/asm-arm/hw_irq.h index 98d594a973d6..f1a08a500604 100644 --- a/include/asm-arm/hw_irq.h +++ b/include/asm-arm/hw_irq.h @@ -6,15 +6,4 @@ #include -#if defined(CONFIG_NO_IDLE_HZ) -# include -# define handle_dynamic_tick(action) \ - if (!(action->flags & IRQF_TIMER) && system_timer->dyn_tick) { \ - write_seqlock(&xtime_lock); \ - if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) \ - system_timer->dyn_tick->handler(irq, NULL); \ - write_sequnlock(&xtime_lock); \ - } -#endif - #endif diff --git a/include/asm-arm/mach/time.h b/include/asm-arm/mach/time.h index 5dc357013b79..2fd36ea0130d 100644 --- a/include/asm-arm/mach/time.h +++ b/include/asm-arm/mach/time.h @@ -41,30 +41,8 @@ struct sys_timer { #ifndef CONFIG_GENERIC_TIME unsigned long (*offset)(void); #endif - -#ifdef CONFIG_NO_IDLE_HZ - struct dyn_tick_timer *dyn_tick; -#endif -}; - -#ifdef CONFIG_NO_IDLE_HZ - -#define DYN_TICK_ENABLED (1 << 1) - -struct dyn_tick_timer { - spinlock_t lock; - unsigned int state; /* Current state */ - int (*enable)(void); /* Enables dynamic tick */ - int (*disable)(void); /* Disables dynamic tick */ - void (*reprogram)(unsigned long); /* Reprograms the timer */ - int (*handler)(int, void *); }; -void timer_dyn_reprogram(void); -#else -#define timer_dyn_reprogram() do { } while (0) -#endif - extern struct sys_timer *system_timer; extern void timer_tick(void); -- cgit v1.2.3 From 1c7d06d419dbe82c76fbb4d3e1fa61b2da2dc00b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Apr 2008 22:12:05 +0200 Subject: revert: thread_info.h change temporarily revert parts of "signals: x86 TS_RESTORE_SIGMASK". Signed-off-by: Ingo Molnar --- include/asm-x86/thread_info_32.h | 13 ++----------- include/asm-x86/thread_info_64.h | 13 ++----------- 2 files changed, 4 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index b6338829d1a8..531859962096 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h @@ -131,6 +131,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_EMU 5 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ #define TIF_SECCOMP 7 /* secure computing */ +#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ #define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ #define TIF_MEMDIE 16 #define TIF_DEBUG 17 /* uses debug registers */ @@ -150,6 +151,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) @@ -186,20 +188,9 @@ static inline struct thread_info *current_thread_info(void) this quantum (SMP) */ #define TS_POLLING 0x0002 /* True if in idle loop and not sleeping */ -#define TS_RESTORE_SIGMASK 0x0004 /* restore signal mask in do_signal() */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); -} -#endif /* !__ASSEMBLY__ */ - #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h index cb69f70abba1..ed664e874dec 100644 --- a/include/asm-x86/thread_info_64.h +++ b/include/asm-x86/thread_info_64.h @@ -109,6 +109,7 @@ static inline struct thread_info *stack_thread_info(void) #define TIF_IRET 5 /* force IRET */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ /* 16 free */ @@ -132,6 +133,7 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_IRET (1 << TIF_IRET) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) #define _TIF_IA32 (1 << TIF_IA32) @@ -176,20 +178,9 @@ static inline struct thread_info *stack_thread_info(void) #define TS_COMPAT 0x0002 /* 32bit syscall active */ #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); -} -#endif /* !__ASSEMBLY__ */ - #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ -- cgit v1.2.3 From 2052e8d40ad58b1d364f900e70edfda62caa0874 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 12 May 2008 15:43:41 +0200 Subject: x86: merge thread_info.h Simple merge of both thread_info_32.h and thread_info_64.h into thread_info.h. Comments for #ifndef __ASM_THREAD_INFO_H and #ifdef __KERNEL__ are the same. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar --- include/asm-x86/thread_info.h | 366 ++++++++++++++++++++++++++++++++++++++- include/asm-x86/thread_info_32.h | 196 --------------------- include/asm-x86/thread_info_64.h | 186 -------------------- 3 files changed, 364 insertions(+), 384 deletions(-) delete mode 100644 include/asm-x86/thread_info_32.h delete mode 100644 include/asm-x86/thread_info_64.h (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 77244f17993f..d59384ac0ba6 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -1,10 +1,372 @@ +/* thread_info.h: low-level thread information + * + * Copyright (C) 2002 David Howells (dhowells@redhat.com) + * - Incorporating suggestions made by Linus Torvalds and Dave Miller + */ + #ifndef _ASM_X86_THREAD_INFO_H +#define _ASM_X86_THREAD_INFO_H + #ifdef CONFIG_X86_32 -# include "thread_info_32.h" +#include +#include + +#ifndef __ASSEMBLY__ +#include +#endif + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + * - if the contents of this structure are changed, + * the assembly constants must also be changed + */ +#ifndef __ASSEMBLY__ + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + unsigned long status; /* thread-synchronous flags */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + mm_segment_t addr_limit; /* thread address space: + 0-0xBFFFFFFF user-thread + 0-0xFFFFFFFF kernel-thread + */ + void *sysenter_return; + struct restart_block restart_block; + unsigned long previous_esp; /* ESP of the previous stack in + case of nested (IRQ) stacks + */ + __u8 supervisor_stack[0]; +}; + +#else /* !__ASSEMBLY__ */ + +#include + +#endif + +#define PREEMPT_ACTIVE 0x10000000 +#ifdef CONFIG_4KSTACKS +#define THREAD_SIZE (4096) #else -# include "thread_info_64.h" +#define THREAD_SIZE (8192) #endif +#define STACK_WARN (THREAD_SIZE/8) +/* + * macros/functions for gaining access to the thread information structure + * + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + + +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("esp") __used; + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + return (struct thread_info *) + (current_stack_pointer & ~(THREAD_SIZE - 1)); +} + +/* thread information allocation */ +#ifdef CONFIG_DEBUG_STACK_USAGE +#define alloc_thread_info(tsk) ((struct thread_info *) \ + __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE))) +#else +#define alloc_thread_info(tsk) ((struct thread_info *) \ + __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) +#endif + +#else /* !__ASSEMBLY__ */ + +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) \ + movl $-THREAD_SIZE, reg; \ + andl %esp, reg + +/* use this one if reg already contains %esp */ +#define GET_THREAD_INFO_WITH_ESP(reg) \ + andl $-THREAD_SIZE, reg + +#endif + +/* + * thread information flags + * - these are process state flags that various + * assembly files may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_SINGLESTEP 3 /* restore singlestep on return to + user mode */ +#define TIF_IRET 4 /* return with iret */ +#define TIF_SYSCALL_EMU 5 /* syscall emulation active */ +#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ +#define TIF_SECCOMP 7 /* secure computing */ +#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ +#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ +#define TIF_MEMDIE 16 +#define TIF_DEBUG 17 /* uses debug registers */ +#define TIF_IO_BITMAP 18 /* uses I/O bitmap */ +#define TIF_FREEZE 19 /* is freezing for suspend */ +#define TIF_NOTSC 20 /* TSC is not accessible in userland */ +#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_IRET (1 << TIF_IRET) +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_NOTSC (1 << TIF_NOTSC) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) + +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP | _TIF_SYSCALL_EMU)) +/* work to do on any return to u-space */ +#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \ + _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS) +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) + + +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_USEDFPU 0x0001 /* FPU was used by this task + this quantum (SMP) */ +#define TS_POLLING 0x0002 /* True if in idle loop + and not sleeping */ + +#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) + +#else /* X86_32 */ + +#include +#include +#include + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + */ +#ifndef __ASSEMBLY__ +struct task_struct; +struct exec_domain; +#include + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + __u32 flags; /* low level flags */ + __u32 status; /* thread synchronous flags */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + mm_segment_t addr_limit; + struct restart_block restart_block; +#ifdef CONFIG_IA32_EMULATION + void __user *sysenter_return; +#endif +}; +#endif + +/* + * macros/functions for gaining access to the thread information structure + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +static inline struct thread_info *current_thread_info(void) +{ + struct thread_info *ti; + ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); + return ti; +} + +/* do not use in interrupt context */ +static inline struct thread_info *stack_thread_info(void) +{ + struct thread_info *ti; + asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + return ti; +} + +/* thread information allocation */ +#ifdef CONFIG_DEBUG_STACK_USAGE +#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) +#else +#define THREAD_FLAGS GFP_KERNEL +#endif + +#define alloc_thread_info(tsk) \ + ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) + +#else /* !__ASSEMBLY__ */ + +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) \ + movq %gs:pda_kernelstack,reg ; \ + subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + +#endif + +/* + * thread information flags + * - these are process state flags that various assembly files + * may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + * Warning: layout of LSW is hardcoded in entry.S + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_IRET 5 /* force IRET */ +#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SECCOMP 8 /* secure computing */ +#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ +#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ +/* 16 free */ +#define TIF_IA32 17 /* 32bit process */ +#define TIF_FORK 18 /* ret_from_fork */ +#define TIF_ABI_PENDING 19 +#define TIF_MEMDIE 20 +#define TIF_DEBUG 21 /* uses debug registers */ +#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_FREEZE 23 /* is freezing for suspend */ +#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ +#define TIF_NOTSC 28 /* TSC is not accessible in userland */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_IRET (1 << TIF_IRET) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) +#define _TIF_IA32 (1 << TIF_IA32) +#define _TIF_FORK (1 << TIF_FORK) +#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) +#define _TIF_NOTSC (1 << TIF_NOTSC) + +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (0x0000FFFF & \ + ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP)) +/* work to do on any return to user space */ +#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + +#define _TIF_DO_NOTIFY_MASK \ + (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) + +#define PREEMPT_ACTIVE 0x10000000 + +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_USEDFPU 0x0001 /* FPU was used by this task + this quantum (SMP) */ +#define TS_COMPAT 0x0002 /* 32bit syscall active */ +#define TS_POLLING 0x0004 /* true if in idle loop + and not sleeping */ + +#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) + +#endif /* !X86_32 */ + + #ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); extern void free_thread_info(struct thread_info *ti); diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h deleted file mode 100644 index 531859962096..000000000000 --- a/include/asm-x86/thread_info_32.h +++ /dev/null @@ -1,196 +0,0 @@ -/* thread_info.h: i386 low-level thread information - * - * Copyright (C) 2002 David Howells (dhowells@redhat.com) - * - Incorporating suggestions made by Linus Torvalds and Dave Miller - */ - -#ifndef _ASM_THREAD_INFO_H -#define _ASM_THREAD_INFO_H - -#ifdef __KERNEL__ - -#include -#include - -#ifndef __ASSEMBLY__ -#include -#endif - -/* - * low level task data that entry.S needs immediate access to - * - this struct should fit entirely inside of one cache line - * - this struct shares the supervisor stack pages - * - if the contents of this structure are changed, - * the assembly constants must also be changed - */ -#ifndef __ASSEMBLY__ - -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - unsigned long flags; /* low level flags */ - unsigned long status; /* thread-synchronous flags */ - __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ - mm_segment_t addr_limit; /* thread address space: - 0-0xBFFFFFFF user-thread - 0-0xFFFFFFFF kernel-thread - */ - void *sysenter_return; - struct restart_block restart_block; - unsigned long previous_esp; /* ESP of the previous stack in - case of nested (IRQ) stacks - */ - __u8 supervisor_stack[0]; -}; - -#else /* !__ASSEMBLY__ */ - -#include - -#endif - -#define PREEMPT_ACTIVE 0x10000000 -#ifdef CONFIG_4KSTACKS -#define THREAD_SIZE (4096) -#else -#define THREAD_SIZE (8192) -#endif - -#define STACK_WARN (THREAD_SIZE/8) -/* - * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - - -/* how to get the current stack pointer from C */ -register unsigned long current_stack_pointer asm("esp") __used; - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); -} - -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE))) -#else -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) -#endif - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movl $-THREAD_SIZE, reg; \ - andl %esp, reg - -/* use this one if reg already contains %esp */ -#define GET_THREAD_INFO_WITH_ESP(reg) \ - andl $-THREAD_SIZE, reg - -#endif - -/* - * thread information flags - * - these are process state flags that various - * assembly files may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SINGLESTEP 3 /* restore singlestep on return to - user mode */ -#define TIF_IRET 4 /* return with iret */ -#define TIF_SYSCALL_EMU 5 /* syscall emulation active */ -#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ -#define TIF_SECCOMP 7 /* secure computing */ -#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ -#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ -#define TIF_MEMDIE 16 -#define TIF_DEBUG 17 /* uses debug registers */ -#define TIF_IO_BITMAP 18 /* uses I/O bitmap */ -#define TIF_FREEZE 19 /* is freezing for suspend */ -#define TIF_NOTSC 20 /* TSC is not accessible in userland */ -#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_NOTSC (1 << TIF_NOTSC) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) - -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_EMU)) -/* work to do on any return to u-space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \ - _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) - - -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ -#define TS_POLLING 0x0002 /* True if in idle loop - and not sleeping */ - -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - -#endif /* __KERNEL__ */ - -#endif /* _ASM_THREAD_INFO_H */ diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h deleted file mode 100644 index ed664e874dec..000000000000 --- a/include/asm-x86/thread_info_64.h +++ /dev/null @@ -1,186 +0,0 @@ -/* thread_info.h: x86_64 low-level thread information - * - * Copyright (C) 2002 David Howells (dhowells@redhat.com) - * - Incorporating suggestions made by Linus Torvalds and Dave Miller - */ - -#ifndef _ASM_THREAD_INFO_H -#define _ASM_THREAD_INFO_H - -#ifdef __KERNEL__ - -#include -#include -#include - -/* - * low level task data that entry.S needs immediate access to - * - this struct should fit entirely inside of one cache line - * - this struct shares the supervisor stack pages - */ -#ifndef __ASSEMBLY__ -struct task_struct; -struct exec_domain; -#include - -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - __u32 flags; /* low level flags */ - __u32 status; /* thread synchronous flags */ - __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ - mm_segment_t addr_limit; - struct restart_block restart_block; -#ifdef CONFIG_IA32_EMULATION - void __user *sysenter_return; -#endif -}; -#endif - -/* - * macros/functions for gaining access to the thread information structure - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} - -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) -{ - struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); - return ti; -} - -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) -#else -#define THREAD_FLAGS GFP_KERNEL -#endif - -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg - -#endif - -/* - * thread information flags - * - these are process state flags that various assembly files - * may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - * Warning: layout of LSW is hardcoded in entry.S - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_IRET 5 /* force IRET */ -#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ -#define TIF_SECCOMP 8 /* secure computing */ -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ -#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ -/* 16 free */ -#define TIF_IA32 17 /* 32bit process */ -#define TIF_FORK 18 /* ret_from_fork */ -#define TIF_ABI_PENDING 19 -#define TIF_MEMDIE 20 -#define TIF_DEBUG 21 /* uses debug registers */ -#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -#define TIF_FREEZE 23 /* is freezing for suspend */ -#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ -#define TIF_NOTSC 28 /* TSC is not accessible in userland */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_FORK (1 << TIF_FORK) -#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) -#define _TIF_NOTSC (1 << TIF_NOTSC) - -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & \ - ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP)) -/* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -#define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) - -#define PREEMPT_ACTIVE 0x10000000 - -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ -#define TS_COMPAT 0x0002 /* 32bit syscall active */ -#define TS_POLLING 0x0004 /* true if in idle loop - and not sleeping */ - -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - -#endif /* __KERNEL__ */ - -#endif /* _ASM_THREAD_INFO_H */ -- cgit v1.2.3 From 12a638e13c68bbe187782518dab375f4fa800fc4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:33 -0700 Subject: x86: threadinfo: common include files Move shared includes to a common area in thread_info.h Adds asm/types.h for x86_64 and linux/compiler.h for x86_32. Not needed but we can avoid some ifdeffing and it simplifies later joining. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/thread_info.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index d59384ac0ba6..d2dc1a3b5d65 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -7,9 +7,11 @@ #ifndef _ASM_X86_THREAD_INFO_H #define _ASM_X86_THREAD_INFO_H -#ifdef CONFIG_X86_32 #include #include +#include + +#ifdef CONFIG_X86_32 #ifndef __ASSEMBLY__ #include @@ -192,8 +194,6 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include -#include #include /* -- cgit v1.2.3 From f2ea3b1d4d7ab66d86da57899993282f3deb1f74 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:34 -0700 Subject: x86: threadinfo: merge thread sync state definitions Merge both. x86_64 has an additional TS_COMPAT that is harmless for 32 bit. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/thread_info.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index d2dc1a3b5d65..4b91f59de8fc 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -177,21 +177,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) - -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ -#define TS_POLLING 0x0002 /* True if in idle loop - and not sleeping */ - -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - #else /* X86_32 */ #include @@ -349,6 +334,8 @@ static inline struct thread_info *stack_thread_info(void) #define PREEMPT_ACTIVE 0x10000000 +#endif /* !X86_32 */ + /* * Thread-synchronous status. * @@ -358,15 +345,12 @@ static inline struct thread_info *stack_thread_info(void) */ #define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ -#define TS_COMPAT 0x0002 /* 32bit syscall active */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) -#endif /* !X86_32 */ - - #ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); extern void free_thread_info(struct thread_info *ti); -- cgit v1.2.3 From 006c484bb3d9547e82a33a09668c9b54b912c8fb Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:35 -0700 Subject: x86: common thread_info definitions Merge the thread_info definition into one structure definition for both arches. The __u32 is equal to unsigned long for 32 bit. sysenter_return is used both for the IA32 emulation for 64 and x86_32. Avoid complicated #ifdef by simply always including it. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/thread_info.h | 55 ++++++++++--------------------------------- 1 file changed, 12 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 4b91f59de8fc..71b0880d80b5 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -11,47 +11,42 @@ #include #include -#ifdef CONFIG_X86_32 - -#ifndef __ASSEMBLY__ -#include -#endif - /* * low level task data that entry.S needs immediate access to * - this struct should fit entirely inside of one cache line * - this struct shares the supervisor stack pages - * - if the contents of this structure are changed, - * the assembly constants must also be changed */ #ifndef __ASSEMBLY__ +struct task_struct; +struct exec_domain; +#include struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ - unsigned long flags; /* low level flags */ - unsigned long status; /* thread-synchronous flags */ + __u32 flags; /* low level flags */ + __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, + int preempt_count; /* 0 => preemptable, <0 => BUG */ - mm_segment_t addr_limit; /* thread address space: - 0-0xBFFFFFFF user-thread - 0-0xFFFFFFFF kernel-thread - */ - void *sysenter_return; + mm_segment_t addr_limit; struct restart_block restart_block; + void __user *sysenter_return; +#ifdef CONFIG_X86_32 unsigned long previous_esp; /* ESP of the previous stack in case of nested (IRQ) stacks */ __u8 supervisor_stack[0]; +#endif }; - #else /* !__ASSEMBLY__ */ #include #endif +#ifdef CONFIG_X86_32 + #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_4KSTACKS #define THREAD_SIZE (4096) @@ -181,32 +176,6 @@ static inline struct thread_info *current_thread_info(void) #include -/* - * low level task data that entry.S needs immediate access to - * - this struct should fit entirely inside of one cache line - * - this struct shares the supervisor stack pages - */ -#ifndef __ASSEMBLY__ -struct task_struct; -struct exec_domain; -#include - -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - __u32 flags; /* low level flags */ - __u32 status; /* thread synchronous flags */ - __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ - mm_segment_t addr_limit; - struct restart_block restart_block; -#ifdef CONFIG_IA32_EMULATION - void __user *sysenter_return; -#endif -}; -#endif - /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. -- cgit v1.2.3 From 3351cc03c0762353225a79507e38db4c1e656d52 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:36 -0700 Subject: x86: threadinfo: merge INIT_THREAD_INFO Both definitions are the same. So move to common x86 area. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/thread_info.h | 49 +++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 71b0880d80b5..8cd52d4bfb0e 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -39,6 +39,23 @@ struct thread_info { __u8 supervisor_stack[0]; #endif }; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + #else /* !__ASSEMBLY__ */ #include @@ -62,22 +79,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the current stack pointer from C */ register unsigned long current_stack_pointer asm("esp") __used; @@ -181,22 +182,6 @@ static inline struct thread_info *current_thread_info(void) * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; -- cgit v1.2.3 From 24e2de6e28a453cd114b06215df2f9931cd0c342 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:37 -0700 Subject: x86: thread_info: PREEMPT_ACTIVE Same for both 32 and 64 bit so simply put it in to the common area. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/thread_info.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 8cd52d4bfb0e..f8d5cf516ac4 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -62,9 +62,10 @@ struct thread_info { #endif +#define PREEMPT_ACTIVE 0x10000000 + #ifdef CONFIG_X86_32 -#define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_4KSTACKS #define THREAD_SIZE (4096) #else @@ -286,8 +287,6 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) -#define PREEMPT_ACTIVE 0x10000000 - #endif /* !X86_32 */ /* -- cgit v1.2.3 From e57549b017552f7a493b366f5ccd4781801083e4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:38 -0700 Subject: x86: thread_info: merge TIF_ flags. Both TIF lists are essentially the same. x86_32 also has TIF_SYSCALL_EMU which must be undefined for the 64 bit case. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/thread_info.h | 162 ++++++++++++++++-------------------------- 1 file changed, 61 insertions(+), 101 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index f8d5cf516ac4..983743a14b99 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -62,6 +62,67 @@ struct thread_info { #endif +/* + * thread information flags + * - these are process state flags that various assembly files + * may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + * Warning: layout of LSW is hardcoded in entry.S + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_IRET 5 /* force IRET */ +#ifdef CONFIG_X86_32 +#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ +#endif +#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SECCOMP 8 /* secure computing */ +#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ +#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ +#define TIF_NOTSC 16 /* TSC is not accessible in userland */ +#define TIF_IA32 17 /* 32bit process */ +#define TIF_FORK 18 /* ret_from_fork */ +#define TIF_ABI_PENDING 19 +#define TIF_MEMDIE 20 +#define TIF_DEBUG 21 /* uses debug registers */ +#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_FREEZE 23 /* is freezing for suspend */ +#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_IRET (1 << TIF_IRET) +#ifdef CONFIG_X86_32 +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) +#else +#define _TIF_SYSCALL_EMU 0 +#endif +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) +#define _TIF_NOTSC (1 << TIF_NOTSC) +#define _TIF_IA32 (1 << TIF_IA32) +#define _TIF_FORK (1 << TIF_FORK) +#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) + #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_X86_32 @@ -113,53 +174,6 @@ static inline struct thread_info *current_thread_info(void) #endif -/* - * thread information flags - * - these are process state flags that various - * assembly files may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SINGLESTEP 3 /* restore singlestep on return to - user mode */ -#define TIF_IRET 4 /* return with iret */ -#define TIF_SYSCALL_EMU 5 /* syscall emulation active */ -#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ -#define TIF_SECCOMP 7 /* secure computing */ -#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ -#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ -#define TIF_MEMDIE 16 -#define TIF_DEBUG 17 /* uses debug registers */ -#define TIF_IO_BITMAP 18 /* uses I/O bitmap */ -#define TIF_FREEZE 19 /* is freezing for suspend */ -#define TIF_NOTSC 20 /* TSC is not accessible in userland */ -#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_NOTSC (1 << TIF_NOTSC) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) - /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ @@ -217,60 +231,6 @@ static inline struct thread_info *stack_thread_info(void) #endif -/* - * thread information flags - * - these are process state flags that various assembly files - * may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - * Warning: layout of LSW is hardcoded in entry.S - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_IRET 5 /* force IRET */ -#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ -#define TIF_SECCOMP 8 /* secure computing */ -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ -#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ -/* 16 free */ -#define TIF_IA32 17 /* 32bit process */ -#define TIF_FORK 18 /* ret_from_fork */ -#define TIF_ABI_PENDING 19 -#define TIF_MEMDIE 20 -#define TIF_DEBUG 21 /* uses debug registers */ -#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -#define TIF_FREEZE 23 /* is freezing for suspend */ -#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ -#define TIF_NOTSC 28 /* TSC is not accessible in userland */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_FORK (1 << TIF_FORK) -#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) -#define _TIF_NOTSC (1 << TIF_NOTSC) - /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ (0x0000FFFF & \ -- cgit v1.2.3 From 00c1bb133cf351fa3904b00a48a9cf535d018de6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:39 -0700 Subject: x86: thread_info: merge tif masks The TIF masks are basically the same. x86_32 also has _TIF_SYSCALL_EMU which is zero for the 64 bit case. The tif masks become the same. x86_64 has an additional _TIF_DONOTIFY_MASK. Does not hurt for the 32 bit case since it is only used in x86_64 arch code. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/thread_info.h | 51 ++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 983743a14b99..b7cd41308e56 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -123,6 +123,27 @@ struct thread_info { #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (0x0000FFFF & \ + ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP| \ + _TIF_SECCOMP|_TIF_SYSCALL_EMU)) + +/* work to do on any return to user space */ +#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + +#define _TIF_DO_NOTIFY_MASK \ + (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ + _TIF_NOTSC) + +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) + + #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_X86_32 @@ -174,20 +195,6 @@ static inline struct thread_info *current_thread_info(void) #endif -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_EMU)) -/* work to do on any return to u-space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \ - _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) - #else /* X86_32 */ #include @@ -231,22 +238,6 @@ static inline struct thread_info *stack_thread_info(void) #endif -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & \ - ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP)) -/* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -#define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) - #endif /* !X86_32 */ /* -- cgit v1.2.3 From b84200b3a0fafa167185201319940d8df62a8c7b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:40 -0700 Subject: x86: thread_info: merge thread_info allocation Make them similar so that both use THREAD_ORDER and THREAD_FLAGS and have a THREAD_SIZE definition that is setup in asm/page_xx.h Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/page_32.h | 8 ++++++++ include/asm-x86/thread_info.h | 37 +++++++++++-------------------------- 2 files changed, 19 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 424e82f8ae27..50b33ebcf851 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -13,6 +13,14 @@ */ #define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) +#ifdef CONFIG_4KSTACKS +#define THREAD_ORDER 0 +#else +#define THREAD_ORDER 1 +#endif +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) + + #ifdef CONFIG_X86_PAE #define __PHYSICAL_MASK_SHIFT 36 #define __VIRTUAL_MASK_SHIFT 32 diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index b7cd41308e56..348f0e0faa3b 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -132,6 +132,7 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) +/* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) @@ -143,18 +144,21 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) - #define PREEMPT_ACTIVE 0x10000000 -#ifdef CONFIG_X86_32 - -#ifdef CONFIG_4KSTACKS -#define THREAD_SIZE (4096) +/* thread information allocation */ +#ifdef CONFIG_DEBUG_STACK_USAGE +#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) #else -#define THREAD_SIZE (8192) +#define THREAD_FLAGS GFP_KERNEL #endif -#define STACK_WARN (THREAD_SIZE/8) +#define alloc_thread_info(tsk) \ + ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) + +#ifdef CONFIG_X86_32 + +#define STACK_WARN (THREAD_SIZE/8) /* * macros/functions for gaining access to the thread information structure * @@ -173,15 +177,6 @@ static inline struct thread_info *current_thread_info(void) (current_stack_pointer & ~(THREAD_SIZE - 1)); } -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE))) -#else -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) -#endif - #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ @@ -219,16 +214,6 @@ static inline struct thread_info *stack_thread_info(void) return ti; } -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) -#else -#define THREAD_FLAGS GFP_KERNEL -#endif - -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) - #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ -- cgit v1.2.3 From 8a6c160a2a13d82c75a50af7282b906cce948df5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Apr 2008 22:13:44 +0200 Subject: x86: redo thread_info.h change redo Roland's "signals: x86 TS_RESTORE_SIGMASK" ontop of the unified thread_info.h file. Signed-off-by: Ingo Molnar --- include/asm-x86/thread_info.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 348f0e0faa3b..74481b72ae0f 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -80,7 +80,6 @@ struct thread_info { #endif #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -108,7 +107,6 @@ struct thread_info { #endif #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -237,9 +235,20 @@ static inline struct thread_info *stack_thread_info(void) #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ +#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) +#ifndef __ASSEMBLY__ +#define HAVE_SET_RESTORE_SIGMASK 1 +static inline void set_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + ti->status |= TS_RESTORE_SIGMASK; + set_bit(TIF_SIGPENDING, &ti->flags); +} +#endif /* !__ASSEMBLY__ */ + #ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); extern void free_thread_info(struct thread_info *ti); -- cgit v1.2.3 From 6859a8402945cf1d74af75a2e1aa4e327a506ab4 Mon Sep 17 00:00:00 2001 From: Alan Mayer Date: Wed, 26 Mar 2008 16:11:31 -0500 Subject: x86: resize NR_IRQS for large machines On machines with very large numbers of cpus, tables that are dimensioned by NR_IRQS get very large, especially the irq_desc table. They are also very sparsely used. When the cpu count is > MAX_IO_APICS, use MAX_IO_APICS to set NR_IRQS, otherwise use NR_CPUS. Signed-off-by: Alan Mayer Reviewed-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/irq_64.h | 6 ++++++ include/linux/kernel_stat.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h index 083d35a62c94..7608176590b6 100644 --- a/include/asm-x86/irq_64.h +++ b/include/asm-x86/irq_64.h @@ -10,6 +10,8 @@ * */ +#include + #define TIMER_IRQ 0 /* @@ -31,7 +33,11 @@ #define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#if NR_CPUS < MAX_IO_APICS #define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +#else +#define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +#endif #define NR_IRQ_VECTORS NR_IRQS static inline int irq_canonicalize(int irq) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index e8ffce898bf9..cf9f40a91c9c 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -1,11 +1,11 @@ #ifndef _LINUX_KERNEL_STAT_H #define _LINUX_KERNEL_STAT_H -#include #include #include #include #include +#include #include /* -- cgit v1.2.3 From 2e0884362d1fe36ef2d673d763d6ce35e2044e66 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 19:00:30 +0200 Subject: x86: move common declarations to hw_irq.h Move the common declarations from hw_irq_32/64 into hw_irq.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-x86/hw_irq.h | 81 +++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/hw_irq_32.h | 61 ---------------------------------- include/asm-x86/hw_irq_64.h | 68 ------------------------------------- 3 files changed, 81 insertions(+), 129 deletions(-) (limited to 'include') diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index bf025399d939..38af08ed43cf 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -1,5 +1,86 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * moved some of the old arch/i386/kernel/irq.h to here. VY + * + * IRQ/IPI changes taken from work by Thomas Radke + * + * + * hacked by Andi Kleen for x86-64. + * unified by tglx + */ + +#define NMI_VECTOR 0x02 + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#include +#include +#include + +#define platform_legacy_irq(irq) ((irq) < 16) + +/* Interrupt handlers registered during init_IRQ */ +extern void apic_timer_interrupt(void); +extern void error_interrupt(void); +extern void spurious_interrupt(void); +extern void thermal_interrupt(void); +extern void reschedule_interrupt(void); + +extern void invalidate_interrupt(void); +extern void invalidate_interrupt0(void); +extern void invalidate_interrupt1(void); +extern void invalidate_interrupt2(void); +extern void invalidate_interrupt3(void); +extern void invalidate_interrupt4(void); +extern void invalidate_interrupt5(void); +extern void invalidate_interrupt6(void); +extern void invalidate_interrupt7(void); + +extern void irq_move_cleanup_interrupt(void); +extern void threshold_interrupt(void); + +extern void call_function_interrupt(void); + +/* PIC specific functions */ +extern void disable_8259A_irq(unsigned int irq); +extern void enable_8259A_irq(unsigned int irq); +extern int i8259A_irq_pending(unsigned int irq); +extern void make_8259A_irq(unsigned int irq); +extern void init_8259A(int aeoi); + +/* IOAPIC */ +#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) +extern unsigned long io_apic_irqs; + +extern void init_VISWS_APIC_irqs(void); +extern void setup_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void print_IO_APIC(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); +extern void setup_ioapic_dest(void); + +/* IPI functions */ +extern void send_IPI_self(int vector); +extern void send_IPI(int dest, int vector); + +/* Statistics */ +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +#endif /* !ASSEMBLY_ */ + #ifdef CONFIG_X86_32 # include "hw_irq_32.h" #else # include "hw_irq_64.h" #endif + +#endif diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h index ea88054e03f3..89fca5af5958 100644 --- a/include/asm-x86/hw_irq_32.h +++ b/include/asm-x86/hw_irq_32.h @@ -1,66 +1,5 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * moved some of the old arch/i386/kernel/irq.h to here. VY - * - * IRQ/IPI changes taken from work by Thomas Radke - * - */ - -#include -#include -#include -#include - -#define NMI_VECTOR 0x02 - -/* - * Various low-level irq details needed by irq.c, process.c, - * time.c, io_apic.c and smp.c - * - * Interrupt entry/exit code at both C and assembly level - */ extern void (*const interrupt[NR_IRQS])(void); -#ifdef CONFIG_SMP -void reschedule_interrupt(void); -void invalidate_interrupt(void); -void call_function_interrupt(void); -#endif - -#ifdef CONFIG_X86_LOCAL_APIC -void apic_timer_interrupt(void); -void error_interrupt(void); -void spurious_interrupt(void); -void thermal_interrupt(void); -#define platform_legacy_irq(irq) ((irq) < 16) -#endif - -void disable_8259A_irq(unsigned int irq); -void enable_8259A_irq(unsigned int irq); -int i8259A_irq_pending(unsigned int irq); -void make_8259A_irq(unsigned int irq); -void init_8259A(int aeoi); -void send_IPI_self(int vector); -void init_VISWS_APIC_irqs(void); -void setup_IO_APIC(void); -void disable_IO_APIC(void); -void print_IO_APIC(void); -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -void send_IPI(int dest, int vector); -void setup_ioapic_dest(void); - -extern unsigned long io_apic_irqs; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) -#endif /* _ASM_HW_IRQ_H */ diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index 0062ef390f67..28674576e9fe 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -1,28 +1,3 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * moved some of the old arch/i386/kernel/irq.h to here. VY - * - * IRQ/IPI changes taken from work by Thomas Radke - * - * - * hacked by Andi Kleen for x86-64. - */ - -#ifndef __ASSEMBLY__ -#include -#include -#include -#include -#include -#endif - -#define NMI_VECTOR 0x02 /* * IDT vectors usable for external interrupt sources start * at 0x20: @@ -96,25 +71,6 @@ #ifndef __ASSEMBLY__ -/* Interrupt handlers registered during init_IRQ */ -void apic_timer_interrupt(void); -void spurious_interrupt(void); -void error_interrupt(void); -void reschedule_interrupt(void); -void call_function_interrupt(void); -void irq_move_cleanup_interrupt(void); -void invalidate_interrupt0(void); -void invalidate_interrupt1(void); -void invalidate_interrupt2(void); -void invalidate_interrupt3(void); -void invalidate_interrupt4(void); -void invalidate_interrupt5(void); -void invalidate_interrupt6(void); -void invalidate_interrupt7(void); -void thermal_interrupt(void); -void threshold_interrupt(void); -void i8254_timer_resume(void); - typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); extern void __setup_vector_irq(int cpu); @@ -127,29 +83,9 @@ extern spinlock_t vector_lock; * Interrupt entry/exit code at both C and assembly level */ -extern void disable_8259A_irq(unsigned int irq); -extern void enable_8259A_irq(unsigned int irq); -extern int i8259A_irq_pending(unsigned int irq); -extern void make_8259A_irq(unsigned int irq); -extern void init_8259A(int aeoi); -extern void send_IPI_self(int vector); -extern void init_VISWS_APIC_irqs(void); -extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); -extern void disable_IO_APIC(void); -extern void print_IO_APIC(void); -extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -extern void send_IPI(int dest, int vector); -extern void setup_ioapic_dest(void); extern void native_init_IRQ(void); -extern unsigned long io_apic_irqs; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) - #include #define IRQ_NAME2(nr) nr##_interrupt(void) @@ -166,8 +102,4 @@ extern atomic_t irq_mis_count; "push $~(" #nr ") ; " \ "jmp common_interrupt"); -#define platform_legacy_irq(irq) ((irq) < 16) - #endif - -#endif /* _ASM_HW_IRQ_H */ -- cgit v1.2.3 From 9b7dc567d03d74a1fbae84e88949b6a60d922d82 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 20:10:09 +0200 Subject: x86: unify interrupt vector defines The interrupt vector defines are copied 4 times around with minimal differences. Move them all into asm-x86/irq_vectors.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/vmiclock_32.c | 3 +- arch/x86/mach-visws/visws_apic.c | 3 +- include/asm-x86/hw_irq.h | 12 +- include/asm-x86/hw_irq_64.h | 71 --------- include/asm-x86/irq_32.h | 2 +- include/asm-x86/irq_64.h | 29 +--- include/asm-x86/irq_vectors.h | 168 ++++++++++++++++++++++ include/asm-x86/mach-default/irq_vectors.h | 96 ------------- include/asm-x86/mach-default/irq_vectors_limits.h | 16 --- include/asm-x86/mach-visws/irq_vectors.h | 62 -------- include/asm-x86/mach-voyager/irq_vectors.h | 79 ---------- 12 files changed, 184 insertions(+), 359 deletions(-) create mode 100644 include/asm-x86/irq_vectors.h delete mode 100644 include/asm-x86/mach-default/irq_vectors.h delete mode 100644 include/asm-x86/mach-default/irq_vectors_limits.h delete mode 100644 include/asm-x86/mach-visws/irq_vectors.h delete mode 100644 include/asm-x86/mach-voyager/irq_vectors.h (limited to 'include') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc3271c..0c7f64b99e17 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -51,7 +51,7 @@ #include #include #include -#include "irq_vectors.h" +#include /* * We use macros for low-level operations which need to be overridden diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index a2b030780aa9..ba7d19e102b1 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -33,8 +33,7 @@ #include #include #include - -#include +#include #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c index cef9cb1d15ac..d8b2cfd85d92 100644 --- a/arch/x86/mach-visws/visws_apic.c +++ b/arch/x86/mach-visws/visws_apic.c @@ -21,10 +21,9 @@ #include #include #include +#include #include "cobalt.h" -#include "irq_vectors.h" - static DEFINE_SPINLOCK(cobalt_lock); diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 38af08ed43cf..a8c5e8bdaa49 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -13,7 +13,7 @@ * unified by tglx */ -#define NMI_VECTOR 0x02 +#include #ifndef __ASSEMBLY__ @@ -75,6 +75,16 @@ extern void send_IPI(int dest, int vector); extern atomic_t irq_err_count; extern atomic_t irq_mis_count; +/* Voyager functions */ +extern asmlinkage void vic_cpi_interrupt(void); +extern asmlinkage void vic_sys_interrupt(void); +extern asmlinkage void vic_cmn_interrupt(void); +extern asmlinkage void qic_timer_interrupt(void); +extern asmlinkage void qic_invalidate_interrupt(void); +extern asmlinkage void qic_reschedule_interrupt(void); +extern asmlinkage void qic_enable_irq_interrupt(void); +extern asmlinkage void qic_call_function_interrupt(void); + #endif /* !ASSEMBLY_ */ #ifdef CONFIG_X86_32 diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index 28674576e9fe..98c9d494a711 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -1,74 +1,3 @@ -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define IA32_SYSCALL_VECTOR 0x80 - - -/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering - * cleanup after irq migration. - */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR - -/* - * Vectors 0x30-0x3f are used for ISA interrupts. - */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define RESCHEDULE_VECTOR 0xfd -#define CALL_FUNCTION_VECTOR 0xfc -/* fb free - please don't readd KDB here because it's useless - (hint - think what a NMI bit does to a vector) */ -#define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -/* f8 free */ -#define INVALIDATE_TLB_VECTOR_END 0xf7 -#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ - -#define NUM_INVALIDATE_TLB_VECTORS 8 - -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x41 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ - - #ifndef __ASSEMBLY__ typedef int vector_irq_t[NR_VECTORS]; diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h index 0b79f3185243..c5c7542f79ad 100644 --- a/include/asm-x86/irq_32.h +++ b/include/asm-x86/irq_32.h @@ -12,7 +12,7 @@ #include /* include comes from machine specific directory */ -#include "irq_vectors.h" +#include #include static inline int irq_canonicalize(int irq) diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h index 7608176590b6..3037ec667bfb 100644 --- a/include/asm-x86/irq_64.h +++ b/include/asm-x86/irq_64.h @@ -11,34 +11,7 @@ */ #include - -#define TIMER_IRQ 0 - -/* - * 16 8259A IRQ's, 208 potential APIC interrupt sources. - * Right now the APIC is mostly only used for SMP. - * 256 vectors is an architectural limit. (we can have - * more than 256 devices theoretically, but they will - * have to use shared interrupts) - * Since vectors 0x00-0x1f are used/reserved for the CPU, - * the usable vector space is 0x20-0xff (224 vectors) - */ - -/* - * The maximum number of vectors supported by x86_64 processors - * is limited to 256. For processors other than x86_64, NR_VECTORS - * should be changed accordingly. - */ -#define NR_VECTORS 256 - -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ - -#if NR_CPUS < MAX_IO_APICS -#define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) -#else -#define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -#endif -#define NR_IRQ_VECTORS NR_IRQS +#include static inline int irq_canonicalize(int irq) { diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h new file mode 100644 index 000000000000..daceaaf0a3a4 --- /dev/null +++ b/include/asm-x86/irq_vectors.h @@ -0,0 +1,168 @@ +#ifndef _ASM_IRQ_VECTORS_H +#define _ASM_IRQ_VECTORS_H + +#include + +#define NMI_VECTOR 0x02 + +/* + * IDT vectors usable for external interrupt sources start + * at 0x20: + */ +#define FIRST_EXTERNAL_VECTOR 0x20 + +#ifdef CONFIG_X86_32 +# define SYSCALL_VECTOR 0x80 +#else +# define IA32_SYSCALL_VECTOR 0x80 +#endif + +/* + * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit. + * + * Reserve the lowest usable priority level 0x20 - 0x2f for triggering + * cleanup after irq migration on 64 bit. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + +/* + * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit + */ +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#define IRQ1_VECTOR (IRQ0_VECTOR + 1) +#define IRQ2_VECTOR (IRQ0_VECTOR + 2) +#define IRQ3_VECTOR (IRQ0_VECTOR + 3) +#define IRQ4_VECTOR (IRQ0_VECTOR + 4) +#define IRQ5_VECTOR (IRQ0_VECTOR + 5) +#define IRQ6_VECTOR (IRQ0_VECTOR + 6) +#define IRQ7_VECTOR (IRQ0_VECTOR + 7) +#define IRQ8_VECTOR (IRQ0_VECTOR + 8) +#define IRQ9_VECTOR (IRQ0_VECTOR + 9) +#define IRQ10_VECTOR (IRQ0_VECTOR + 10) +#define IRQ11_VECTOR (IRQ0_VECTOR + 11) +#define IRQ12_VECTOR (IRQ0_VECTOR + 12) +#define IRQ13_VECTOR (IRQ0_VECTOR + 13) +#define IRQ14_VECTOR (IRQ0_VECTOR + 14) +#define IRQ15_VECTOR (IRQ0_VECTOR + 15) + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + * + * Vectors 0xf0-0xfa are free (reserved for future Linux use). + */ +#ifdef CONFIG_X86_32 + +# define SPURIOUS_APIC_VECTOR 0xff +# define ERROR_APIC_VECTOR 0xfe +# define INVALIDATE_TLB_VECTOR 0xfd +# define RESCHEDULE_VECTOR 0xfc +# define CALL_FUNCTION_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xf0 + +#else + +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define THERMAL_APIC_VECTOR 0xfa +#define THRESHOLD_APIC_VECTOR 0xf9 +#define INVALIDATE_TLB_VECTOR_END 0xf7 +#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + +#define NUM_INVALIDATE_TLB_VECTORS 8 + +#endif + +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef + +/* + * First APIC vector available to drivers: (vectors 0x30-0xee) we + * start at 0x31(0x41) to spread out vectors evenly between priority + * levels. (0x80 is the syscall vector) + */ +#ifdef CONFIG_X86_32 +# define FIRST_DEVICE_VECTOR 0x31 +#else +# define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) +#endif + +#define FIRST_SYSTEM_VECTOR 0xef + +#define NR_VECTORS 256 + +#define FPU_IRQ 13 + +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 +#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) + +#if !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) + +# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) + +# define NR_IRQS 224 + +# if (224 >= 32 * NR_CPUS) +# define NR_IRQ_VECTORS NR_IRQS +# else +# define NR_IRQ_VECTORS (32 * NR_CPUS) +# endif + +# else /* IO_APIC || PARAVIRT */ + +# define NR_IRQS 16 +# define NR_IRQ_VECTORS NR_IRQS + +# endif + +#else /* !VISWS && !VOYAGER */ + +# define NR_IRQS 224 +# define NR_IRQ_VECTORS NR_IRQS + +#endif /* VISWS */ + +/* Voyager specific defines */ +/* These define the CPIs we use in linux */ +#define VIC_CPI_LEVEL0 0 +#define VIC_CPI_LEVEL1 1 +/* now the fake CPIs */ +#define VIC_TIMER_CPI 2 +#define VIC_INVALIDATE_CPI 3 +#define VIC_RESCHEDULE_CPI 4 +#define VIC_ENABLE_IRQ_CPI 5 +#define VIC_CALL_FUNCTION_CPI 6 + +/* Now the QIC CPIs: Since we don't need the two initial levels, + * these are 2 less than the VIC CPIs */ +#define QIC_CPI_OFFSET 1 +#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) +#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) +#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) +#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) + +#define VIC_START_FAKE_CPI VIC_TIMER_CPI +#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_CPI + +/* this is the SYS_INT CPI. */ +#define VIC_SYS_INT 8 +#define VIC_CMN_INT 15 + +/* This is the boot CPI for alternate processors. It gets overwritten + * by the above once the system has activated all available processors */ +#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 +#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) + + +#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h deleted file mode 100644 index 881c63ca61ad..000000000000 --- a/include/asm-x86/mach-default/irq_vectors.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * This file should contain #defines for all of the interrupt vector - * numbers used by this architecture. - * - * In addition, there are some standard defines: - * - * FIRST_EXTERNAL_VECTOR: - * The first free place for external interrupts - * - * SYSCALL_VECTOR: - * The IRQ vector a syscall makes the user to kernel transition - * under. - * - * TIMER_IRQ: - * The IRQ number the timer interrupt comes in at. - * - * NR_IRQS: - * The total number of interrupt vectors (including all the - * architecture specific interrupts) needed. - * - */ -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define RESCHEDULE_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb - -#define THERMAL_APIC_VECTOR 0xf0 -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef - -#define TIMER_IRQ 0 - -/* - * 16 8259A IRQ's, 208 potential APIC interrupt sources. - * Right now the APIC is mostly only used for SMP. - * 256 vectors is an architectural limit. (we can have - * more than 256 devices theoretically, but they will - * have to use shared interrupts) - * Since vectors 0x00-0x1f are used/reserved for the CPU, - * the usable vector space is 0x20-0xff (224 vectors) - */ - -/* - * The maximum number of vectors supported by i386 processors - * is limited to 256. For processors other than i386, NR_VECTORS - * should be changed accordingly. - */ -#define NR_VECTORS 256 - -#include "irq_vectors_limits.h" - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - - -#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mach-default/irq_vectors_limits.h b/include/asm-x86/mach-default/irq_vectors_limits.h deleted file mode 100644 index a90c7a60109f..000000000000 --- a/include/asm-x86/mach-default/irq_vectors_limits.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H - -#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) -#define NR_IRQS 224 -# if (224 >= 32 * NR_CPUS) -# define NR_IRQ_VECTORS NR_IRQS -# else -# define NR_IRQ_VECTORS (32 * NR_CPUS) -# endif -#else -#define NR_IRQS 16 -#define NR_IRQ_VECTORS NR_IRQS -#endif - -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-visws/irq_vectors.h b/include/asm-x86/mach-visws/irq_vectors.h deleted file mode 100644 index cb572d8db505..000000000000 --- a/include/asm-x86/mach-visws/irq_vectors.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define RESCHEDULE_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb - -#define THERMAL_APIC_VECTOR 0xf0 -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef - -#define TIMER_IRQ 0 - -/* - * IRQ definitions - */ -#define NR_VECTORS 256 -#define NR_IRQS 224 -#define NR_IRQ_VECTORS NR_IRQS - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - -#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h deleted file mode 100644 index 165421f5821c..000000000000 --- a/include/asm-x86/mach-voyager/irq_vectors.h +++ /dev/null @@ -1,79 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2002 - * - * Author: James.Bottomley@HansenPartnership.com - * - * linux/arch/i386/voyager/irq_vectors.h - * - * This file provides definitions for the VIC and QIC CPIs - */ - -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - -#define NR_VECTORS 256 -#define NR_IRQS 224 -#define NR_IRQ_VECTORS NR_IRQS - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - -#ifndef __ASSEMBLY__ -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_IRQ_VECTORS_H */ -- cgit v1.2.3 From 0bc471d93051a19545257909bc2ed2ad3b389b54 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 21:55:12 +0200 Subject: x86: move BUILD_IRQ macro magic to i8259_64.c i8259_64.c is the only place which uses those macros. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8259_64.c | 14 ++++++++++++++ include/asm-x86/hw_irq_64.h | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index fa57a1568508..c4ae4769ce67 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -34,6 +34,20 @@ * interrupt-controller happy. */ +#define IRQ_NAME2(nr) nr##_interrupt(void) +#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +/* + * SMP has a few special interrupts for IPI messages + */ + +#define BUILD_IRQ(nr) \ + asmlinkage void IRQ_NAME(nr); \ + asm("\n.p2align\n" \ + "IRQ" #nr "_interrupt:\n\t" \ + "push $~(" #nr ") ; " \ + "jmp common_interrupt"); + #define BI(x,y) \ BUILD_IRQ(x##y) diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index 98c9d494a711..9305f7456a7f 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -17,18 +17,4 @@ extern void native_init_IRQ(void); #include -#define IRQ_NAME2(nr) nr##_interrupt(void) -#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - -/* - * SMP has a few special interrupts for IPI messages - */ - -#define BUILD_IRQ(nr) \ - asmlinkage void IRQ_NAME(nr); \ - asm("\n.p2align\n" \ - "IRQ" #nr "_interrupt:\n\t" \ - "push $~(" #nr ") ; " \ - "jmp common_interrupt"); - #endif -- cgit v1.2.3 From 97e7b6f54c0d66586a658e985630cd63040311fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 22:02:25 +0200 Subject: x86: unify apic interrupt function declarations Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-x86/hw_irq.h | 4 ++++ include/asm-x86/hw_irq_64.h | 3 --- include/asm-x86/irq_64.h | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index a8c5e8bdaa49..cdb09d77af03 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -67,6 +67,10 @@ extern void print_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); +#ifdef CONFIG_X86_64 +extern void enable_IO_APIC(void); +#endif + /* IPI functions */ extern void send_IPI_self(int vector); extern void send_IPI(int dest, int vector); diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index 9305f7456a7f..428785b3634b 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -12,9 +12,6 @@ extern spinlock_t vector_lock; * Interrupt entry/exit code at both C and assembly level */ -extern void enable_IO_APIC(void); -extern void native_init_IRQ(void); - #include #endif diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h index 3037ec667bfb..1ef233d15dc4 100644 --- a/include/asm-x86/irq_64.h +++ b/include/asm-x86/irq_64.h @@ -27,4 +27,6 @@ extern void fixup_irqs(cpumask_t map); #define __ARCH_HAS_DO_SOFTIRQ 1 +extern void native_init_IRQ(void); + #endif /* _ASM_IRQ_H */ -- cgit v1.2.3 From 22dc12d1f694b9af88e616ab758ff90c69d0fc83 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 22:10:39 +0200 Subject: x86: unify hwirq.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-x86/hw_irq.h | 11 +++++++---- include/asm-x86/hw_irq_32.h | 5 ----- include/asm-x86/hw_irq_64.h | 17 ----------------- 3 files changed, 7 insertions(+), 26 deletions(-) delete mode 100644 include/asm-x86/hw_irq_32.h delete mode 100644 include/asm-x86/hw_irq_64.h (limited to 'include') diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index cdb09d77af03..1db2dff1ef49 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -89,12 +89,15 @@ extern asmlinkage void qic_reschedule_interrupt(void); extern asmlinkage void qic_enable_irq_interrupt(void); extern asmlinkage void qic_call_function_interrupt(void); -#endif /* !ASSEMBLY_ */ - #ifdef CONFIG_X86_32 -# include "hw_irq_32.h" +extern void (*const interrupt[NR_IRQS])(void); #else -# include "hw_irq_64.h" +typedef int vector_irq_t[NR_VECTORS]; +DECLARE_PER_CPU(vector_irq_t, vector_irq); +extern void __setup_vector_irq(int cpu); +extern spinlock_t vector_lock; #endif +#endif /* !ASSEMBLY_ */ + #endif diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h deleted file mode 100644 index 89fca5af5958..000000000000 --- a/include/asm-x86/hw_irq_32.h +++ /dev/null @@ -1,5 +0,0 @@ - -extern void (*const interrupt[NR_IRQS])(void); - - - diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h deleted file mode 100644 index 428785b3634b..000000000000 --- a/include/asm-x86/hw_irq_64.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef __ASSEMBLY__ - -typedef int vector_irq_t[NR_VECTORS]; -DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern void __setup_vector_irq(int cpu); -extern spinlock_t vector_lock; - -/* - * Various low-level irq details needed by irq.c, process.c, - * time.c, io_apic.c and smp.c - * - * Interrupt entry/exit code at both C and assembly level - */ - -#include - -#endif -- cgit v1.2.3 From 22067d4501bfb47c8ca34144f68fad734178914d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 22:14:44 +0200 Subject: x86: unify irq.h Not much difference in those files. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-x86/irq.h | 51 +++++++++++++++++++++++++++++++++++++++++++++--- include/asm-x86/irq_32.h | 51 ------------------------------------------------ include/asm-x86/irq_64.h | 32 ------------------------------ 3 files changed, 48 insertions(+), 86 deletions(-) delete mode 100644 include/asm-x86/irq_32.h delete mode 100644 include/asm-x86/irq_64.h (limited to 'include') diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h index 7ba905465a53..1a2925757317 100644 --- a/include/asm-x86/irq.h +++ b/include/asm-x86/irq.h @@ -1,5 +1,50 @@ -#ifdef CONFIG_X86_32 -# include "irq_32.h" +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * + */ + +#include +#include + +static inline int irq_canonicalize(int irq) +{ + return ((irq == 2) ? 9 : irq); +} + +#ifdef CONFIG_X86_LOCAL_APIC +# define ARCH_HAS_NMI_WATCHDOG +#endif + +#ifdef CONFIG_4KSTACKS + extern void irq_ctx_init(int cpu); + extern void irq_ctx_exit(int cpu); +# define __ARCH_HAS_DO_SOFTIRQ #else -# include "irq_64.h" +# define irq_ctx_init(cpu) do { } while (0) +# define irq_ctx_exit(cpu) do { } while (0) +# ifdef CONFIG_X86_64 +# define __ARCH_HAS_DO_SOFTIRQ +# endif +#endif + +#ifdef CONFIG_IRQBALANCE +extern int irqbalance_disable(char *str); +#endif + +#ifdef CONFIG_HOTPLUG_CPU +#include +extern void fixup_irqs(cpumask_t map); #endif + +extern unsigned int do_IRQ(struct pt_regs *regs); +extern void init_IRQ(void); +extern void native_init_IRQ(void); + +/* Interrupt vector management */ +extern DECLARE_BITMAP(used_vectors, NR_VECTORS); + +#endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h deleted file mode 100644 index c5c7542f79ad..000000000000 --- a/include/asm-x86/irq_32.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * - */ - -#include -/* include comes from machine specific directory */ -#include -#include - -static inline int irq_canonicalize(int irq) -{ - return ((irq == 2) ? 9 : irq); -} - -#ifdef CONFIG_X86_LOCAL_APIC -# define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ -#endif - -#ifdef CONFIG_4KSTACKS - extern void irq_ctx_init(int cpu); - extern void irq_ctx_exit(int cpu); -# define __ARCH_HAS_DO_SOFTIRQ -#else -# define irq_ctx_init(cpu) do { } while (0) -# define irq_ctx_exit(cpu) do { } while (0) -#endif - -#ifdef CONFIG_IRQBALANCE -extern int irqbalance_disable(char *str); -#endif - -#ifdef CONFIG_HOTPLUG_CPU -extern void fixup_irqs(cpumask_t map); -#endif - -unsigned int do_IRQ(struct pt_regs *regs); -void init_IRQ(void); -void __init native_init_IRQ(void); - -/* Interrupt vector management */ -extern DECLARE_BITMAP(used_vectors, NR_VECTORS); - -#endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h deleted file mode 100644 index 1ef233d15dc4..000000000000 --- a/include/asm-x86/irq_64.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * - */ - -#include -#include - -static inline int irq_canonicalize(int irq) -{ - return ((irq == 2) ? 9 : irq); -} - -#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ - -#ifdef CONFIG_HOTPLUG_CPU -#include -extern void fixup_irqs(cpumask_t map); -#endif - -#define __ARCH_HAS_DO_SOFTIRQ 1 - -extern void native_init_IRQ(void); - -#endif /* _ASM_IRQ_H */ -- cgit v1.2.3 From 88a83350bc1955fa9d8fca059e19bc360fcef2ba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 23:19:26 +0200 Subject: x86: declare setup_apic_routing Global functions need a prototype. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-x86/genapic_64.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 1de931b263ce..0f8504627c41 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -44,4 +44,6 @@ DECLARE_PER_CPU(int, x2apic_extra_bits); extern void uv_cpu_init(void); extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); +extern void setup_apic_routing(void); + #endif -- cgit v1.2.3 From 1a331957efd214fc3a84f70956dfaec65e70c031 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 3 May 2008 00:30:50 +0200 Subject: x86: move eisa_set_level_irq declaration to header Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 -- include/asm-x86/hw_irq.h | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c49ebcc6c41e..671591fcc61d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -507,8 +507,6 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) * Make sure all (legacy) PCI IRQs are set as level-triggered. */ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - extern void eisa_set_level_irq(unsigned int irq); - if (triggering == ACPI_LEVEL_SENSITIVE) eisa_set_level_irq(gsi); } diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 1db2dff1ef49..1428b41dcbb9 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -79,6 +79,9 @@ extern void send_IPI(int dest, int vector); extern atomic_t irq_err_count; extern atomic_t irq_mis_count; +/* EISA */ +extern void eisa_set_level_irq(unsigned int irq); + /* Voyager functions */ extern asmlinkage void vic_cpi_interrupt(void); extern asmlinkage void vic_sys_interrupt(void); -- cgit v1.2.3 From 305b92a2323eeaa4b481f409d54f778dd7e21a46 Mon Sep 17 00:00:00 2001 From: Alan Mayer Date: Tue, 15 Apr 2008 15:36:56 -0500 Subject: x86: change FIRST_SYSTEM_VECTOR to a variable The SGI UV system needs several more system vectors than a vanilla x86_64 system. Rather than burden the other archs with extra system vectors that they don't use, change FIRST_SYSTEM_VECTOR to a variable, so that it can be dynamic. Signed-off-by: Alan Mayer Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic_32.c | 14 +++++++------- arch/x86/kernel/i8259_64.c | 30 +++++++++++++++--------------- arch/x86/kernel/io_apic_32.c | 8 ++++++-- arch/x86/kernel/io_apic_64.c | 6 +++++- include/asm-x86/desc.h | 22 ++++++++++++++++++++++ include/asm-x86/irq_vectors.h | 2 -- 6 files changed, 55 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1bdeb6c..807158e4b5d0 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1351,13 +1351,13 @@ void __init smp_intr_init(void) * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. */ - set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); /* IPI for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } #endif @@ -1370,15 +1370,15 @@ void __init apic_intr_init(void) smp_intr_init(); #endif /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); /* thermal monitor LVT interrupt */ #ifdef CONFIG_X86_MCE_P4THERMAL - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif } diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index c4ae4769ce67..1870e0e86559 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -493,33 +493,33 @@ void __init native_init_IRQ(void) * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. */ - set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); /* IPIs for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); #endif - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); if (!acpi_ioapic) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a40d54fc1fdd..0ae4a9d00ce8 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -83,6 +83,10 @@ int mp_irq_entries; static int disable_timer_pin_1 __initdata; +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -1176,7 +1180,7 @@ static int __assign_irq_vector(int irq) offset = current_offset; next: vector += 8; - if (vector >= FIRST_SYSTEM_VECTOR) { + if (vector >= first_system_vector) { offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } @@ -2269,7 +2273,7 @@ void __init setup_IO_APIC(void) int i; /* Reserve all the system vectors. */ - for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + for (i = first_system_vector; i < NR_VECTORS; i++) set_bit(i, used_vectors); enable_IO_APIC(); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8dfcc529..f1e1ae3e5c7d 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -82,6 +82,10 @@ struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { static int assign_irq_vector(int irq, cpumask_t mask); +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + #define __apicdebuginit __init int sis_apic_bug; /* not actually supported, dummy for compile */ @@ -730,7 +734,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) offset = current_offset; next: vector += 8; - if (vector >= FIRST_SYSTEM_VECTOR) { + if (vector >= first_system_vector) { /* If we run out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index 268a012bcd79..b3875d4b4fab 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -311,6 +311,28 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } +#define SYS_VECTOR_FREE 0 +#define SYS_VECTOR_ALLOCED 1 + +extern int first_system_vector; +extern char system_vectors[]; + +static inline void alloc_system_vector(int vector) +{ + if (system_vectors[vector] == SYS_VECTOR_FREE) { + system_vectors[vector] = SYS_VECTOR_ALLOCED; + if (first_system_vector > vector) + first_system_vector = vector; + } else + BUG(); +} + +static inline void alloc_intr_gate(unsigned int n, void *addr) +{ + alloc_system_vector(n); + set_intr_gate(n, addr); +} + /* * This routine sets up an interrupt gate at directory privilege level 3. */ diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index daceaaf0a3a4..3cb6d8c77b39 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -96,8 +96,6 @@ # define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) #endif -#define FIRST_SYSTEM_VECTOR 0xef - #define NR_VECTORS 256 #define FPU_IRQ 13 -- cgit v1.2.3 From 988f7b5789ccf5cfed14c72e28573a49f0cb4809 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:22 -0700 Subject: x86: PAT export resource_wc in pci sysfs For the ranges with IORESOURCE_PREFETCH, export a new resource_wc interface in pci /sysfs along with resource (which is uncached). Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/filesystems/sysfs-pci.txt | 1 + drivers/pci/pci-sysfs.c | 84 ++++++++++++++++++++++++--------- include/linux/pci.h | 1 + 3 files changed, 64 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 5daa2aaec2c5..68ef48839c04 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -36,6 +36,7 @@ files, each with their own function. local_cpus nearby CPU mask (cpumask, ro) resource PCI resource host addresses (ascii, ro) resource0..N PCI resource N, if present (binary, mmap) + resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) rom PCI ROM resource, if present (binary, ro) subsystem_device PCI subsystem device (ascii, ro) subsystem_vendor PCI subsystem vendor (ascii, ro) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 271d41cc05ab..9ec7d3977a82 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -489,13 +489,14 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, * @kobj: kobject for mapping * @attr: struct bin_attribute for the file being mapped * @vma: struct vm_area_struct passed into the mmap + * @write_combine: 1 for write_combine mapping * * Use the regular PCI mapping routines to map a PCI resource into userspace. * FIXME: write combining? maybe automatic for prefetchable regions? */ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, - struct vm_area_struct *vma) + struct vm_area_struct *vma, int write_combine) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); @@ -518,7 +519,21 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, vma->vm_pgoff += start >> PAGE_SHIFT; mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; - return pci_mmap_page_range(pdev, vma, mmap_type, 0); + return pci_mmap_page_range(pdev, vma, mmap_type, write_combine); +} + +static int +pci_mmap_resource_uc(struct kobject *kobj, struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 0); +} + +static int +pci_mmap_resource_wc(struct kobject *kobj, struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 1); } /** @@ -541,9 +556,46 @@ pci_remove_resource_files(struct pci_dev *pdev) sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); kfree(res_attr); } + + res_attr = pdev->res_attr_wc[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } } } +static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) +{ + /* allocate attribute structure, piggyback attribute name */ + int name_len = write_combine ? 13 : 10; + struct bin_attribute *res_attr; + int retval; + + res_attr = kzalloc(sizeof(*res_attr) + name_len, GFP_ATOMIC); + if (res_attr) { + char *res_attr_name = (char *)(res_attr + 1); + + if (write_combine) { + pdev->res_attr_wc[num] = res_attr; + sprintf(res_attr_name, "resource%d_wc", num); + res_attr->mmap = pci_mmap_resource_wc; + } else { + pdev->res_attr[num] = res_attr; + sprintf(res_attr_name, "resource%d", num); + res_attr->mmap = pci_mmap_resource_uc; + } + res_attr->attr.name = res_attr_name; + res_attr->attr.mode = S_IRUSR | S_IWUSR; + res_attr->size = pci_resource_len(pdev, num); + res_attr->private = &pdev->resource[num]; + retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); + } else + retval = -ENOMEM; + + return retval; +} + /** * pci_create_resource_files - create resource files in sysfs for @dev * @dev: dev in question @@ -557,31 +609,19 @@ static int pci_create_resource_files(struct pci_dev *pdev) /* Expose the PCI resources from this device as files */ for (i = 0; i < PCI_ROM_RESOURCE; i++) { - struct bin_attribute *res_attr; /* skip empty resources */ if (!pci_resource_len(pdev, i)) continue; - /* allocate attribute structure, piggyback attribute name */ - res_attr = kzalloc(sizeof(*res_attr) + 10, GFP_ATOMIC); - if (res_attr) { - char *res_attr_name = (char *)(res_attr + 1); - - pdev->res_attr[i] = res_attr; - sprintf(res_attr_name, "resource%d", i); - res_attr->attr.name = res_attr_name; - res_attr->attr.mode = S_IRUSR | S_IWUSR; - res_attr->size = pci_resource_len(pdev, i); - res_attr->mmap = pci_mmap_resource; - res_attr->private = &pdev->resource[i]; - retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); - if (retval) { - pci_remove_resource_files(pdev); - return retval; - } - } else { - return -ENOMEM; + retval = pci_create_attr(pdev, i, 0); + /* for prefetchable resources, create a WC mappable file */ + if (!retval && pdev->resource[i].flags & IORESOURCE_PREFETCH) + retval = pci_create_attr(pdev, i, 1); + + if (retval) { + pci_remove_resource_files(pdev); + return retval; } } return 0; diff --git a/include/linux/pci.h b/include/linux/pci.h index 509159bcd4e7..d18b1dd49fab 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -206,6 +206,7 @@ struct pci_dev { struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ + struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ #ifdef CONFIG_PCI_MSI struct list_head msi_list; #endif -- cgit v1.2.3 From f8096f92b87d81c55ed63964d27baa9ce5ffe508 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 22 Apr 2008 16:27:29 +0100 Subject: x86: separate cmpxchg8b checking from PAE checking .. allowing the former to be use in non-PAE kernels, too. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.cpu | 4 ++++ include/asm-x86/required-features.h | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ad6301849a1..3d22bb8175b4 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -399,6 +399,10 @@ config X86_TSC def_bool y depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 +config X86_CMPXCHG64 + def_bool y + depends on X86_PAE || X86_64 + # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h index 7400d3ad75c6..8c387198ca88 100644 --- a/include/asm-x86/required-features.h +++ b/include/asm-x86/required-features.h @@ -19,9 +19,13 @@ #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) # define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) -# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) #else # define NEED_PAE 0 +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) +#else # define NEED_CX8 0 #endif -- cgit v1.2.3 From aa134f1b09df6beaa4d031a50d5fda1f3cebce6c Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 8 Apr 2008 10:49:03 +0200 Subject: x86: iommu: use symbolic constants, not hardcoded numbers Move symbolic constants into gart.h, and use them instead of hardcoded constant. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-gart_64.c | 10 +++++----- drivers/char/agp/amd64-agp.c | 25 +++---------------------- include/asm-x86/gart.h | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d1695f..bffcf455c857 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -598,13 +598,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) dev = k8_northbridges[i]; gatt_reg = __pa(gatt) >> 12; gatt_reg <<= 4; - pci_write_config_dword(dev, 0x98, gatt_reg); - pci_read_config_dword(dev, 0x90, &ctl); + pci_write_config_dword(dev, AMD64_GARTTABLEBASE, gatt_reg); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); - ctl |= 1; - ctl &= ~((1<<4) | (1<<5)); + ctl |= GARTEN; + ctl &= ~(DISGARTCPU | DISGARTIO); - pci_write_config_dword(dev, 0x90, ctl); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); } flush_gart(); diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index d8200ac8f8cb..25d64224cdbb 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -16,28 +16,9 @@ #include /* PAGE_SIZE */ #include #include +#include #include "agp.h" -/* PTE bits. */ -#define GPTE_VALID 1 -#define GPTE_COHERENT 2 - -/* Aperture control register bits. */ -#define GARTEN (1<<0) -#define DISGARTCPU (1<<4) -#define DISGARTIO (1<<5) - -/* GART cache control register bits. */ -#define INVGART (1<<0) -#define GARTPTEERR (1<<1) - -/* K8 On-cpu GART registers */ -#define AMD64_GARTAPERTURECTL 0x90 -#define AMD64_GARTAPERTUREBASE 0x94 -#define AMD64_GARTTABLEBASE 0x98 -#define AMD64_GARTCACHECTL 0x9c -#define AMD64_GARTEN (1<<0) - /* NVIDIA K8 registers */ #define NVIDIA_X86_64_0_APBASE 0x10 #define NVIDIA_X86_64_1_APBASE1 0x50 @@ -165,7 +146,7 @@ static int amd64_fetch_size(void) * In a multiprocessor x86-64 system, this function gets * called once for each CPU. */ -static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table) +static u64 amd64_configure(struct pci_dev *hammer, u64 gatt_table) { u64 aperturebase; u32 tmp; @@ -181,7 +162,7 @@ static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table) addr >>= 12; tmp = (u32) addr<<4; tmp &= ~0xf; - pci_write_config_dword (hammer, AMD64_GARTTABLEBASE, tmp); + pci_write_config_dword(hammer, AMD64_GARTTABLEBASE, tmp); /* Enable GART translation for this hammer. */ pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp); diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 90958ed993fa..248e5778e928 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -5,6 +5,7 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); extern int force_iommu, no_iommu; extern int iommu_detected; +extern int agp_amd64_init(void); #ifdef CONFIG_GART_IOMMU extern void gart_iommu_init(void); extern void gart_iommu_shutdown(void); @@ -31,4 +32,24 @@ static inline void gart_iommu_shutdown(void) #endif +/* PTE bits. */ +#define GPTE_VALID 1 +#define GPTE_COHERENT 2 + +/* Aperture control register bits. */ +#define GARTEN (1<<0) +#define DISGARTCPU (1<<4) +#define DISGARTIO (1<<5) + +/* GART cache control register bits. */ +#define INVGART (1<<0) +#define GARTPTEERR (1<<1) + +/* K8 On-cpu GART registers */ +#define AMD64_GARTAPERTURECTL 0x90 +#define AMD64_GARTAPERTUREBASE 0x94 +#define AMD64_GARTTABLEBASE 0x98 +#define AMD64_GARTCACHECTL 0x9c +#define AMD64_GARTEN (1<<0) + #endif -- cgit v1.2.3 From 3bb6fbf9969a8bbe4892968659239273d092e78a Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 15 Apr 2008 12:43:57 +0200 Subject: x86 gart: factor out common code Cleanup gart handling on amd64 a bit: move common code into enable_gart_translation , and use symbolic register names where appropriate. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-gart_64.c | 23 ++++++----------------- drivers/char/agp/amd64-agp.c | 29 +++++++++-------------------- include/asm-x86/gart.h | 17 +++++++++++++++++ 3 files changed, 32 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index bffcf455c857..1f99b62ff616 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -533,8 +533,8 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) unsigned aper_size = 0, aper_base_32, aper_order; u64 aper_base; - pci_read_config_dword(dev, 0x94, &aper_base_32); - pci_read_config_dword(dev, 0x90, &aper_order); + pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order); aper_order = (aper_order >> 1) & 7; aper_base = aper_base_32 & 0x7fff; @@ -592,19 +592,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) agp_gatt_table = gatt; for (i = 0; i < num_k8_northbridges; i++) { - u32 gatt_reg; - u32 ctl; - dev = k8_northbridges[i]; - gatt_reg = __pa(gatt) >> 12; - gatt_reg <<= 4; - pci_write_config_dword(dev, AMD64_GARTTABLEBASE, gatt_reg); - pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); - - ctl |= GARTEN; - ctl &= ~(DISGARTCPU | DISGARTIO); - - pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); + enable_gart_translation(dev, __pa(gatt)); } flush_gart(); @@ -648,11 +637,11 @@ void gart_iommu_shutdown(void) u32 ctl; dev = k8_northbridges[i]; - pci_read_config_dword(dev, 0x90, &ctl); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); - ctl &= ~1; + ctl &= ~GARTEN; - pci_write_config_dword(dev, 0x90, ctl); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); } } diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 9c24470a8252..e3c7ea07f57c 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -150,25 +150,14 @@ static u64 amd64_configure(struct pci_dev *hammer, u64 gatt_table) { u64 aperturebase; u32 tmp; - u64 addr, aper_base; + u64 aper_base; /* Address to map to */ - pci_read_config_dword (hammer, AMD64_GARTAPERTUREBASE, &tmp); + pci_read_config_dword(hammer, AMD64_GARTAPERTUREBASE, &tmp); aperturebase = tmp << 25; aper_base = (aperturebase & PCI_BASE_ADDRESS_MEM_MASK); - /* address of the mappings table */ - addr = (u64) gatt_table; - addr >>= 12; - tmp = (u32) addr<<4; - tmp &= ~0xf; - pci_write_config_dword(hammer, AMD64_GARTTABLEBASE, tmp); - - /* Enable GART translation for this hammer. */ - pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp); - tmp |= GARTEN; - tmp &= ~(DISGARTCPU | DISGARTIO); - pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp); + enable_gart_translation(hammer, gatt_table); return aper_base; } @@ -207,9 +196,9 @@ static void amd64_cleanup(void) for (i = 0; i < num_k8_northbridges; i++) { struct pci_dev *dev = k8_northbridges[i]; /* disable gart translation */ - pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); tmp &= ~AMD64_GARTEN; - pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp); } } @@ -289,9 +278,9 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, u32 nb_order, nb_base; u16 apsize; - pci_read_config_dword(nb, 0x90, &nb_order); + pci_read_config_dword(nb, AMD64_GARTAPERTURECTL, &nb_order); nb_order = (nb_order >> 1) & 7; - pci_read_config_dword(nb, 0x94, &nb_base); + pci_read_config_dword(nb, AMD64_GARTAPERTUREBASE, &nb_base); nb_aper = nb_base << 25; if (aperture_valid(nb_aper, (32*1024*1024)<> 25); + pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1); + pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25); return 0; } diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 248e5778e928..6f22786d2f0c 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -52,4 +52,21 @@ static inline void gart_iommu_shutdown(void) #define AMD64_GARTCACHECTL 0x9c #define AMD64_GARTEN (1<<0) +static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) +{ + u32 tmp, ctl; + + /* address of the mappings table */ + addr >>= 12; + tmp = (u32) addr<<4; + tmp &= ~0xf; + pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp); + + /* Enable GART translation for this hammer. */ + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); + ctl |= GARTEN; + ctl &= ~(DISGARTCPU | DISGARTIO); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); +} + #endif -- cgit v1.2.3 From 7a10174eeafe737f3ccfcece5bdff749c3b044e0 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 28 Apr 2008 17:30:37 +1000 Subject: [POWERPC] Define and use TLF_RESTORE_SIGMASK Replace TIF_RESTORE_SIGMASK with TLF_RESTORE_SIGMASK and define our own set_restore_sigmask() function. This saves the costly SMP-safe set_bit operation, which we do not need for the sigmask flag since TIF_SIGPENDING always has to be set too. Signed-off-by: Roland McGrath Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/entry_32.S | 4 ++-- arch/powerpc/kernel/signal.c | 12 ++++++------ arch/powerpc/kernel/signal_32.c | 2 +- arch/ppc/kernel/entry.S | 4 ++-- include/asm-powerpc/thread_info.h | 17 +++++++++++++---- 5 files changed, 24 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0c8614d9875c..3a05e9f93d42 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -668,7 +668,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED) + andi. r0,r9,_TIF_USER_WORK_MASK bne do_work restore_user: @@ -925,7 +925,7 @@ recheck: lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched - andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK + andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a65a44fbe523..ad55488939c3 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -120,7 +120,7 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) int ret; int is32 = is_32bit_task(); - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else if (!oldset) oldset = ¤t->blocked; @@ -131,9 +131,10 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) check_syscall_restart(regs, &ka, signr > 0); if (signr <= 0) { + struct thread_info *ti = current_thread_info(); /* No signal to deliver -- put the saved sigmask back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + if (ti->local_flags & _TLF_RESTORE_SIGMASK) { + ti->local_flags &= ~_TLF_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } return 0; /* no signals delivered */ @@ -169,10 +170,9 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) /* * A signal was successfully delivered; the saved sigmask is in - * its frame, and we can clear the TIF_RESTORE_SIGMASK flag. + * its frame, and we can clear the TLF_RESTORE_SIGMASK flag. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); + current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK; } return ret; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index ad6943468ee9..4ae16d179803 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -243,7 +243,7 @@ long sys_sigsuspend(old_sigset_t mask) current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S index 5f3a5d068a5c..fcd830a292e2 100644 --- a/arch/ppc/kernel/entry.S +++ b/arch/ppc/kernel/entry.S @@ -647,7 +647,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED) + andi. r0,r9,_TIF_USER_WORK_MASK bne do_work restore_user: @@ -898,7 +898,7 @@ recheck: lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched - andi. r0,r9,_TIF_SIGPENDING + andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h index d030f5ce39ad..e079e81051fd 100644 --- a/include/asm-powerpc/thread_info.h +++ b/include/asm-powerpc/thread_info.h @@ -116,7 +116,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ -#define TIF_RESTORE_SIGMASK 13 /* Restore signal mask in do_signal */ #define TIF_FREEZE 14 /* Freezing for suspend */ #define TIF_RUNLATCH 15 /* Is the runlatch enabled? */ #define TIF_ABI_PENDING 16 /* 32/64 bit switch needed */ @@ -134,21 +133,31 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SECCOMP (1<local_flags |= _TLF_RESTORE_SIGMASK; + set_bit(TIF_SIGPENDING, &ti->flags); +} +#endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 663276b7c6340e755ed62bed67a2b96f4fc3d513 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 30 Apr 2008 20:44:53 +1000 Subject: [POWERPC] Set lower flag bits in regs->trap to indicate debug level exception We use the low bits of regs->trap as flag bits. We already indicate critical and machine check level exceptions via this mechanism. Extend it to indicate debug level exceptions. Signed-off-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/head_booke.h | 2 +- include/asm-powerpc/ptrace.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index aefafc6330c9..721faef87095 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -272,7 +272,7 @@ label: /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) + EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ diff --git a/include/asm-powerpc/ptrace.h b/include/asm-powerpc/ptrace.h index 39023dde1cc4..38d87e5e569d 100644 --- a/include/asm-powerpc/ptrace.h +++ b/include/asm-powerpc/ptrace.h @@ -119,6 +119,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno, #ifndef __powerpc64__ #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) +#define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) #endif /* ! __powerpc64__ */ #define TRAP(regs) ((regs)->trap & ~0xF) #ifdef __powerpc64__ -- cgit v1.2.3 From 5f25f06529ecb4b20efc7ba00de599f5b9f4b63c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:07 +1000 Subject: [POWERPC] Move declaration of init_bootmem_done into system.h ... instead of having an extern declaration in a .c file. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/init_32.c | 3 +-- include/asm-powerpc/system.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 1952b4d3fa7f..45418590b6a9 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -76,8 +77,6 @@ void MMU_init(void); /* XXX should be in current.h -- paulus */ extern struct task_struct *current_set[NR_CPUS]; -extern int init_bootmem_done; - /* * this tells the system to map all of ram with the segregs * (i.e. page tables) instead of the bats. diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index 2b6559a6d113..df781adac6dd 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -190,6 +190,7 @@ extern struct task_struct *_switch(struct thread_struct *prev, extern unsigned int rtas_data; extern int mem_init_done; /* set on boot once kmalloc can be called */ +extern int init_bootmem_done; /* set on !NUMA once bootmem is available */ extern unsigned long memory_limit; extern unsigned long klimit; -- cgit v1.2.3 From 572fb578de59efaaa8d197b0183db43b1128a06e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:08 +1000 Subject: [POWERPC] Move declaration of tce variables into mmu-hash64.h ... instead of having extern declarations in a .c file. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_utils_64.c | 2 -- include/asm-powerpc/mmu-hash64.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1a4b4b36b0c6..05f955f8df35 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -509,8 +509,6 @@ void __init htab_initialize(void) unsigned long base = 0, size = 0, limit; int i; - extern unsigned long tce_alloc_start, tce_alloc_end; - DBG(" -> htab_initialize()\n"); /* Initialize segment sizes */ diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h index 0dff76776044..f35d1e164da8 100644 --- a/include/asm-powerpc/mmu-hash64.h +++ b/include/asm-powerpc/mmu-hash64.h @@ -181,6 +181,7 @@ extern int mmu_io_psize; extern int mmu_kernel_ssize; extern int mmu_highuser_ssize; extern u16 mmu_slb_size; +extern unsigned long tce_alloc_start, tce_alloc_end; /* * If the processor supports 64k normal pages but not 64k cache -- cgit v1.2.3 From 7d9e793463d854073f5c0dedea991a5b63336d6a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:13 +1000 Subject: [POWERPC] Add a declaration for xmon() Usually we call xmon() via debugger(), so this could be static. Sometimes when debugging it's nice to be able to call xmon() directly though, so add a declaration. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-powerpc/xmon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-powerpc/xmon.h b/include/asm-powerpc/xmon.h index 88320a05f0a8..81477f24a188 100644 --- a/include/asm-powerpc/xmon.h +++ b/include/asm-powerpc/xmon.h @@ -15,6 +15,8 @@ #ifdef CONFIG_XMON extern void xmon_setup(void); extern void xmon_register_spus(struct list_head *list); +struct pt_regs; +extern int xmon(struct pt_regs *excp); #else static inline void xmon_setup(void) { }; static inline void xmon_register_spus(struct list_head *list) { }; -- cgit v1.2.3 From af3b74df1b2fa4dbfb0534818167f6393f5ae7c7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:15 +1000 Subject: [POWERPC] Move xmon_irq() declaration into xmon.h The typdef for irqreturn_t was moved into its own header a while back, so there's no reason we can't move xmon_irq() into xmon.h now. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/chrp/setup.c | 7 ------- arch/powerpc/platforms/powermac/pic.c | 8 +------- include/asm-powerpc/xmon.h | 3 +++ 3 files changed, 4 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 116babbaaf81..1ba7ce5aafae 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -63,13 +63,6 @@ static struct mpic *chrp_mpic; DEFINE_PER_CPU(struct timer_list, heartbeat_timer); unsigned long event_scan_interval; -/* - * XXX this should be in xmon.h, but putting it there means xmon.h - * has to include (to get irqreturn_t), which - * causes all sorts of problems. -- paulus - */ -extern irqreturn_t xmon_irq(int, void *); - extern unsigned long loops_per_jiffy; /* To be replaced by RTAS when available */ diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 829b8b02527b..6d149ae8ffa7 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -34,16 +34,10 @@ #include #include #include +#include #include "pmac.h" -/* - * XXX this should be in xmon.h, but putting it there means xmon.h - * has to include (to get irqreturn_t), which - * causes all sorts of problems. -- paulus - */ -extern irqreturn_t xmon_irq(int, void *); - #ifdef CONFIG_PPC32 struct pmac_irq_hw { unsigned int event; diff --git a/include/asm-powerpc/xmon.h b/include/asm-powerpc/xmon.h index 81477f24a188..d83da3cbd085 100644 --- a/include/asm-powerpc/xmon.h +++ b/include/asm-powerpc/xmon.h @@ -12,11 +12,14 @@ #ifdef __KERNEL__ +#include + #ifdef CONFIG_XMON extern void xmon_setup(void); extern void xmon_register_spus(struct list_head *list); struct pt_regs; extern int xmon(struct pt_regs *excp); +extern irqreturn_t xmon_irq(int, void *); #else static inline void xmon_setup(void) { }; static inline void xmon_register_spus(struct list_head *list) { }; -- cgit v1.2.3 From 1c8950ff87de950a3b6ccfb26650fc0a56278836 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:17 +1000 Subject: [POWERPC] Make cpus_in_xmon static and remove extern mess from hvc_console.c This is a little messier than I'd like because xmon.h only exists on powerpc and we can't have a static inline and an extern declaration visible at the same time. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/xmon/xmon.c | 7 ++++++- drivers/char/hvc_console.c | 8 +------- drivers/char/hvc_console.h | 10 ++++++++++ include/asm-powerpc/xmon.h | 4 ++++ 4 files changed, 21 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 95f24c9822b1..6726da07c065 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -54,7 +54,7 @@ #define skipbl xmon_skipbl #ifdef CONFIG_SMP -cpumask_t cpus_in_xmon = CPU_MASK_NONE; +static cpumask_t cpus_in_xmon = CPU_MASK_NONE; static unsigned long xmon_taken = 1; static int xmon_owner; static int xmon_gate; @@ -327,6 +327,11 @@ static void release_output_lock(void) { xmon_speaker = 0; } + +int cpus_are_in_xmon(void) +{ + return !cpus_empty(cpus_in_xmon); +} #endif static int xmon_core(struct pt_regs *regs, int fromipi) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 44160d5ebca0..2f9759d625cc 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -675,12 +675,6 @@ static int hvc_poll(struct hvc_struct *hp) return poll_mask; } -#if defined(CONFIG_XMON) && defined(CONFIG_SMP) -extern cpumask_t cpus_in_xmon; -#else -static const cpumask_t cpus_in_xmon = CPU_MASK_NONE; -#endif - /* * This kthread is either polling or interrupt driven. This is determined by * calling hvc_poll() who determines whether a console adapter support @@ -698,7 +692,7 @@ static int khvcd(void *unused) hvc_kicked = 0; try_to_freeze(); wmb(); - if (cpus_empty(cpus_in_xmon)) { + if (!cpus_are_in_xmon()) { spin_lock(&hvc_structs_lock); list_for_each_entry(hp, &hvc_structs, next) { poll_mask |= hvc_poll(hp); diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 8c59818050e6..42ffb17e15df 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -60,4 +60,14 @@ extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq, /* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */ extern int __devexit hvc_remove(struct hvc_struct *hp); + +#if defined(CONFIG_XMON) && defined(CONFIG_SMP) +#include +#else +static inline int cpus_are_in_xmon(void) +{ + return 0; +} +#endif + #endif // HVC_CONSOLE_H diff --git a/include/asm-powerpc/xmon.h b/include/asm-powerpc/xmon.h index d83da3cbd085..5eb8e599e5cc 100644 --- a/include/asm-powerpc/xmon.h +++ b/include/asm-powerpc/xmon.h @@ -25,5 +25,9 @@ static inline void xmon_setup(void) { }; static inline void xmon_register_spus(struct list_head *list) { }; #endif +#if defined(CONFIG_XMON) && defined(CONFIG_SMP) +extern int cpus_are_in_xmon(void); +#endif + #endif /* __KERNEL __ */ #endif /* __ASM_POWERPC_XMON_H */ -- cgit v1.2.3 From 1c21a2937b1f342a8f5d580203c3396557d53b6e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:19 +1000 Subject: [POWERPC] Fix sparse warnings in arch/powerpc/kernel Make a few things static in lparcfg.c Make init and exit routines static in rtas_flash.c Make things static in rtas_pci.c Make some functions static in rtas.c Make fops static in rtas-proc.c Remove unneeded extern for do_gtod in smp.c Make clocksource_init() static in time.c Make last_tick_len and ticklen_to_xs static in time.c Move the declaration of the pvr per-cpu into smp.h Make kexec_smp_down() and kexec_stack static in machine_kexec_64.c Don't return void in arch_teardown_msi_irqs() in msi.c Move declaration of GregorianDay()into asm/time.h Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/lparcfg.c | 6 +++--- arch/powerpc/kernel/machine_kexec_64.c | 4 ++-- arch/powerpc/kernel/msi.c | 2 +- arch/powerpc/kernel/rtas-proc.c | 14 +++++++------- arch/powerpc/kernel/rtas.c | 6 +++--- arch/powerpc/kernel/rtas_flash.c | 4 ++-- arch/powerpc/kernel/rtas_pci.c | 4 ++-- arch/powerpc/kernel/smp.c | 4 ---- arch/powerpc/kernel/time.c | 10 +++++----- arch/powerpc/platforms/maple/time.c | 2 -- include/asm-powerpc/smp.h | 2 ++ include/asm-powerpc/time.h | 1 + 12 files changed, 28 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 1e656b43ad7f..827a5726a035 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -573,7 +573,7 @@ static int lparcfg_open(struct inode *inode, struct file *file) return single_open(file, lparcfg_data, NULL); } -const struct file_operations lparcfg_fops = { +static const struct file_operations lparcfg_fops = { .owner = THIS_MODULE, .read = seq_read, .write = lparcfg_write, @@ -581,7 +581,7 @@ const struct file_operations lparcfg_fops = { .release = single_release, }; -int __init lparcfg_init(void) +static int __init lparcfg_init(void) { struct proc_dir_entry *ent; mode_t mode = S_IRUSR | S_IRGRP | S_IROTH; @@ -601,7 +601,7 @@ int __init lparcfg_init(void) return 0; } -void __exit lparcfg_cleanup(void) +static void __exit lparcfg_cleanup(void) { if (proc_ppc64_lparcfg) remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 704375bda73a..631dfd614b21 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -158,7 +158,7 @@ void kexec_copy_flush(struct kimage *image) * on calling the interrupts, but we would like to call it off irq level * so that the interrupt controller is clean. */ -void kexec_smp_down(void *arg) +static void kexec_smp_down(void *arg) { if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 1); @@ -249,7 +249,7 @@ static void kexec_prepare_cpus(void) * We could use a smaller stack if we don't care about anything using * current, but that audit has not been performed. */ -union thread_union kexec_stack +static union thread_union kexec_stack __attribute__((__section__(".data.init_task"))) = { }; /* Our assembly helper, in kexec_stub.S */ diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index c62d1012c013..3bb7d3dd28be 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -34,5 +34,5 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) void arch_teardown_msi_irqs(struct pci_dev *dev) { - return ppc_md.teardown_msi_irqs(dev); + ppc_md.teardown_msi_irqs(dev); } diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index f9c6abc84a94..1be9fe38bcb5 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -160,7 +160,7 @@ static int sensors_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_sensors_show, NULL); } -const struct file_operations ppc_rtas_sensors_operations = { +static const struct file_operations ppc_rtas_sensors_operations = { .open = sensors_open, .read = seq_read, .llseek = seq_lseek, @@ -172,7 +172,7 @@ static int poweron_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_poweron_show, NULL); } -const struct file_operations ppc_rtas_poweron_operations = { +static const struct file_operations ppc_rtas_poweron_operations = { .open = poweron_open, .read = seq_read, .llseek = seq_lseek, @@ -185,7 +185,7 @@ static int progress_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_progress_show, NULL); } -const struct file_operations ppc_rtas_progress_operations = { +static const struct file_operations ppc_rtas_progress_operations = { .open = progress_open, .read = seq_read, .llseek = seq_lseek, @@ -198,7 +198,7 @@ static int clock_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_clock_show, NULL); } -const struct file_operations ppc_rtas_clock_operations = { +static const struct file_operations ppc_rtas_clock_operations = { .open = clock_open, .read = seq_read, .llseek = seq_lseek, @@ -211,7 +211,7 @@ static int tone_freq_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_tone_freq_show, NULL); } -const struct file_operations ppc_rtas_tone_freq_operations = { +static const struct file_operations ppc_rtas_tone_freq_operations = { .open = tone_freq_open, .read = seq_read, .llseek = seq_lseek, @@ -224,7 +224,7 @@ static int tone_volume_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_tone_volume_show, NULL); } -const struct file_operations ppc_rtas_tone_volume_operations = { +static const struct file_operations ppc_rtas_tone_volume_operations = { .open = tone_volume_open, .read = seq_read, .llseek = seq_lseek, @@ -237,7 +237,7 @@ static int rmo_buf_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_rmo_buf_show, NULL); } -const struct file_operations ppc_rtas_rmo_buf_ops = { +static const struct file_operations ppc_rtas_rmo_buf_ops = { .open = rmo_buf_open, .read = seq_read, .llseek = seq_lseek, diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 34843c318419..2a60bd3e3afa 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -340,8 +340,8 @@ int rtas_get_error_log_max(void) EXPORT_SYMBOL(rtas_get_error_log_max); -char rtas_err_buf[RTAS_ERROR_LOG_MAX]; -int rtas_last_error_token; +static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; +static int rtas_last_error_token; /** Return a copy of the detailed error text associated with the * most recent failed call to rtas. Because the error text @@ -484,7 +484,7 @@ unsigned int rtas_busy_delay(int status) } EXPORT_SYMBOL(rtas_busy_delay); -int rtas_error_rc(int rtas_rc) +static int rtas_error_rc(int rtas_rc) { int rc; diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 0a5e22b22729..09ded5c424a9 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -731,7 +731,7 @@ static const struct file_operations validate_flash_operations = { .release = validate_flash_release, }; -int __init rtas_flash_init(void) +static int __init rtas_flash_init(void) { int rc; @@ -817,7 +817,7 @@ cleanup: return rc; } -void __exit rtas_flash_cleanup(void) +static void __exit rtas_flash_cleanup(void) { rtas_flash_term_hook = NULL; diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 3ab88a9dc70d..589a2797eac2 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -155,12 +155,12 @@ static int rtas_pci_write_config(struct pci_bus *bus, return PCIBIOS_DEVICE_NOT_FOUND; } -struct pci_ops rtas_pci_ops = { +static struct pci_ops rtas_pci_ops = { .read = rtas_pci_read_config, .write = rtas_pci_write_config, }; -int is_python(struct device_node *dev) +static int is_python(struct device_node *dev) { const char *model = of_get_property(dev, "model", NULL); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 1457aa0a08f1..ba7989ffaeee 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -365,12 +365,8 @@ void smp_call_function_interrupt(void) } } -extern struct gettimeofday_struct do_gtod; - struct thread_info *current_set[NR_CPUS]; -DECLARE_PER_CPU(unsigned int, pvr); - static void __devinit smp_store_cpu_info(int id) { per_cpu(pvr, id) = mfspr(SPRN_PVR); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 73401e83739a..c73fc33aa817 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -129,7 +129,7 @@ static unsigned long __initdata iSeries_recal_titan; static signed long __initdata iSeries_recal_tb; /* Forward declaration is only needed for iSereis compiles */ -void __init clocksource_init(void); +static void __init clocksource_init(void); #endif #define XSEC_PER_SEC (1024*1024) @@ -150,8 +150,8 @@ u64 tb_to_xs; unsigned tb_to_us; #define TICKLEN_SCALE NTP_SCALE_SHIFT -u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ -u64 ticklen_to_xs; /* 0.64 fraction */ +static u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ +static u64 ticklen_to_xs; /* 0.64 fraction */ /* If last_tick_len corresponds to about 1/HZ seconds, then last_tick_len << TICKLEN_SHIFT will be about 2^63. */ @@ -164,7 +164,7 @@ static u64 tb_to_ns_scale __read_mostly; static unsigned tb_to_ns_shift __read_mostly; static unsigned long boot_tb __read_mostly; -struct gettimeofday_struct do_gtod; +static struct gettimeofday_struct do_gtod; extern struct timezone sys_tz; static long timezone_offset; @@ -832,7 +832,7 @@ void update_vsyscall_tz(void) ++vdso_data->tb_update_count; } -void __init clocksource_init(void) +static void __init clocksource_init(void) { struct clocksource *clock; diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index 9f7579b38c72..53bca132fb48 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -41,8 +41,6 @@ #define DBG(x...) #endif -extern void GregorianDay(struct rtc_time * tm); - static int maple_rtc_addr; static int maple_clock_read(int addr) diff --git a/include/asm-powerpc/smp.h b/include/asm-powerpc/smp.h index 505f35bacaa9..1cd43e3d94fb 100644 --- a/include/asm-powerpc/smp.h +++ b/include/asm-powerpc/smp.h @@ -37,6 +37,8 @@ extern void cpu_die(void); extern void smp_send_debugger_break(int cpu); extern void smp_message_recv(int); +DECLARE_PER_CPU(unsigned int, pvr); + #ifdef CONFIG_HOTPLUG_CPU extern void fixup_irqs(cpumask_t map); int generic_cpu_disable(void); diff --git a/include/asm-powerpc/time.h b/include/asm-powerpc/time.h index ce5de6e0e690..febd581ec9b0 100644 --- a/include/asm-powerpc/time.h +++ b/include/asm-powerpc/time.h @@ -33,6 +33,7 @@ extern unsigned tb_to_us; struct rtc_time; extern void to_tm(int tim, struct rtc_time * tm); +extern void GregorianDay(struct rtc_time *tm); extern time_t last_rtc_update; extern void generic_calibrate_decr(void); -- cgit v1.2.3 From a560643e21e1ac10f7398b45111aecdd7f47a4a5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 14 May 2008 14:30:48 +1000 Subject: [POWERPC] Defer processing of interrupts when the CPU wakes from sleep mode This provides a way to defer processing of an interrupt that wakes the processor out of sleep mode. On 32-bit platforms that use an interrupt to wake the processor, we have to have interrupts enabled in hardware at the point where we go to sleep, otherwise the processor will never wake up. However, because interrupts are logically disabled at this point, we don't want to process the interrupt straight away. This is handled by setting the _TLF_SLEEPING flag. When we get an interrupt and _TLF_SLEEPING is set, we firstly clear the MSR_EE (external interrupt enable) bit in the saved MSR value, and secondly we then return to the address in the link register, like we do for _TLF_NAPPING, but without actually handling the interrupt. Note that this is handled somewhat differently on powerbooks, so this new code will only be used on non-Apple machines. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/entry_32.S | 8 ++++++++ include/asm-powerpc/thread_info.h | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3a05e9f93d42..888a364043a8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -147,6 +147,7 @@ transfer_to_handler: lwz r12,TI_LOCAL_FLAGS(r9) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f + bt- 31-TLF_SLEEPING,7f #endif /* CONFIG_6xx */ .globl transfer_to_handler_cont transfer_to_handler_cont: @@ -164,6 +165,13 @@ transfer_to_handler_cont: 4: rlwinm r12,r12,0,~_TLF_NAPPING stw r12,TI_LOCAL_FLAGS(r9) b power_save_6xx_restore + +7: rlwinm r12,r12,0,~_TLF_SLEEPING + stw r12,TI_LOCAL_FLAGS(r9) + lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ + rlwinm r9,r9,0,~MSR_EE + lwz r12,_LINK(r11) /* and return to address in LR */ + b fast_exception_return #endif /* diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h index e079e81051fd..b705c2a7651a 100644 --- a/include/asm-powerpc/thread_info.h +++ b/include/asm-powerpc/thread_info.h @@ -144,9 +144,11 @@ static inline struct thread_info *current_thread_info(void) /* Bits in local_flags */ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ #define TLF_NAPPING 0 /* idle thread enabled NAP mode */ -#define TLF_RESTORE_SIGMASK 1 /* Restore signal mask in do_signal */ +#define TLF_SLEEPING 1 /* suspend code enabled SLEEP mode */ +#define TLF_RESTORE_SIGMASK 2 /* Restore signal mask in do_signal */ #define _TLF_NAPPING (1 << TLF_NAPPING) +#define _TLF_SLEEPING (1 << TLF_SLEEPING) #define _TLF_RESTORE_SIGMASK (1 << TLF_RESTORE_SIGMASK) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 0b2cca804ee40495bc92449c3e22f3c3a3f2977a Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 29 Apr 2008 01:38:13 +1000 Subject: [POWERPC] Add 6xx-style HID0_SLEEP support. This adds a function to put a 6xx/7xx/7xxx/83xx family CPU into sleep mode, and return after an interrupt has occurred. It expects to be called with interrupts disabled, and returns with interrupts disabled. Interrupts are enabled while the processor is asleep, but the interrupt that wakes the processor is not handled; it is still pending when this function returns. Signed-off-by: Scott Wood Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/6xx-suspend.S | 52 +++++++++++++++++++++++++++++++++++++++ arch/powerpc/sysdev/Makefile | 4 +++ include/asm-powerpc/mpc6xx.h | 6 +++++ 3 files changed, 62 insertions(+) create mode 100644 arch/powerpc/sysdev/6xx-suspend.S create mode 100644 include/asm-powerpc/mpc6xx.h (limited to 'include') diff --git a/arch/powerpc/sysdev/6xx-suspend.S b/arch/powerpc/sysdev/6xx-suspend.S new file mode 100644 index 000000000000..21cda085d926 --- /dev/null +++ b/arch/powerpc/sysdev/6xx-suspend.S @@ -0,0 +1,52 @@ +/* + * Enter and leave sleep state on chips with 6xx-style HID0 + * power management bits, which don't leave sleep state via reset. + * + * Author: Scott Wood + * + * Copyright (c) 2006-2007 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include + +_GLOBAL(mpc6xx_enter_standby) + mflr r4 + + mfspr r5, SPRN_HID0 + rlwinm r5, r5, 0, ~(HID0_DOZE | HID0_NAP) + oris r5, r5, HID0_SLEEP@h + mtspr SPRN_HID0, r5 + isync + + lis r5, ret_from_standby@h + ori r5, r5, ret_from_standby@l + mtlr r5 + + rlwinm r5, r1, 0, 0, 31-THREAD_SHIFT + lwz r6, TI_LOCAL_FLAGS(r5) + ori r6, r6, _TLF_SLEEPING + stw r6, TI_LOCAL_FLAGS(r5) + + mfmsr r5 + ori r5, r5, MSR_EE + oris r5, r5, MSR_POW@h + sync + mtmsr r5 + isync + +1: b 1b + +ret_from_standby: + mfspr r5, SPRN_HID0 + rlwinm r5, r5, 0, ~HID0_SLEEP + mtspr SPRN_HID0, r5 + + mtlr r4 + blr diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 6d386d0071a0..2cc50520a698 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -44,3 +44,7 @@ obj-$(CONFIG_PPC_DCR) += dcr.o obj-$(CONFIG_8xx) += mpc8xx_pic.o cpm1.o obj-$(CONFIG_UCODE_PATCH) += micropatch.o endif + +ifeq ($(CONFIG_SUSPEND),y) +obj-$(CONFIG_6xx) += 6xx-suspend.o +endif diff --git a/include/asm-powerpc/mpc6xx.h b/include/asm-powerpc/mpc6xx.h new file mode 100644 index 000000000000..effc2291beb2 --- /dev/null +++ b/include/asm-powerpc/mpc6xx.h @@ -0,0 +1,6 @@ +#ifndef __ASM_POWERPC_MPC6xx_H +#define __ASM_POWERPC_MPC6xx_H + +void mpc6xx_enter_standby(void); + +#endif -- cgit v1.2.3 From 7bc228b1ef71f395aeb89bdf81bf95556b08b374 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 15 May 2008 05:16:44 +1000 Subject: [POWERPC] Get most of ioctl.h content from Now that allows overriding of the most commonly changed macros, take advantage of that. Signed-off-by: Robert P. J. Day Signed-off-by: Paul Mackerras --- include/asm-powerpc/ioctl.h | 58 +-------------------------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) (limited to 'include') diff --git a/include/asm-powerpc/ioctl.h b/include/asm-powerpc/ioctl.h index 8eb99848c402..57d68304218b 100644 --- a/include/asm-powerpc/ioctl.h +++ b/include/asm-powerpc/ioctl.h @@ -1,69 +1,13 @@ #ifndef _ASM_POWERPC_IOCTL_H #define _ASM_POWERPC_IOCTL_H - -/* - * this was copied from the alpha as it's a bit cleaner there. - * -- Cort - */ - -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 #define _IOC_SIZEBITS 13 #define _IOC_DIRBITS 3 -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit. - * And this turns out useful to catch old ioctl numbers in header - * files for us. - */ #define _IOC_NONE 1U #define _IOC_READ 2U #define _IOC_WRITE 4U -#define _IOC(dir,type,nr,size) \ - (((dir) << _IOC_DIRSHIFT) | \ - ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | \ - ((size) << _IOC_SIZESHIFT)) - -/* provoke compile error for invalid uses of size argument */ -extern unsigned int __invalid_size_argument_for_IOC; -#define _IOC_TYPECHECK(t) \ - ((sizeof(t) == sizeof(t[1]) && \ - sizeof(t) < (1 << _IOC_SIZEBITS)) ? \ - sizeof(t) : __invalid_size_argument_for_IOC) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode them.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* various drivers, such as the pcmcia stuff, need these... */ -#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) -#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) -#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) -#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) -#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) +#include #endif /* _ASM_POWERPC_IOCTL_H */ -- cgit v1.2.3 From 09e67ca2c523544e6b38aa570a5f62a0cf20b87b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 16 May 2008 11:57:45 +1000 Subject: [POWERPC] Move of_device_get_modalias to drivers/of Commit 140b932f8cb6cced10b96860651a198b1b89cbb9 ("Create modalias file in sysfs for of_platform bus") needs this to avoid breaking the sparc builds. Just move the code and add whitespace around some binary operators. Signed-off-by: Stephen Rothwell Acked-by: David S. Miller Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/of_device.c | 48 ----------------------------------------- drivers/of/device.c | 48 +++++++++++++++++++++++++++++++++++++++++ include/asm-powerpc/of_device.h | 2 -- include/linux/of_device.h | 3 +++ 4 files changed, 51 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c index 5748ddb47d9f..e9be908f199b 100644 --- a/arch/powerpc/kernel/of_device.c +++ b/arch/powerpc/kernel/of_device.c @@ -89,54 +89,6 @@ struct of_device *of_device_alloc(struct device_node *np, } EXPORT_SYMBOL(of_device_alloc); -ssize_t of_device_get_modalias(struct of_device *ofdev, - char *str, ssize_t len) -{ - const char *compat; - int cplen, i; - ssize_t tsize, csize, repend; - - /* Name & Type */ - csize = snprintf(str, len, "of:N%sT%s", - ofdev->node->name, ofdev->node->type); - - /* Get compatible property if any */ - compat = of_get_property(ofdev->node, "compatible", &cplen); - if (!compat) - return csize; - - /* Find true end (we tolerate multiple \0 at the end */ - for (i=(cplen-1); i>=0 && !compat[i]; i--) - cplen--; - if (!cplen) - return csize; - cplen++; - - /* Check space (need cplen+1 chars including final \0) */ - tsize = csize + cplen; - repend = tsize; - - if (csize>=len) /* @ the limit, all is already filled */ - return tsize; - - if (tsize>=len) { /* limit compat list */ - cplen = len-csize-1; - repend = len; - } - - /* Copy and do char replacement */ - memcpy(&str[csize+1], compat, cplen); - for (i=csize; idev); } EXPORT_SYMBOL(of_device_unregister); + +ssize_t of_device_get_modalias(struct of_device *ofdev, + char *str, ssize_t len) +{ + const char *compat; + int cplen, i; + ssize_t tsize, csize, repend; + + /* Name & Type */ + csize = snprintf(str, len, "of:N%sT%s", + ofdev->node->name, ofdev->node->type); + + /* Get compatible property if any */ + compat = of_get_property(ofdev->node, "compatible", &cplen); + if (!compat) + return csize; + + /* Find true end (we tolerate multiple \0 at the end */ + for (i = (cplen - 1); i >= 0 && !compat[i]; i--) + cplen--; + if (!cplen) + return csize; + cplen++; + + /* Check space (need cplen+1 chars including final \0) */ + tsize = csize + cplen; + repend = tsize; + + if (csize >= len) /* @ the limit, all is already filled */ + return tsize; + + if (tsize >= len) { /* limit compat list */ + cplen = len - csize - 1; + repend = len; + } + + /* Copy and do char replacement */ + memcpy(&str[csize + 1], compat, cplen); + for (i = csize; i < repend; i++) { + char c = str[i]; + if (c == '\0') + str[i] = 'C'; + else if (c == ' ') + str[i] = '_'; + } + + return tsize; +} diff --git a/include/asm-powerpc/of_device.h b/include/asm-powerpc/of_device.h index 6526e139a463..3c123990ca2e 100644 --- a/include/asm-powerpc/of_device.h +++ b/include/asm-powerpc/of_device.h @@ -21,8 +21,6 @@ extern struct of_device *of_device_alloc(struct device_node *np, const char *bus_id, struct device *parent); -extern ssize_t of_device_get_modalias(struct of_device *ofdev, - char *str, ssize_t len); extern int of_device_uevent(struct device *dev, struct kobj_uevent_env *env); diff --git a/include/linux/of_device.h b/include/linux/of_device.h index afe338217d91..d3a74e00a3e1 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -24,4 +24,7 @@ static inline void of_device_free(struct of_device *dev) of_release_dev(&dev->dev); } +extern ssize_t of_device_get_modalias(struct of_device *ofdev, + char *str, ssize_t len); + #endif /* _LINUX_OF_DEVICE_H */ -- cgit v1.2.3 From dedd4915af40cff6614707c50dcae43d17beadec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 17 May 2008 08:28:33 +0200 Subject: bitops: fix build in struct thread_info we can move flags from u32 to natural size - assembly code uses offsetof. Signed-off-by: Ingo Molnar --- include/asm-x86/thread_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 74481b72ae0f..25d710532561 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -24,10 +24,10 @@ struct exec_domain; struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ - __u32 flags; /* low level flags */ + unsigned long flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; struct restart_block restart_block; -- cgit v1.2.3 From 8b09dee67f484e9b42114b1a1f068e080fd7aa56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcupreempt: remove duplicate prototypes rcu_batches_completed and rcu_patches_completed_bh are both declared in rcuclassic.h and rcupreempt.h. This patch removes the extra prototypes for them from rcupdate.h. rcu_batches_completed_bh is defined as a static inline in the rcupreempt.h header file. Trying to export this as EXPORT_SYMBOL_GPL causes linking problems with the powerpc linker. There's no need to export a static inlined function. Modules must be compiled with the same type of RCU implementation as the kernel they are for. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 -- kernel/rcupreempt.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d42dbec06083..ec2fc5b32646 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -224,8 +224,6 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); /* Internal to kernel */ extern void rcu_init(void); diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index e1cdf196a515..5e02b7740702 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -217,8 +217,6 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); -EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); - void __rcu_read_lock(void) { int idx; -- cgit v1.2.3 From 4446a36ff8c74ac3b32feb009b651048e129c6af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add call_rcu_sched() Fourth cut of patch to provide the call_rcu_sched(). This is again to synchronize_sched() as call_rcu() is to synchronize_rcu(). Should be fine for experimental and -rt use, but not ready for inclusion. With some luck, I will be able to tell Andrew to come out of hiding on the next round. Passes multi-day rcutorture sessions with concurrent CPU hotplugging. Fixes since the first version include a bug that could result in indefinite blocking (spotted by Gautham Shenoy), better resiliency against CPU-hotplug operations, and other minor fixes. Fixes since the second version include reworking grace-period detection to avoid deadlocks that could happen when running concurrently with CPU hotplug, adding Mathieu's fix to avoid the softlockup messages, as well as Mathieu's fix to allow use earlier in boot. Fixes since the third version include a wrong-CPU bug spotted by Andrew, getting rid of the obsolete synchronize_kernel API that somehow snuck back in, merging spin_unlock() and local_irq_restore() in a few places, commenting the code that checks for quiescent states based on interrupting from user-mode execution or the idle loop, removing some inline attributes, and some code-style changes. Known/suspected shortcomings: o I still do not entirely trust the sleep/wakeup logic. Next step will be to use a private snapshot of the CPU online mask in rcu_sched_grace_period() -- if the CPU wasn't there at the start of the grace period, we don't need to hear from it. And the bit about accounting for changes in online CPUs inside of rcu_sched_grace_period() is ugly anyway. o It might be good for rcu_sched_grace_period() to invoke resched_cpu() when a given CPU wasn't responding quickly, but resched_cpu() is declared static... This patch also fixes a long-standing bug in the earlier preemptable-RCU implementation of synchronize_rcu() that could result in loss of concurrent external changes to a task's CPU affinity mask. I still cannot remember who reported this... Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/rcuclassic.h | 3 + include/linux/rcupdate.h | 22 +++ include/linux/rcupreempt.h | 42 ++++- init/main.c | 1 + kernel/rcupdate.c | 20 +-- kernel/rcupreempt.c | 414 ++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 434 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index b3aa05baab8a..8c774905dcfe 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -151,7 +151,10 @@ extern struct lockdep_map rcu_lock_map; #define __synchronize_sched() synchronize_rcu() +#define call_rcu_sched(head, func) call_rcu(head, func) + extern void __rcu_init(void); +#define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ec2fc5b32646..411969cb5243 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,6 +40,7 @@ #include #include #include +#include /** * struct rcu_head - callback structure for use with RCU @@ -168,6 +169,27 @@ struct rcu_head { (p) = (v); \ }) +/* Infrastructure to implement the synchronize_() primitives. */ + +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + +extern void wakeme_after_rcu(struct rcu_head *head); + +#define synchronize_rcu_xxx(name, func) \ +void name(void) \ +{ \ + struct rcu_synchronize rcu; \ + \ + init_completion(&rcu.completion); \ + /* Will wake me after RCU finished. */ \ + func(&rcu.head, wakeme_after_rcu); \ + /* Wait for it. */ \ + wait_for_completion(&rcu.completion); \ +} + /** * synchronize_sched - block until all CPUs have exited any non-preemptive * kernel code sequences. diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 8a05c7e20bc4..f04b64eca636 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -40,10 +40,39 @@ #include #include -#define rcu_qsctr_inc(cpu) +struct rcu_dyntick_sched { + int dynticks; + int dynticks_snap; + int sched_qs; + int sched_qs_snap; + int sched_dynticks_snap; +}; + +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); + +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_qs++; +} #define rcu_bh_qsctr_inc(cpu) #define call_rcu_bh(head, rcu) call_rcu(head, rcu) +/** + * call_rcu_sched - Queue RCU callback for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full + * synchronize_sched()-style grace period elapses, in other words after + * all currently executing preempt-disabled sections of code (including + * hardirq handlers, NMI handlers, and local_irq_save() blocks) have + * completed. + */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + extern void __rcu_read_lock(void) __acquires(RCU); extern void __rcu_read_unlock(void) __releases(RCU); extern int rcu_pending(int cpu); @@ -55,6 +84,7 @@ extern int rcu_needs_cpu(int cpu); extern void __synchronize_sched(void); extern void __rcu_init(void); +extern void rcu_init_sched(void); extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); @@ -81,20 +111,20 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; #ifdef CONFIG_NO_HZ -DECLARE_PER_CPU(long, dynticks_progress_counter); +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - __get_cpu_var(dynticks_progress_counter)++; - WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1); } static inline void rcu_exit_nohz(void) { - __get_cpu_var(dynticks_progress_counter)++; smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ - WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1)); } #else /* CONFIG_NO_HZ */ diff --git a/init/main.c b/init/main.c index f7fb20021d48..a9cc3e0803de 100644 --- a/init/main.c +++ b/init/main.c @@ -758,6 +758,7 @@ static void __init do_initcalls(void) */ static void __init do_basic_setup(void) { + rcu_init_sched(); /* needed by module_init stage. */ /* drivers will send hotplug events */ init_workqueues(); usermodehelper_init(); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c09605f8d16c..a4e329d92883 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -39,18 +39,12 @@ #include #include #include -#include #include #include #include #include #include -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); @@ -60,7 +54,7 @@ static struct completion rcu_barrier_completion; * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. */ -static void wakeme_after_rcu(struct rcu_head *head) +void wakeme_after_rcu(struct rcu_head *head) { struct rcu_synchronize *rcu; @@ -77,17 +71,7 @@ static void wakeme_after_rcu(struct rcu_head *head) * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ -void synchronize_rcu(void) -{ - struct rcu_synchronize rcu; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished */ - call_rcu(&rcu.head, wakeme_after_rcu); - - /* Wait for it */ - wait_for_completion(&rcu.completion); -} +synchronize_rcu_xxx(synchronize_rcu, call_rcu) EXPORT_SYMBOL_GPL(synchronize_rcu); static void rcu_barrier_callback(struct rcu_head *notused) diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 5e02b7740702..aaa7976bd85f 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -87,9 +88,14 @@ struct rcu_data { struct rcu_head **nexttail; struct rcu_head *waitlist[GP_STAGES]; struct rcu_head **waittail[GP_STAGES]; - struct rcu_head *donelist; + struct rcu_head *donelist; /* from waitlist & waitschedlist */ struct rcu_head **donetail; long rcu_flipctr[2]; + struct rcu_head *nextschedlist; + struct rcu_head **nextschedtail; + struct rcu_head *waitschedlist; + struct rcu_head **waitschedtail; + int rcu_sched_sleeping; #ifdef CONFIG_RCU_TRACE struct rcupreempt_trace trace; #endif /* #ifdef CONFIG_RCU_TRACE */ @@ -131,11 +137,24 @@ enum rcu_try_flip_states { rcu_try_flip_waitmb_state, }; +/* + * States for rcu_ctrlblk.rcu_sched_sleep. + */ + +enum rcu_sched_sleep_states { + rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */ + rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */ + rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */ +}; + struct rcu_ctrlblk { spinlock_t fliplock; /* Protect state-machine transitions. */ long completed; /* Number of last completed batch. */ enum rcu_try_flip_states rcu_try_flip_state; /* The current state of the rcu state machine */ + spinlock_t schedlock; /* Protect rcu_sched sleep state. */ + enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */ + wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ }; static DEFINE_PER_CPU(struct rcu_data, rcu_data); @@ -143,8 +162,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = { .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), .completed = 0, .rcu_try_flip_state = rcu_try_flip_idle_state, + .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock), + .sched_sleep = rcu_sched_not_sleeping, + .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq), }; +static struct task_struct *rcu_sched_grace_period_task; #ifdef CONFIG_RCU_TRACE static char *rcu_try_flip_state_names[] = @@ -207,6 +230,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag) */ #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); +#define RCU_SCHED_BATCH_TIME (HZ / 50) + /* * Return the number of RCU batches processed thus far. Useful * for debug and statistics. @@ -411,32 +436,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) } } -#ifdef CONFIG_NO_HZ +DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { + .dynticks = 1, +}; -DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; -static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); +#ifdef CONFIG_NO_HZ static DEFINE_PER_CPU(int, rcu_update_flag); /** * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. * * If the CPU was idle with dynamic ticks active, this updates the - * dynticks_progress_counter to let the RCU handling know that the + * rcu_dyntick_sched.dynticks to let the RCU handling know that the * CPU is active. */ void rcu_irq_enter(void) { int cpu = smp_processor_id(); + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); if (per_cpu(rcu_update_flag, cpu)) per_cpu(rcu_update_flag, cpu)++; /* * Only update if we are coming from a stopped ticks mode - * (dynticks_progress_counter is even). + * (rcu_dyntick_sched.dynticks is even). */ if (!in_interrupt() && - (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { + (rdssp->dynticks & 0x1) == 0) { /* * The following might seem like we could have a race * with NMI/SMIs. But this really isn't a problem. @@ -459,12 +486,12 @@ void rcu_irq_enter(void) * RCU read-side critical sections on this CPU would * have already completed. */ - per_cpu(dynticks_progress_counter, cpu)++; + rdssp->dynticks++; /* * The following memory barrier ensures that any * rcu_read_lock() primitives in the irq handler * are seen by other CPUs to follow the above - * increment to dynticks_progress_counter. This is + * increment to rcu_dyntick_sched.dynticks. This is * required in order for other CPUs to correctly * determine when it is safe to advance the RCU * grace-period state machine. @@ -472,7 +499,7 @@ void rcu_irq_enter(void) smp_mb(); /* see above block comment. */ /* * Since we can't determine the dynamic tick mode from - * the dynticks_progress_counter after this routine, + * the rcu_dyntick_sched.dynticks after this routine, * we use a second flag to acknowledge that we came * from an idle state with ticks stopped. */ @@ -480,7 +507,7 @@ void rcu_irq_enter(void) /* * If we take an NMI/SMI now, they will also increment * the rcu_update_flag, and will not update the - * dynticks_progress_counter on exit. That is for + * rcu_dyntick_sched.dynticks on exit. That is for * this IRQ to do. */ } @@ -490,12 +517,13 @@ void rcu_irq_enter(void) * rcu_irq_exit - Called from exiting Hard irq context. * * If the CPU was idle with dynamic ticks active, update the - * dynticks_progress_counter to put let the RCU handling be + * rcu_dyntick_sched.dynticks to put let the RCU handling be * aware that the CPU is going back to idle with no ticks. */ void rcu_irq_exit(void) { int cpu = smp_processor_id(); + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); /* * rcu_update_flag is set if we interrupted the CPU @@ -503,7 +531,7 @@ void rcu_irq_exit(void) * Once this occurs, we keep track of interrupt nesting * because a NMI/SMI could also come in, and we still * only want the IRQ that started the increment of the - * dynticks_progress_counter to be the one that modifies + * rcu_dyntick_sched.dynticks to be the one that modifies * it on exit. */ if (per_cpu(rcu_update_flag, cpu)) { @@ -515,28 +543,29 @@ void rcu_irq_exit(void) /* * If an NMI/SMI happens now we are still - * protected by the dynticks_progress_counter being odd. + * protected by the rcu_dyntick_sched.dynticks being odd. */ /* * The following memory barrier ensures that any * rcu_read_unlock() primitives in the irq handler * are seen by other CPUs to preceed the following - * increment to dynticks_progress_counter. This + * increment to rcu_dyntick_sched.dynticks. This * is required in order for other CPUs to determine * when it is safe to advance the RCU grace-period * state machine. */ smp_mb(); /* see above block comment. */ - per_cpu(dynticks_progress_counter, cpu)++; - WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); + rdssp->dynticks++; + WARN_ON(rdssp->dynticks & 0x1); } } static void dyntick_save_progress_counter(int cpu) { - per_cpu(rcu_dyntick_snapshot, cpu) = - per_cpu(dynticks_progress_counter, cpu); + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->dynticks_snap = rdssp->dynticks; } static inline int @@ -544,9 +573,10 @@ rcu_try_flip_waitack_needed(int cpu) { long curr; long snap; + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - curr = per_cpu(dynticks_progress_counter, cpu); - snap = per_cpu(rcu_dyntick_snapshot, cpu); + curr = rdssp->dynticks; + snap = rdssp->dynticks_snap; smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ /* @@ -580,9 +610,10 @@ rcu_try_flip_waitmb_needed(int cpu) { long curr; long snap; + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - curr = per_cpu(dynticks_progress_counter, cpu); - snap = per_cpu(rcu_dyntick_snapshot, cpu); + curr = rdssp->dynticks; + snap = rdssp->dynticks_snap; smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ /* @@ -609,14 +640,86 @@ rcu_try_flip_waitmb_needed(int cpu) return 1; } +static void dyntick_save_progress_counter_sched(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_dynticks_snap = rdssp->dynticks; +} + +static int rcu_qsctr_inc_needed_dyntick(int cpu) +{ + long curr; + long snap; + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + curr = rdssp->dynticks; + snap = rdssp->sched_dynticks_snap; + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU remained in dynticks mode for the entire time + * and didn't take any interrupts, NMIs, SMIs, or whatever, + * then it cannot be in the middle of an rcu_read_lock(), so + * the next rcu_read_lock() it executes must use the new value + * of the counter. Therefore, this CPU has been in a quiescent + * state the entire time, and we don't need to wait for it. + */ + + if ((curr == snap) && ((curr & 0x1) == 0)) + return 0; + + /* + * If the CPU passed through or entered a dynticks idle phase with + * no active irq handlers, then, as above, this CPU has already + * passed through a quiescent state. + */ + + if ((curr - snap) > 2 || (snap & 0x1) == 0) + return 0; + + /* We need this CPU to go through a quiescent state. */ + + return 1; +} + #else /* !CONFIG_NO_HZ */ -# define dyntick_save_progress_counter(cpu) do { } while (0) -# define rcu_try_flip_waitack_needed(cpu) (1) -# define rcu_try_flip_waitmb_needed(cpu) (1) +# define dyntick_save_progress_counter(cpu) do { } while (0) +# define rcu_try_flip_waitack_needed(cpu) (1) +# define rcu_try_flip_waitmb_needed(cpu) (1) + +# define dyntick_save_progress_counter_sched(cpu) do { } while (0) +# define rcu_qsctr_inc_needed_dyntick(cpu) (1) #endif /* CONFIG_NO_HZ */ +static void save_qsctr_sched(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_qs_snap = rdssp->sched_qs; +} + +static inline int rcu_qsctr_inc_needed(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + /* + * If there has been a quiescent state, no more need to wait + * on this CPU. + */ + + if (rdssp->sched_qs != rdssp->sched_qs_snap) { + smp_mb(); /* force ordering with cpu entering schedule(). */ + return 0; + } + + /* We need this CPU to go through a quiescent state. */ + + return 1; +} + /* * Get here when RCU is idle. Decide whether we need to * move out of idle state, and return non-zero if so. @@ -819,6 +922,26 @@ void rcu_check_callbacks(int cpu, int user) unsigned long flags; struct rcu_data *rdp = RCU_DATA_CPU(cpu); + /* + * If this CPU took its interrupt from user mode or from the + * idle loop, and this is not a nested interrupt, then + * this CPU has to have exited all prior preept-disable + * sections of code. So increment the counter to note this. + * + * The memory barrier is needed to handle the case where + * writes from a preempt-disable section of code get reordered + * into schedule() by this CPU's write buffer. So the memory + * barrier makes sure that the rcu_qsctr_inc() is seen by other + * CPUs to happen after any such write. + */ + + if (user || + (idle_cpu(cpu) && !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + smp_mb(); /* Guard against aggressive schedule(). */ + rcu_qsctr_inc(cpu); + } + rcu_check_mb(cpu); if (rcu_ctrlblk.completed == rdp->completed) rcu_try_flip(); @@ -869,6 +992,8 @@ void rcu_offline_cpu(int cpu) struct rcu_head *list = NULL; unsigned long flags; struct rcu_data *rdp = RCU_DATA_CPU(cpu); + struct rcu_head *schedlist = NULL; + struct rcu_head **schedtail = &schedlist; struct rcu_head **tail = &list; /* @@ -882,6 +1007,11 @@ void rcu_offline_cpu(int cpu) rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], list, tail); rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); + rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail, + schedlist, schedtail); + rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail, + schedlist, schedtail); + rdp->rcu_sched_sleeping = 0; spin_unlock_irqrestore(&rdp->lock, flags); rdp->waitlistcount = 0; @@ -916,22 +1046,40 @@ void rcu_offline_cpu(int cpu) * fix. */ - local_irq_save(flags); + local_irq_save(flags); /* disable preempt till we know what lock. */ rdp = RCU_DATA_ME(); spin_lock(&rdp->lock); *rdp->nexttail = list; if (list) rdp->nexttail = tail; + *rdp->nextschedtail = schedlist; + if (schedlist) + rdp->nextschedtail = schedtail; spin_unlock_irqrestore(&rdp->lock, flags); } void __devinit rcu_online_cpu(int cpu) { unsigned long flags; + struct rcu_data *rdp; spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); cpu_set(cpu, rcu_cpu_online_map); spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); + + /* + * The rcu_sched grace-period processing might have bypassed + * this CPU, given that it was not in the rcu_cpu_online_map + * when the grace-period scan started. This means that the + * grace-period task might sleep. So make sure that if this + * should happen, the first callback posted to this CPU will + * wake up the grace-period task if need be. + */ + + rdp = RCU_DATA_CPU(cpu); + spin_lock_irqsave(&rdp->lock, flags); + rdp->rcu_sched_sleeping = 1; + spin_unlock_irqrestore(&rdp->lock, flags); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -986,31 +1134,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) *rdp->nexttail = head; rdp->nexttail = &head->next; RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); - spin_unlock(&rdp->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&rdp->lock, flags); } EXPORT_SYMBOL_GPL(call_rcu); +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + struct rcu_data *rdp; + int wake_gp = 0; + + head->func = func; + head->next = NULL; + local_irq_save(flags); + rdp = RCU_DATA_ME(); + spin_lock(&rdp->lock); + *rdp->nextschedtail = head; + rdp->nextschedtail = &head->next; + if (rdp->rcu_sched_sleeping) { + + /* Grace-period processing might be sleeping... */ + + rdp->rcu_sched_sleeping = 0; + wake_gp = 1; + } + spin_unlock_irqrestore(&rdp->lock, flags); + if (wake_gp) { + + /* Wake up grace-period processing, unless someone beat us. */ + + spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); + if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping) + wake_gp = 0; + rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping; + spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); + if (wake_gp) + wake_up_interruptible(&rcu_ctrlblk.sched_wq); + } +} +EXPORT_SYMBOL_GPL(call_rcu_sched); + /* * Wait until all currently running preempt_disable() code segments * (including hardware-irq-disable segments) complete. Note that * in -rt this does -not- necessarily result in all currently executing * interrupt -handlers- having completed. */ -void __synchronize_sched(void) +synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched) +EXPORT_SYMBOL_GPL(__synchronize_sched); + +/* + * kthread function that manages call_rcu_sched grace periods. + */ +static int rcu_sched_grace_period(void *arg) { - cpumask_t oldmask; + int couldsleep; /* might sleep after current pass. */ + int couldsleepnext = 0; /* might sleep after next pass. */ int cpu; + unsigned long flags; + struct rcu_data *rdp; + int ret; - if (sched_getaffinity(0, &oldmask) < 0) - oldmask = cpu_possible_map; - for_each_online_cpu(cpu) { - sched_setaffinity(0, &cpumask_of_cpu(cpu)); - schedule(); - } - sched_setaffinity(0, &oldmask); + /* + * Each pass through the following loop handles one + * rcu_sched grace period cycle. + */ + do { + /* Save each CPU's current state. */ + + for_each_online_cpu(cpu) { + dyntick_save_progress_counter_sched(cpu); + save_qsctr_sched(cpu); + } + + /* + * Sleep for about an RCU grace-period's worth to + * allow better batching and to consume less CPU. + */ + schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME); + + /* + * If there was nothing to do last time, prepare to + * sleep at the end of the current grace period cycle. + */ + couldsleep = couldsleepnext; + couldsleepnext = 1; + if (couldsleep) { + spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); + rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep; + spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); + } + + /* + * Wait on each CPU in turn to have either visited + * a quiescent state or been in dynticks-idle mode. + */ + for_each_online_cpu(cpu) { + while (rcu_qsctr_inc_needed(cpu) && + rcu_qsctr_inc_needed_dyntick(cpu)) { + /* resched_cpu(cpu); @@@ */ + schedule_timeout_interruptible(1); + } + } + + /* Advance callbacks for each CPU. */ + + for_each_online_cpu(cpu) { + + rdp = RCU_DATA_CPU(cpu); + spin_lock_irqsave(&rdp->lock, flags); + + /* + * We are running on this CPU irq-disabled, so no + * CPU can go offline until we re-enable irqs. + * The current CPU might have already gone + * offline (between the for_each_offline_cpu and + * the spin_lock_irqsave), but in that case all its + * callback lists will be empty, so no harm done. + * + * Advance the callbacks! We share normal RCU's + * donelist, since callbacks are invoked the + * same way in either case. + */ + if (rdp->waitschedlist != NULL) { + *rdp->donetail = rdp->waitschedlist; + rdp->donetail = rdp->waitschedtail; + + /* + * Next rcu_check_callbacks() will + * do the required raise_softirq(). + */ + } + if (rdp->nextschedlist != NULL) { + rdp->waitschedlist = rdp->nextschedlist; + rdp->waitschedtail = rdp->nextschedtail; + couldsleep = 0; + couldsleepnext = 0; + } else { + rdp->waitschedlist = NULL; + rdp->waitschedtail = &rdp->waitschedlist; + } + rdp->nextschedlist = NULL; + rdp->nextschedtail = &rdp->nextschedlist; + + /* Mark sleep intention. */ + + rdp->rcu_sched_sleeping = couldsleep; + + spin_unlock_irqrestore(&rdp->lock, flags); + } + + /* If we saw callbacks on the last scan, go deal with them. */ + + if (!couldsleep) + continue; + + /* Attempt to block... */ + + spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); + if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) { + + /* + * Someone posted a callback after we scanned. + * Go take care of it. + */ + spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); + couldsleepnext = 0; + continue; + } + + /* Block until the next person posts a callback. */ + + rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; + spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); + ret = 0; + __wait_event_interruptible(rcu_ctrlblk.sched_wq, + rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, + ret); + + /* + * Signals would prevent us from sleeping, and we cannot + * do much with them in any case. So flush them. + */ + if (ret) + flush_signals(current); + couldsleepnext = 0; + + } while (!kthread_should_stop()); + + return (0); } -EXPORT_SYMBOL_GPL(__synchronize_sched); /* * Check to see if any future RCU-related work will need to be done @@ -1027,7 +1340,9 @@ int rcu_needs_cpu(int cpu) return (rdp->donelist != NULL || !!rdp->waitlistcount || - rdp->nextlist != NULL); + rdp->nextlist != NULL || + rdp->nextschedlist != NULL || + rdp->waitschedlist != NULL); } int rcu_pending(int cpu) @@ -1038,7 +1353,9 @@ int rcu_pending(int cpu) if (rdp->donelist != NULL || !!rdp->waitlistcount || - rdp->nextlist != NULL) + rdp->nextlist != NULL || + rdp->nextschedlist != NULL || + rdp->waitschedlist != NULL) return 1; /* The RCU core needs an acknowledgement from this CPU. */ @@ -1105,6 +1422,11 @@ void __init __rcu_init(void) rdp->donetail = &rdp->donelist; rdp->rcu_flipctr[0] = 0; rdp->rcu_flipctr[1] = 0; + rdp->nextschedlist = NULL; + rdp->nextschedtail = &rdp->nextschedlist; + rdp->waitschedlist = NULL; + rdp->waitschedtail = &rdp->waitschedlist; + rdp->rcu_sched_sleeping = 0; } register_cpu_notifier(&rcu_nb); @@ -1127,11 +1449,15 @@ void __init __rcu_init(void) } /* - * Deprecated, use synchronize_rcu() or synchronize_sched() instead. + * Late-boot-time RCU initialization that must wait until after scheduler + * has been initialized. */ -void synchronize_kernel(void) +void __init rcu_init_sched(void) { - synchronize_rcu(); + rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period, + NULL, + "rcu_sched_grace_period"); + WARN_ON(IS_ERR(rcu_sched_grace_period_task)); } #ifdef CONFIG_RCU_TRACE -- cgit v1.2.3 From 70f12f848d3e981479b4f6f751e73c14f7c13e5b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add rcu_barrier_sched() and rcu_barrier_bh() Add rcu_barrier_sched() and rcu_barrier_bh(). With these in place, rcutorture no longer gives the occasional oops when repeatedly starting and stopping torturing rcu_bh. Also adds the API needed to flush out pre-existing call_rcu_sched() callbacks. Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/rcupdate.h | 2 ++ kernel/rcupdate.c | 55 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 411969cb5243..e8b4039cfb2f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -246,6 +246,8 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a4e329d92883..4a74b8d48d90 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -45,6 +45,12 @@ #include #include +enum rcu_barrier { + RCU_BARRIER_STD, + RCU_BARRIER_BH, + RCU_BARRIER_SCHED, +}; + static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); @@ -83,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused) /* * Called with preemption disabled, and from cross-cpu IRQ context. */ -static void rcu_barrier_func(void *notused) +static void rcu_barrier_func(void *type) { int cpu = smp_processor_id(); struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); atomic_inc(&rcu_barrier_cpu_count); - call_rcu(head, rcu_barrier_callback); + switch ((enum rcu_barrier)type) { + case RCU_BARRIER_STD: + call_rcu(head, rcu_barrier_callback); + break; + case RCU_BARRIER_BH: + call_rcu_bh(head, rcu_barrier_callback); + break; + case RCU_BARRIER_SCHED: + call_rcu_sched(head, rcu_barrier_callback); + break; + } } -/** - * rcu_barrier - Wait until all the in-flight RCUs are complete. +/* + * Orchestrate the specified type of RCU barrier, waiting for all + * RCU callbacks of the specified type to complete. */ -void rcu_barrier(void) +static void _rcu_barrier(enum rcu_barrier type) { BUG_ON(in_interrupt()); /* Take cpucontrol mutex to protect against CPU hotplug */ @@ -111,13 +128,39 @@ void rcu_barrier(void) * until all the callbacks are queued. */ rcu_read_lock(); - on_each_cpu(rcu_barrier_func, NULL, 0, 1); + on_each_cpu(rcu_barrier_func, (void *)type, 0, 1); rcu_read_unlock(); wait_for_completion(&rcu_barrier_completion); mutex_unlock(&rcu_barrier_mutex); } + +/** + * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. + */ +void rcu_barrier(void) +{ + _rcu_barrier(RCU_BARRIER_STD); +} EXPORT_SYMBOL_GPL(rcu_barrier); +/** + * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. + */ +void rcu_barrier_bh(void) +{ + _rcu_barrier(RCU_BARRIER_BH); +} +EXPORT_SYMBOL_GPL(rcu_barrier_bh); + +/** + * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. + */ +void rcu_barrier_sched(void) +{ + _rcu_barrier(RCU_BARRIER_SCHED); +} +EXPORT_SYMBOL_GPL(rcu_barrier_sched); + void __init rcu_init(void) { __rcu_init(); -- cgit v1.2.3 From 82524746c27fa418c250a56dd7606b9d3fc79826 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: split list.h and move rcu-protected lists into rculist.h Move rcu-protected lists from list.h into a new header file rculist.h. This is done because list are a very used primitive structure all over the kernel and it's currently impossible to include other header files in this list.h without creating some circular dependencies. For example, list.h implements rcu-protected list and uses rcu_dereference() without including rcupdate.h. It actually compiles because users of rcu_dereference() are macros. Others RCU functions could be used too but aren't probably because of this. Therefore this patch creates rculist.h which includes rcupdates without to many changes/troubles. Signed-off-by: Franck Bui-Huu Acked-by: Paul E. McKenney Acked-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/ia64/sn/kernel/irq.c | 1 + crypto/async_tx/async_tx.c | 1 + drivers/infiniband/hw/ipath/ipath_verbs.c | 1 + drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 3 +- drivers/net/macvlan.c | 2 +- include/linux/dcache.h | 1 + include/linux/list.h | 367 ---------------------- include/linux/rculist.h | 396 ++++++++++++++++++++++++ kernel/pid.c | 1 + lib/textsearch.c | 1 + net/802/psnap.c | 1 + net/8021q/vlan.c | 1 + net/bridge/br_fdb.c | 1 + net/bridge/br_stp.c | 1 + net/netlabel/netlabel_domainhash.c | 3 +- 15 files changed, 409 insertions(+), 372 deletions(-) create mode 100644 include/linux/rculist.h (limited to 'include') diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 53351c3cd7b1..96c31b4180c3 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index c6e772fc5ccd..095c798d3170 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -23,6 +23,7 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. * */ +#include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index e0ec540042bf..5d830d87ebca 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "ipath_kernel.h" #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index 9e5abf9c309d..d73e32232879 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -31,8 +31,7 @@ * SOFTWARE. */ -#include -#include +#include #include "ipath_verbs.h" diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c36a03ae9bfb..860d75d81f82 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a6639407c80..1f5cebf10a23 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/list.h b/include/linux/list.h index 08cf4f651889..139ec41d9c2e 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -84,65 +84,6 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) __list_add(new, head->prev, head); } -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add_rcu(struct list_head * new, - struct list_head * prev, struct list_head * next) -{ - new->next = next; - new->prev = prev; - smp_wmb(); - next->prev = new; - prev->next = new; -} - -/** - * list_add_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_rcu(struct list_head *new, struct list_head *head) -{ - __list_add_rcu(new, head, head->next); -} - -/** - * list_add_tail_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_tail_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_tail_rcu(struct list_head *new, - struct list_head *head) -{ - __list_add_rcu(new, head->prev, head); -} - /* * Delete a list entry by making the prev/next entries * point to each other. @@ -173,36 +114,6 @@ static inline void list_del(struct list_head *entry) extern void list_del(struct list_head *entry); #endif -/** - * list_del_rcu - deletes entry from list without re-initialization - * @entry: the element to delete from the list. - * - * Note: list_empty() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_del_rcu() - * or list_add_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - * - * Note that the caller is not permitted to immediately free - * the newly deleted entry. Instead, either synchronize_rcu() - * or call_rcu() must be used to defer freeing until an RCU - * grace period has elapsed. - */ -static inline void list_del_rcu(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->prev = LIST_POISON2; -} - /** * list_replace - replace old entry by new one * @old : the element to be replaced @@ -226,25 +137,6 @@ static inline void list_replace_init(struct list_head *old, INIT_LIST_HEAD(old); } -/** - * list_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - * Note: @old should not be empty. - */ -static inline void list_replace_rcu(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->prev = old->prev; - smp_wmb(); - new->next->prev = new; - new->prev->next = new; - old->prev = LIST_POISON2; -} - /** * list_del_init - deletes entry from list and reinitialize it. * @entry: the element to delete from the list. @@ -368,62 +260,6 @@ static inline void list_splice_init(struct list_head *list, } } -/** - * list_splice_init_rcu - splice an RCU-protected list into an existing list. - * @list: the RCU-protected list to splice - * @head: the place in the list to splice the first list into - * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... - * - * @head can be RCU-read traversed concurrently with this function. - * - * Note that this function blocks. - * - * Important note: the caller must take whatever action is necessary to - * prevent any other updates to @head. In principle, it is possible - * to modify the list as soon as sync() begins execution. - * If this sort of thing becomes necessary, an alternative version - * based on call_rcu() could be created. But only if -really- - * needed -- there is no shortage of RCU API members. - */ -static inline void list_splice_init_rcu(struct list_head *list, - struct list_head *head, - void (*sync)(void)) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - if (list_empty(head)) - return; - - /* "first" and "last" tracking list, so initialize it. */ - - INIT_LIST_HEAD(list); - - /* - * At this point, the list body still points to the source list. - * Wait for any readers to finish using the list before splicing - * the list body into the new list. Any new readers will see - * an empty list. - */ - - sync(); - - /* - * Readers are finished with the source list, so perform splice. - * The order is important if the new list is global and accessible - * to concurrent RCU readers. Note that RCU readers are not - * permitted to traverse the prev pointers without excluding - * this function. - */ - - last->next = at; - smp_wmb(); - head->next = first; - first->prev = head; - at->prev = last; -} - /** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. @@ -629,57 +465,6 @@ static inline void list_splice_init_rcu(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.prev, typeof(*n), member)) -/** - * list_for_each_rcu - iterate over an rcu-protected list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) - -#define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - pos != (head); \ - pos = rcu_dereference(pos->next)) - -/** - * list_for_each_entry_rcu - iterate over rcu list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) - - -/** - * list_for_each_continue_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * Iterate over an rcu-protected list, continuing after current point. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference((pos)->next); \ - prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference((pos)->next)) - /* * Double linked lists with a single pointer list head. * Mostly useful for hash tables where the two pointer list head is @@ -730,31 +515,6 @@ static inline void hlist_del(struct hlist_node *n) n->pprev = LIST_POISON2; } -/** - * hlist_del_rcu - deletes entry from hash list without re-initialization - * @n: the element to delete from the hash list. - * - * Note: list_unhashed() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the hash list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry(). - */ -static inline void hlist_del_rcu(struct hlist_node *n) -{ - __hlist_del(n); - n->pprev = LIST_POISON2; -} - static inline void hlist_del_init(struct hlist_node *n) { if (!hlist_unhashed(n)) { @@ -763,27 +523,6 @@ static inline void hlist_del_init(struct hlist_node *n) } } -/** - * hlist_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - */ -static inline void hlist_replace_rcu(struct hlist_node *old, - struct hlist_node *new) -{ - struct hlist_node *next = old->next; - - new->next = next; - new->pprev = old->pprev; - smp_wmb(); - if (next) - new->next->pprev = &new->next; - *new->pprev = new; - old->pprev = LIST_POISON2; -} - static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; @@ -794,38 +533,6 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->pprev = &h->first; } - -/** - * hlist_add_head_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the specified hlist, - * while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_add_head_rcu(struct hlist_node *n, - struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - n->pprev = &h->first; - smp_wmb(); - if (first) - first->pprev = &n->next; - h->first = n; -} - /* next must be != NULL */ static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) @@ -847,63 +554,6 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } -/** - * hlist_add_before_rcu - * @n: the new element to add to the hash list. - * @next: the existing element to add the new element before. - * - * Description: - * Adds the specified element to the specified hlist - * before the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_before_rcu(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - smp_wmb(); - next->pprev = &n->next; - *(n->pprev) = n; -} - -/** - * hlist_add_after_rcu - * @prev: the existing element to add the new element after. - * @n: the new element to add to the hash list. - * - * Description: - * Adds the specified element to the specified hlist - * after the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_after_rcu(struct hlist_node *prev, - struct hlist_node *n) -{ - n->next = prev->next; - n->pprev = &prev->next; - smp_wmb(); - prev->next = n; - if (n->next) - n->next->pprev = &n->next; -} - #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ @@ -964,21 +614,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = n) -/** - * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as hlist_add_head_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = rcu_dereference(pos->next)) - #endif diff --git a/include/linux/rculist.h b/include/linux/rculist.h new file mode 100644 index 000000000000..aa9b3eb15683 --- /dev/null +++ b/include/linux/rculist.h @@ -0,0 +1,396 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + new->next = next; + new->prev = prev; + smp_wmb(); + next->prev = new; + prev->next = new; +} + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_tail_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_del_rcu() + * or list_add_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = LIST_POISON2; +} + +/** + * list_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + * Note: @old should not be empty. + */ +static inline void list_replace_rcu(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->prev = old->prev; + smp_wmb(); + new->next->prev = new; + new->prev->next = new; + old->prev = LIST_POISON2; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list. + * @list: the RCU-protected list to splice + * @head: the place in the list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + * + * @head can be RCU-read traversed concurrently with this function. + * + * Note that this function blocks. + * + * Important note: the caller must take whatever action is necessary to + * prevent any other updates to @head. In principle, it is possible + * to modify the list as soon as sync() begins execution. + * If this sort of thing becomes necessary, an alternative version + * based on call_rcu() could be created. But only if -really- + * needed -- there is no shortage of RCU API members. + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + if (list_empty(head)) + return; + + /* "first" and "last" tracking list, so initialize it. */ + + INIT_LIST_HEAD(list); + + /* + * At this point, the list body still points to the source list. + * Wait for any readers to finish using the list before splicing + * the list body into the new list. Any new readers will see + * an empty list. + */ + + sync(); + + /* + * Readers are finished with the source list, so perform splice. + * The order is important if the new list is global and accessible + * to concurrent RCU readers. Note that RCU readers are not + * permitted to traverse the prev pointers without excluding + * this function. + */ + + last->next = at; + smp_wmb(); + head->next = first; + first->prev = head; + at->prev = last; +} + +/** + * list_for_each_rcu - iterate over an rcu-protected list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) + +#define __list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) + +/** + * list_for_each_safe_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + * + * Iterate over an rcu-protected list, safe against removal of list entry. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_safe_rcu(pos, n, head) \ + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) + +/** + * list_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + + +/** + * list_for_each_continue_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * Iterate over an rcu-protected list, continuing after current point. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_continue_rcu(pos, head) \ + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry(). + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + */ +static inline void hlist_replace_rcu(struct hlist_node *old, + struct hlist_node *new) +{ + struct hlist_node *next = old->next; + + new->next = next; + new->pprev = old->pprev; + smp_wmb(); + if (next) + new->next->pprev = &new->next; + *new->pprev = new; + old->pprev = LIST_POISON2; +} + +/** + * hlist_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_head_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + n->pprev = &h->first; + smp_wmb(); + if (first) + first->pprev = &n->next; + h->first = n; +} + +/** + * hlist_add_before_rcu + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * Description: + * Adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +/** + * hlist_add_after_rcu + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * Description: + * Adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + +/** + * hlist_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = (head)->first; \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = pos->next) + +#endif /* __KERNEL__ */ +#endif diff --git a/kernel/pid.c b/kernel/pid.c index 20d59fa2d493..30bd5d4b2ac7 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/textsearch.c b/lib/textsearch.c index be8bda3862f5..a3e500ad51d7 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include #include diff --git a/net/802/psnap.c b/net/802/psnap.c index 31128cb92a23..ea4643931446 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -20,6 +20,7 @@ #include #include #include +#include static LIST_HEAD(snap_list); static DEFINE_SPINLOCK(snap_lock); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2a739adaa92b..e7ddbfa0e02f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 72c5976a5ce3..142060f02054 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index e38034aa56f5..9e96ffcd29a3 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ #include +#include #include "br_private.h" #include "br_private_stp.h" diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 02c2f7c0b255..643c032a3a57 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -30,8 +30,7 @@ */ #include -#include -#include +#include #include #include #include -- cgit v1.2.3 From 10aa9d2cf9878757b003023d33ff90a37aa3044b Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:06 +0200 Subject: rculist.h: use the rcu API Make almost all list mutation primitives use rcu_assign_pointer(). The main point of this being readability improvement. Signed-off-by: Franck Bui-Huu Cc: "Paul E. McKenney" Cc: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index aa9b3eb15683..8d2c81fccfe5 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -7,6 +7,7 @@ * RCU-protected list version */ #include +#include /* * Insert a new entry between two known consecutive entries. @@ -19,9 +20,8 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - smp_wmb(); + rcu_assign_pointer(prev->next, new); next->prev = new; - prev->next = new; } /** @@ -110,9 +110,8 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - smp_wmb(); + rcu_assign_pointer(new->prev->next, new); new->next->prev = new; - new->prev->next = new; old->prev = LIST_POISON2; } @@ -166,8 +165,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - smp_wmb(); - head->next = first; + rcu_assign_pointer(head->next, first); first->prev = head; at->prev = last; } @@ -280,10 +278,9 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - smp_wmb(); + rcu_assign_pointer(*new->pprev, new); if (next) new->next->pprev = &new->next; - *new->pprev = new; old->pprev = LIST_POISON2; } @@ -310,12 +307,12 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; + n->next = first; n->pprev = &h->first; - smp_wmb(); + rcu_assign_pointer(h->first, n); if (first) first->pprev = &n->next; - h->first = n; } /** @@ -341,9 +338,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - smp_wmb(); + rcu_assign_pointer(*(n->pprev), n); next->pprev = &n->next; - *(n->pprev) = n; } /** @@ -369,8 +365,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - smp_wmb(); - prev->next = n; + rcu_assign_pointer(prev->next, n); if (n->next) n->next->pprev = &n->next; } -- cgit v1.2.3 From 78b0e0e9b27b62c4b22f05a147f7a80fa58b1ae3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:06 +0200 Subject: RCU, rculist.h: fix list iterators RCU list iterators: should prefetch ever be optimised out with no side-effects, the current version will lose the barrier completely. Pointed-out-by: Linus Torvalds Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 48 +++++++++++++++--------------------------------- 1 file changed, 15 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8d2c81fccfe5..b0f39be08b6c 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -180,31 +180,14 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_rcu(pos, head) \ - for (pos = (head)->next; \ - prefetch(rcu_dereference(pos)->next), pos != (head); \ - pos = pos->next) + for (pos = rcu_dereference((head)->next); \ + prefetch(pos->next), pos != (head); \ + pos = rcu_dereference(pos->next)) #define __list_for_each_rcu(pos, head) \ - for (pos = (head)->next; \ - rcu_dereference(pos) != (head); \ - pos = pos->next) - -/** - * list_for_each_safe_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - * - * Iterate over an rcu-protected list, safe against removal of list entry. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_safe_rcu(pos, n, head) \ - for (pos = (head)->next; \ - n = rcu_dereference(pos)->next, pos != (head); \ - pos = n) + for (pos = rcu_dereference((head)->next); \ + pos != (head); \ + pos = rcu_dereference(pos->next)) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -217,10 +200,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(rcu_dereference(pos)->member.next), \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) + for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) /** @@ -235,9 +217,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = (pos)->next; \ - prefetch(rcu_dereference((pos))->next), (pos) != (head); \ - (pos) = (pos)->next) + for ((pos) = rcu_dereference((pos)->next); \ + prefetch((pos)->next), (pos) != (head); \ + (pos) = rcu_dereference((pos)->next)) /** * hlist_del_rcu - deletes entry from hash list without re-initialization @@ -382,10 +364,10 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * as long as the traversal is guarded by rcu_read_lock(). */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = (head)->first; \ - rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) && \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From eeec12bf7b7d80d1c9cae5aae0dff7e2f928c64b Mon Sep 17 00:00:00 2001 From: Graeme Gregory Date: Wed, 30 Apr 2008 19:27:40 +0200 Subject: [ALSA] soc - DAPM - add hook to read state of DAPM widget This adds a hook to read the power state of a DAPM widget, I use this in the gta02 driver to expose certain DAPM widgets in the mixer for ease of audio routing. Signed-off-by: Graeme Gregory Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/soc-dapm.h | 2 ++ sound/soc/soc-dapm.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index a105b01e06d5..40cc695b69b6 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -223,6 +223,8 @@ int snd_soc_dapm_sys_add(struct device *dev); /* dapm audio endpoint control */ int snd_soc_dapm_set_endpoint(struct snd_soc_codec *codec, char *pin, int status); +int snd_soc_dapm_get_endpoint_status(struct snd_soc_codec *codec, + char *pin); int snd_soc_dapm_sync_endpoints(struct snd_soc_codec *codec); /* dapm widget types */ diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index af3326c63504..9fd5ee818e89 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1342,6 +1342,29 @@ int snd_soc_dapm_set_endpoint(struct snd_soc_codec *codec, } EXPORT_SYMBOL_GPL(snd_soc_dapm_set_endpoint); +/** + * snd_soc_dapm_get_endpoint_status - get audio endpoint status + * @codec: audio codec + * @endpoint: audio signal endpoint (or start point) + * + * Get audio endpoint status - connected or disconnected. + * + * Returns status + */ +int snd_soc_dapm_get_endpoint_status(struct snd_soc_codec *codec, + char *endpoint) +{ + struct snd_soc_dapm_widget *w; + + list_for_each_entry(w, &codec->dapm_widgets, list) { + if (!strcmp(w->name, endpoint)) + return w->connected; + } + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_get_endpoint); + /** * snd_soc_dapm_free - free dapm resources * @socdev: SoC device -- cgit v1.2.3 From 4ba1327ab8ce179c40862f3dedb4ebaaa491d737 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:51:19 +0200 Subject: [ALSA] soc - DAPM - Add bulk control registration Most SoC drivers cut'n'paste a loop iterating over an array to register their DAPM controls. Provide a function they can call instead. Signed-off-by: Mark Brown Cc: Graeme Gregory Cc: Frank Mandarino Cc: Jarkko Nikula Cc: Richard Purdie Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/soc-dapm.h | 3 +++ sound/soc/soc-dapm.c | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 40cc695b69b6..1f30616afe71 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -205,6 +205,9 @@ int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_new_control(struct snd_soc_codec *codec, const struct snd_soc_dapm_widget *widget); +int snd_soc_dapm_new_controls(struct snd_soc_codec *codec, + const struct snd_soc_dapm_widget *widget, + int num); /* dapm path setup */ int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index c60200ccde60..811d65271012 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1233,6 +1233,33 @@ int snd_soc_dapm_new_control(struct snd_soc_codec *codec, } EXPORT_SYMBOL_GPL(snd_soc_dapm_new_control); +/** + * snd_soc_dapm_new_controls - create new dapm controls + * @codec: audio codec + * @widget: widget array + * @num: number of widgets + * + * Creates new DAPM controls based upon the templates. + * + * Returns 0 for success else error. + */ +int snd_soc_dapm_new_controls(struct snd_soc_codec *codec, + const struct snd_soc_dapm_widget *widget, + int num) +{ + int i, ret; + + for (i = 0; i < num; i++) { + ret = snd_soc_dapm_new_control(codec, widget); + if (ret < 0) + return ret; + widget++; + } + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_new_controls); + + /** * snd_soc_dapm_stream_event - send a stream event to the dapm core * @codec: audio codec -- cgit v1.2.3 From 105f1c28442301237d20b05a3d52d9987614016f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:52:19 +0200 Subject: [ALSA] soc - DAPM - Bulk route registration ASoC codecs and machine drivers that use DAPM routes all cut'n'paste a loop iterating over a null terminated array of routes. Factor out this into a bulk registration function, improving the error reporting for most users, and deprecate the old API to help out of tree users pick up the changes. Signed-off-by: Mark Brown Cc: Graeme Gregory Cc: Frank Mandarino Cc: Jarkko Nikula Cc: Richard Purdie Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/soc-dapm.h | 17 +++++++++++- sound/soc/soc-dapm.c | 72 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 73 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 1f30616afe71..bf4cf0c1d37f 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -193,6 +193,7 @@ struct snd_soc_dapm_widget; enum snd_soc_dapm_type; struct snd_soc_dapm_path; struct snd_soc_dapm_pin; +struct snd_soc_dapm_route; /* dapm controls */ int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol, @@ -210,10 +211,12 @@ int snd_soc_dapm_new_controls(struct snd_soc_codec *codec, int num); /* dapm path setup */ -int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, +int __deprecated snd_soc_dapm_connect_input(struct snd_soc_codec *codec, const char *sink_name, const char *control_name, const char *src_name); int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec); void snd_soc_dapm_free(struct snd_soc_device *socdev); +int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, + const struct snd_soc_dapm_route *route, int num); /* dapm events */ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream, @@ -250,6 +253,18 @@ enum snd_soc_dapm_type { snd_soc_dapm_post, /* machine specific post widget - exec last */ }; +/* + * DAPM audio route definition. + * + * Defines an audio route originating at source via control and finishing + * at sink. + */ +struct snd_soc_dapm_route { + const char *sink; + const char *control; + const char *source; +}; + /* dapm audio path between two widgets */ struct snd_soc_dapm_path { char *name; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 811d65271012..1ef6d94b8357 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -841,21 +841,8 @@ int snd_soc_dapm_sync_endpoints(struct snd_soc_codec *codec) } EXPORT_SYMBOL_GPL(snd_soc_dapm_sync_endpoints); -/** - * snd_soc_dapm_connect_input - connect dapm widgets - * @codec: audio codec - * @sink: name of target widget - * @control: mixer control name - * @source: name of source name - * - * Connects 2 dapm widgets together via a named audio path. The sink is - * the widget receiving the audio signal, whilst the source is the sender - * of the audio signal. - * - * Returns 0 for success else error. - */ -int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, const char *sink, - const char * control, const char *source) +static int snd_soc_dapm_add_route(struct snd_soc_codec *codec, + const char *sink, const char *control, const char *source) { struct snd_soc_dapm_path *path; struct snd_soc_dapm_widget *wsource = NULL, *wsink = NULL, *w; @@ -957,8 +944,63 @@ err: kfree(path); return ret; } + +/** + * snd_soc_dapm_connect_input - connect dapm widgets + * @codec: audio codec + * @sink: name of target widget + * @control: mixer control name + * @source: name of source name + * + * Connects 2 dapm widgets together via a named audio path. The sink is + * the widget receiving the audio signal, whilst the source is the sender + * of the audio signal. + * + * This function has been deprecated in favour of snd_soc_dapm_add_routes(). + * + * Returns 0 for success else error. + */ +int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, const char *sink, + const char *control, const char *source) +{ + return snd_soc_dapm_add_route(codec, sink, control, source); +} EXPORT_SYMBOL_GPL(snd_soc_dapm_connect_input); +/** + * snd_soc_dapm_add_routes - Add routes between DAPM widgets + * @codec: codec + * @route: audio routes + * @num: number of routes + * + * Connects 2 dapm widgets together via a named audio path. The sink is + * the widget receiving the audio signal, whilst the source is the sender + * of the audio signal. + * + * Returns 0 for success else error. On error all resources can be freed + * with a call to snd_soc_card_free(). + */ +int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, + const struct snd_soc_dapm_route *route, int num) +{ + int i, ret; + + for (i = 0; i < num; i++) { + ret = snd_soc_dapm_add_route(codec, route->sink, + route->control, route->source); + if (ret < 0) { + printk(KERN_ERR "Failed to add route %s->%s\n", + route->source, + route->sink); + return ret; + } + route++; + } + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_add_routes); + /** * snd_soc_dapm_new_widgets - add new dapm widgets * @codec: audio codec -- cgit v1.2.3 From 1ef6ab75c7deef931d6308af282ed7d8e480e77f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 19 May 2008 12:31:55 +0200 Subject: [ALSA] ASoC: Make CPU and codec DAI operations have same type The CPU and codec DAI operations differ only in the presence of the digital mute operation for the codec so they may as well be the same type. Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/soc.h | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index d3c8c033dff8..73accbcfbd2d 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -272,9 +272,9 @@ struct snd_soc_ops { int (*trigger)(struct snd_pcm_substream *, int); }; -/* ASoC codec DAI ops */ -struct snd_soc_codec_ops { - /* codec DAI clocking configuration */ +/* ASoC DAI ops */ +struct snd_soc_dai_ops { + /* DAI clocking configuration */ int (*set_sysclk)(struct snd_soc_codec_dai *codec_dai, int clk_id, unsigned int freq, int dir); int (*set_pll)(struct snd_soc_codec_dai *codec_dai, @@ -282,7 +282,7 @@ struct snd_soc_codec_ops { int (*set_clkdiv)(struct snd_soc_codec_dai *codec_dai, int div_id, int div); - /* CPU DAI format configuration */ + /* DAI format configuration */ int (*set_fmt)(struct snd_soc_codec_dai *codec_dai, unsigned int fmt); int (*set_tdm_slot)(struct snd_soc_codec_dai *codec_dai, @@ -293,24 +293,6 @@ struct snd_soc_codec_ops { int (*digital_mute)(struct snd_soc_codec_dai *, int mute); }; -/* ASoC cpu DAI ops */ -struct snd_soc_cpu_ops { - /* CPU DAI clocking configuration */ - int (*set_sysclk)(struct snd_soc_cpu_dai *cpu_dai, - int clk_id, unsigned int freq, int dir); - int (*set_clkdiv)(struct snd_soc_cpu_dai *cpu_dai, - int div_id, int div); - int (*set_pll)(struct snd_soc_cpu_dai *cpu_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out); - - /* CPU DAI format configuration */ - int (*set_fmt)(struct snd_soc_cpu_dai *cpu_dai, - unsigned int fmt); - int (*set_tdm_slot)(struct snd_soc_cpu_dai *cpu_dai, - unsigned int mask, int slots); - int (*set_tristate)(struct snd_soc_cpu_dai *, int tristate); -}; - /* SoC Codec DAI */ struct snd_soc_codec_dai { char *name; @@ -328,7 +310,7 @@ struct snd_soc_codec_dai { /* ops */ struct snd_soc_ops ops; - struct snd_soc_codec_ops dai_ops; + struct snd_soc_dai_ops dai_ops; /* DAI private data */ void *private_data; @@ -352,7 +334,7 @@ struct snd_soc_cpu_dai { /* ops */ struct snd_soc_ops ops; - struct snd_soc_cpu_ops dai_ops; + struct snd_soc_dai_ops dai_ops; /* DAI capabilities */ struct snd_soc_pcm_stream capture; -- cgit v1.2.3 From 1a189b97190d3f0f8cf0379a799d3555b2d648bb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 13 Apr 2008 21:41:55 +0100 Subject: [ARM] pxa: Add bare bones PWM API Signed-off-by: Russell King --- arch/arm/Kconfig | 3 +++ include/linux/pwm.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 include/linux/pwm.h (limited to 'include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b786e68914d4..c274dbb89a83 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -22,6 +22,9 @@ config ARM Europe. There is an ARM Linux project with a web page at . +config HAVE_PWM + bool + config SYS_SUPPORTS_APM_EMULATION bool diff --git a/include/linux/pwm.h b/include/linux/pwm.h new file mode 100644 index 000000000000..3945f803d514 --- /dev/null +++ b/include/linux/pwm.h @@ -0,0 +1,31 @@ +#ifndef __LINUX_PWM_H +#define __LINUX_PWM_H + +struct pwm_device; + +/* + * pwm_request - request a PWM device + */ +struct pwm_device *pwm_request(int pwm_id, const char *label); + +/* + * pwm_free - free a PWM device + */ +void pwm_free(struct pwm_device *pwm); + +/* + * pwm_config - change a PWM device configuration + */ +int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns); + +/* + * pwm_enable - start a PWM output toggling + */ +int pwm_enable(struct pwm_device *pwm); + +/* + * pwm_disable - stop a PWM output toggling + */ +void pwm_disable(struct pwm_device *pwm); + +#endif /* __ASM_ARCH_PWM_H */ -- cgit v1.2.3 From 284d115ec9b70d7c38752d10ad393a198db07a4b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 17:32:16 +0100 Subject: [ARM] pxa: separate PXA25x and PXA27x UDC register definitions The PXA25x and PXA27x USB device controller register definitions are different. Currently, they live side by side in pxa-regs.h, but only one set is available depending on the setting of PXA25x or PXA27x. This means that if we build to support both PXA25x and PXA27x, the PXA27x definitions are unavailable, even to PXA27x specific code. Remove these definitions from pxa-regs.h, and place them in separate files. Include these files where appropriate. Note: according to the dependencies in drivers/usb/gadget/Kconfig, we do not support the UDC on PXA27x nor PXA3xx CPUs, so remove the platform devices from pxa27x.c and pxa3xx.c. Signed-off-by: Russell King --- arch/arm/mach-pxa/em-x270.c | 1 + arch/arm/mach-pxa/pxa27x.c | 2 +- arch/arm/mach-pxa/pxa3xx.c | 2 +- drivers/usb/gadget/pxa2xx_udc.c | 10 +- include/asm-arm/arch-pxa/pxa-regs.h | 412 ---------------------------------- include/asm-arm/arch-pxa/pxa25x-udc.h | 163 ++++++++++++++ include/asm-arm/arch-pxa/pxa27x-udc.h | 256 +++++++++++++++++++++ 7 files changed, 430 insertions(+), 416 deletions(-) create mode 100644 include/asm-arm/arch-pxa/pxa25x-udc.h create mode 100644 include/asm-arm/arch-pxa/pxa27x-udc.h (limited to 'include') diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index edc4f07a230d..1269ac991505 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 7e945836e129..cdaf573e0f17 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -353,7 +353,7 @@ void __init pxa_set_i2c_power_info(struct i2c_pxa_platform_data *info) } static struct platform_device *devices[] __initdata = { - &pxa_device_udc, +/* &pxa_device_udc, The UDC driver is PXA25x only */ &pxa_device_ffuart, &pxa_device_btuart, &pxa_device_stuart, diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 644550bfa330..7fbe78649dad 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -520,7 +520,7 @@ void __init pxa3xx_init_irq(void) */ static struct platform_device *devices[] __initdata = { - &pxa_device_udc, +/* &pxa_device_udc, The UDC driver is PXA25x only */ &pxa_device_ffuart, &pxa_device_btuart, &pxa_device_stuart, diff --git a/drivers/usb/gadget/pxa2xx_udc.c b/drivers/usb/gadget/pxa2xx_udc.c index 08f699b1fc57..63db96adc0b0 100644 --- a/drivers/usb/gadget/pxa2xx_udc.c +++ b/drivers/usb/gadget/pxa2xx_udc.c @@ -46,19 +46,25 @@ #include #include #include +#include #include #include #include -#include #include #include #include -#include #include #include +/* + * This driver is PXA25x only. Grab the right register definitions. + */ +#ifdef CONFIG_ARCH_PXA +#include +#endif + #include diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index 4b2ea1e95c57..68d742877308 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -599,418 +599,6 @@ #define SMC_REG_BASE __REG(0x40500500) /* Secondary Modem Codec */ -/* - * USB Device Controller - * PXA25x and PXA27x USB device controller registers are different. - */ -#if defined(CONFIG_PXA25x) - -#define UDC_RES1 __REG(0x40600004) /* UDC Undocumented - Reserved1 */ -#define UDC_RES2 __REG(0x40600008) /* UDC Undocumented - Reserved2 */ -#define UDC_RES3 __REG(0x4060000C) /* UDC Undocumented - Reserved3 */ - -#define UDCCR __REG(0x40600000) /* UDC Control Register */ -#define UDCCR_UDE (1 << 0) /* UDC enable */ -#define UDCCR_UDA (1 << 1) /* UDC active */ -#define UDCCR_RSM (1 << 2) /* Device resume */ -#define UDCCR_RESIR (1 << 3) /* Resume interrupt request */ -#define UDCCR_SUSIR (1 << 4) /* Suspend interrupt request */ -#define UDCCR_SRM (1 << 5) /* Suspend/resume interrupt mask */ -#define UDCCR_RSTIR (1 << 6) /* Reset interrupt request */ -#define UDCCR_REM (1 << 7) /* Reset interrupt mask */ - -#define UDCCS0 __REG(0x40600010) /* UDC Endpoint 0 Control/Status Register */ -#define UDCCS0_OPR (1 << 0) /* OUT packet ready */ -#define UDCCS0_IPR (1 << 1) /* IN packet ready */ -#define UDCCS0_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS0_DRWF (1 << 3) /* Device remote wakeup feature */ -#define UDCCS0_SST (1 << 4) /* Sent stall */ -#define UDCCS0_FST (1 << 5) /* Force stall */ -#define UDCCS0_RNE (1 << 6) /* Receive FIFO no empty */ -#define UDCCS0_SA (1 << 7) /* Setup active */ - -/* Bulk IN - Endpoint 1,6,11 */ -#define UDCCS1 __REG(0x40600014) /* UDC Endpoint 1 (IN) Control/Status Register */ -#define UDCCS6 __REG(0x40600028) /* UDC Endpoint 6 (IN) Control/Status Register */ -#define UDCCS11 __REG(0x4060003C) /* UDC Endpoint 11 (IN) Control/Status Register */ - -#define UDCCS_BI_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_BI_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_BI_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_BI_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_BI_SST (1 << 4) /* Sent stall */ -#define UDCCS_BI_FST (1 << 5) /* Force stall */ -#define UDCCS_BI_TSP (1 << 7) /* Transmit short packet */ - -/* Bulk OUT - Endpoint 2,7,12 */ -#define UDCCS2 __REG(0x40600018) /* UDC Endpoint 2 (OUT) Control/Status Register */ -#define UDCCS7 __REG(0x4060002C) /* UDC Endpoint 7 (OUT) Control/Status Register */ -#define UDCCS12 __REG(0x40600040) /* UDC Endpoint 12 (OUT) Control/Status Register */ - -#define UDCCS_BO_RFS (1 << 0) /* Receive FIFO service */ -#define UDCCS_BO_RPC (1 << 1) /* Receive packet complete */ -#define UDCCS_BO_DME (1 << 3) /* DMA enable */ -#define UDCCS_BO_SST (1 << 4) /* Sent stall */ -#define UDCCS_BO_FST (1 << 5) /* Force stall */ -#define UDCCS_BO_RNE (1 << 6) /* Receive FIFO not empty */ -#define UDCCS_BO_RSP (1 << 7) /* Receive short packet */ - -/* Isochronous IN - Endpoint 3,8,13 */ -#define UDCCS3 __REG(0x4060001C) /* UDC Endpoint 3 (IN) Control/Status Register */ -#define UDCCS8 __REG(0x40600030) /* UDC Endpoint 8 (IN) Control/Status Register */ -#define UDCCS13 __REG(0x40600044) /* UDC Endpoint 13 (IN) Control/Status Register */ - -#define UDCCS_II_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_II_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_II_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_II_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_II_TSP (1 << 7) /* Transmit short packet */ - -/* Isochronous OUT - Endpoint 4,9,14 */ -#define UDCCS4 __REG(0x40600020) /* UDC Endpoint 4 (OUT) Control/Status Register */ -#define UDCCS9 __REG(0x40600034) /* UDC Endpoint 9 (OUT) Control/Status Register */ -#define UDCCS14 __REG(0x40600048) /* UDC Endpoint 14 (OUT) Control/Status Register */ - -#define UDCCS_IO_RFS (1 << 0) /* Receive FIFO service */ -#define UDCCS_IO_RPC (1 << 1) /* Receive packet complete */ -#define UDCCS_IO_ROF (1 << 2) /* Receive overflow */ -#define UDCCS_IO_DME (1 << 3) /* DMA enable */ -#define UDCCS_IO_RNE (1 << 6) /* Receive FIFO not empty */ -#define UDCCS_IO_RSP (1 << 7) /* Receive short packet */ - -/* Interrupt IN - Endpoint 5,10,15 */ -#define UDCCS5 __REG(0x40600024) /* UDC Endpoint 5 (Interrupt) Control/Status Register */ -#define UDCCS10 __REG(0x40600038) /* UDC Endpoint 10 (Interrupt) Control/Status Register */ -#define UDCCS15 __REG(0x4060004C) /* UDC Endpoint 15 (Interrupt) Control/Status Register */ - -#define UDCCS_INT_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_INT_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_INT_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_INT_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_INT_SST (1 << 4) /* Sent stall */ -#define UDCCS_INT_FST (1 << 5) /* Force stall */ -#define UDCCS_INT_TSP (1 << 7) /* Transmit short packet */ - -#define UFNRH __REG(0x40600060) /* UDC Frame Number Register High */ -#define UFNRL __REG(0x40600064) /* UDC Frame Number Register Low */ -#define UBCR2 __REG(0x40600068) /* UDC Byte Count Reg 2 */ -#define UBCR4 __REG(0x4060006c) /* UDC Byte Count Reg 4 */ -#define UBCR7 __REG(0x40600070) /* UDC Byte Count Reg 7 */ -#define UBCR9 __REG(0x40600074) /* UDC Byte Count Reg 9 */ -#define UBCR12 __REG(0x40600078) /* UDC Byte Count Reg 12 */ -#define UBCR14 __REG(0x4060007c) /* UDC Byte Count Reg 14 */ -#define UDDR0 __REG(0x40600080) /* UDC Endpoint 0 Data Register */ -#define UDDR1 __REG(0x40600100) /* UDC Endpoint 1 Data Register */ -#define UDDR2 __REG(0x40600180) /* UDC Endpoint 2 Data Register */ -#define UDDR3 __REG(0x40600200) /* UDC Endpoint 3 Data Register */ -#define UDDR4 __REG(0x40600400) /* UDC Endpoint 4 Data Register */ -#define UDDR5 __REG(0x406000A0) /* UDC Endpoint 5 Data Register */ -#define UDDR6 __REG(0x40600600) /* UDC Endpoint 6 Data Register */ -#define UDDR7 __REG(0x40600680) /* UDC Endpoint 7 Data Register */ -#define UDDR8 __REG(0x40600700) /* UDC Endpoint 8 Data Register */ -#define UDDR9 __REG(0x40600900) /* UDC Endpoint 9 Data Register */ -#define UDDR10 __REG(0x406000C0) /* UDC Endpoint 10 Data Register */ -#define UDDR11 __REG(0x40600B00) /* UDC Endpoint 11 Data Register */ -#define UDDR12 __REG(0x40600B80) /* UDC Endpoint 12 Data Register */ -#define UDDR13 __REG(0x40600C00) /* UDC Endpoint 13 Data Register */ -#define UDDR14 __REG(0x40600E00) /* UDC Endpoint 14 Data Register */ -#define UDDR15 __REG(0x406000E0) /* UDC Endpoint 15 Data Register */ - -#define UICR0 __REG(0x40600050) /* UDC Interrupt Control Register 0 */ - -#define UICR0_IM0 (1 << 0) /* Interrupt mask ep 0 */ -#define UICR0_IM1 (1 << 1) /* Interrupt mask ep 1 */ -#define UICR0_IM2 (1 << 2) /* Interrupt mask ep 2 */ -#define UICR0_IM3 (1 << 3) /* Interrupt mask ep 3 */ -#define UICR0_IM4 (1 << 4) /* Interrupt mask ep 4 */ -#define UICR0_IM5 (1 << 5) /* Interrupt mask ep 5 */ -#define UICR0_IM6 (1 << 6) /* Interrupt mask ep 6 */ -#define UICR0_IM7 (1 << 7) /* Interrupt mask ep 7 */ - -#define UICR1 __REG(0x40600054) /* UDC Interrupt Control Register 1 */ - -#define UICR1_IM8 (1 << 0) /* Interrupt mask ep 8 */ -#define UICR1_IM9 (1 << 1) /* Interrupt mask ep 9 */ -#define UICR1_IM10 (1 << 2) /* Interrupt mask ep 10 */ -#define UICR1_IM11 (1 << 3) /* Interrupt mask ep 11 */ -#define UICR1_IM12 (1 << 4) /* Interrupt mask ep 12 */ -#define UICR1_IM13 (1 << 5) /* Interrupt mask ep 13 */ -#define UICR1_IM14 (1 << 6) /* Interrupt mask ep 14 */ -#define UICR1_IM15 (1 << 7) /* Interrupt mask ep 15 */ - -#define USIR0 __REG(0x40600058) /* UDC Status Interrupt Register 0 */ - -#define USIR0_IR0 (1 << 0) /* Interrupt request ep 0 */ -#define USIR0_IR1 (1 << 1) /* Interrupt request ep 1 */ -#define USIR0_IR2 (1 << 2) /* Interrupt request ep 2 */ -#define USIR0_IR3 (1 << 3) /* Interrupt request ep 3 */ -#define USIR0_IR4 (1 << 4) /* Interrupt request ep 4 */ -#define USIR0_IR5 (1 << 5) /* Interrupt request ep 5 */ -#define USIR0_IR6 (1 << 6) /* Interrupt request ep 6 */ -#define USIR0_IR7 (1 << 7) /* Interrupt request ep 7 */ - -#define USIR1 __REG(0x4060005C) /* UDC Status Interrupt Register 1 */ - -#define USIR1_IR8 (1 << 0) /* Interrupt request ep 8 */ -#define USIR1_IR9 (1 << 1) /* Interrupt request ep 9 */ -#define USIR1_IR10 (1 << 2) /* Interrupt request ep 10 */ -#define USIR1_IR11 (1 << 3) /* Interrupt request ep 11 */ -#define USIR1_IR12 (1 << 4) /* Interrupt request ep 12 */ -#define USIR1_IR13 (1 << 5) /* Interrupt request ep 13 */ -#define USIR1_IR14 (1 << 6) /* Interrupt request ep 14 */ -#define USIR1_IR15 (1 << 7) /* Interrupt request ep 15 */ - -#elif defined(CONFIG_PXA27x) - -#define UDCCR __REG(0x40600000) /* UDC Control Register */ -#define UDCCR_OEN (1 << 31) /* On-the-Go Enable */ -#define UDCCR_AALTHNP (1 << 30) /* A-device Alternate Host Negotiation - Protocol Port Support */ -#define UDCCR_AHNP (1 << 29) /* A-device Host Negotiation Protocol - Support */ -#define UDCCR_BHNP (1 << 28) /* B-device Host Negotiation Protocol - Enable */ -#define UDCCR_DWRE (1 << 16) /* Device Remote Wake-up Enable */ -#define UDCCR_ACN (0x03 << 11) /* Active UDC configuration Number */ -#define UDCCR_ACN_S 11 -#define UDCCR_AIN (0x07 << 8) /* Active UDC interface Number */ -#define UDCCR_AIN_S 8 -#define UDCCR_AAISN (0x07 << 5) /* Active UDC Alternate Interface - Setting Number */ -#define UDCCR_AAISN_S 5 -#define UDCCR_SMAC (1 << 4) /* Switch Endpoint Memory to Active - Configuration */ -#define UDCCR_EMCE (1 << 3) /* Endpoint Memory Configuration - Error */ -#define UDCCR_UDR (1 << 2) /* UDC Resume */ -#define UDCCR_UDA (1 << 1) /* UDC Active */ -#define UDCCR_UDE (1 << 0) /* UDC Enable */ - -#define UDCICR0 __REG(0x40600004) /* UDC Interrupt Control Register0 */ -#define UDCICR1 __REG(0x40600008) /* UDC Interrupt Control Register1 */ -#define UDCICR_FIFOERR (1 << 1) /* FIFO Error interrupt for EP */ -#define UDCICR_PKTCOMPL (1 << 0) /* Packet Complete interrupt for EP */ - -#define UDC_INT_FIFOERROR (0x2) -#define UDC_INT_PACKETCMP (0x1) - -#define UDCICR_INT(n,intr) (((intr) & 0x03) << (((n) & 0x0F) * 2)) -#define UDCICR1_IECC (1 << 31) /* IntEn - Configuration Change */ -#define UDCICR1_IESOF (1 << 30) /* IntEn - Start of Frame */ -#define UDCICR1_IERU (1 << 29) /* IntEn - Resume */ -#define UDCICR1_IESU (1 << 28) /* IntEn - Suspend */ -#define UDCICR1_IERS (1 << 27) /* IntEn - Reset */ - -#define UDCISR0 __REG(0x4060000C) /* UDC Interrupt Status Register 0 */ -#define UDCISR1 __REG(0x40600010) /* UDC Interrupt Status Register 1 */ -#define UDCISR_INT(n,intr) (((intr) & 0x03) << (((n) & 0x0F) * 2)) -#define UDCISR1_IRCC (1 << 31) /* IntReq - Configuration Change */ -#define UDCISR1_IRSOF (1 << 30) /* IntReq - Start of Frame */ -#define UDCISR1_IRRU (1 << 29) /* IntReq - Resume */ -#define UDCISR1_IRSU (1 << 28) /* IntReq - Suspend */ -#define UDCISR1_IRRS (1 << 27) /* IntReq - Reset */ - -#define UDCFNR __REG(0x40600014) /* UDC Frame Number Register */ -#define UDCOTGICR __REG(0x40600018) /* UDC On-The-Go interrupt control */ -#define UDCOTGICR_IESF (1 << 24) /* OTG SET_FEATURE command recvd */ -#define UDCOTGICR_IEXR (1 << 17) /* Extra Transciever Interrupt - Rising Edge Interrupt Enable */ -#define UDCOTGICR_IEXF (1 << 16) /* Extra Transciever Interrupt - Falling Edge Interrupt Enable */ -#define UDCOTGICR_IEVV40R (1 << 9) /* OTG Vbus Valid 4.0V Rising Edge - Interrupt Enable */ -#define UDCOTGICR_IEVV40F (1 << 8) /* OTG Vbus Valid 4.0V Falling Edge - Interrupt Enable */ -#define UDCOTGICR_IEVV44R (1 << 7) /* OTG Vbus Valid 4.4V Rising Edge - Interrupt Enable */ -#define UDCOTGICR_IEVV44F (1 << 6) /* OTG Vbus Valid 4.4V Falling Edge - Interrupt Enable */ -#define UDCOTGICR_IESVR (1 << 5) /* OTG Session Valid Rising Edge - Interrupt Enable */ -#define UDCOTGICR_IESVF (1 << 4) /* OTG Session Valid Falling Edge - Interrupt Enable */ -#define UDCOTGICR_IESDR (1 << 3) /* OTG A-Device SRP Detect Rising - Edge Interrupt Enable */ -#define UDCOTGICR_IESDF (1 << 2) /* OTG A-Device SRP Detect Falling - Edge Interrupt Enable */ -#define UDCOTGICR_IEIDR (1 << 1) /* OTG ID Change Rising Edge - Interrupt Enable */ -#define UDCOTGICR_IEIDF (1 << 0) /* OTG ID Change Falling Edge - Interrupt Enable */ - -#define UP2OCR __REG(0x40600020) /* USB Port 2 Output Control register */ - -#define UP2OCR_CPVEN (1 << 0) /* Charge Pump Vbus Enable */ -#define UP2OCR_CPVPE (1 << 1) /* Charge Pump Vbus Pulse Enable */ -#define UP2OCR_DPPDE (1 << 2) /* Host Port 2 Transceiver D+ Pull Down Enable */ -#define UP2OCR_DMPDE (1 << 3) /* Host Port 2 Transceiver D- Pull Down Enable */ -#define UP2OCR_DPPUE (1 << 4) /* Host Port 2 Transceiver D+ Pull Up Enable */ -#define UP2OCR_DMPUE (1 << 5) /* Host Port 2 Transceiver D- Pull Up Enable */ -#define UP2OCR_DPPUBE (1 << 6) /* Host Port 2 Transceiver D+ Pull Up Bypass Enable */ -#define UP2OCR_DMPUBE (1 << 7) /* Host Port 2 Transceiver D- Pull Up Bypass Enable */ -#define UP2OCR_EXSP (1 << 8) /* External Transceiver Speed Control */ -#define UP2OCR_EXSUS (1 << 9) /* External Transceiver Speed Enable */ -#define UP2OCR_IDON (1 << 10) /* OTG ID Read Enable */ -#define UP2OCR_HXS (1 << 16) /* Host Port 2 Transceiver Output Select */ -#define UP2OCR_HXOE (1 << 17) /* Host Port 2 Transceiver Output Enable */ -#define UP2OCR_SEOS (1 << 24) /* Single-Ended Output Select */ - -#define UDCCSN(x) __REG2(0x40600100, (x) << 2) -#define UDCCSR0 __REG(0x40600100) /* UDC Control/Status register - Endpoint 0 */ -#define UDCCSR0_SA (1 << 7) /* Setup Active */ -#define UDCCSR0_RNE (1 << 6) /* Receive FIFO Not Empty */ -#define UDCCSR0_FST (1 << 5) /* Force Stall */ -#define UDCCSR0_SST (1 << 4) /* Sent Stall */ -#define UDCCSR0_DME (1 << 3) /* DMA Enable */ -#define UDCCSR0_FTF (1 << 2) /* Flush Transmit FIFO */ -#define UDCCSR0_IPR (1 << 1) /* IN Packet Ready */ -#define UDCCSR0_OPC (1 << 0) /* OUT Packet Complete */ - -#define UDCCSRA __REG(0x40600104) /* UDC Control/Status register - Endpoint A */ -#define UDCCSRB __REG(0x40600108) /* UDC Control/Status register - Endpoint B */ -#define UDCCSRC __REG(0x4060010C) /* UDC Control/Status register - Endpoint C */ -#define UDCCSRD __REG(0x40600110) /* UDC Control/Status register - Endpoint D */ -#define UDCCSRE __REG(0x40600114) /* UDC Control/Status register - Endpoint E */ -#define UDCCSRF __REG(0x40600118) /* UDC Control/Status register - Endpoint F */ -#define UDCCSRG __REG(0x4060011C) /* UDC Control/Status register - Endpoint G */ -#define UDCCSRH __REG(0x40600120) /* UDC Control/Status register - Endpoint H */ -#define UDCCSRI __REG(0x40600124) /* UDC Control/Status register - Endpoint I */ -#define UDCCSRJ __REG(0x40600128) /* UDC Control/Status register - Endpoint J */ -#define UDCCSRK __REG(0x4060012C) /* UDC Control/Status register - Endpoint K */ -#define UDCCSRL __REG(0x40600130) /* UDC Control/Status register - Endpoint L */ -#define UDCCSRM __REG(0x40600134) /* UDC Control/Status register - Endpoint M */ -#define UDCCSRN __REG(0x40600138) /* UDC Control/Status register - Endpoint N */ -#define UDCCSRP __REG(0x4060013C) /* UDC Control/Status register - Endpoint P */ -#define UDCCSRQ __REG(0x40600140) /* UDC Control/Status register - Endpoint Q */ -#define UDCCSRR __REG(0x40600144) /* UDC Control/Status register - Endpoint R */ -#define UDCCSRS __REG(0x40600148) /* UDC Control/Status register - Endpoint S */ -#define UDCCSRT __REG(0x4060014C) /* UDC Control/Status register - Endpoint T */ -#define UDCCSRU __REG(0x40600150) /* UDC Control/Status register - Endpoint U */ -#define UDCCSRV __REG(0x40600154) /* UDC Control/Status register - Endpoint V */ -#define UDCCSRW __REG(0x40600158) /* UDC Control/Status register - Endpoint W */ -#define UDCCSRX __REG(0x4060015C) /* UDC Control/Status register - Endpoint X */ - -#define UDCCSR_DPE (1 << 9) /* Data Packet Error */ -#define UDCCSR_FEF (1 << 8) /* Flush Endpoint FIFO */ -#define UDCCSR_SP (1 << 7) /* Short Packet Control/Status */ -#define UDCCSR_BNE (1 << 6) /* Buffer Not Empty (IN endpoints) */ -#define UDCCSR_BNF (1 << 6) /* Buffer Not Full (OUT endpoints) */ -#define UDCCSR_FST (1 << 5) /* Force STALL */ -#define UDCCSR_SST (1 << 4) /* Sent STALL */ -#define UDCCSR_DME (1 << 3) /* DMA Enable */ -#define UDCCSR_TRN (1 << 2) /* Tx/Rx NAK */ -#define UDCCSR_PC (1 << 1) /* Packet Complete */ -#define UDCCSR_FS (1 << 0) /* FIFO needs service */ - -#define UDCBCN(x) __REG2(0x40600200, (x)<<2) -#define UDCBCR0 __REG(0x40600200) /* Byte Count Register - EP0 */ -#define UDCBCRA __REG(0x40600204) /* Byte Count Register - EPA */ -#define UDCBCRB __REG(0x40600208) /* Byte Count Register - EPB */ -#define UDCBCRC __REG(0x4060020C) /* Byte Count Register - EPC */ -#define UDCBCRD __REG(0x40600210) /* Byte Count Register - EPD */ -#define UDCBCRE __REG(0x40600214) /* Byte Count Register - EPE */ -#define UDCBCRF __REG(0x40600218) /* Byte Count Register - EPF */ -#define UDCBCRG __REG(0x4060021C) /* Byte Count Register - EPG */ -#define UDCBCRH __REG(0x40600220) /* Byte Count Register - EPH */ -#define UDCBCRI __REG(0x40600224) /* Byte Count Register - EPI */ -#define UDCBCRJ __REG(0x40600228) /* Byte Count Register - EPJ */ -#define UDCBCRK __REG(0x4060022C) /* Byte Count Register - EPK */ -#define UDCBCRL __REG(0x40600230) /* Byte Count Register - EPL */ -#define UDCBCRM __REG(0x40600234) /* Byte Count Register - EPM */ -#define UDCBCRN __REG(0x40600238) /* Byte Count Register - EPN */ -#define UDCBCRP __REG(0x4060023C) /* Byte Count Register - EPP */ -#define UDCBCRQ __REG(0x40600240) /* Byte Count Register - EPQ */ -#define UDCBCRR __REG(0x40600244) /* Byte Count Register - EPR */ -#define UDCBCRS __REG(0x40600248) /* Byte Count Register - EPS */ -#define UDCBCRT __REG(0x4060024C) /* Byte Count Register - EPT */ -#define UDCBCRU __REG(0x40600250) /* Byte Count Register - EPU */ -#define UDCBCRV __REG(0x40600254) /* Byte Count Register - EPV */ -#define UDCBCRW __REG(0x40600258) /* Byte Count Register - EPW */ -#define UDCBCRX __REG(0x4060025C) /* Byte Count Register - EPX */ - -#define UDCDN(x) __REG2(0x40600300, (x)<<2) -#define PHYS_UDCDN(x) (0x40600300 + ((x)<<2)) -#define PUDCDN(x) (volatile u32 *)(io_p2v(PHYS_UDCDN((x)))) -#define UDCDR0 __REG(0x40600300) /* Data Register - EP0 */ -#define UDCDRA __REG(0x40600304) /* Data Register - EPA */ -#define UDCDRB __REG(0x40600308) /* Data Register - EPB */ -#define UDCDRC __REG(0x4060030C) /* Data Register - EPC */ -#define UDCDRD __REG(0x40600310) /* Data Register - EPD */ -#define UDCDRE __REG(0x40600314) /* Data Register - EPE */ -#define UDCDRF __REG(0x40600318) /* Data Register - EPF */ -#define UDCDRG __REG(0x4060031C) /* Data Register - EPG */ -#define UDCDRH __REG(0x40600320) /* Data Register - EPH */ -#define UDCDRI __REG(0x40600324) /* Data Register - EPI */ -#define UDCDRJ __REG(0x40600328) /* Data Register - EPJ */ -#define UDCDRK __REG(0x4060032C) /* Data Register - EPK */ -#define UDCDRL __REG(0x40600330) /* Data Register - EPL */ -#define UDCDRM __REG(0x40600334) /* Data Register - EPM */ -#define UDCDRN __REG(0x40600338) /* Data Register - EPN */ -#define UDCDRP __REG(0x4060033C) /* Data Register - EPP */ -#define UDCDRQ __REG(0x40600340) /* Data Register - EPQ */ -#define UDCDRR __REG(0x40600344) /* Data Register - EPR */ -#define UDCDRS __REG(0x40600348) /* Data Register - EPS */ -#define UDCDRT __REG(0x4060034C) /* Data Register - EPT */ -#define UDCDRU __REG(0x40600350) /* Data Register - EPU */ -#define UDCDRV __REG(0x40600354) /* Data Register - EPV */ -#define UDCDRW __REG(0x40600358) /* Data Register - EPW */ -#define UDCDRX __REG(0x4060035C) /* Data Register - EPX */ - -#define UDCCN(x) __REG2(0x40600400, (x)<<2) -#define UDCCRA __REG(0x40600404) /* Configuration register EPA */ -#define UDCCRB __REG(0x40600408) /* Configuration register EPB */ -#define UDCCRC __REG(0x4060040C) /* Configuration register EPC */ -#define UDCCRD __REG(0x40600410) /* Configuration register EPD */ -#define UDCCRE __REG(0x40600414) /* Configuration register EPE */ -#define UDCCRF __REG(0x40600418) /* Configuration register EPF */ -#define UDCCRG __REG(0x4060041C) /* Configuration register EPG */ -#define UDCCRH __REG(0x40600420) /* Configuration register EPH */ -#define UDCCRI __REG(0x40600424) /* Configuration register EPI */ -#define UDCCRJ __REG(0x40600428) /* Configuration register EPJ */ -#define UDCCRK __REG(0x4060042C) /* Configuration register EPK */ -#define UDCCRL __REG(0x40600430) /* Configuration register EPL */ -#define UDCCRM __REG(0x40600434) /* Configuration register EPM */ -#define UDCCRN __REG(0x40600438) /* Configuration register EPN */ -#define UDCCRP __REG(0x4060043C) /* Configuration register EPP */ -#define UDCCRQ __REG(0x40600440) /* Configuration register EPQ */ -#define UDCCRR __REG(0x40600444) /* Configuration register EPR */ -#define UDCCRS __REG(0x40600448) /* Configuration register EPS */ -#define UDCCRT __REG(0x4060044C) /* Configuration register EPT */ -#define UDCCRU __REG(0x40600450) /* Configuration register EPU */ -#define UDCCRV __REG(0x40600454) /* Configuration register EPV */ -#define UDCCRW __REG(0x40600458) /* Configuration register EPW */ -#define UDCCRX __REG(0x4060045C) /* Configuration register EPX */ - -#define UDCCONR_CN (0x03 << 25) /* Configuration Number */ -#define UDCCONR_CN_S (25) -#define UDCCONR_IN (0x07 << 22) /* Interface Number */ -#define UDCCONR_IN_S (22) -#define UDCCONR_AISN (0x07 << 19) /* Alternate Interface Number */ -#define UDCCONR_AISN_S (19) -#define UDCCONR_EN (0x0f << 15) /* Endpoint Number */ -#define UDCCONR_EN_S (15) -#define UDCCONR_ET (0x03 << 13) /* Endpoint Type: */ -#define UDCCONR_ET_S (13) -#define UDCCONR_ET_INT (0x03 << 13) /* Interrupt */ -#define UDCCONR_ET_BULK (0x02 << 13) /* Bulk */ -#define UDCCONR_ET_ISO (0x01 << 13) /* Isochronous */ -#define UDCCONR_ET_NU (0x00 << 13) /* Not used */ -#define UDCCONR_ED (1 << 12) /* Endpoint Direction */ -#define UDCCONR_MPS (0x3ff << 2) /* Maximum Packet Size */ -#define UDCCONR_MPS_S (2) -#define UDCCONR_DE (1 << 1) /* Double Buffering Enable */ -#define UDCCONR_EE (1 << 0) /* Endpoint Enable */ - - -#define UDC_INT_FIFOERROR (0x2) -#define UDC_INT_PACKETCMP (0x1) - -#define UDC_FNR_MASK (0x7ff) - -#define UDCCSR_WR_MASK (UDCCSR_DME|UDCCSR_FST) -#define UDC_BCR_MASK (0x3ff) -#endif - /* * Fast Infrared Communication Port */ diff --git a/include/asm-arm/arch-pxa/pxa25x-udc.h b/include/asm-arm/arch-pxa/pxa25x-udc.h new file mode 100644 index 000000000000..840305916b6d --- /dev/null +++ b/include/asm-arm/arch-pxa/pxa25x-udc.h @@ -0,0 +1,163 @@ +#ifndef _ASM_ARCH_PXA25X_UDC_H +#define _ASM_ARCH_PXA25X_UDC_H + +#ifdef _ASM_ARCH_PXA27X_UDC_H +#error You can't include both PXA25x and PXA27x UDC support +#endif + +#define UDC_RES1 __REG(0x40600004) /* UDC Undocumented - Reserved1 */ +#define UDC_RES2 __REG(0x40600008) /* UDC Undocumented - Reserved2 */ +#define UDC_RES3 __REG(0x4060000C) /* UDC Undocumented - Reserved3 */ + +#define UDCCR __REG(0x40600000) /* UDC Control Register */ +#define UDCCR_UDE (1 << 0) /* UDC enable */ +#define UDCCR_UDA (1 << 1) /* UDC active */ +#define UDCCR_RSM (1 << 2) /* Device resume */ +#define UDCCR_RESIR (1 << 3) /* Resume interrupt request */ +#define UDCCR_SUSIR (1 << 4) /* Suspend interrupt request */ +#define UDCCR_SRM (1 << 5) /* Suspend/resume interrupt mask */ +#define UDCCR_RSTIR (1 << 6) /* Reset interrupt request */ +#define UDCCR_REM (1 << 7) /* Reset interrupt mask */ + +#define UDCCS0 __REG(0x40600010) /* UDC Endpoint 0 Control/Status Register */ +#define UDCCS0_OPR (1 << 0) /* OUT packet ready */ +#define UDCCS0_IPR (1 << 1) /* IN packet ready */ +#define UDCCS0_FTF (1 << 2) /* Flush Tx FIFO */ +#define UDCCS0_DRWF (1 << 3) /* Device remote wakeup feature */ +#define UDCCS0_SST (1 << 4) /* Sent stall */ +#define UDCCS0_FST (1 << 5) /* Force stall */ +#define UDCCS0_RNE (1 << 6) /* Receive FIFO no empty */ +#define UDCCS0_SA (1 << 7) /* Setup active */ + +/* Bulk IN - Endpoint 1,6,11 */ +#define UDCCS1 __REG(0x40600014) /* UDC Endpoint 1 (IN) Control/Status Register */ +#define UDCCS6 __REG(0x40600028) /* UDC Endpoint 6 (IN) Control/Status Register */ +#define UDCCS11 __REG(0x4060003C) /* UDC Endpoint 11 (IN) Control/Status Register */ + +#define UDCCS_BI_TFS (1 << 0) /* Transmit FIFO service */ +#define UDCCS_BI_TPC (1 << 1) /* Transmit packet complete */ +#define UDCCS_BI_FTF (1 << 2) /* Flush Tx FIFO */ +#define UDCCS_BI_TUR (1 << 3) /* Transmit FIFO underrun */ +#define UDCCS_BI_SST (1 << 4) /* Sent stall */ +#define UDCCS_BI_FST (1 << 5) /* Force stall */ +#define UDCCS_BI_TSP (1 << 7) /* Transmit short packet */ + +/* Bulk OUT - Endpoint 2,7,12 */ +#define UDCCS2 __REG(0x40600018) /* UDC Endpoint 2 (OUT) Control/Status Register */ +#define UDCCS7 __REG(0x4060002C) /* UDC Endpoint 7 (OUT) Control/Status Register */ +#define UDCCS12 __REG(0x40600040) /* UDC Endpoint 12 (OUT) Control/Status Register */ + +#define UDCCS_BO_RFS (1 << 0) /* Receive FIFO service */ +#define UDCCS_BO_RPC (1 << 1) /* Receive packet complete */ +#define UDCCS_BO_DME (1 << 3) /* DMA enable */ +#define UDCCS_BO_SST (1 << 4) /* Sent stall */ +#define UDCCS_BO_FST (1 << 5) /* Force stall */ +#define UDCCS_BO_RNE (1 << 6) /* Receive FIFO not empty */ +#define UDCCS_BO_RSP (1 << 7) /* Receive short packet */ + +/* Isochronous IN - Endpoint 3,8,13 */ +#define UDCCS3 __REG(0x4060001C) /* UDC Endpoint 3 (IN) Control/Status Register */ +#define UDCCS8 __REG(0x40600030) /* UDC Endpoint 8 (IN) Control/Status Register */ +#define UDCCS13 __REG(0x40600044) /* UDC Endpoint 13 (IN) Control/Status Register */ + +#define UDCCS_II_TFS (1 << 0) /* Transmit FIFO service */ +#define UDCCS_II_TPC (1 << 1) /* Transmit packet complete */ +#define UDCCS_II_FTF (1 << 2) /* Flush Tx FIFO */ +#define UDCCS_II_TUR (1 << 3) /* Transmit FIFO underrun */ +#define UDCCS_II_TSP (1 << 7) /* Transmit short packet */ + +/* Isochronous OUT - Endpoint 4,9,14 */ +#define UDCCS4 __REG(0x40600020) /* UDC Endpoint 4 (OUT) Control/Status Register */ +#define UDCCS9 __REG(0x40600034) /* UDC Endpoint 9 (OUT) Control/Status Register */ +#define UDCCS14 __REG(0x40600048) /* UDC Endpoint 14 (OUT) Control/Status Register */ + +#define UDCCS_IO_RFS (1 << 0) /* Receive FIFO service */ +#define UDCCS_IO_RPC (1 << 1) /* Receive packet complete */ +#define UDCCS_IO_ROF (1 << 2) /* Receive overflow */ +#define UDCCS_IO_DME (1 << 3) /* DMA enable */ +#define UDCCS_IO_RNE (1 << 6) /* Receive FIFO not empty */ +#define UDCCS_IO_RSP (1 << 7) /* Receive short packet */ + +/* Interrupt IN - Endpoint 5,10,15 */ +#define UDCCS5 __REG(0x40600024) /* UDC Endpoint 5 (Interrupt) Control/Status Register */ +#define UDCCS10 __REG(0x40600038) /* UDC Endpoint 10 (Interrupt) Control/Status Register */ +#define UDCCS15 __REG(0x4060004C) /* UDC Endpoint 15 (Interrupt) Control/Status Register */ + +#define UDCCS_INT_TFS (1 << 0) /* Transmit FIFO service */ +#define UDCCS_INT_TPC (1 << 1) /* Transmit packet complete */ +#define UDCCS_INT_FTF (1 << 2) /* Flush Tx FIFO */ +#define UDCCS_INT_TUR (1 << 3) /* Transmit FIFO underrun */ +#define UDCCS_INT_SST (1 << 4) /* Sent stall */ +#define UDCCS_INT_FST (1 << 5) /* Force stall */ +#define UDCCS_INT_TSP (1 << 7) /* Transmit short packet */ + +#define UFNRH __REG(0x40600060) /* UDC Frame Number Register High */ +#define UFNRL __REG(0x40600064) /* UDC Frame Number Register Low */ +#define UBCR2 __REG(0x40600068) /* UDC Byte Count Reg 2 */ +#define UBCR4 __REG(0x4060006c) /* UDC Byte Count Reg 4 */ +#define UBCR7 __REG(0x40600070) /* UDC Byte Count Reg 7 */ +#define UBCR9 __REG(0x40600074) /* UDC Byte Count Reg 9 */ +#define UBCR12 __REG(0x40600078) /* UDC Byte Count Reg 12 */ +#define UBCR14 __REG(0x4060007c) /* UDC Byte Count Reg 14 */ +#define UDDR0 __REG(0x40600080) /* UDC Endpoint 0 Data Register */ +#define UDDR1 __REG(0x40600100) /* UDC Endpoint 1 Data Register */ +#define UDDR2 __REG(0x40600180) /* UDC Endpoint 2 Data Register */ +#define UDDR3 __REG(0x40600200) /* UDC Endpoint 3 Data Register */ +#define UDDR4 __REG(0x40600400) /* UDC Endpoint 4 Data Register */ +#define UDDR5 __REG(0x406000A0) /* UDC Endpoint 5 Data Register */ +#define UDDR6 __REG(0x40600600) /* UDC Endpoint 6 Data Register */ +#define UDDR7 __REG(0x40600680) /* UDC Endpoint 7 Data Register */ +#define UDDR8 __REG(0x40600700) /* UDC Endpoint 8 Data Register */ +#define UDDR9 __REG(0x40600900) /* UDC Endpoint 9 Data Register */ +#define UDDR10 __REG(0x406000C0) /* UDC Endpoint 10 Data Register */ +#define UDDR11 __REG(0x40600B00) /* UDC Endpoint 11 Data Register */ +#define UDDR12 __REG(0x40600B80) /* UDC Endpoint 12 Data Register */ +#define UDDR13 __REG(0x40600C00) /* UDC Endpoint 13 Data Register */ +#define UDDR14 __REG(0x40600E00) /* UDC Endpoint 14 Data Register */ +#define UDDR15 __REG(0x406000E0) /* UDC Endpoint 15 Data Register */ + +#define UICR0 __REG(0x40600050) /* UDC Interrupt Control Register 0 */ + +#define UICR0_IM0 (1 << 0) /* Interrupt mask ep 0 */ +#define UICR0_IM1 (1 << 1) /* Interrupt mask ep 1 */ +#define UICR0_IM2 (1 << 2) /* Interrupt mask ep 2 */ +#define UICR0_IM3 (1 << 3) /* Interrupt mask ep 3 */ +#define UICR0_IM4 (1 << 4) /* Interrupt mask ep 4 */ +#define UICR0_IM5 (1 << 5) /* Interrupt mask ep 5 */ +#define UICR0_IM6 (1 << 6) /* Interrupt mask ep 6 */ +#define UICR0_IM7 (1 << 7) /* Interrupt mask ep 7 */ + +#define UICR1 __REG(0x40600054) /* UDC Interrupt Control Register 1 */ + +#define UICR1_IM8 (1 << 0) /* Interrupt mask ep 8 */ +#define UICR1_IM9 (1 << 1) /* Interrupt mask ep 9 */ +#define UICR1_IM10 (1 << 2) /* Interrupt mask ep 10 */ +#define UICR1_IM11 (1 << 3) /* Interrupt mask ep 11 */ +#define UICR1_IM12 (1 << 4) /* Interrupt mask ep 12 */ +#define UICR1_IM13 (1 << 5) /* Interrupt mask ep 13 */ +#define UICR1_IM14 (1 << 6) /* Interrupt mask ep 14 */ +#define UICR1_IM15 (1 << 7) /* Interrupt mask ep 15 */ + +#define USIR0 __REG(0x40600058) /* UDC Status Interrupt Register 0 */ + +#define USIR0_IR0 (1 << 0) /* Interrupt request ep 0 */ +#define USIR0_IR1 (1 << 1) /* Interrupt request ep 1 */ +#define USIR0_IR2 (1 << 2) /* Interrupt request ep 2 */ +#define USIR0_IR3 (1 << 3) /* Interrupt request ep 3 */ +#define USIR0_IR4 (1 << 4) /* Interrupt request ep 4 */ +#define USIR0_IR5 (1 << 5) /* Interrupt request ep 5 */ +#define USIR0_IR6 (1 << 6) /* Interrupt request ep 6 */ +#define USIR0_IR7 (1 << 7) /* Interrupt request ep 7 */ + +#define USIR1 __REG(0x4060005C) /* UDC Status Interrupt Register 1 */ + +#define USIR1_IR8 (1 << 0) /* Interrupt request ep 8 */ +#define USIR1_IR9 (1 << 1) /* Interrupt request ep 9 */ +#define USIR1_IR10 (1 << 2) /* Interrupt request ep 10 */ +#define USIR1_IR11 (1 << 3) /* Interrupt request ep 11 */ +#define USIR1_IR12 (1 << 4) /* Interrupt request ep 12 */ +#define USIR1_IR13 (1 << 5) /* Interrupt request ep 13 */ +#define USIR1_IR14 (1 << 6) /* Interrupt request ep 14 */ +#define USIR1_IR15 (1 << 7) /* Interrupt request ep 15 */ + +#endif diff --git a/include/asm-arm/arch-pxa/pxa27x-udc.h b/include/asm-arm/arch-pxa/pxa27x-udc.h new file mode 100644 index 000000000000..9cf0b1f88112 --- /dev/null +++ b/include/asm-arm/arch-pxa/pxa27x-udc.h @@ -0,0 +1,256 @@ +#ifndef _ASM_ARCH_PXA27X_UDC_H +#define _ASM_ARCH_PXA27X_UDC_H + +#ifdef _ASM_ARCH_PXA25X_UDC_H +#error You can't include both PXA25x and PXA27x UDC support +#endif + +#define UDCCR __REG(0x40600000) /* UDC Control Register */ +#define UDCCR_OEN (1 << 31) /* On-the-Go Enable */ +#define UDCCR_AALTHNP (1 << 30) /* A-device Alternate Host Negotiation + Protocol Port Support */ +#define UDCCR_AHNP (1 << 29) /* A-device Host Negotiation Protocol + Support */ +#define UDCCR_BHNP (1 << 28) /* B-device Host Negotiation Protocol + Enable */ +#define UDCCR_DWRE (1 << 16) /* Device Remote Wake-up Enable */ +#define UDCCR_ACN (0x03 << 11) /* Active UDC configuration Number */ +#define UDCCR_ACN_S 11 +#define UDCCR_AIN (0x07 << 8) /* Active UDC interface Number */ +#define UDCCR_AIN_S 8 +#define UDCCR_AAISN (0x07 << 5) /* Active UDC Alternate Interface + Setting Number */ +#define UDCCR_AAISN_S 5 +#define UDCCR_SMAC (1 << 4) /* Switch Endpoint Memory to Active + Configuration */ +#define UDCCR_EMCE (1 << 3) /* Endpoint Memory Configuration + Error */ +#define UDCCR_UDR (1 << 2) /* UDC Resume */ +#define UDCCR_UDA (1 << 1) /* UDC Active */ +#define UDCCR_UDE (1 << 0) /* UDC Enable */ + +#define UDCICR0 __REG(0x40600004) /* UDC Interrupt Control Register0 */ +#define UDCICR1 __REG(0x40600008) /* UDC Interrupt Control Register1 */ +#define UDCICR_FIFOERR (1 << 1) /* FIFO Error interrupt for EP */ +#define UDCICR_PKTCOMPL (1 << 0) /* Packet Complete interrupt for EP */ + +#define UDC_INT_FIFOERROR (0x2) +#define UDC_INT_PACKETCMP (0x1) + +#define UDCICR_INT(n,intr) (((intr) & 0x03) << (((n) & 0x0F) * 2)) +#define UDCICR1_IECC (1 << 31) /* IntEn - Configuration Change */ +#define UDCICR1_IESOF (1 << 30) /* IntEn - Start of Frame */ +#define UDCICR1_IERU (1 << 29) /* IntEn - Resume */ +#define UDCICR1_IESU (1 << 28) /* IntEn - Suspend */ +#define UDCICR1_IERS (1 << 27) /* IntEn - Reset */ + +#define UDCISR0 __REG(0x4060000C) /* UDC Interrupt Status Register 0 */ +#define UDCISR1 __REG(0x40600010) /* UDC Interrupt Status Register 1 */ +#define UDCISR_INT(n,intr) (((intr) & 0x03) << (((n) & 0x0F) * 2)) +#define UDCISR1_IRCC (1 << 31) /* IntReq - Configuration Change */ +#define UDCISR1_IRSOF (1 << 30) /* IntReq - Start of Frame */ +#define UDCISR1_IRRU (1 << 29) /* IntReq - Resume */ +#define UDCISR1_IRSU (1 << 28) /* IntReq - Suspend */ +#define UDCISR1_IRRS (1 << 27) /* IntReq - Reset */ + +#define UDCFNR __REG(0x40600014) /* UDC Frame Number Register */ +#define UDCOTGICR __REG(0x40600018) /* UDC On-The-Go interrupt control */ +#define UDCOTGICR_IESF (1 << 24) /* OTG SET_FEATURE command recvd */ +#define UDCOTGICR_IEXR (1 << 17) /* Extra Transciever Interrupt + Rising Edge Interrupt Enable */ +#define UDCOTGICR_IEXF (1 << 16) /* Extra Transciever Interrupt + Falling Edge Interrupt Enable */ +#define UDCOTGICR_IEVV40R (1 << 9) /* OTG Vbus Valid 4.0V Rising Edge + Interrupt Enable */ +#define UDCOTGICR_IEVV40F (1 << 8) /* OTG Vbus Valid 4.0V Falling Edge + Interrupt Enable */ +#define UDCOTGICR_IEVV44R (1 << 7) /* OTG Vbus Valid 4.4V Rising Edge + Interrupt Enable */ +#define UDCOTGICR_IEVV44F (1 << 6) /* OTG Vbus Valid 4.4V Falling Edge + Interrupt Enable */ +#define UDCOTGICR_IESVR (1 << 5) /* OTG Session Valid Rising Edge + Interrupt Enable */ +#define UDCOTGICR_IESVF (1 << 4) /* OTG Session Valid Falling Edge + Interrupt Enable */ +#define UDCOTGICR_IESDR (1 << 3) /* OTG A-Device SRP Detect Rising + Edge Interrupt Enable */ +#define UDCOTGICR_IESDF (1 << 2) /* OTG A-Device SRP Detect Falling + Edge Interrupt Enable */ +#define UDCOTGICR_IEIDR (1 << 1) /* OTG ID Change Rising Edge + Interrupt Enable */ +#define UDCOTGICR_IEIDF (1 << 0) /* OTG ID Change Falling Edge + Interrupt Enable */ + +#define UP2OCR __REG(0x40600020) /* USB Port 2 Output Control register */ + +#define UP2OCR_CPVEN (1 << 0) /* Charge Pump Vbus Enable */ +#define UP2OCR_CPVPE (1 << 1) /* Charge Pump Vbus Pulse Enable */ +#define UP2OCR_DPPDE (1 << 2) /* Host Port 2 Transceiver D+ Pull Down Enable */ +#define UP2OCR_DMPDE (1 << 3) /* Host Port 2 Transceiver D- Pull Down Enable */ +#define UP2OCR_DPPUE (1 << 4) /* Host Port 2 Transceiver D+ Pull Up Enable */ +#define UP2OCR_DMPUE (1 << 5) /* Host Port 2 Transceiver D- Pull Up Enable */ +#define UP2OCR_DPPUBE (1 << 6) /* Host Port 2 Transceiver D+ Pull Up Bypass Enable */ +#define UP2OCR_DMPUBE (1 << 7) /* Host Port 2 Transceiver D- Pull Up Bypass Enable */ +#define UP2OCR_EXSP (1 << 8) /* External Transceiver Speed Control */ +#define UP2OCR_EXSUS (1 << 9) /* External Transceiver Speed Enable */ +#define UP2OCR_IDON (1 << 10) /* OTG ID Read Enable */ +#define UP2OCR_HXS (1 << 16) /* Host Port 2 Transceiver Output Select */ +#define UP2OCR_HXOE (1 << 17) /* Host Port 2 Transceiver Output Enable */ +#define UP2OCR_SEOS (1 << 24) /* Single-Ended Output Select */ + +#define UDCCSN(x) __REG2(0x40600100, (x) << 2) +#define UDCCSR0 __REG(0x40600100) /* UDC Control/Status register - Endpoint 0 */ +#define UDCCSR0_SA (1 << 7) /* Setup Active */ +#define UDCCSR0_RNE (1 << 6) /* Receive FIFO Not Empty */ +#define UDCCSR0_FST (1 << 5) /* Force Stall */ +#define UDCCSR0_SST (1 << 4) /* Sent Stall */ +#define UDCCSR0_DME (1 << 3) /* DMA Enable */ +#define UDCCSR0_FTF (1 << 2) /* Flush Transmit FIFO */ +#define UDCCSR0_IPR (1 << 1) /* IN Packet Ready */ +#define UDCCSR0_OPC (1 << 0) /* OUT Packet Complete */ + +#define UDCCSRA __REG(0x40600104) /* UDC Control/Status register - Endpoint A */ +#define UDCCSRB __REG(0x40600108) /* UDC Control/Status register - Endpoint B */ +#define UDCCSRC __REG(0x4060010C) /* UDC Control/Status register - Endpoint C */ +#define UDCCSRD __REG(0x40600110) /* UDC Control/Status register - Endpoint D */ +#define UDCCSRE __REG(0x40600114) /* UDC Control/Status register - Endpoint E */ +#define UDCCSRF __REG(0x40600118) /* UDC Control/Status register - Endpoint F */ +#define UDCCSRG __REG(0x4060011C) /* UDC Control/Status register - Endpoint G */ +#define UDCCSRH __REG(0x40600120) /* UDC Control/Status register - Endpoint H */ +#define UDCCSRI __REG(0x40600124) /* UDC Control/Status register - Endpoint I */ +#define UDCCSRJ __REG(0x40600128) /* UDC Control/Status register - Endpoint J */ +#define UDCCSRK __REG(0x4060012C) /* UDC Control/Status register - Endpoint K */ +#define UDCCSRL __REG(0x40600130) /* UDC Control/Status register - Endpoint L */ +#define UDCCSRM __REG(0x40600134) /* UDC Control/Status register - Endpoint M */ +#define UDCCSRN __REG(0x40600138) /* UDC Control/Status register - Endpoint N */ +#define UDCCSRP __REG(0x4060013C) /* UDC Control/Status register - Endpoint P */ +#define UDCCSRQ __REG(0x40600140) /* UDC Control/Status register - Endpoint Q */ +#define UDCCSRR __REG(0x40600144) /* UDC Control/Status register - Endpoint R */ +#define UDCCSRS __REG(0x40600148) /* UDC Control/Status register - Endpoint S */ +#define UDCCSRT __REG(0x4060014C) /* UDC Control/Status register - Endpoint T */ +#define UDCCSRU __REG(0x40600150) /* UDC Control/Status register - Endpoint U */ +#define UDCCSRV __REG(0x40600154) /* UDC Control/Status register - Endpoint V */ +#define UDCCSRW __REG(0x40600158) /* UDC Control/Status register - Endpoint W */ +#define UDCCSRX __REG(0x4060015C) /* UDC Control/Status register - Endpoint X */ + +#define UDCCSR_DPE (1 << 9) /* Data Packet Error */ +#define UDCCSR_FEF (1 << 8) /* Flush Endpoint FIFO */ +#define UDCCSR_SP (1 << 7) /* Short Packet Control/Status */ +#define UDCCSR_BNE (1 << 6) /* Buffer Not Empty (IN endpoints) */ +#define UDCCSR_BNF (1 << 6) /* Buffer Not Full (OUT endpoints) */ +#define UDCCSR_FST (1 << 5) /* Force STALL */ +#define UDCCSR_SST (1 << 4) /* Sent STALL */ +#define UDCCSR_DME (1 << 3) /* DMA Enable */ +#define UDCCSR_TRN (1 << 2) /* Tx/Rx NAK */ +#define UDCCSR_PC (1 << 1) /* Packet Complete */ +#define UDCCSR_FS (1 << 0) /* FIFO needs service */ + +#define UDCBCN(x) __REG2(0x40600200, (x)<<2) +#define UDCBCR0 __REG(0x40600200) /* Byte Count Register - EP0 */ +#define UDCBCRA __REG(0x40600204) /* Byte Count Register - EPA */ +#define UDCBCRB __REG(0x40600208) /* Byte Count Register - EPB */ +#define UDCBCRC __REG(0x4060020C) /* Byte Count Register - EPC */ +#define UDCBCRD __REG(0x40600210) /* Byte Count Register - EPD */ +#define UDCBCRE __REG(0x40600214) /* Byte Count Register - EPE */ +#define UDCBCRF __REG(0x40600218) /* Byte Count Register - EPF */ +#define UDCBCRG __REG(0x4060021C) /* Byte Count Register - EPG */ +#define UDCBCRH __REG(0x40600220) /* Byte Count Register - EPH */ +#define UDCBCRI __REG(0x40600224) /* Byte Count Register - EPI */ +#define UDCBCRJ __REG(0x40600228) /* Byte Count Register - EPJ */ +#define UDCBCRK __REG(0x4060022C) /* Byte Count Register - EPK */ +#define UDCBCRL __REG(0x40600230) /* Byte Count Register - EPL */ +#define UDCBCRM __REG(0x40600234) /* Byte Count Register - EPM */ +#define UDCBCRN __REG(0x40600238) /* Byte Count Register - EPN */ +#define UDCBCRP __REG(0x4060023C) /* Byte Count Register - EPP */ +#define UDCBCRQ __REG(0x40600240) /* Byte Count Register - EPQ */ +#define UDCBCRR __REG(0x40600244) /* Byte Count Register - EPR */ +#define UDCBCRS __REG(0x40600248) /* Byte Count Register - EPS */ +#define UDCBCRT __REG(0x4060024C) /* Byte Count Register - EPT */ +#define UDCBCRU __REG(0x40600250) /* Byte Count Register - EPU */ +#define UDCBCRV __REG(0x40600254) /* Byte Count Register - EPV */ +#define UDCBCRW __REG(0x40600258) /* Byte Count Register - EPW */ +#define UDCBCRX __REG(0x4060025C) /* Byte Count Register - EPX */ + +#define UDCDN(x) __REG2(0x40600300, (x)<<2) +#define PHYS_UDCDN(x) (0x40600300 + ((x)<<2)) +#define PUDCDN(x) (volatile u32 *)(io_p2v(PHYS_UDCDN((x)))) +#define UDCDR0 __REG(0x40600300) /* Data Register - EP0 */ +#define UDCDRA __REG(0x40600304) /* Data Register - EPA */ +#define UDCDRB __REG(0x40600308) /* Data Register - EPB */ +#define UDCDRC __REG(0x4060030C) /* Data Register - EPC */ +#define UDCDRD __REG(0x40600310) /* Data Register - EPD */ +#define UDCDRE __REG(0x40600314) /* Data Register - EPE */ +#define UDCDRF __REG(0x40600318) /* Data Register - EPF */ +#define UDCDRG __REG(0x4060031C) /* Data Register - EPG */ +#define UDCDRH __REG(0x40600320) /* Data Register - EPH */ +#define UDCDRI __REG(0x40600324) /* Data Register - EPI */ +#define UDCDRJ __REG(0x40600328) /* Data Register - EPJ */ +#define UDCDRK __REG(0x4060032C) /* Data Register - EPK */ +#define UDCDRL __REG(0x40600330) /* Data Register - EPL */ +#define UDCDRM __REG(0x40600334) /* Data Register - EPM */ +#define UDCDRN __REG(0x40600338) /* Data Register - EPN */ +#define UDCDRP __REG(0x4060033C) /* Data Register - EPP */ +#define UDCDRQ __REG(0x40600340) /* Data Register - EPQ */ +#define UDCDRR __REG(0x40600344) /* Data Register - EPR */ +#define UDCDRS __REG(0x40600348) /* Data Register - EPS */ +#define UDCDRT __REG(0x4060034C) /* Data Register - EPT */ +#define UDCDRU __REG(0x40600350) /* Data Register - EPU */ +#define UDCDRV __REG(0x40600354) /* Data Register - EPV */ +#define UDCDRW __REG(0x40600358) /* Data Register - EPW */ +#define UDCDRX __REG(0x4060035C) /* Data Register - EPX */ + +#define UDCCN(x) __REG2(0x40600400, (x)<<2) +#define UDCCRA __REG(0x40600404) /* Configuration register EPA */ +#define UDCCRB __REG(0x40600408) /* Configuration register EPB */ +#define UDCCRC __REG(0x4060040C) /* Configuration register EPC */ +#define UDCCRD __REG(0x40600410) /* Configuration register EPD */ +#define UDCCRE __REG(0x40600414) /* Configuration register EPE */ +#define UDCCRF __REG(0x40600418) /* Configuration register EPF */ +#define UDCCRG __REG(0x4060041C) /* Configuration register EPG */ +#define UDCCRH __REG(0x40600420) /* Configuration register EPH */ +#define UDCCRI __REG(0x40600424) /* Configuration register EPI */ +#define UDCCRJ __REG(0x40600428) /* Configuration register EPJ */ +#define UDCCRK __REG(0x4060042C) /* Configuration register EPK */ +#define UDCCRL __REG(0x40600430) /* Configuration register EPL */ +#define UDCCRM __REG(0x40600434) /* Configuration register EPM */ +#define UDCCRN __REG(0x40600438) /* Configuration register EPN */ +#define UDCCRP __REG(0x4060043C) /* Configuration register EPP */ +#define UDCCRQ __REG(0x40600440) /* Configuration register EPQ */ +#define UDCCRR __REG(0x40600444) /* Configuration register EPR */ +#define UDCCRS __REG(0x40600448) /* Configuration register EPS */ +#define UDCCRT __REG(0x4060044C) /* Configuration register EPT */ +#define UDCCRU __REG(0x40600450) /* Configuration register EPU */ +#define UDCCRV __REG(0x40600454) /* Configuration register EPV */ +#define UDCCRW __REG(0x40600458) /* Configuration register EPW */ +#define UDCCRX __REG(0x4060045C) /* Configuration register EPX */ + +#define UDCCONR_CN (0x03 << 25) /* Configuration Number */ +#define UDCCONR_CN_S (25) +#define UDCCONR_IN (0x07 << 22) /* Interface Number */ +#define UDCCONR_IN_S (22) +#define UDCCONR_AISN (0x07 << 19) /* Alternate Interface Number */ +#define UDCCONR_AISN_S (19) +#define UDCCONR_EN (0x0f << 15) /* Endpoint Number */ +#define UDCCONR_EN_S (15) +#define UDCCONR_ET (0x03 << 13) /* Endpoint Type: */ +#define UDCCONR_ET_S (13) +#define UDCCONR_ET_INT (0x03 << 13) /* Interrupt */ +#define UDCCONR_ET_BULK (0x02 << 13) /* Bulk */ +#define UDCCONR_ET_ISO (0x01 << 13) /* Isochronous */ +#define UDCCONR_ET_NU (0x00 << 13) /* Not used */ +#define UDCCONR_ED (1 << 12) /* Endpoint Direction */ +#define UDCCONR_MPS (0x3ff << 2) /* Maximum Packet Size */ +#define UDCCONR_MPS_S (2) +#define UDCCONR_DE (1 << 1) /* Double Buffering Enable */ +#define UDCCONR_EE (1 << 0) /* Endpoint Enable */ + + +#define UDC_INT_FIFOERROR (0x2) +#define UDC_INT_PACKETCMP (0x1) + +#define UDC_FNR_MASK (0x7ff) + +#define UDCCSR_WR_MASK (UDCCSR_DME|UDCCSR_FST) +#define UDC_BCR_MASK (0x3ff) + +#endif -- cgit v1.2.3 From 0be9898adb6f58fee44f0fec0bbc0eac997ea9eb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 19 May 2008 12:31:28 +0200 Subject: [ALSA] ASoC: Clarify API for bias configuration Currently the ASoC core configures the bias levels in the system using a callback on codecs and machines called 'dapm_event', passing it PCI style power levels as SNDRV_CTL_POWER_ constants. This is more obscure than it needs to be and has caused confusion to driver authors, especially given that DAPM is also performing power management. Address this by renaming the callback function to 'set_bias_level' and using constants explicitly representing the off, standby, pre-on and on states which DAPM transitions through. Also unexport the API for setting bias level: there are currently no in-tree users of this API other than the core itself and it is likely that the core would need to be extended to cater for any users. Signed-off-by: Mark Brown Cc: Jarkko Nikula Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/soc-dapm.h | 3 ++- include/sound/soc.h | 28 +++++++++++++++++++++++---- sound/soc/codecs/tlv320aic3x.c | 26 ++++++++++++------------- sound/soc/codecs/wm8731.c | 28 +++++++++++++-------------- sound/soc/codecs/wm8750.c | 36 +++++++++++++++++----------------- sound/soc/codecs/wm8753.c | 36 +++++++++++++++++----------------- sound/soc/codecs/wm9712.c | 28 +++++++++++++-------------- sound/soc/codecs/wm9713.c | 26 ++++++++++++------------- sound/soc/soc-core.c | 25 ++++++++++++------------ sound/soc/soc-dapm.c | 44 ++++++++++++++++++++---------------------- 10 files changed, 150 insertions(+), 130 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index bf4cf0c1d37f..f8223fae5804 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -221,7 +221,8 @@ int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, /* dapm events */ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream, int event); -int snd_soc_dapm_device_event(struct snd_soc_device *socdev, int event); +int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, + enum snd_soc_bias_level level); /* dapm sys fs - used by the core */ int snd_soc_dapm_sys_add(struct device *dev); diff --git a/include/sound/soc.h b/include/sound/soc.h index 73accbcfbd2d..bca9538d9e50 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -102,6 +102,24 @@ .get = xhandler_get, .put = xhandler_put, \ .private_value = (unsigned long)&xenum } +/* + * Bias levels + * + * @ON: Bias is fully on for audio playback and capture operations. + * @PREPARE: Prepare for audio operations. Called before DAPM switching for + * stream start and stop operations. + * @STANDBY: Low power standby state when no playback/capture operations are + * in progress. NOTE: The transition time between STANDBY and ON + * should be as fast as possible and no longer than 10ms. + * @OFF: Power Off. No restrictions on transition times. + */ +enum snd_soc_bias_level { + SND_SOC_BIAS_ON, + SND_SOC_BIAS_PREPARE, + SND_SOC_BIAS_STANDBY, + SND_SOC_BIAS_OFF, +}; + /* * Digital Audio Interface (DAI) types */ @@ -356,7 +374,8 @@ struct snd_soc_codec { struct mutex mutex; /* callbacks */ - int (*dapm_event)(struct snd_soc_codec *codec, int event); + int (*set_bias_level)(struct snd_soc_codec *, + enum snd_soc_bias_level level); /* runtime */ struct snd_card *card; @@ -378,8 +397,8 @@ struct snd_soc_codec { /* dapm */ struct list_head dapm_widgets; struct list_head dapm_paths; - unsigned int dapm_state; - unsigned int suspend_dapm_state; + enum snd_soc_bias_level bias_level; + enum snd_soc_bias_level suspend_bias_level; struct delayed_work delayed_work; /* codec DAI's */ @@ -449,7 +468,8 @@ struct snd_soc_machine { int (*resume_post)(struct platform_device *pdev); /* callbacks */ - int (*dapm_event)(struct snd_soc_machine *, int event); + int (*set_bias_level)(struct snd_soc_machine *, + enum snd_soc_bias_level level); /* CPU <--> Codec DAI links */ struct snd_soc_dai_link *dai_link; diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index cb8365ac0c02..dc8a38d9e53a 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -847,13 +847,14 @@ static int aic3x_set_dai_fmt(struct snd_soc_codec_dai *codec_dai, return 0; } -static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) +static int aic3x_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { struct aic3x_priv *aic3x = codec->private_data; u8 reg; - switch (event) { - case SNDRV_CTL_POWER_D0: + switch (level) { + case SND_SOC_BIAS_ON: /* all power is driven by DAPM system */ if (aic3x->master) { /* enable pll */ @@ -862,10 +863,9 @@ static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) reg | PLL_ENABLE); } break; - case SNDRV_CTL_POWER_D1: - case SNDRV_CTL_POWER_D2: + case SND_SOC_BIAS_PREPARE: break; - case SNDRV_CTL_POWER_D3hot: + case SND_SOC_BIAS_STANDBY: /* * all power is driven by DAPM system, * so output power is safe if bypass was set @@ -877,7 +877,7 @@ static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) reg & ~PLL_ENABLE); } break; - case SNDRV_CTL_POWER_D3cold: + case SND_SOC_BIAS_OFF: /* force all power off */ reg = aic3x_read_reg_cache(codec, LINE1L_2_LADC_CTRL); aic3x_write(codec, LINE1L_2_LADC_CTRL, reg & ~LADC_PWR_ON); @@ -913,7 +913,7 @@ static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) } break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -979,7 +979,7 @@ static int aic3x_suspend(struct platform_device *pdev, pm_message_t state) struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_codec *codec = socdev->codec; - aic3x_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + aic3x_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -999,7 +999,7 @@ static int aic3x_resume(struct platform_device *pdev) codec->hw_write(codec->control_data, data, 2); } - aic3x_dapm_event(codec, codec->suspend_dapm_state); + aic3x_set_bias_level(codec, codec->suspend_bias_level); return 0; } @@ -1018,7 +1018,7 @@ static int aic3x_init(struct snd_soc_device *socdev) codec->owner = THIS_MODULE; codec->read = aic3x_read_reg_cache; codec->write = aic3x_write; - codec->dapm_event = aic3x_dapm_event; + codec->set_bias_level = aic3x_set_bias_level; codec->dai = &aic3x_dai; codec->num_dai = 1; codec->reg_cache_size = sizeof(aic3x_reg); @@ -1100,7 +1100,7 @@ static int aic3x_init(struct snd_soc_device *socdev) aic3x_write(codec, LINE2R_2_MONOLOPM_VOL, DEFAULT_VOL); /* off, with power on */ - aic3x_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + aic3x_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* setup GPIO functions */ aic3x_write(codec, AIC3X_GPIO1_REG, (setup->gpio_func[0] & 0xf) << 4); @@ -1271,7 +1271,7 @@ static int aic3x_remove(struct platform_device *pdev) /* power down chip */ if (codec->control_data) - aic3x_dapm_event(codec, SNDRV_CTL_POWER_D3); + aic3x_set_bias_level(codec, SND_SOC_BIAS_OFF); snd_soc_free_pcms(socdev); snd_soc_dapm_free(socdev); diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index 0cf9265fca8f..0f28aa4bcccb 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -435,29 +435,29 @@ static int wm8731_set_dai_fmt(struct snd_soc_codec_dai *codec_dai, return 0; } -static int wm8731_dapm_event(struct snd_soc_codec *codec, int event) +static int wm8731_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { u16 reg = wm8731_read_reg_cache(codec, WM8731_PWR) & 0xff7f; - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ + switch (level) { + case SND_SOC_BIAS_ON: /* vref/mid, osc on, dac unmute */ wm8731_write(codec, WM8731_PWR, reg); break; - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + case SND_SOC_BIAS_PREPARE: break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: /* everything off except vref/vmid, */ wm8731_write(codec, WM8731_PWR, reg | 0x0040); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: /* everything off, dac mute, inactive */ wm8731_write(codec, WM8731_ACTIVE, 0x0); wm8731_write(codec, WM8731_PWR, 0xffff); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -503,7 +503,7 @@ static int wm8731_suspend(struct platform_device *pdev, pm_message_t state) struct snd_soc_codec *codec = socdev->codec; wm8731_write(codec, WM8731_ACTIVE, 0x0); - wm8731_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8731_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -521,8 +521,8 @@ static int wm8731_resume(struct platform_device *pdev) data[1] = cache[i] & 0x00ff; codec->hw_write(codec->control_data, data, 2); } - wm8731_dapm_event(codec, SNDRV_CTL_POWER_D3hot); - wm8731_dapm_event(codec, codec->suspend_dapm_state); + wm8731_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + wm8731_set_bias_level(codec, codec->suspend_bias_level); return 0; } @@ -539,7 +539,7 @@ static int wm8731_init(struct snd_soc_device *socdev) codec->owner = THIS_MODULE; codec->read = wm8731_read_reg_cache; codec->write = wm8731_write; - codec->dapm_event = wm8731_dapm_event; + codec->set_bias_level = wm8731_set_bias_level; codec->dai = &wm8731_dai; codec->num_dai = 1; codec->reg_cache_size = sizeof(wm8731_reg); @@ -557,7 +557,7 @@ static int wm8731_init(struct snd_soc_device *socdev) } /* power on device */ - wm8731_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm8731_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* set the update bits */ reg = wm8731_read_reg_cache(codec, WM8731_LOUT1V); @@ -730,7 +730,7 @@ static int wm8731_remove(struct platform_device *pdev) struct snd_soc_codec *codec = socdev->codec; if (codec->control_data) - wm8731_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8731_set_bias_level(codec, SND_SOC_BIAS_OFF); snd_soc_free_pcms(socdev); snd_soc_dapm_free(socdev); diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 16cd5d4d5ad9..62423f4493b0 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -686,29 +686,29 @@ static int wm8750_mute(struct snd_soc_codec_dai *dai, int mute) return 0; } -static int wm8750_dapm_event(struct snd_soc_codec *codec, int event) +static int wm8750_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { u16 pwr_reg = wm8750_read_reg_cache(codec, WM8750_PWR1) & 0xfe3e; - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ + switch (level) { + case SND_SOC_BIAS_ON: /* set vmid to 50k and unmute dac */ wm8750_write(codec, WM8750_PWR1, pwr_reg | 0x00c0); break; - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + case SND_SOC_BIAS_PREPARE: /* set vmid to 5k for quick power up */ wm8750_write(codec, WM8750_PWR1, pwr_reg | 0x01c1); break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: /* mute dac and set vmid to 500k, enable VREF */ wm8750_write(codec, WM8750_PWR1, pwr_reg | 0x0141); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: wm8750_write(codec, WM8750_PWR1, 0x0001); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -748,7 +748,7 @@ static void wm8750_work(struct work_struct *work) { struct snd_soc_codec *codec = container_of(work, struct snd_soc_codec, delayed_work.work); - wm8750_dapm_event(codec, codec->dapm_state); + wm8750_set_bias_level(codec, codec->bias_level); } static int wm8750_suspend(struct platform_device *pdev, pm_message_t state) @@ -756,7 +756,7 @@ static int wm8750_suspend(struct platform_device *pdev, pm_message_t state) struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_codec *codec = socdev->codec; - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8750_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -777,12 +777,12 @@ static int wm8750_resume(struct platform_device *pdev) codec->hw_write(codec->control_data, data, 2); } - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm8750_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* charge wm8750 caps */ - if (codec->suspend_dapm_state == SNDRV_CTL_POWER_D0) { - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D2); - codec->dapm_state = SNDRV_CTL_POWER_D0; + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) { + wm8750_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + codec->bias_level = SND_SOC_BIAS_ON; schedule_delayed_work(&codec->delayed_work, msecs_to_jiffies(1000)); } @@ -803,7 +803,7 @@ static int wm8750_init(struct snd_soc_device *socdev) codec->owner = THIS_MODULE; codec->read = wm8750_read_reg_cache; codec->write = wm8750_write; - codec->dapm_event = wm8750_dapm_event; + codec->set_bias_level = wm8750_set_bias_level; codec->dai = &wm8750_dai; codec->num_dai = 1; codec->reg_cache_size = sizeof(wm8750_reg); @@ -821,8 +821,8 @@ static int wm8750_init(struct snd_soc_device *socdev) } /* charge output caps */ - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D2); - codec->dapm_state = SNDRV_CTL_POWER_D3hot; + wm8750_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + codec->bias_level = SND_SOC_BIAS_STANDBY; schedule_delayed_work(&codec->delayed_work, msecs_to_jiffies(1000)); /* set the update bits */ @@ -1021,7 +1021,7 @@ static int wm8750_remove(struct platform_device *pdev) struct snd_soc_codec *codec = socdev->codec; if (codec->control_data) - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8750_set_bias_level(codec, SND_SOC_BIAS_OFF); run_delayed_work(&codec->delayed_work); snd_soc_free_pcms(socdev); snd_soc_dapm_free(socdev); diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index fb41826c4c4c..9032b0c07c86 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -1274,29 +1274,29 @@ static int wm8753_mute(struct snd_soc_codec_dai *dai, int mute) return 0; } -static int wm8753_dapm_event(struct snd_soc_codec *codec, int event) +static int wm8753_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { u16 pwr_reg = wm8753_read_reg_cache(codec, WM8753_PWR1) & 0xfe3e; - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ + switch (level) { + case SND_SOC_BIAS_ON: /* set vmid to 50k and unmute dac */ wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x00c0); break; - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + case SND_SOC_BIAS_PREPARE: /* set vmid to 5k for quick power up */ wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x01c1); break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: /* mute dac and set vmid to 500k, enable VREF */ wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x0141); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: wm8753_write(codec, WM8753_PWR1, 0x0001); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -1500,7 +1500,7 @@ static void wm8753_work(struct work_struct *work) { struct snd_soc_codec *codec = container_of(work, struct snd_soc_codec, delayed_work.work); - wm8753_dapm_event(codec, codec->dapm_state); + wm8753_set_bias_level(codec, codec->bias_level); } static int wm8753_suspend(struct platform_device *pdev, pm_message_t state) @@ -1512,7 +1512,7 @@ static int wm8753_suspend(struct platform_device *pdev, pm_message_t state) if (!codec->card) return 0; - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8753_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -1537,12 +1537,12 @@ static int wm8753_resume(struct platform_device *pdev) codec->hw_write(codec->control_data, data, 2); } - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm8753_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* charge wm8753 caps */ - if (codec->suspend_dapm_state == SNDRV_CTL_POWER_D0) { - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D2); - codec->dapm_state = SNDRV_CTL_POWER_D0; + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) { + wm8753_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + codec->bias_level = SND_SOC_BIAS_ON; schedule_delayed_work(&codec->delayed_work, msecs_to_jiffies(caps_charge)); } @@ -1563,7 +1563,7 @@ static int wm8753_init(struct snd_soc_device *socdev) codec->owner = THIS_MODULE; codec->read = wm8753_read_reg_cache; codec->write = wm8753_write; - codec->dapm_event = wm8753_dapm_event; + codec->set_bias_level = wm8753_set_bias_level; codec->dai = wm8753_dai; codec->num_dai = 2; codec->reg_cache_size = sizeof(wm8753_reg); @@ -1584,8 +1584,8 @@ static int wm8753_init(struct snd_soc_device *socdev) } /* charge output caps */ - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D2); - codec->dapm_state = SNDRV_CTL_POWER_D3hot; + wm8753_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + codec->bias_level = SND_SOC_BIAS_STANDBY; schedule_delayed_work(&codec->delayed_work, msecs_to_jiffies(caps_charge)); @@ -1792,7 +1792,7 @@ static int wm8753_remove(struct platform_device *pdev) struct snd_soc_codec *codec = socdev->codec; if (codec->control_data) - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8753_set_bias_level(codec, SND_SOC_BIAS_OFF); run_delayed_work(&codec->delayed_work); snd_soc_free_pcms(socdev); snd_soc_dapm_free(socdev); diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 89efe40c7c33..e26cfcf0b4fc 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -571,23 +571,23 @@ struct snd_soc_codec_dai wm9712_dai[] = { }; EXPORT_SYMBOL_GPL(wm9712_dai); -static int wm9712_dapm_event(struct snd_soc_codec *codec, int event) +static int wm9712_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + switch (level) { + case SND_SOC_BIAS_ON: + case SND_SOC_BIAS_PREPARE: break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: ac97_write(codec, AC97_POWERDOWN, 0x0000); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: /* disable everything including AC link */ ac97_write(codec, AC97_EXTENDED_MSTATUS, 0xffff); ac97_write(codec, AC97_POWERDOWN, 0xffff); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -615,7 +615,7 @@ static int wm9712_soc_suspend(struct platform_device *pdev, struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_codec *codec = socdev->codec; - wm9712_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm9712_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -632,7 +632,7 @@ static int wm9712_soc_resume(struct platform_device *pdev) return ret; } - wm9712_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm9712_set_bias_level(codec, SND_SOC_BIAS_STANDBY); if (ret == 0) { /* Sync reg_cache with the hardware after cold reset */ @@ -644,8 +644,8 @@ static int wm9712_soc_resume(struct platform_device *pdev) } } - if (codec->suspend_dapm_state == SNDRV_CTL_POWER_D0) - wm9712_dapm_event(codec, SNDRV_CTL_POWER_D0); + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) + wm9712_set_bias_level(codec, SND_SOC_BIAS_ON); return ret; } @@ -679,7 +679,7 @@ static int wm9712_soc_probe(struct platform_device *pdev) codec->num_dai = ARRAY_SIZE(wm9712_dai); codec->write = ac97_write; codec->read = ac97_read; - codec->dapm_event = wm9712_dapm_event; + codec->set_bias_level = wm9712_set_bias_level; INIT_LIST_HEAD(&codec->dapm_widgets); INIT_LIST_HEAD(&codec->dapm_paths); @@ -703,7 +703,7 @@ static int wm9712_soc_probe(struct platform_device *pdev) /* set alc mux to none */ ac97_write(codec, AC97_VIDEO, ac97_read(codec, AC97_VIDEO) | 0x3000); - wm9712_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm9712_set_bias_level(codec, SND_SOC_BIAS_STANDBY); wm9712_add_controls(codec); wm9712_add_widgets(codec); ret = snd_soc_register_card(socdev); diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 9e6b2fd7262b..4863636e9d56 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -1094,33 +1094,33 @@ int wm9713_reset(struct snd_soc_codec *codec, int try_warm) } EXPORT_SYMBOL_GPL(wm9713_reset); -static int wm9713_dapm_event(struct snd_soc_codec *codec, int event) +static int wm9713_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { u16 reg; - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ + switch (level) { + case SND_SOC_BIAS_ON: /* enable thermal shutdown */ reg = ac97_read(codec, AC97_EXTENDED_MID) & 0x1bff; ac97_write(codec, AC97_EXTENDED_MID, reg); break; - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + case SND_SOC_BIAS_PREPARE: break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: /* enable master bias and vmid */ reg = ac97_read(codec, AC97_EXTENDED_MID) & 0x3bff; ac97_write(codec, AC97_EXTENDED_MID, reg); ac97_write(codec, AC97_POWERDOWN, 0x0000); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: /* disable everything including AC link */ ac97_write(codec, AC97_EXTENDED_MID, 0xffff); ac97_write(codec, AC97_EXTENDED_MSTATUS, 0xffff); ac97_write(codec, AC97_POWERDOWN, 0xffff); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -1157,7 +1157,7 @@ static int wm9713_soc_resume(struct platform_device *pdev) return ret; } - wm9713_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm9713_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* do we need to re-start the PLL ? */ if (wm9713->pll_out) @@ -1173,8 +1173,8 @@ static int wm9713_soc_resume(struct platform_device *pdev) } } - if (codec->suspend_dapm_state == SNDRV_CTL_POWER_D0) - wm9713_dapm_event(codec, SNDRV_CTL_POWER_D0); + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) + wm9713_set_bias_level(codec, SND_SOC_BIAS_ON); return ret; } @@ -1213,7 +1213,7 @@ static int wm9713_soc_probe(struct platform_device *pdev) codec->num_dai = ARRAY_SIZE(wm9713_dai); codec->write = ac97_write; codec->read = ac97_read; - codec->dapm_event = wm9713_dapm_event; + codec->set_bias_level = wm9713_set_bias_level; INIT_LIST_HEAD(&codec->dapm_widgets); INIT_LIST_HEAD(&codec->dapm_paths); @@ -1235,7 +1235,7 @@ static int wm9713_soc_probe(struct platform_device *pdev) goto reset_err; } - wm9713_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm9713_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* unmute the adc - move to kcontrol */ reg = ac97_read(codec, AC97_CD) & 0x7fff; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 0318d8abe3e8..a05b3450aee8 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -283,12 +283,12 @@ static void close_delayed_work(struct work_struct *work) /* are we waiting on this codec DAI stream */ if (codec_dai->pop_wait == 1) { - /* power down the codec to D1 if no longer active */ + /* Reduce power if no longer active */ if (codec->active == 0) { dbg("pop wq D1 %s %s\n", codec->name, codec_dai->playback.stream_name); - snd_soc_dapm_device_event(socdev, - SNDRV_CTL_POWER_D1); + snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_PREPARE); } codec_dai->pop_wait = 0; @@ -296,12 +296,12 @@ static void close_delayed_work(struct work_struct *work) codec_dai->playback.stream_name, SND_SOC_DAPM_STREAM_STOP); - /* power down the codec power domain if no longer active */ + /* Fall into standby if no longer active */ if (codec->active == 0) { dbg("pop wq D3 %s %s\n", codec->name, codec_dai->playback.stream_name); - snd_soc_dapm_device_event(socdev, - SNDRV_CTL_POWER_D3hot); + snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_STANDBY); } } } @@ -361,8 +361,8 @@ static int soc_codec_close(struct snd_pcm_substream *substream) SND_SOC_DAPM_STREAM_STOP); if (codec->active == 0 && codec_dai->pop_wait == 0) - snd_soc_dapm_device_event(socdev, - SNDRV_CTL_POWER_D3hot); + snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_STANDBY); } mutex_unlock(&pcm_mutex); @@ -435,9 +435,10 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) } } else { /* no delayed work - do we need to power up codec */ - if (codec->dapm_state != SNDRV_CTL_POWER_D0) { + if (codec->bias_level != SND_SOC_BIAS_ON) { - snd_soc_dapm_device_event(socdev, SNDRV_CTL_POWER_D1); + snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_PREPARE); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) snd_soc_dapm_stream_event(codec, @@ -448,7 +449,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) codec_dai->capture.stream_name, SND_SOC_DAPM_STREAM_START); - snd_soc_dapm_device_event(socdev, SNDRV_CTL_POWER_D0); + snd_soc_dapm_set_bias_level(socdev, SND_SOC_BIAS_ON); if (codec_dai->dai_ops.digital_mute) codec_dai->dai_ops.digital_mute(codec_dai, 0); @@ -658,7 +659,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) /* close any waiting streams and save state */ run_delayed_work(&socdev->delayed_work); - codec->suspend_dapm_state = codec->dapm_state; + codec->suspend_bias_level = codec->bias_level; for(i = 0; i < codec->num_dai; i++) { char *stream = codec->dai[i].playback.stream_name; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 8a3192bcee78..728f3ac2f304 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -763,21 +763,18 @@ static ssize_t dapm_widget_show(struct device *dev, } } - switch(codec->dapm_state){ - case SNDRV_CTL_POWER_D0: - state = "D0"; + switch (codec->bias_level) { + case SND_SOC_BIAS_ON: + state = "On"; break; - case SNDRV_CTL_POWER_D1: - state = "D1"; + case SND_SOC_BIAS_PREPARE: + state = "Prepare"; break; - case SNDRV_CTL_POWER_D2: - state = "D2"; + case SND_SOC_BIAS_STANDBY: + state = "Standby"; break; - case SNDRV_CTL_POWER_D3hot: - state = "D3hot"; - break; - case SNDRV_CTL_POWER_D3cold: - state = "D3cold"; + case SND_SOC_BIAS_OFF: + state = "Off"; break; } count += sprintf(buf + count, "PM State: %s\n", state); @@ -1358,27 +1355,28 @@ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, EXPORT_SYMBOL_GPL(snd_soc_dapm_stream_event); /** - * snd_soc_dapm_device_event - send a device event to the dapm core + * snd_soc_dapm_set_bias_level - set the bias level for the system * @socdev: audio device - * @event: device event + * @level: level to configure * - * Sends a device event to the dapm core. The core then makes any - * necessary machine or codec power changes.. + * Configure the bias (power) levels for the SoC audio device. * * Returns 0 for success else error. */ -int snd_soc_dapm_device_event(struct snd_soc_device *socdev, int event) +int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, + enum snd_soc_bias_level level) { struct snd_soc_codec *codec = socdev->codec; struct snd_soc_machine *machine = socdev->machine; + int ret = 0; - if (machine->dapm_event) - machine->dapm_event(machine, event); - if (codec->dapm_event) - codec->dapm_event(codec, event); - return 0; + if (machine->set_bias_level) + ret = machine->set_bias_level(machine, level); + if (ret == 0 && codec->set_bias_level) + ret = codec->set_bias_level(codec, level); + + return ret; } -EXPORT_SYMBOL_GPL(snd_soc_dapm_device_event); /** * snd_soc_dapm_set_endpoint - set audio endpoint status -- cgit v1.2.3 From 0abbc78a0137fee60ef092f0b20a3d3d7e7e0cc2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 20 May 2008 16:27:17 +0200 Subject: x86, aperture_64: use symbolic constants Factor-out common aperture_valid code. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/aperture_64.c | 23 +---------------------- drivers/char/agp/amd64-agp.c | 22 ++++------------------ include/asm-x86/gart.h | 24 ++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 02f4dbaa4df4..5373f7834d8a 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -109,27 +109,6 @@ static u32 __init allocate_aperture(void) return (u32)__pa(p); } -static int __init aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) -{ - if (!aper_base) - return 0; - - if (aper_base + aper_size > 0x100000000UL) { - printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); - return 0; - } - if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); - return 0; - } - if (aper_size < min_size) { - printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n", - aper_size>>20, min_size>>20); - return 0; - } - - return 1; -} /* Find a PCI capability */ static __u32 __init find_cap(int bus, int slot, int func, int cap) @@ -344,7 +323,7 @@ out: if (gart_fix_e820 && !fix && aper_enabled) { if (!e820_all_mapped(aper_base, aper_base + aper_size, E820_RESERVED)) { - /* reserved it, so we can resuse it in second kernel */ + /* reserve it, so we can reuse it in second kernel */ printk(KERN_INFO "update e820 for GART\n"); add_memory_region(aper_base, aper_size, E820_RESERVED); update_e820(); diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index e3c7ea07f57c..f5af65ac8c78 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -228,24 +228,10 @@ static const struct agp_bridge_driver amd_8151_driver = { }; /* Some basic sanity checks for the aperture. */ -static int __devinit aperture_valid(u64 aper, u32 size) +static int __devinit agp_aperture_valid(u64 aper, u32 size) { - if (aper == 0) { - printk(KERN_ERR PFX "No aperture\n"); + if (!aperture_valid(aper, size, 32*1024*1024)) return 0; - } - if ((u64)aper + size > 0x100000000ULL) { - printk(KERN_ERR PFX "Aperture out of bounds\n"); - return 0; - } - if (e820_any_mapped(aper, aper + size, E820_RAM)) { - printk(KERN_ERR PFX "Aperture pointing to RAM\n"); - return 0; - } - if (size < 32*1024*1024) { - printk(KERN_ERR PFX "Aperture too small (%d MB)\n", size>>20); - return 0; - } /* Request the Aperture. This catches cases when someone else already put a mapping in there - happens with some very broken BIOS @@ -282,7 +268,7 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, nb_order = (nb_order >> 1) & 7; pci_read_config_dword(nb, AMD64_GARTAPERTUREBASE, &nb_base); nb_aper = nb_base << 25; - if (aperture_valid(nb_aper, (32*1024*1024)< + extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); extern int force_iommu, no_iommu; @@ -69,4 +71,26 @@ static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); } +static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) +{ + if (!aper_base) + return 0; + + if (aper_base + aper_size > 0x100000000ULL) { + printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); + return 0; + } + if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { + printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); + return 0; + } + if (aper_size < min_size) { + printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n", + aper_size>>20, min_size>>20); + return 0; + } + + return 1; +} + #endif -- cgit v1.2.3 From bedd78ca786c1d18c2a2785c7e40593dc9c9870f Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 12:31:29 +0100 Subject: [RTC] remove old ARM rtc library code Now that all drivers using it are gone, remove the old ARM RTC library. Acked-by: Alessandro Zummo Signed-off-by: Russell King --- arch/arm/common/Makefile | 1 - arch/arm/common/rtctime.c | 434 ---------------------------------------------- include/asm-arm/rtc.h | 43 ----- 3 files changed, 478 deletions(-) delete mode 100644 arch/arm/common/rtctime.c delete mode 100644 include/asm-arm/rtc.h (limited to 'include') diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 3d0b9fa42f84..325e4b6a6afb 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -2,7 +2,6 @@ # Makefile for the linux kernel. # -obj-y += rtctime.o obj-$(CONFIG_ARM_GIC) += gic.o obj-$(CONFIG_ARM_VIC) += vic.o obj-$(CONFIG_ICST525) += icst525.o diff --git a/arch/arm/common/rtctime.c b/arch/arm/common/rtctime.c deleted file mode 100644 index aa8f7739c822..000000000000 --- a/arch/arm/common/rtctime.c +++ /dev/null @@ -1,434 +0,0 @@ -/* - * linux/arch/arm/common/rtctime.c - * - * Copyright (C) 2003 Deep Blue Solutions Ltd. - * Based on sa1100-rtc.c, Nils Faerber, CIH, Nicolas Pitre. - * Based on rtc.c by Paul Gortmaker - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static DECLARE_WAIT_QUEUE_HEAD(rtc_wait); -static struct fasync_struct *rtc_async_queue; - -/* - * rtc_lock protects rtc_irq_data - */ -static DEFINE_SPINLOCK(rtc_lock); -static unsigned long rtc_irq_data; - -/* - * rtc_sem protects rtc_inuse and rtc_ops - */ -static DEFINE_MUTEX(rtc_mutex); -static unsigned long rtc_inuse; -static struct rtc_ops *rtc_ops; - -#define rtc_epoch 1900UL - -/* - * Calculate the next alarm time given the requested alarm time mask - * and the current time. - */ -void rtc_next_alarm_time(struct rtc_time *next, struct rtc_time *now, struct rtc_time *alrm) -{ - unsigned long next_time; - unsigned long now_time; - - next->tm_year = now->tm_year; - next->tm_mon = now->tm_mon; - next->tm_mday = now->tm_mday; - next->tm_hour = alrm->tm_hour; - next->tm_min = alrm->tm_min; - next->tm_sec = alrm->tm_sec; - - rtc_tm_to_time(now, &now_time); - rtc_tm_to_time(next, &next_time); - - if (next_time < now_time) { - /* Advance one day */ - next_time += 60 * 60 * 24; - rtc_time_to_tm(next_time, next); - } -} -EXPORT_SYMBOL(rtc_next_alarm_time); - -static inline int rtc_arm_read_time(struct rtc_ops *ops, struct rtc_time *tm) -{ - memset(tm, 0, sizeof(struct rtc_time)); - return ops->read_time(tm); -} - -static inline int rtc_arm_set_time(struct rtc_ops *ops, struct rtc_time *tm) -{ - int ret; - - ret = rtc_valid_tm(tm); - if (ret == 0) - ret = ops->set_time(tm); - - return ret; -} - -static inline int rtc_arm_read_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm) -{ - int ret = -EINVAL; - if (ops->read_alarm) { - memset(alrm, 0, sizeof(struct rtc_wkalrm)); - ret = ops->read_alarm(alrm); - } - return ret; -} - -static inline int rtc_arm_set_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm) -{ - int ret = -EINVAL; - if (ops->set_alarm) - ret = ops->set_alarm(alrm); - return ret; -} - -void rtc_update(unsigned long num, unsigned long events) -{ - spin_lock(&rtc_lock); - rtc_irq_data = (rtc_irq_data + (num << 8)) | events; - spin_unlock(&rtc_lock); - - wake_up_interruptible(&rtc_wait); - kill_fasync(&rtc_async_queue, SIGIO, POLL_IN); -} -EXPORT_SYMBOL(rtc_update); - - -static ssize_t -rtc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) -{ - DECLARE_WAITQUEUE(wait, current); - unsigned long data; - ssize_t ret; - - if (count < sizeof(unsigned long)) - return -EINVAL; - - add_wait_queue(&rtc_wait, &wait); - do { - __set_current_state(TASK_INTERRUPTIBLE); - - spin_lock_irq(&rtc_lock); - data = rtc_irq_data; - rtc_irq_data = 0; - spin_unlock_irq(&rtc_lock); - - if (data != 0) { - ret = 0; - break; - } - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - schedule(); - } while (1); - set_current_state(TASK_RUNNING); - remove_wait_queue(&rtc_wait, &wait); - - if (ret == 0) { - ret = put_user(data, (unsigned long __user *)buf); - if (ret == 0) - ret = sizeof(unsigned long); - } - return ret; -} - -static unsigned int rtc_poll(struct file *file, poll_table *wait) -{ - unsigned long data; - - poll_wait(file, &rtc_wait, wait); - - spin_lock_irq(&rtc_lock); - data = rtc_irq_data; - spin_unlock_irq(&rtc_lock); - - return data != 0 ? POLLIN | POLLRDNORM : 0; -} - -static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct rtc_ops *ops = file->private_data; - struct rtc_time tm; - struct rtc_wkalrm alrm; - void __user *uarg = (void __user *)arg; - int ret = -EINVAL; - - switch (cmd) { - case RTC_ALM_READ: - ret = rtc_arm_read_alarm(ops, &alrm); - if (ret) - break; - ret = copy_to_user(uarg, &alrm.time, sizeof(tm)); - if (ret) - ret = -EFAULT; - break; - - case RTC_ALM_SET: - ret = copy_from_user(&alrm.time, uarg, sizeof(tm)); - if (ret) { - ret = -EFAULT; - break; - } - alrm.enabled = 0; - alrm.pending = 0; - alrm.time.tm_mday = -1; - alrm.time.tm_mon = -1; - alrm.time.tm_year = -1; - alrm.time.tm_wday = -1; - alrm.time.tm_yday = -1; - alrm.time.tm_isdst = -1; - ret = rtc_arm_set_alarm(ops, &alrm); - break; - - case RTC_RD_TIME: - ret = rtc_arm_read_time(ops, &tm); - if (ret) - break; - ret = copy_to_user(uarg, &tm, sizeof(tm)); - if (ret) - ret = -EFAULT; - break; - - case RTC_SET_TIME: - if (!capable(CAP_SYS_TIME)) { - ret = -EACCES; - break; - } - ret = copy_from_user(&tm, uarg, sizeof(tm)); - if (ret) { - ret = -EFAULT; - break; - } - ret = rtc_arm_set_time(ops, &tm); - break; - - case RTC_EPOCH_SET: -#ifndef rtc_epoch - /* - * There were no RTC clocks before 1900. - */ - if (arg < 1900) { - ret = -EINVAL; - break; - } - if (!capable(CAP_SYS_TIME)) { - ret = -EACCES; - break; - } - rtc_epoch = arg; - ret = 0; -#endif - break; - - case RTC_EPOCH_READ: - ret = put_user(rtc_epoch, (unsigned long __user *)uarg); - break; - - case RTC_WKALM_SET: - ret = copy_from_user(&alrm, uarg, sizeof(alrm)); - if (ret) { - ret = -EFAULT; - break; - } - ret = rtc_arm_set_alarm(ops, &alrm); - break; - - case RTC_WKALM_RD: - ret = rtc_arm_read_alarm(ops, &alrm); - if (ret) - break; - ret = copy_to_user(uarg, &alrm, sizeof(alrm)); - if (ret) - ret = -EFAULT; - break; - - default: - if (ops->ioctl) - ret = ops->ioctl(cmd, arg); - break; - } - return ret; -} - -static int rtc_open(struct inode *inode, struct file *file) -{ - int ret; - - mutex_lock(&rtc_mutex); - - if (rtc_inuse) { - ret = -EBUSY; - } else if (!rtc_ops || !try_module_get(rtc_ops->owner)) { - ret = -ENODEV; - } else { - file->private_data = rtc_ops; - - ret = rtc_ops->open ? rtc_ops->open() : 0; - if (ret == 0) { - spin_lock_irq(&rtc_lock); - rtc_irq_data = 0; - spin_unlock_irq(&rtc_lock); - - rtc_inuse = 1; - } - } - mutex_unlock(&rtc_mutex); - - return ret; -} - -static int rtc_release(struct inode *inode, struct file *file) -{ - struct rtc_ops *ops = file->private_data; - - if (ops->release) - ops->release(); - - spin_lock_irq(&rtc_lock); - rtc_irq_data = 0; - spin_unlock_irq(&rtc_lock); - - module_put(rtc_ops->owner); - rtc_inuse = 0; - - return 0; -} - -static int rtc_fasync(int fd, struct file *file, int on) -{ - return fasync_helper(fd, file, on, &rtc_async_queue); -} - -static const struct file_operations rtc_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = rtc_read, - .poll = rtc_poll, - .ioctl = rtc_ioctl, - .open = rtc_open, - .release = rtc_release, - .fasync = rtc_fasync, -}; - -static struct miscdevice rtc_miscdev = { - .minor = RTC_MINOR, - .name = "rtc", - .fops = &rtc_fops, -}; - - -static int rtc_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) -{ - struct rtc_ops *ops = data; - struct rtc_wkalrm alrm; - struct rtc_time tm; - char *p = page; - - if (rtc_arm_read_time(ops, &tm) == 0) { - p += sprintf(p, - "rtc_time\t: %02d:%02d:%02d\n" - "rtc_date\t: %04d-%02d-%02d\n" - "rtc_epoch\t: %04lu\n", - tm.tm_hour, tm.tm_min, tm.tm_sec, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - rtc_epoch); - } - - if (rtc_arm_read_alarm(ops, &alrm) == 0) { - p += sprintf(p, "alrm_time\t: "); - if ((unsigned int)alrm.time.tm_hour <= 24) - p += sprintf(p, "%02d:", alrm.time.tm_hour); - else - p += sprintf(p, "**:"); - if ((unsigned int)alrm.time.tm_min <= 59) - p += sprintf(p, "%02d:", alrm.time.tm_min); - else - p += sprintf(p, "**:"); - if ((unsigned int)alrm.time.tm_sec <= 59) - p += sprintf(p, "%02d\n", alrm.time.tm_sec); - else - p += sprintf(p, "**\n"); - - p += sprintf(p, "alrm_date\t: "); - if ((unsigned int)alrm.time.tm_year <= 200) - p += sprintf(p, "%04d-", alrm.time.tm_year + 1900); - else - p += sprintf(p, "****-"); - if ((unsigned int)alrm.time.tm_mon <= 11) - p += sprintf(p, "%02d-", alrm.time.tm_mon + 1); - else - p += sprintf(p, "**-"); - if ((unsigned int)alrm.time.tm_mday <= 31) - p += sprintf(p, "%02d\n", alrm.time.tm_mday); - else - p += sprintf(p, "**\n"); - p += sprintf(p, "alrm_wakeup\t: %s\n", - alrm.enabled ? "yes" : "no"); - p += sprintf(p, "alrm_pending\t: %s\n", - alrm.pending ? "yes" : "no"); - } - - if (ops->proc) - p += ops->proc(p); - - return p - page; -} - -int register_rtc(struct rtc_ops *ops) -{ - int ret = -EBUSY; - - mutex_lock(&rtc_mutex); - if (rtc_ops == NULL) { - rtc_ops = ops; - - ret = misc_register(&rtc_miscdev); - if (ret == 0) - create_proc_read_entry("driver/rtc", 0, NULL, - rtc_read_proc, ops); - } - mutex_unlock(&rtc_mutex); - - return ret; -} -EXPORT_SYMBOL(register_rtc); - -void unregister_rtc(struct rtc_ops *rtc) -{ - mutex_lock(&rtc_mutex); - if (rtc == rtc_ops) { - remove_proc_entry("driver/rtc", NULL); - misc_deregister(&rtc_miscdev); - rtc_ops = NULL; - } - mutex_unlock(&rtc_mutex); -} -EXPORT_SYMBOL(unregister_rtc); diff --git a/include/asm-arm/rtc.h b/include/asm-arm/rtc.h deleted file mode 100644 index 1a5c9232a91e..000000000000 --- a/include/asm-arm/rtc.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * linux/include/asm-arm/rtc.h - * - * Copyright (C) 2003 Deep Blue Solutions Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef ASMARM_RTC_H -#define ASMARM_RTC_H - -struct module; - -struct rtc_ops { - struct module *owner; - int (*open)(void); - void (*release)(void); - int (*ioctl)(unsigned int, unsigned long); - - int (*read_time)(struct rtc_time *); - int (*set_time)(struct rtc_time *); - int (*read_alarm)(struct rtc_wkalrm *); - int (*set_alarm)(struct rtc_wkalrm *); - int (*proc)(char *buf); -}; - -void rtc_next_alarm_time(struct rtc_time *, struct rtc_time *, struct rtc_time *); -void rtc_update(unsigned long, unsigned long); -int register_rtc(struct rtc_ops *); -void unregister_rtc(struct rtc_ops *); - -static inline int rtc_periodic_alarm(struct rtc_time *tm) -{ - return (tm->tm_year == -1) || - ((unsigned)tm->tm_mon >= 12) || - ((unsigned)(tm->tm_mday - 1) >= 31) || - ((unsigned)tm->tm_hour > 23) || - ((unsigned)tm->tm_min > 59) || - ((unsigned)tm->tm_sec > 59); -} - -#endif -- cgit v1.2.3 From 3843fc2575e3389f4f0ad0420a720240a5746a5d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 May 2008 12:05:57 +0100 Subject: xen: remove support for non-PAE 32-bit Non-PAE operation has been deprecated in Xen for a while, and is rarely tested or used. xen-unstable has now officially dropped non-PAE support. Since Xen/pvops' non-PAE support has also been broken for a while, we may as well completely drop it altogether. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/xen/Kconfig | 2 +- arch/x86/xen/enlighten.c | 51 ++++++++++++++++------------------------------ arch/x86/xen/mmu.c | 19 ++--------------- arch/x86/xen/mmu.h | 24 ++++++---------------- arch/x86/xen/xen-head.S | 4 ---- include/asm-x86/xen/page.h | 4 ---- 6 files changed, 27 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 2e641be2737e..525b108411bd 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,7 +6,7 @@ config XEN bool "Xen guest support" select PARAVIRT depends on X86_32 - depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) + depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e457d61..a05b7721bc81 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) static __init void xen_pagetable_setup_start(pgd_t *base) { pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; + int i; /* special set_pte for pagetable initialization */ pv_mmu_ops.set_pte = xen_set_pte_init; init_mm.pgd = base; /* - * copy top-level of Xen-supplied pagetable into place. For - * !PAE we can use this as-is, but for PAE it is a stand-in - * while we copy the pmd pages. + * copy top-level of Xen-supplied pagetable into place. This + * is a stand-in while we copy the pmd pages. */ memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); - if (PTRS_PER_PMD > 1) { - int i; - /* - * For PAE, need to allocate new pmds, rather than - * share Xen's, since Xen doesn't like pmd's being - * shared between address spaces. - */ - for (i = 0; i < PTRS_PER_PGD; i++) { - if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { - pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + /* + * For PAE, need to allocate new pmds, rather than + * share Xen's, since Xen doesn't like pmd's being + * shared between address spaces. + */ + for (i = 0; i < PTRS_PER_PGD; i++) { + if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { + pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); - memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), - PAGE_SIZE); + memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), + PAGE_SIZE); - make_lowmem_page_readonly(pmd); + make_lowmem_page_readonly(pmd); - set_pgd(&base[i], __pgd(1 + __pa(pmd))); - } else - pgd_clear(&base[i]); - } + set_pgd(&base[i], __pgd(1 + __pa(pmd))); + } else + pgd_clear(&base[i]); } /* make sure zero_page is mapped RO so we can use it in pagetables */ @@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ - { - unsigned level; - -#ifdef CONFIG_X86_PAE - level = MMUEXT_PIN_L3_TABLE; -#else - level = MMUEXT_PIN_L2_TABLE; -#endif - - pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); - } + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); } /* This is called once we have the cpu_possible_map */ @@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pte = xen_make_pte, .make_pgd = xen_make_pgd, -#ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, .set_pud = xen_set_pud, @@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pmd = xen_make_pmd, .pmd_val = xen_pmd_val, -#endif /* PAE */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 126766d43aea..07c2653ec335 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -222,7 +222,7 @@ pmdval_t xen_pmd_val(pmd_t pmd) ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; return ret; } -#ifdef CONFIG_X86_PAE + void xen_set_pud(pud_t *ptr, pud_t val) { struct multicall_space mcs; @@ -272,12 +272,6 @@ pmd_t xen_make_pmd(pmdval_t pmd) return native_make_pmd(pmd); } -#else /* !PAE */ -void xen_set_pte(pte_t *ptep, pte_t pte) -{ - *ptep = pte; -} -#endif /* CONFIG_X86_PAE */ /* (Yet another) pagetable walker. This one is intended for pinning a @@ -430,8 +424,6 @@ static int pin_page(struct page *page, enum pt_level level) read-only, and can be pinned. */ void xen_pgd_pin(pgd_t *pgd) { - unsigned level; - xen_mc_batch(); if (pgd_walk(pgd, pin_page, TASK_SIZE)) { @@ -441,14 +433,7 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_batch(); } -#ifdef CONFIG_X86_PAE - level = MMUEXT_PIN_L3_TABLE; -#else - level = MMUEXT_PIN_L2_TABLE; -#endif - - xen_do_pin(level, PFN_DOWN(__pa(pgd))); - + xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); xen_mc_issue(0); } diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index b5e189b1519d..5fe961caffd4 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm); void xen_pgd_pin(pgd_t *pgd); //void xen_pgd_unpin(pgd_t *pgd); -#ifdef CONFIG_X86_PAE -unsigned long long xen_pte_val(pte_t); -unsigned long long xen_pmd_val(pmd_t); -unsigned long long xen_pgd_val(pgd_t); +pteval_t xen_pte_val(pte_t); +pmdval_t xen_pmd_val(pmd_t); +pgdval_t xen_pgd_val(pgd_t); -pte_t xen_make_pte(unsigned long long); -pmd_t xen_make_pmd(unsigned long long); -pgd_t xen_make_pgd(unsigned long long); +pte_t xen_make_pte(pteval_t); +pmd_t xen_make_pmd(pmdval_t); +pgd_t xen_make_pgd(pgdval_t); void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); @@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val); void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_pmd_clear(pmd_t *pmdp); - -#else -unsigned long xen_pte_val(pte_t); -unsigned long xen_pmd_val(pmd_t); -unsigned long xen_pgd_val(pgd_t); - -pte_t xen_make_pte(unsigned long); -pmd_t xen_make_pmd(unsigned long); -pgd_t xen_make_pgd(unsigned long); -#endif - #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 288d587ce73c..2ab5f42f34d8 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -30,11 +30,7 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") -#ifdef CONFIG_X86_PAE ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") -#else - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") -#endif ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") #endif /*CONFIG_XEN */ diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index baf3a4dce28c..e11f24038b1d 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x) return (pte_t) { .pte = x }; } -#ifdef CONFIG_X86_PAE #define pmd_val_ma(v) ((v).pmd) #define pud_val_ma(v) ((v).pgd.pgd) #define __pmd_ma(x) ((pmd_t) { (x) } ) -#else /* !X86_PAE */ -#define pmd_val_ma(v) ((v).pud.pgd.pgd) -#endif /* CONFIG_X86_PAE */ #define pgd_val_ma(x) ((x).pgd) -- cgit v1.2.3 From 2ca7633dc73405ee775a2e9b7961b47a38bc882d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 11 May 2008 10:40:47 +1000 Subject: [POWERPC] Tweak VDSO linker script to avoid upsetting old binutils This works around bugs in older binutils' objcopy. The placement of these sections does not really matter, but it confused the buggy old BFD libraries. Signed-off-by: Roland McGrath Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/vdso64/vdso64.lds.S | 6 +++--- include/asm-powerpc/ppc_asm.h | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 932b3fdb34b9..7d6585f90277 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -43,15 +43,15 @@ SECTIONS .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } .rodata1 : { *(.rodata1) } + .dynamic : { *(.dynamic) } :text :dynamic + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text .gcc_except_table : { *(.gcc_except_table) } + .rela.dyn ALIGN(8) : { *(.rela.dyn) } .opd ALIGN(8) : { KEEP (*(.opd)) } .got ALIGN(8) : { *(.got .toc) } - .rela.dyn ALIGN(8) : { *(.rela.dyn) } - - .dynamic : { *(.dynamic) } :text :dynamic _end = .; PROVIDE(end = .); diff --git a/include/asm-powerpc/ppc_asm.h b/include/asm-powerpc/ppc_asm.h index 2dbd4e7884fa..ef96bfd4ef4c 100644 --- a/include/asm-powerpc/ppc_asm.h +++ b/include/asm-powerpc/ppc_asm.h @@ -356,6 +356,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) #define toreal(rd) #define fromreal(rd) +/* + * We use addis to ensure compatibility with the "classic" ppc versions of + * these macros, which use rs = 0 to get the tophys offset in rd, rather than + * converting the address in r0, and so this version has to do that too + * (i.e. set register rd to 0 when rs == 0). + */ #define tophys(rd,rs) \ addis rd,rs,0 -- cgit v1.2.3 From 475ca391b490a683d66bf19999a8a7a24913f139 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 22 May 2008 06:59:23 +1000 Subject: [POWERPC] mpic: Deal with bogus NIRQ in Feature Reporting Register Some chips (like the SoCs from Freescale) report the wrong value in NIRQ and this causes issues if its doesn't match or exceed the value of irq_count. Add a flag that board code can set to just use irq_count instead of FRR[NIRQ]. Eventually we'll add a device tree property with the number of sources. Signed-off-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/mpic.c | 9 ++++++--- include/asm-powerpc/mpic.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 8619f2a3f1f6..466e2183e86c 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1144,9 +1144,12 @@ struct mpic * __init mpic_alloc(struct device_node *node, mpic->num_cpus = ((greg_feature & MPIC_GREG_FEATURE_LAST_CPU_MASK) >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1; if (isu_size == 0) - mpic->num_sources = - ((greg_feature & MPIC_GREG_FEATURE_LAST_SRC_MASK) - >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; + if (flags & MPIC_BROKEN_FRR_NIRQS) + mpic->num_sources = mpic->irq_count; + else + mpic->num_sources = + ((greg_feature & MPIC_GREG_FEATURE_LAST_SRC_MASK) + >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; /* Map the per-CPU registers */ for (i = 0; i < mpic->num_cpus; i++) { diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h index 943c5a3fac8a..6802570f4240 100644 --- a/include/asm-powerpc/mpic.h +++ b/include/asm-powerpc/mpic.h @@ -353,6 +353,8 @@ struct mpic #define MPIC_ENABLE_MCK 0x00000200 /* Disable bias among target selection, spread interrupts evenly */ #define MPIC_NO_BIAS 0x00000400 +/* Ignore NIRQS as reported by FRR */ +#define MPIC_BROKEN_FRR_NIRQS 0x00000800 /* MPIC HW modification ID */ #define MPIC_REGSET_MASK 0xf0000000 -- cgit v1.2.3 From bd3bff9e20f454b242d979ec2f9a4dca0d5fa06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: sched: add latency tracer callbacks to the scheduler add 3 lightweight callbacks to the tracer backend. zero impact if tracing is turned off. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 26 ++++++++++++++++++++++++++ kernel/sched.c | 3 +++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4b..717cab8a0c83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2117,6 +2117,32 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +#else +static inline void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +{ +} +#endif + +#ifdef CONFIG_SCHED_TRACER +extern void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); +extern void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); +#else +static inline void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +static inline void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +#endif + extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a91539..463dcdb36ef8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2467,6 +2467,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ + ftrace_wake_up_task(p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2611,6 +2612,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } + ftrace_wake_up_new_task(p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2783,6 +2785,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); + ftrace_ctx_switch(prev, next); mm = next->mm; oldmm = prev->active_mm; /* -- cgit v1.2.3 From 7c731e0a495e25e79dc1e9e68772a67a55721a65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: make the task state char-string visible to all The tracer wants to be able to convert the state number into a user visible character. This patch pulls that conversion string out the scheduler into the header. This way if it were to ever change, other parts of the kernel will know. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ kernel/sched.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 717cab8a0c83..6e26f1fdbfe2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2237,6 +2237,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +#define TASK_STATE_TO_CHAR_STR "RSDTtZX" + #endif /* __KERNEL__ */ #endif diff --git a/kernel/sched.c b/kernel/sched.c index 463dcdb36ef8..73e600852365 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5729,7 +5729,7 @@ out_unlock: return retval; } -static const char stat_nam[] = "RSDTtZX"; +static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; void sched_show_task(struct task_struct *p) { -- cgit v1.2.3 From 502825282e6f79c975a644afc124432ec1744de4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: add preempt_enable/disable notrace macros The tracer may need to call preempt_enable and disable functions for time keeping and such. The trace gets ugly when we see these functions show up for all traces. To make the output cleaner this patch adds preempt_enable_notrace and preempt_disable_notrace to be used by tracer (and debugging) functions. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/preempt.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 23f0c54175cd..36b03d50bf40 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -52,6 +52,34 @@ do { \ preempt_check_resched(); \ } while (0) +/* For debugging and tracer internals only! */ +#define add_preempt_count_notrace(val) \ + do { preempt_count() += (val); } while (0) +#define sub_preempt_count_notrace(val) \ + do { preempt_count() -= (val); } while (0) +#define inc_preempt_count_notrace() add_preempt_count_notrace(1) +#define dec_preempt_count_notrace() sub_preempt_count_notrace(1) + +#define preempt_disable_notrace() \ +do { \ + inc_preempt_count_notrace(); \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched_notrace() \ +do { \ + barrier(); \ + dec_preempt_count_notrace(); \ +} while (0) + +/* preempt_check_resched is OK to trace */ +#define preempt_enable_notrace() \ +do { \ + preempt_enable_no_resched_notrace(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + #else #define preempt_disable() do { } while (0) @@ -59,6 +87,10 @@ do { \ #define preempt_enable() do { } while (0) #define preempt_check_resched() do { } while (0) +#define preempt_disable_notrace() do { } while (0) +#define preempt_enable_no_resched_notrace() do { } while (0) +#define preempt_enable_notrace() do { } while (0) + #endif #ifdef CONFIG_PREEMPT_NOTIFIERS -- cgit v1.2.3 From ffdc1a09ae7e2cbd714a446ee38a27f625b5f1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: tracing: add notrace to linkage.h notrace signals that a function should not be traced. Most of the time this is used by tracers to annotate code that cannot be traced - it's in a volatile state (such as in user vdso context or NMI context) or it's in the tracer internals. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f8..14f329c64ba8 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -3,6 +3,8 @@ #include +#define notrace __attribute__((no_instrument_function)) + #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else -- cgit v1.2.3 From 23adec554a7648f99c8acc0caf49c66320cd2b84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: x86: add notrace annotations to vsyscall. Add the notrace annotations to the vsyscall functions - there we are not in kernel context yet, so the tracer function cannot (and must not) be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsyscall_64.c | 3 ++- arch/x86/vdso/vclock_gettime.c | 15 ++++++++------- arch/x86/vdso/vgetcpu.c | 3 ++- include/asm-x86/vsyscall.h | 3 ++- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 61efa2f7d564..4063dfa2a02d 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -42,7 +42,8 @@ #include #include -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __vsyscall(nr) \ + __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace #define __syscall_clobber "r11","cx","memory" /* diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 23476c2ebfc4..5cb8f754c52d 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -23,7 +23,7 @@ #define gtod vdso_vsyscall_gtod_data -static long vdso_fallback_gettime(long clock, struct timespec *ts) +notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; asm("syscall" : "=a" (ret) : @@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } -static inline long vgetns(void) +notrace static inline long vgetns(void) { long v; cycles_t (*vread)(void); @@ -40,7 +40,7 @@ static inline long vgetns(void) return (v * gtod->clock.mult) >> gtod->clock.shift; } -static noinline int do_realtime(struct timespec *ts) +notrace static noinline int do_realtime(struct timespec *ts) { unsigned long seq, ns; do { @@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts) } /* Copy of the version in kernel/time.c which we cannot directly access */ -static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) +notrace static void +vset_normalized_timespec(struct timespec *ts, long sec, long nsec) { while (nsec >= NSEC_PER_SEC) { nsec -= NSEC_PER_SEC; @@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) ts->tv_nsec = nsec; } -static noinline int do_monotonic(struct timespec *ts) +notrace static noinline int do_monotonic(struct timespec *ts) { unsigned long seq, ns, secs; do { @@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts) return 0; } -int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { if (likely(gtod->sysctl_enabled && gtod->clock.vread)) switch (clock) { @@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { long ret; if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index c8097f17f8a9..9fbc6b20026b 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c @@ -13,7 +13,8 @@ #include #include "vextern.h" -long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +notrace long +__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) { unsigned int p; diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h index 17b3700949bf..6b66ff905af0 100644 --- a/include/asm-x86/vsyscall.h +++ b/include/asm-x86/vsyscall.h @@ -24,7 +24,8 @@ enum vsyscall_num { ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) #define __section_vsyscall_clock __attribute__ \ ((unused, __section__ (".vsyscall_clock"),aligned(16))) -#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) +#define __vsyscall_fn \ + __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace #define VGETCPU_RDTSCP 1 #define VGETCPU_LSL 2 -- cgit v1.2.3 From 16444a8a40d4c7b4f6de34af0cae1f76a4f6c901 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: add basic support for gcc profiler instrumentation If CONFIG_FTRACE is selected and /proc/sys/kernel/ftrace_enabled is set to a non-zero value the ftrace routine will be called everytime we enter a kernel function that is not marked with the "notrace" attribute. The ftrace routine will then call a registered function if a function happens to be registered. [ This code has been highly hacked by Steven Rostedt and Ingo Molnar, so don't blame Arnaldo for all of this ;-) ] Update: It is now possible to register more than one ftrace function. If only one ftrace function is registered, that will be the function that ftrace calls directly. If more than one function is registered, then ftrace will call a function that will loop through the functions to call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Makefile | 4 ++ arch/x86/Kconfig | 1 + arch/x86/kernel/entry_32.S | 27 +++++++++ arch/x86/kernel/entry_64.S | 37 ++++++++++++ include/linux/ftrace.h | 38 +++++++++++++ kernel/Makefile | 1 + kernel/trace/Kconfig | 5 ++ kernel/trace/Makefile | 3 + kernel/trace/ftrace.c | 138 +++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 2 + 10 files changed, 256 insertions(+) create mode 100644 include/linux/ftrace.h create mode 100644 kernel/trace/Kconfig create mode 100644 kernel/trace/Makefile create mode 100644 kernel/trace/ftrace.c (limited to 'include') diff --git a/Makefile b/Makefile index 20b32351906b..b4a273f19b52 100644 --- a/Makefile +++ b/Makefile @@ -528,6 +528,10 @@ KBUILD_CFLAGS += -g KBUILD_AFLAGS += -gdwarf-2 endif +ifdef CONFIG_FTRACE +KBUILD_CFLAGS += -pg +endif + # We trigger additional mismatches with less inlining ifdef CONFIG_DEBUG_SECTION_MISMATCH KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef2f..c742dfeb0dbe 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc3271c..f47b9b5440d2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,6 +1109,33 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ +#ifdef CONFIG_FTRACE +ENTRY(mcount) + cmpl $ftrace_stub, ftrace_trace_function + jnz trace + +.globl ftrace_stub +ftrace_stub: + ret + + /* taken from glibc */ +trace: + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + movl 0x4(%ebp), %edx + + call *ftrace_trace_function + + popl %edx + popl %ecx + popl %eax + + jmp ftrace_stub +END(mcount) +#endif + .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 556a8df522a7..f046e0c64883 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -54,6 +54,43 @@ .code64 +#ifdef CONFIG_FTRACE +ENTRY(mcount) + cmpq $ftrace_stub, ftrace_trace_function + jnz trace +.globl ftrace_stub +ftrace_stub: + retq + +trace: + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + movq 8(%rbp), %rsi + + call *ftrace_trace_function + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + + jmp ftrace_stub +END(mcount) +#endif + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h new file mode 100644 index 000000000000..b96ef14c249a --- /dev/null +++ b/include/linux/ftrace.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_FTRACE_H +#define _LINUX_FTRACE_H + +#ifdef CONFIG_FTRACE + +#include + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); + +struct ftrace_ops { + ftrace_func_t func; + struct ftrace_ops *next; +}; + +/* + * The ftrace_ops must be a static and should also + * be read_mostly. These functions do modify read_mostly variables + * so use them sparely. Never free an ftrace_op or modify the + * next pointer after it has been registered. Even after unregistering + * it, the next pointer may still be used internally. + */ +int register_ftrace_function(struct ftrace_ops *ops); +int unregister_ftrace_function(struct ftrace_ops *ops); +void clear_ftrace_function(void); + +extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void mcount(void); + +#else /* !CONFIG_FTRACE */ +# define register_ftrace_function(ops) do { } while (0) +# define unregister_ftrace_function(ops) do { } while (0) +# define clear_ftrace_function(ops) do { } while (0) +#endif /* CONFIG_FTRACE */ +#endif /* _LINUX_FTRACE_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 1c9938addb9d..fa05f6d8bdbf 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_FTRACE) += trace/ ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig new file mode 100644 index 000000000000..8185c91417bc --- /dev/null +++ b/kernel/trace/Kconfig @@ -0,0 +1,5 @@ +# +# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: +# +config HAVE_FTRACE + bool diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile new file mode 100644 index 000000000000..bf4fd215a6a9 --- /dev/null +++ b/kernel/trace/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_FTRACE) += libftrace.o + +libftrace-y := ftrace.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c new file mode 100644 index 000000000000..b6a80b98a3fb --- /dev/null +++ b/kernel/trace/ftrace.c @@ -0,0 +1,138 @@ +/* + * Infrastructure for profiling code inserted by 'gcc -pg'. + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2004-2008 Ingo Molnar + * + * Originally ported from the -rt patch by: + * Copyright (C) 2007 Arnaldo Carvalho de Melo + * + * Based on code in the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ + +#include +#include + +static DEFINE_SPINLOCK(ftrace_func_lock); +static struct ftrace_ops ftrace_list_end __read_mostly = +{ + .func = ftrace_stub, +}; + +static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; +ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; + +/* mcount is defined per arch in assembly */ +EXPORT_SYMBOL(mcount); + +notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_ops *op = ftrace_list; + + /* in case someone actually ports this to alpha! */ + read_barrier_depends(); + + while (op != &ftrace_list_end) { + /* silly alpha */ + read_barrier_depends(); + op->func(ip, parent_ip); + op = op->next; + }; +} + +/** + * register_ftrace_function - register a function for profiling + * @ops - ops structure that holds the function for profiling. + * + * Register a function to be called by all functions in the + * kernel. + * + * Note: @ops->func and all the functions it calls must be labeled + * with "notrace", otherwise it will go into a + * recursive loop. + */ +int register_ftrace_function(struct ftrace_ops *ops) +{ + unsigned long flags; + + spin_lock_irqsave(&ftrace_func_lock, flags); + ops->next = ftrace_list; + /* + * We are entering ops into the ftrace_list but another + * CPU might be walking that list. We need to make sure + * the ops->next pointer is valid before another CPU sees + * the ops pointer included into the ftrace_list. + */ + smp_wmb(); + ftrace_list = ops; + /* + * For one func, simply call it directly. + * For more than one func, call the chain. + */ + if (ops->next == &ftrace_list_end) + ftrace_trace_function = ops->func; + else + ftrace_trace_function = ftrace_list_func; + spin_unlock_irqrestore(&ftrace_func_lock, flags); + + return 0; +} + +/** + * unregister_ftrace_function - unresgister a function for profiling. + * @ops - ops structure that holds the function to unregister + * + * Unregister a function that was added to be called by ftrace profiling. + */ +int unregister_ftrace_function(struct ftrace_ops *ops) +{ + unsigned long flags; + struct ftrace_ops **p; + int ret = 0; + + spin_lock_irqsave(&ftrace_func_lock, flags); + + /* + * If we are the only function, then the ftrace pointer is + * pointing directly to that function. + */ + if (ftrace_list == ops && ops->next == &ftrace_list_end) { + ftrace_trace_function = ftrace_stub; + ftrace_list = &ftrace_list_end; + goto out; + } + + for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) + if (*p == ops) + break; + + if (*p != ops) { + ret = -1; + goto out; + } + + *p = (*p)->next; + + /* If we only have one func left, then call that directly */ + if (ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + + out: + spin_unlock_irqrestore(&ftrace_func_lock, flags); + + return 0; +} + +/** + * clear_ftrace_function - reset the ftrace function + * + * This NULLs the ftrace function and in essence stops + * tracing. There may be lag + */ +void clear_ftrace_function(void) +{ + ftrace_trace_function = ftrace_stub; +} diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d2099f41aa1e..d8b6279a9b42 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -634,6 +634,8 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. +source kernel/trace/Kconfig + config PROVIDE_OHCI1394_DMA_INIT bool "Remote debugging over FireWire early on boot" depends on PCI && X86 -- cgit v1.2.3 From 352ad25aa4a189c667cb2af333948d34692a2d27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: tracer for scheduler wakeup latency This patch adds the tracer that tracks the wakeup latency of the highest priority waking task. "wakeup" is added to /debugfs/tracing/available_tracers Also added to /debugfs/tracing tracing_max_latency holds the current max latency for the wakeup wakeup_thresh if set to other than zero, a log will be recorded for every wakeup that takes longer than the number entered in here (usecs for all counters) (deletes previous trace) Examples: (with ftrace_enabled = 0) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 26 us, #2/2, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/0-3 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / quilt-8551 0d..3 0us+: wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) quilt-8551 0d..4 26us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ (with ftrace_enabled = 1) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 -------------------------------------------------------------------- latency: 36 us, #45/45, CPU#0 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/1-5 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / bash-10653 1d..3 0us : wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) bash-10653 1d..3 1us : try_to_wake_up+0x271/0x2e7 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..2 2us : try_to_wake_up+0x296/0x2e7 (update_rq_clock+0x9/0x20 ) bash-10653 1d..2 2us : update_rq_clock+0x1e/0x20 (__update_rq_clock+0xc/0x90 ) bash-10653 1d..2 3us : __update_rq_clock+0x1b/0x90 (sched_clock+0x9/0x29 ) bash-10653 1d..2 4us : try_to_wake_up+0x2a6/0x2e7 (activate_task+0xc/0x3f ) bash-10653 1d..2 4us : activate_task+0x2d/0x3f (enqueue_task+0xe/0x66 ) bash-10653 1d..2 5us : enqueue_task+0x5b/0x66 (enqueue_task_rt+0x9/0x3c ) bash-10653 1d..2 6us : try_to_wake_up+0x2ba/0x2e7 (check_preempt_wakeup+0x12/0x99 ) [...] bash-10653 1d..5 33us : tracing_record_cmdline+0xcf/0xd4 (_spin_unlock+0x9/0x33 ) bash-10653 1d..5 34us : _spin_unlock+0x19/0x33 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..4 35us : wakeup_sched_switch+0x65/0x2ff (_spin_lock_irqsave+0xc/0xa9 ) bash-10653 1d..4 35us : _spin_lock_irqsave+0x19/0xa9 (add_preempt_count+0xe/0x77 ) bash-10653 1d..4 36us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ The [...] was added here to not waste your email box space. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 23 ++- kernel/trace/Kconfig | 13 ++ kernel/trace/Makefile | 1 + kernel/trace/trace_sched_wakeup.c | 310 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 343 insertions(+), 4 deletions(-) create mode 100644 kernel/trace/trace_sched_wakeup.c (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b96ef14c249a..db8a5e7abe41 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,10 +5,6 @@ #include -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) -#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { @@ -35,4 +31,23 @@ extern void mcount(void); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ + + +#ifdef CONFIG_FRAME_POINTER +/* TODO: need to fix this for ARM */ +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +#else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d6aa92866cd..892ecc94a82b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -4,6 +4,9 @@ config HAVE_FTRACE bool +config TRACER_MAX_TRACE + bool + config TRACING bool select DEBUG_FS @@ -23,6 +26,16 @@ config FTRACE (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. +config SCHED_TRACER + bool "Scheduling Latency Tracer" + depends on DEBUG_KERNEL + select TRACING + select CONTEXT_SWITCH_TRACER + select TRACER_MAX_TRACE + help + This tracer tracks the latency of the highest priority task + to be scheduled in, starting from the point it has woken up. + config CONTEXT_SWITCH_TRACER bool "Trace process context switches" depends on DEBUG_KERNEL diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6b54ceb7f16e..5508cdb19aea 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -3,5 +3,6 @@ obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FTRACE) += trace_functions.o +obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c new file mode 100644 index 000000000000..7c3ccefcf4c3 --- /dev/null +++ b/kernel/trace/trace_sched_wakeup.c @@ -0,0 +1,310 @@ +/* + * trace task wakeup timings + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + * + * Based on code from the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *wakeup_trace; +static int __read_mostly tracer_enabled; + +static struct task_struct *wakeup_task; +static int wakeup_cpu; +static unsigned wakeup_prio = -1; + +static DEFINE_SPINLOCK(wakeup_lock); + +static void notrace __wakeup_reset(struct trace_array *tr); + +/* + * Should this new latency be reported/recorded? + */ +static int notrace report_latency(cycle_t delta) +{ + if (tracing_thresh) { + if (delta < tracing_thresh) + return 0; + } else { + if (delta <= tracing_max_latency) + return 0; + } + return 1; +} + +void notrace +wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) +{ + unsigned long latency = 0, t0 = 0, t1 = 0; + struct trace_array *tr = wakeup_trace; + struct trace_array_cpu *data; + cycle_t T0, T1, delta; + unsigned long flags; + long disabled; + int cpu; + + if (unlikely(!tracer_enabled)) + return; + + /* + * When we start a new trace, we set wakeup_task to NULL + * and then set tracer_enabled = 1. We want to make sure + * that another CPU does not see the tracer_enabled = 1 + * and the wakeup_task with an older task, that might + * actually be the same as next. + */ + smp_rmb(); + + if (next != wakeup_task) + return; + + /* The task we are waitng for is waking up */ + data = tr->data[wakeup_cpu]; + + /* disable local data, not wakeup_cpu data */ + cpu = raw_smp_processor_id(); + disabled = atomic_inc_return(&tr->data[cpu]->disabled); + if (likely(disabled != 1)) + goto out; + + spin_lock_irqsave(&wakeup_lock, flags); + + /* We could race with grabbing wakeup_lock */ + if (unlikely(!tracer_enabled || next != wakeup_task)) + goto out_unlock; + + ftrace(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); + + /* + * usecs conversion is slow so we try to delay the conversion + * as long as possible: + */ + T0 = data->preempt_timestamp; + T1 = now(cpu); + delta = T1-T0; + + if (!report_latency(delta)) + goto out_unlock; + + latency = nsecs_to_usecs(delta); + + tracing_max_latency = delta; + t0 = nsecs_to_usecs(T0); + t1 = nsecs_to_usecs(T1); + + update_max_tr(tr, wakeup_task, wakeup_cpu); + + if (tracing_thresh) { + printk(KERN_INFO "(%16s-%-5d|#%d): %lu us wakeup latency " + "violates %lu us threshold.\n" + " => started at timestamp %lu: ", + wakeup_task->comm, wakeup_task->pid, + raw_smp_processor_id(), + latency, nsecs_to_usecs(tracing_thresh), t0); + } else { + printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us maximum " + "wakeup latency.\n => started at timestamp %lu: ", + wakeup_task->comm, wakeup_task->pid, + cpu, latency, t0); + } + + printk(KERN_CONT " ended at timestamp %lu: ", t1); + dump_stack(); + t1 = nsecs_to_usecs(now(cpu)); + printk(KERN_CONT " dump-end timestamp %lu\n\n", t1); + +out_unlock: + __wakeup_reset(tr); + spin_unlock_irqrestore(&wakeup_lock, flags); +out: + atomic_dec(&tr->data[cpu]->disabled); +} + +static void notrace __wakeup_reset(struct trace_array *tr) +{ + struct trace_array_cpu *data; + int cpu; + + assert_spin_locked(&wakeup_lock); + + for_each_possible_cpu(cpu) { + data = tr->data[cpu]; + tracing_reset(data); + } + + wakeup_cpu = -1; + wakeup_prio = -1; + + if (wakeup_task) + put_task_struct(wakeup_task); + + wakeup_task = NULL; +} + +static void notrace wakeup_reset(struct trace_array *tr) +{ + unsigned long flags; + + spin_lock_irqsave(&wakeup_lock, flags); + __wakeup_reset(tr); + spin_unlock_irqrestore(&wakeup_lock, flags); +} + +static notrace void +wakeup_check_start(struct trace_array *tr, struct task_struct *p, + struct task_struct *curr) +{ + int cpu = smp_processor_id(); + unsigned long flags; + long disabled; + + if (likely(!rt_task(p)) || + p->prio >= wakeup_prio || + p->prio >= curr->prio) + return; + + disabled = atomic_inc_return(&tr->data[cpu]->disabled); + if (unlikely(disabled != 1)) + goto out; + + /* interrupts should be off from try_to_wake_up */ + spin_lock(&wakeup_lock); + + /* check for races. */ + if (!tracer_enabled || p->prio >= wakeup_prio) + goto out_locked; + + /* reset the trace */ + __wakeup_reset(tr); + + wakeup_cpu = task_cpu(p); + wakeup_prio = p->prio; + + wakeup_task = p; + get_task_struct(wakeup_task); + + local_save_flags(flags); + + tr->data[wakeup_cpu]->preempt_timestamp = now(cpu); + ftrace(tr, tr->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags); + +out_locked: + spin_unlock(&wakeup_lock); +out: + atomic_dec(&tr->data[cpu]->disabled); +} + +notrace void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ + if (likely(!tracer_enabled)) + return; + + wakeup_check_start(wakeup_trace, wakee, curr); +} + +notrace void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) +{ + if (likely(!tracer_enabled)) + return; + + wakeup_check_start(wakeup_trace, wakee, curr); +} + +static notrace void start_wakeup_tracer(struct trace_array *tr) +{ + wakeup_reset(tr); + + /* + * Don't let the tracer_enabled = 1 show up before + * the wakeup_task is reset. This may be overkill since + * wakeup_reset does a spin_unlock after setting the + * wakeup_task to NULL, but I want to be safe. + * This is a slow path anyway. + */ + smp_wmb(); + + tracer_enabled = 1; + + return; +} + +static notrace void stop_wakeup_tracer(struct trace_array *tr) +{ + tracer_enabled = 0; +} + +static notrace void wakeup_tracer_init(struct trace_array *tr) +{ + wakeup_trace = tr; + + if (tr->ctrl) + start_wakeup_tracer(tr); +} + +static notrace void wakeup_tracer_reset(struct trace_array *tr) +{ + if (tr->ctrl) { + stop_wakeup_tracer(tr); + /* make sure we put back any tasks we are tracing */ + wakeup_reset(tr); + } +} + +static void wakeup_tracer_ctrl_update(struct trace_array *tr) +{ + if (tr->ctrl) + start_wakeup_tracer(tr); + else + stop_wakeup_tracer(tr); +} + +static void notrace wakeup_tracer_open(struct trace_iterator *iter) +{ + /* stop the trace while dumping */ + if (iter->tr->ctrl) + stop_wakeup_tracer(iter->tr); +} + +static void notrace wakeup_tracer_close(struct trace_iterator *iter) +{ + /* forget about any processes we were recording */ + if (iter->tr->ctrl) + start_wakeup_tracer(iter->tr); +} + +static struct tracer wakeup_tracer __read_mostly = +{ + .name = "wakeup", + .init = wakeup_tracer_init, + .reset = wakeup_tracer_reset, + .open = wakeup_tracer_open, + .close = wakeup_tracer_close, + .ctrl_update = wakeup_tracer_ctrl_update, + .print_max = 1, +}; + +__init static int init_wakeup_tracer(void) +{ + int ret; + + ret = register_tracer(&wakeup_tracer); + if (ret) + return ret; + + return 0; +} +device_initcall(init_wakeup_tracer); -- cgit v1.2.3 From 81d68a96a39844853b37f20cc8282d9b65b78ef3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace irq disabled critical timings This patch adds latency tracing for critical timings (how long interrupts are disabled for). "irqsoff" is added to /debugfs/tracing/available_tracers Note: tracing_max_latency also holds the max latency for irqsoff (in usecs). (default to large number so one must start latency tracing) tracing_thresh threshold (in usecs) to always print out if irqs off is detected to be longer than stated here. If irq_thresh is non-zero, then max_irq_latency is ignored. Here's an example of a trace with ftrace_enabled = 0 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 100 us, #3/3, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1d.s3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1d.s3 100us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1d.s3 100us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= And this is a trace with ftrace_enabled == 1 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 -------------------------------------------------------------------- latency: 102 us, #12/12, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1dNs3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_read_phy_reg+0x16/0x225 [e1000] (e1000_update_stats+0x5e2/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_swfw_sync_acquire+0x10/0x99 [e1000] (e1000_read_phy_reg+0x49/0x225 [e1000]) swapper-0 1dNs3 46us : e1000_get_hw_eeprom_semaphore+0x12/0xa6 [e1000] (e1000_swfw_sync_acquire+0x36/0x99 [e1000]) swapper-0 1dNs3 47us : __const_udelay+0x9/0x47 (e1000_read_phy_reg+0x116/0x225 [e1000]) swapper-0 1dNs3 47us+: __delay+0x9/0x50 (__const_udelay+0x45/0x47) swapper-0 1dNs3 97us : preempt_schedule+0xc/0x84 (__delay+0x4e/0x50) swapper-0 1dNs3 98us : e1000_swfw_sync_release+0xc/0x55 [e1000] (e1000_read_phy_reg+0x211/0x225 [e1000]) swapper-0 1dNs3 99us+: e1000_put_hw_eeprom_semaphore+0x9/0x35 [e1000] (e1000_swfw_sync_release+0x50/0x55 [e1000]) swapper-0 1dNs3 101us : _spin_unlock_irqrestore+0xe/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 3 + arch/x86/lib/Makefile | 1 + arch/x86/lib/thunk_32.S | 47 +++++ arch/x86/lib/thunk_64.S | 19 +- include/asm-x86/irqflags.h | 24 +-- include/linux/ftrace.h | 8 + include/linux/irqflags.h | 12 +- kernel/fork.c | 2 +- kernel/lockdep.c | 23 ++- kernel/printk.c | 2 + kernel/trace/Kconfig | 18 ++ kernel/trace/Makefile | 1 + kernel/trace/trace_irqsoff.c | 402 +++++++++++++++++++++++++++++++++++++++++++ 13 files changed, 531 insertions(+), 31 deletions(-) create mode 100644 arch/x86/lib/thunk_32.S create mode 100644 kernel/trace/trace_irqsoff.c (limited to 'include') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988b..dd349c92f051 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -165,7 +165,10 @@ void cpu_idle(void) */ local_irq_disable(); enter_idle(); + /* Don't trace irqs off for idle */ + stop_critical_timings(); idle(); + start_critical_timings(); /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 76f60f52a885..84aa2883fe15 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o lib-y := delay_$(BITS).o +lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o lib-y += memcpy_$(BITS).o diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S new file mode 100644 index 000000000000..650b11e00ecc --- /dev/null +++ b/arch/x86/lib/thunk_32.S @@ -0,0 +1,47 @@ +/* + * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash) + * Copyright 2008 by Steven Rostedt, Red Hat, Inc + * (inspired by Andi Kleen's thunk_64.S) + * Subject to the GNU public license, v.2. No warranty of any kind. + */ + + #include + +#define ARCH_TRACE_IRQS_ON \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_on; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_TRACE_IRQS_OFF \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_off; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#ifdef CONFIG_TRACE_IRQFLAGS + /* put return address in eax (arg1) */ + .macro thunk_ra name,func + .globl \name +\name: + pushl %eax + pushl %ecx + pushl %edx + /* Place EIP in the arg1 */ + movl 3*4(%esp), %eax + call \func + popl %edx + popl %ecx + popl %eax + ret + .endm + + thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller + thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller +#endif diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index e009251d4e9f..bf9a7d5a5428 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -2,6 +2,7 @@ * Save registers before calling assembly functions. This avoids * disturbance of register allocation in some inline assembly constructs. * Copyright 2001,2002 by Andi Kleen, SuSE Labs. + * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. * Subject to the GNU public license, v.2. No warranty of any kind. */ @@ -42,8 +43,22 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS - thunk trace_hardirqs_on_thunk,trace_hardirqs_on - thunk trace_hardirqs_off_thunk,trace_hardirqs_off + /* put return address in rdi (arg1) */ + .macro thunk_ra name,func + .globl \name +\name: + CFI_STARTPROC + SAVE_ARGS + /* SAVE_ARGS pushs 9 elements */ + /* the next element would be the rip */ + movq 9*8(%rsp), %rdi + call \func + jmp restore + CFI_ENDPROC + .endm + + thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller + thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h index c242527f970e..24d71b1eb189 100644 --- a/include/asm-x86/irqflags.h +++ b/include/asm-x86/irqflags.h @@ -179,8 +179,6 @@ static inline void trace_hardirqs_fixup(void) * have a reliable stack. x86_64 only. */ #define SWAPGS_UNSAFE_STACK swapgs -#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk -#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk #define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk #define ARCH_LOCKDEP_SYS_EXIT_IRQ \ TRACE_IRQS_ON; \ @@ -192,24 +190,6 @@ static inline void trace_hardirqs_fixup(void) TRACE_IRQS_OFF; #else -#define ARCH_TRACE_IRQS_ON \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_on; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - -#define ARCH_TRACE_IRQS_OFF \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_off; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - #define ARCH_LOCKDEP_SYS_EXIT \ pushl %eax; \ pushl %ecx; \ @@ -223,8 +203,8 @@ static inline void trace_hardirqs_fixup(void) #endif #ifdef CONFIG_TRACE_IRQFLAGS -# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON -# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF +# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; +# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; #else # define TRACE_IRQS_ON # define TRACE_IRQS_OFF diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db8a5e7abe41..0a20445dcbcc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -50,4 +50,12 @@ extern void mcount(void); # define CALLER_ADDR5 0UL #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); +#else +# define time_hardirqs_on(a0, a1) do { } while (0) +# define time_hardirqs_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index e600c4e9b8c5..5b711d4e9fd9 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,10 +12,10 @@ #define _LINUX_TRACE_IRQFLAGS_H #ifdef CONFIG_TRACE_IRQFLAGS - extern void trace_hardirqs_on(void); - extern void trace_hardirqs_off(void); extern void trace_softirqs_on(unsigned long ip); extern void trace_softirqs_off(unsigned long ip); + extern void trace_hardirqs_on(void); + extern void trace_hardirqs_off(void); # define trace_hardirq_context(p) ((p)->hardirq_context) # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) @@ -41,6 +41,14 @@ # define INIT_TRACE_IRQFLAGS #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void stop_critical_timings(void); + extern void start_critical_timings(void); +#else +# define stop_critical_timings() do { } while (0) +# define start_critical_timings() do { } while (0) +#endif + #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #include diff --git a/kernel/fork.c b/kernel/fork.c index 19908b26cf80..d66d676dc362 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -909,7 +909,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, rt_mutex_init_task(p); -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP) DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 81a4e4a3f087..e21924365ea3 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -982,7 +983,7 @@ check_noncircular(struct lock_class *source, unsigned int depth) return 1; } -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) /* * Forwards and backwards subgraph searching, for the purposes of * proving that two subgraphs can be connected by a new dependency @@ -1680,7 +1681,7 @@ valid_state(struct task_struct *curr, struct held_lock *this, static int mark_lock(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit); -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) /* * print irq inversion bug: @@ -2013,11 +2014,13 @@ void early_boot_irqs_on(void) /* * Hardirqs will be enabled: */ -void trace_hardirqs_on(void) +void notrace trace_hardirqs_on_caller(unsigned long a0) { struct task_struct *curr = current; unsigned long ip; + time_hardirqs_on(CALLER_ADDR0, a0); + if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -2055,16 +2058,23 @@ void trace_hardirqs_on(void) curr->hardirq_enable_event = ++curr->irq_events; debug_atomic_inc(&hardirqs_on_events); } +EXPORT_SYMBOL(trace_hardirqs_on_caller); +void notrace trace_hardirqs_on(void) +{ + trace_hardirqs_on_caller(CALLER_ADDR0); +} EXPORT_SYMBOL(trace_hardirqs_on); /* * Hardirqs were disabled: */ -void trace_hardirqs_off(void) +void notrace trace_hardirqs_off_caller(unsigned long a0) { struct task_struct *curr = current; + time_hardirqs_off(CALLER_ADDR0, a0); + if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -2082,7 +2092,12 @@ void trace_hardirqs_off(void) } else debug_atomic_inc(&redundant_hardirqs_off); } +EXPORT_SYMBOL(trace_hardirqs_off_caller); +void notrace trace_hardirqs_off(void) +{ + trace_hardirqs_off_caller(CALLER_ADDR0); +} EXPORT_SYMBOL(trace_hardirqs_off); /* diff --git a/kernel/printk.c b/kernel/printk.c index 8fb01c32aa3b..ae7d5b9e535d 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1041,7 +1041,9 @@ void release_console_sem(void) _log_end = log_end; con_start = log_end; /* Flush */ spin_unlock(&logbuf_lock); + stop_critical_timings(); /* don't trace print latency */ call_console_drivers(_con_start, _log_end); + start_critical_timings(); local_irq_restore(flags); } console_locked = 0; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 892ecc94a82b..896df1cf6adc 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -26,6 +26,24 @@ config FTRACE (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. +config IRQSOFF_TRACER + bool "Interrupts-off Latency Tracer" + default n + depends on TRACE_IRQFLAGS_SUPPORT + depends on GENERIC_TIME + select TRACE_IRQFLAGS + select TRACING + select TRACER_MAX_TRACE + help + This option measures the time spent in irqs-off critical + sections, with microsecond accuracy. + + The default measurement method is a maximum search, which is + disabled by default and can be runtime (re-)started + via: + + echo 0 > /debugfs/tracing/tracing_max_latency + config SCHED_TRACER bool "Scheduling Latency Tracer" depends on DEBUG_KERNEL diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 5508cdb19aea..46be8647fb65 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FTRACE) += trace_functions.o +obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c new file mode 100644 index 000000000000..a9131b0cf1a5 --- /dev/null +++ b/kernel/trace/trace_irqsoff.c @@ -0,0 +1,402 @@ +/* + * trace irqs off criticall timings + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + * + * From code in the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *irqsoff_trace __read_mostly; +static int tracer_enabled __read_mostly; + +/* + * Sequence count - we record it when starting a measurement and + * skip the latency if the sequence has changed - some other section + * did a maximum and could disturb our measurement with serial console + * printouts, etc. Truly coinciding maximum latencies should be rare + * and what happens together happens separately as well, so this doesnt + * decrease the validity of the maximum found: + */ +static __cacheline_aligned_in_smp unsigned long max_sequence; + +#ifdef CONFIG_FTRACE +/* + * irqsoff uses its own tracer function to keep the overhead down: + */ +static void notrace +irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (likely(!tracer_enabled)) + return; + + local_save_flags(flags); + + if (!irqs_disabled_flags(flags)) + return; + + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + ftrace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); +} + +static struct ftrace_ops trace_ops __read_mostly = +{ + .func = irqsoff_tracer_call, +}; +#endif /* CONFIG_FTRACE */ + +/* + * Should this new latency be reported/recorded? + */ +static int notrace report_latency(cycle_t delta) +{ + if (tracing_thresh) { + if (delta < tracing_thresh) + return 0; + } else { + if (delta <= tracing_max_latency) + return 0; + } + return 1; +} + +static void notrace +check_critical_timing(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long parent_ip, + int cpu) +{ + unsigned long latency, t0, t1; + cycle_t T0, T1, T2, delta; + unsigned long flags; + + /* + * usecs conversion is slow so we try to delay the conversion + * as long as possible: + */ + T0 = data->preempt_timestamp; + T1 = now(cpu); + delta = T1-T0; + + local_save_flags(flags); + + if (!report_latency(delta)) + goto out; + + ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); + /* + * Update the timestamp, because the trace entry above + * might change it (it can only get larger so the latency + * is fair to be reported): + */ + T2 = now(cpu); + + delta = T2-T0; + + latency = nsecs_to_usecs(delta); + + if (data->critical_sequence != max_sequence) + goto out; + + tracing_max_latency = delta; + t0 = nsecs_to_usecs(T0); + t1 = nsecs_to_usecs(T1); + + data->critical_end = parent_ip; + + update_max_tr_single(tr, current, cpu); + + if (tracing_thresh) + printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical section " + "violates %lu us threshold.\n" + " => started at timestamp %lu: ", + current->comm, current->pid, + raw_smp_processor_id(), + latency, nsecs_to_usecs(tracing_thresh), t0); + else + printk(KERN_INFO "(%16s-%-5d|#%d):" + " new %lu us maximum-latency " + "critical section.\n => started at timestamp %lu: ", + current->comm, current->pid, + raw_smp_processor_id(), + latency, t0); + + print_symbol(KERN_CONT "<%s>\n", data->critical_start); + printk(KERN_CONT " => ended at timestamp %lu: ", t1); + print_symbol(KERN_CONT "<%s>\n", data->critical_end); + dump_stack(); + t1 = nsecs_to_usecs(now(cpu)); + printk(KERN_CONT " => dump-end timestamp %lu\n\n", t1); + + max_sequence++; + +out: + data->critical_sequence = max_sequence; + data->preempt_timestamp = now(cpu); + tracing_reset(data); + ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); +} + +static inline void notrace +start_critical_timing(unsigned long ip, unsigned long parent_ip) +{ + int cpu; + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + + if (likely(!tracer_enabled)) + return; + + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(!data) || unlikely(!data->trace) || + data->critical_start || atomic_read(&data->disabled)) + return; + + atomic_inc(&data->disabled); + + data->critical_sequence = max_sequence; + data->preempt_timestamp = now(cpu); + data->critical_start = parent_ip; + tracing_reset(data); + + local_save_flags(flags); + ftrace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); +} + +static inline void notrace +stop_critical_timing(unsigned long ip, unsigned long parent_ip) +{ + int cpu; + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + + if (likely(!tracer_enabled)) + return; + + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(!data) || unlikely(!data->trace) || + !data->critical_start || atomic_read(&data->disabled)) + return; + + atomic_inc(&data->disabled); + local_save_flags(flags); + ftrace(tr, data, ip, parent_ip, flags); + check_critical_timing(tr, data, parent_ip, cpu); + data->critical_start = 0; + atomic_dec(&data->disabled); +} + +void notrace start_critical_timings(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); +} + +void notrace stop_critical_timings(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); +} + +#ifdef CONFIG_PROVE_LOCKING +void notrace time_hardirqs_on(unsigned long a0, unsigned long a1) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + stop_critical_timing(a0, a1); +} + +void notrace time_hardirqs_off(unsigned long a0, unsigned long a1) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + start_critical_timing(a0, a1); +} + +#else /* !CONFIG_PROVE_LOCKING */ + +/* + * Stubs: + */ + +void early_boot_irqs_off(void) +{ +} + +void early_boot_irqs_on(void) +{ +} + +void trace_softirqs_on(unsigned long ip) +{ +} + +void trace_softirqs_off(unsigned long ip) +{ +} + +inline void print_irqtrace_events(struct task_struct *curr) +{ +} + +/* + * We are only interested in hardirq on/off events: + */ +void notrace trace_hardirqs_on(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); +} +EXPORT_SYMBOL(trace_hardirqs_on); + +void notrace trace_hardirqs_off(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); +} +EXPORT_SYMBOL(trace_hardirqs_off); + +void notrace trace_hardirqs_on_caller(unsigned long caller_addr) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + stop_critical_timing(CALLER_ADDR0, caller_addr); +} +EXPORT_SYMBOL(trace_hardirqs_on_caller); + +void notrace trace_hardirqs_off_caller(unsigned long caller_addr) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + start_critical_timing(CALLER_ADDR0, caller_addr); +} +EXPORT_SYMBOL(trace_hardirqs_off_caller); + +#endif /* CONFIG_PROVE_LOCKING */ + +static void start_irqsoff_tracer(struct trace_array *tr) +{ + tracer_enabled = 1; + register_ftrace_function(&trace_ops); +} + +static void stop_irqsoff_tracer(struct trace_array *tr) +{ + unregister_ftrace_function(&trace_ops); + tracer_enabled = 0; +} + +static void irqsoff_tracer_init(struct trace_array *tr) +{ + irqsoff_trace = tr; + /* make sure that the tracer is visibel */ + smp_wmb(); + + if (tr->ctrl) + start_irqsoff_tracer(tr); +} + +static void irqsoff_tracer_reset(struct trace_array *tr) +{ + if (tr->ctrl) + stop_irqsoff_tracer(tr); +} + +static void irqsoff_tracer_ctrl_update(struct trace_array *tr) +{ + if (tr->ctrl) + start_irqsoff_tracer(tr); + else + stop_irqsoff_tracer(tr); +} + +static void notrace irqsoff_tracer_open(struct trace_iterator *iter) +{ + /* stop the trace while dumping */ + if (iter->tr->ctrl) + stop_irqsoff_tracer(iter->tr); +} + +static void notrace irqsoff_tracer_close(struct trace_iterator *iter) +{ + if (iter->tr->ctrl) + start_irqsoff_tracer(iter->tr); +} + +static struct tracer irqsoff_tracer __read_mostly = +{ + .name = "irqsoff", + .init = irqsoff_tracer_init, + .reset = irqsoff_tracer_reset, + .open = irqsoff_tracer_open, + .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, + .print_max = 1, +}; + +__init static int init_irqsoff_tracer(void) +{ + register_tracer(&irqsoff_tracer); + + return 0; +} +device_initcall(init_irqsoff_tracer); -- cgit v1.2.3 From 6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace preempt off critical timings Add preempt off timings. A lot of kernel core code is taken from the RT patch latency trace that was written by Ingo Molnar. This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers Now instead of just tracing irqs off, preemption off can be selected to be recorded. When this is selected, it shares the same files as irqs off timings. One can either trace preemption off, irqs off, or one or the other off. By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording of preempt off only is performed. "irqsoff" will only record the time irqs are disabled, but "preemptirqsoff" will take the total time irqs or preemption are disabled. Runtime switching of these options is now supported by simpling echoing in the appropriate trace name into /debugfs/tracing/current_tracer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 3 + include/linux/ftrace.h | 8 ++ include/linux/irqflags.h | 3 +- include/linux/preempt.h | 2 +- kernel/sched.c | 24 +++++- kernel/trace/Kconfig | 25 ++++++ kernel/trace/Makefile | 1 + kernel/trace/trace_irqsoff.c | 184 +++++++++++++++++++++++++++++++------------ 8 files changed, 197 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60d..a30aa1f2607a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -185,7 +185,10 @@ void cpu_idle(void) local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; + /* Don't trace irqs off for idle */ + stop_critical_timings(); idle(); + start_critical_timings(); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0a20445dcbcc..740c97dcf9cb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,4 +58,12 @@ extern void mcount(void); # define time_hardirqs_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_PREEMPT_TRACER + extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); + extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); +#else +# define trace_preempt_on(a0, a1) do { } while (0) +# define trace_preempt_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5b711d4e9fd9..2b1c2e58566e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -41,7 +41,8 @@ # define INIT_TRACE_IRQFLAGS #endif -#ifdef CONFIG_IRQSOFF_TRACER +#if defined(CONFIG_IRQSOFF_TRACER) || \ + defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); extern void start_critical_timings(void); #else diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 36b03d50bf40..72b1a10a59b6 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -10,7 +10,7 @@ #include #include -#ifdef CONFIG_DEBUG_PREEMPT +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void add_preempt_count(int val); extern void sub_preempt_count(int val); #else diff --git a/kernel/sched.c b/kernel/sched.c index 73e600852365..328494e28df2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -4365,26 +4366,44 @@ void scheduler_tick(void) #endif } -#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) +#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ + defined(CONFIG_PREEMPT_TRACER)) + +static inline unsigned long get_parent_ip(unsigned long addr) +{ + if (in_lock_functions(addr)) { + addr = CALLER_ADDR2; + if (in_lock_functions(addr)) + addr = CALLER_ADDR3; + } + return addr; +} void __kprobes add_preempt_count(int val) { +#ifdef CONFIG_DEBUG_PREEMPT /* * Underflow? */ if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) return; +#endif preempt_count() += val; +#ifdef CONFIG_DEBUG_PREEMPT /* * Spinlock count overflowing soon? */ DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK - 10); +#endif + if (preempt_count() == val) + trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); } EXPORT_SYMBOL(add_preempt_count); void __kprobes sub_preempt_count(int val) { +#ifdef CONFIG_DEBUG_PREEMPT /* * Underflow? */ @@ -4396,7 +4415,10 @@ void __kprobes sub_preempt_count(int val) if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK))) return; +#endif + if (preempt_count() == val) + trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); preempt_count() -= val; } EXPORT_SYMBOL(sub_preempt_count); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 896df1cf6adc..6430016b98e8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -44,6 +44,31 @@ config IRQSOFF_TRACER echo 0 > /debugfs/tracing/tracing_max_latency + (Note that kernel size and overhead increases with this option + enabled. This option and the preempt-off timing option can be + used together or separately.) + +config PREEMPT_TRACER + bool "Preemption-off Latency Tracer" + default n + depends on GENERIC_TIME + depends on PREEMPT + select TRACING + select TRACER_MAX_TRACE + help + This option measures the time spent in preemption off critical + sections, with microsecond accuracy. + + The default measurement method is a maximum search, which is + disabled by default and can be runtime (re-)started + via: + + echo 0 > /debugfs/tracing/tracing_max_latency + + (Note that kernel size and overhead increases with this option + enabled. This option and the irqs-off timing option can be + used together or separately.) + config SCHED_TRACER bool "Scheduling Latency Tracer" depends on DEBUG_KERNEL diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 46be8647fb65..3fec653d6533 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FTRACE) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o +obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index a9131b0cf1a5..8b1231633dc5 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -21,6 +21,36 @@ static struct trace_array *irqsoff_trace __read_mostly; static int tracer_enabled __read_mostly; +static DEFINE_PER_CPU(int, tracing_cpu); + +enum { + TRACER_IRQS_OFF = (1 << 1), + TRACER_PREEMPT_OFF = (1 << 2), +}; + +static int trace_type __read_mostly; + +#ifdef CONFIG_PREEMPT_TRACER +static inline int notrace +preempt_trace(void) +{ + return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count()); +} +#else +# define preempt_trace() (0) +#endif + +#ifdef CONFIG_IRQSOFF_TRACER +static inline int notrace +irq_trace(void) +{ + return ((trace_type & TRACER_IRQS_OFF) && + irqs_disabled()); +} +#else +# define irq_trace() (0) +#endif + /* * Sequence count - we record it when starting a measurement and * skip the latency if the sequence has changed - some other section @@ -44,14 +74,11 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) long disabled; int cpu; - if (likely(!tracer_enabled)) + if (likely(!__get_cpu_var(tracing_cpu))) return; local_save_flags(flags); - if (!irqs_disabled_flags(flags)) - return; - cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -171,23 +198,29 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) if (likely(!tracer_enabled)) return; + if (__get_cpu_var(tracing_cpu)) + return; + cpu = raw_smp_processor_id(); data = tr->data[cpu]; if (unlikely(!data) || unlikely(!data->trace) || - data->critical_start || atomic_read(&data->disabled)) + atomic_read(&data->disabled)) return; atomic_inc(&data->disabled); data->critical_sequence = max_sequence; data->preempt_timestamp = now(cpu); - data->critical_start = parent_ip; + data->critical_start = parent_ip ? : ip; tracing_reset(data); local_save_flags(flags); + ftrace(tr, data, ip, parent_ip, flags); + __get_cpu_var(tracing_cpu) = 1; + atomic_dec(&data->disabled); } @@ -199,7 +232,13 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; - if (likely(!tracer_enabled)) + /* Always clear the tracing cpu on stopping the trace */ + if (unlikely(__get_cpu_var(tracing_cpu))) + __get_cpu_var(tracing_cpu) = 0; + else + return; + + if (!tracer_enabled) return; cpu = raw_smp_processor_id(); @@ -212,49 +251,35 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); ftrace(tr, data, ip, parent_ip, flags); - check_critical_timing(tr, data, parent_ip, cpu); + check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); } +/* start and stop critical timings used to for stoppage (in idle) */ void notrace start_critical_timings(void) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (preempt_trace() || irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } void notrace stop_critical_timings(void) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (preempt_trace() || irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } +#ifdef CONFIG_IRQSOFF_TRACER #ifdef CONFIG_PROVE_LOCKING void notrace time_hardirqs_on(unsigned long a0, unsigned long a1) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) stop_critical_timing(a0, a1); } void notrace time_hardirqs_off(unsigned long a0, unsigned long a1) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) start_critical_timing(a0, a1); } @@ -289,49 +314,46 @@ inline void print_irqtrace_events(struct task_struct *curr) */ void notrace trace_hardirqs_on(void) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_on); void notrace trace_hardirqs_off(void) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_off); void notrace trace_hardirqs_on_caller(unsigned long caller_addr) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_on_caller); void notrace trace_hardirqs_off_caller(unsigned long caller_addr) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_PROVE_LOCKING */ +#endif /* CONFIG_IRQSOFF_TRACER */ + +#ifdef CONFIG_PREEMPT_TRACER +void notrace trace_preempt_on(unsigned long a0, unsigned long a1) +{ + stop_critical_timing(a0, a1); +} + +void notrace trace_preempt_off(unsigned long a0, unsigned long a1) +{ + start_critical_timing(a0, a1); +} +#endif /* CONFIG_PREEMPT_TRACER */ static void start_irqsoff_tracer(struct trace_array *tr) { @@ -345,7 +367,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr) tracer_enabled = 0; } -static void irqsoff_tracer_init(struct trace_array *tr) +static void __irqsoff_tracer_init(struct trace_array *tr) { irqsoff_trace = tr; /* make sure that the tracer is visibel */ @@ -382,6 +404,13 @@ static void notrace irqsoff_tracer_close(struct trace_iterator *iter) start_irqsoff_tracer(iter->tr); } +#ifdef CONFIG_IRQSOFF_TRACER +static void irqsoff_tracer_init(struct trace_array *tr) +{ + trace_type = TRACER_IRQS_OFF; + + __irqsoff_tracer_init(tr); +} static struct tracer irqsoff_tracer __read_mostly = { .name = "irqsoff", @@ -392,10 +421,65 @@ static struct tracer irqsoff_tracer __read_mostly = .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, }; +# define register_irqsoff(trace) register_tracer(&trace) +#else +# define register_irqsoff(trace) do { } while (0) +#endif + +#ifdef CONFIG_PREEMPT_TRACER +static void preemptoff_tracer_init(struct trace_array *tr) +{ + trace_type = TRACER_PREEMPT_OFF; + + __irqsoff_tracer_init(tr); +} + +static struct tracer preemptoff_tracer __read_mostly = +{ + .name = "preemptoff", + .init = preemptoff_tracer_init, + .reset = irqsoff_tracer_reset, + .open = irqsoff_tracer_open, + .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, + .print_max = 1, +}; +# define register_preemptoff(trace) register_tracer(&trace) +#else +# define register_preemptoff(trace) do { } while (0) +#endif + +#if defined(CONFIG_IRQSOFF_TRACER) && \ + defined(CONFIG_PREEMPT_TRACER) + +static void preemptirqsoff_tracer_init(struct trace_array *tr) +{ + trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; + + __irqsoff_tracer_init(tr); +} + +static struct tracer preemptirqsoff_tracer __read_mostly = +{ + .name = "preemptirqsoff", + .init = preemptirqsoff_tracer_init, + .reset = irqsoff_tracer_reset, + .open = irqsoff_tracer_open, + .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, + .print_max = 1, +}; + +# define register_preemptirqsoff(trace) register_tracer(&trace) +#else +# define register_preemptirqsoff(trace) do { } while (0) +#endif __init static int init_irqsoff_tracer(void) { - register_tracer(&irqsoff_tracer); + register_irqsoff(irqsoff_tracer); + register_preemptoff(preemptoff_tracer); + register_preemptirqsoff(preemptirqsoff_tracer); return 0; } -- cgit v1.2.3 From 3d0833953e1b98b79ddf491dd49229eef9baeac1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: dynamic enabling/disabling of function calls This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/ftrace.c | 237 +++++++++++++++++++++++++++++++ include/linux/ftrace.h | 18 +++ kernel/trace/Kconfig | 17 +++ kernel/trace/ftrace.c | 356 ++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 597 insertions(+), 32 deletions(-) create mode 100644 arch/x86/kernel/ftrace.c (limited to 'include') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b4720..e142091524b0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c new file mode 100644 index 000000000000..5dd58136ef02 --- /dev/null +++ b/arch/x86/kernel/ftrace.c @@ -0,0 +1,237 @@ +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt + * + * Thanks goes to Ingo Molnar, for suggesting the idea. + * Mathieu Desnoyers, for suggesting postponing the modifications. + * Arjan van de Ven, for keeping me straight, and explaining to me + * the dangers of modifying code on the run. + */ + +#include +#include +#include +#include +#include +#include + +#define CALL_BACK 5 + +#define JMPFWD 0x03eb + +static unsigned short ftrace_jmp = JMPFWD; + +struct ftrace_record { + struct dyn_ftrace rec; + int failed; +} __attribute__((packed)); + +struct ftrace_page { + struct ftrace_page *next; + int index; + struct ftrace_record records[]; +} __attribute__((packed)); + +#define ENTRIES_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record)) + +/* estimate from running different kernels */ +#define NR_TO_INIT 10000 + +#define MCOUNT_ADDR ((long)(&mcount)) + +union ftrace_code_union { + char code[5]; + struct { + char e8; + int offset; + } __attribute__((packed)); +}; + +static struct ftrace_page *ftrace_pages_start; +static struct ftrace_page *ftrace_pages; + +notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +{ + struct ftrace_record *rec; + unsigned short save; + + ip -= CALL_BACK; + save = *(short *)ip; + + /* If this was already converted, skip it */ + if (save == JMPFWD) + return NULL; + + if (ftrace_pages->index == ENTRIES_PER_PAGE) { + if (!ftrace_pages->next) + return NULL; + ftrace_pages = ftrace_pages->next; + } + + rec = &ftrace_pages->records[ftrace_pages->index++]; + + return &rec->rec; +} + +static int notrace +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code) +{ + unsigned short old = *(unsigned short *)old_code; + unsigned short new = *(unsigned short *)new_code; + unsigned short replaced; + int faulted = 0; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. + * + * No real locking needed, this code is run through + * kstop_machine. + */ + asm volatile ( + "1: lock\n" + " cmpxchg %w3, (%2)\n" + "2:\n" + ".section .fixup, \"ax\"\n" + " movl $1, %0\n" + "3: jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : "=r"(faulted), "=a"(replaced) + : "r"(ip), "r"(new), "0"(faulted), "a"(old) + : "memory"); + sync_core(); + + if (replaced != old) + faulted = 2; + + return faulted; +} + +static int notrace ftrace_calc_offset(long ip) +{ + return (int)(MCOUNT_ADDR - ip); +} + +notrace void ftrace_code_disable(struct dyn_ftrace *rec) +{ + unsigned long ip; + union ftrace_code_union save; + struct ftrace_record *r = + container_of(rec, struct ftrace_record, rec); + + ip = rec->ip; + + save.e8 = 0xe8; + save.offset = ftrace_calc_offset(ip); + + /* move the IP back to the start of the call */ + ip -= CALL_BACK; + + r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp); +} + +static void notrace ftrace_replace_code(int saved) +{ + unsigned char *new = NULL, *old = NULL; + struct ftrace_record *rec; + struct ftrace_page *pg; + unsigned long ip; + int i; + + if (saved) + old = (char *)&ftrace_jmp; + else + new = (char *)&ftrace_jmp; + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + for (i = 0; i < pg->index; i++) { + union ftrace_code_union calc; + rec = &pg->records[i]; + + /* don't modify code that has already faulted */ + if (rec->failed) + continue; + + ip = rec->rec.ip; + + calc.e8 = 0xe8; + calc.offset = ftrace_calc_offset(ip); + + if (saved) + new = calc.code; + else + old = calc.code; + + ip -= CALL_BACK; + + rec->failed = ftrace_modify_code(ip, old, new); + } + } + +} + +notrace void ftrace_startup_code(void) +{ + ftrace_replace_code(1); +} + +notrace void ftrace_shutdown_code(void) +{ + ftrace_replace_code(0); +} + +notrace void ftrace_shutdown_replenish(void) +{ + if (ftrace_pages->next) + return; + + /* allocate another page */ + ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); +} + +notrace int ftrace_shutdown_arch_init(void) +{ + struct ftrace_page *pg; + int cnt; + int i; + + /* allocate a few pages */ + ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages_start) + return -1; + + /* + * Allocate a few more pages. + * + * TODO: have some parser search vmlinux before + * final linking to find all calls to ftrace. + * Then we can: + * a) know how many pages to allocate. + * and/or + * b) set up the table then. + * + * The dynamic code is still necessary for + * modules. + */ + + pg = ftrace_pages = ftrace_pages_start; + + cnt = NR_TO_INIT / ENTRIES_PER_PAGE; + + for (i = 0; i < cnt; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + + /* If we fail, we'll try later anyway */ + if (!pg->next) + break; + + pg = pg->next; + } + + return 0; +} diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 740c97dcf9cb..90dbc0ee2046 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -32,6 +32,24 @@ extern void mcount(void); # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ +#ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_HASHBITS 10 +# define FTRACE_HASHSIZE (1< +#include +#include +#include +#include +#include #include +#include +#include +#include + +#include "trace.h" -static DEFINE_SPINLOCK(ftrace_func_lock); +static DEFINE_SPINLOCK(ftrace_lock); static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, @@ -44,21 +53,21 @@ notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip) } /** - * register_ftrace_function - register a function for profiling - * @ops - ops structure that holds the function for profiling. - * - * Register a function to be called by all functions in the - * kernel. + * clear_ftrace_function - reset the ftrace function * - * Note: @ops->func and all the functions it calls must be labeled - * with "notrace", otherwise it will go into a - * recursive loop. + * This NULLs the ftrace function and in essence stops + * tracing. There may be lag */ -int register_ftrace_function(struct ftrace_ops *ops) +void clear_ftrace_function(void) { - unsigned long flags; + ftrace_trace_function = ftrace_stub; +} + +static int notrace __register_ftrace_function(struct ftrace_ops *ops) +{ + /* Should never be called by interrupts */ + spin_lock(&ftrace_lock); - spin_lock_irqsave(&ftrace_func_lock, flags); ops->next = ftrace_list; /* * We are entering ops into the ftrace_list but another @@ -68,6 +77,7 @@ int register_ftrace_function(struct ftrace_ops *ops) */ smp_wmb(); ftrace_list = ops; + /* * For one func, simply call it directly. * For more than one func, call the chain. @@ -76,28 +86,22 @@ int register_ftrace_function(struct ftrace_ops *ops) ftrace_trace_function = ops->func; else ftrace_trace_function = ftrace_list_func; - spin_unlock_irqrestore(&ftrace_func_lock, flags); + + spin_unlock(&ftrace_lock); return 0; } -/** - * unregister_ftrace_function - unresgister a function for profiling. - * @ops - ops structure that holds the function to unregister - * - * Unregister a function that was added to be called by ftrace profiling. - */ -int unregister_ftrace_function(struct ftrace_ops *ops) +static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) { - unsigned long flags; struct ftrace_ops **p; int ret = 0; - spin_lock_irqsave(&ftrace_func_lock, flags); + spin_lock(&ftrace_lock); /* - * If we are the only function, then the ftrace pointer is - * pointing directly to that function. + * If we are removing the last function, then simply point + * to the ftrace_stub. */ if (ftrace_list == ops && ops->next == &ftrace_list_end) { ftrace_trace_function = ftrace_stub; @@ -117,22 +121,310 @@ int unregister_ftrace_function(struct ftrace_ops *ops) *p = (*p)->next; /* If we only have one func left, then call that directly */ - if (ftrace_list->next == &ftrace_list_end) + if (ftrace_list == &ftrace_list_end || + ftrace_list->next == &ftrace_list_end) ftrace_trace_function = ftrace_list->func; out: - spin_unlock_irqrestore(&ftrace_func_lock, flags); + spin_unlock(&ftrace_lock); + + return ret; +} + +#ifdef CONFIG_DYNAMIC_FTRACE + +static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; + +static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); + +static DEFINE_SPINLOCK(ftrace_shutdown_lock); +static DEFINE_MUTEX(ftraced_lock); + +static int ftraced_trigger; +static int ftraced_suspend; + +static int ftrace_record_suspend; + +static inline int +notrace ftrace_ip_in_hash(unsigned long ip, unsigned long key) +{ + struct dyn_ftrace *p; + struct hlist_node *t; + int found = 0; + + hlist_for_each_entry(p, t, &ftrace_hash[key], node) { + if (p->ip == ip) { + found = 1; + break; + } + } + + return found; +} + +static inline void notrace +ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) +{ + hlist_add_head(&node->node, &ftrace_hash[key]); +} + +static void notrace +ftrace_record_ip(unsigned long ip, unsigned long parent_ip) +{ + struct dyn_ftrace *node; + unsigned long flags; + unsigned long key; + int resched; + int atomic; + + resched = need_resched(); + preempt_disable_notrace(); + + /* We simply need to protect against recursion */ + __get_cpu_var(ftrace_shutdown_disable_cpu)++; + if (__get_cpu_var(ftrace_shutdown_disable_cpu) != 1) + goto out; + + if (unlikely(ftrace_record_suspend)) + goto out; + + key = hash_long(ip, FTRACE_HASHBITS); + + WARN_ON_ONCE(key >= FTRACE_HASHSIZE); + + if (ftrace_ip_in_hash(ip, key)) + goto out; + + atomic = irqs_disabled(); + + spin_lock_irqsave(&ftrace_shutdown_lock, flags); + + /* This ip may have hit the hash before the lock */ + if (ftrace_ip_in_hash(ip, key)) + goto out_unlock; + + /* + * There's a slight race that the ftraced will update the + * hash and reset here. The arch alloc is responsible + * for seeing if the IP has already changed, and if + * it has, the alloc will fail. + */ + node = ftrace_alloc_shutdown_node(ip); + if (!node) + goto out_unlock; + + node->ip = ip; + + ftrace_add_hash(node, key); + + ftraced_trigger = 1; + + out_unlock: + spin_unlock_irqrestore(&ftrace_shutdown_lock, flags); + out: + __get_cpu_var(ftrace_shutdown_disable_cpu)--; + + /* prevent recursion with scheduler */ + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); +} + +static struct ftrace_ops ftrace_shutdown_ops __read_mostly = +{ + .func = ftrace_record_ip, +}; + + +static int notrace __ftrace_modify_code(void *data) +{ + void (*func)(void) = data; + + func(); + return 0; +} + +static void notrace ftrace_run_startup_code(void) +{ + stop_machine_run(__ftrace_modify_code, ftrace_startup_code, NR_CPUS); +} + +static void notrace ftrace_run_shutdown_code(void) +{ + stop_machine_run(__ftrace_modify_code, ftrace_shutdown_code, NR_CPUS); +} + +static void notrace ftrace_startup(void) +{ + mutex_lock(&ftraced_lock); + ftraced_suspend++; + if (ftraced_suspend != 1) + goto out; + __unregister_ftrace_function(&ftrace_shutdown_ops); + + ftrace_run_startup_code(); + out: + mutex_unlock(&ftraced_lock); +} + +static void notrace ftrace_shutdown(void) +{ + mutex_lock(&ftraced_lock); + ftraced_suspend--; + if (ftraced_suspend) + goto out; + + ftrace_run_shutdown_code(); + + __register_ftrace_function(&ftrace_shutdown_ops); + out: + mutex_unlock(&ftraced_lock); +} + +static cycle_t ftrace_update_time; +static unsigned long ftrace_update_cnt; +unsigned long ftrace_update_tot_cnt; + +static int notrace __ftrace_update_code(void *ignore) +{ + struct dyn_ftrace *p; + struct hlist_head head; + struct hlist_node *t; + cycle_t start, stop; + int i; + + /* Don't be calling ftrace ops now */ + __unregister_ftrace_function(&ftrace_shutdown_ops); + + start = now(raw_smp_processor_id()); + ftrace_update_cnt = 0; + + /* No locks needed, the machine is stopped! */ + for (i = 0; i < FTRACE_HASHSIZE; i++) { + if (hlist_empty(&ftrace_hash[i])) + continue; + + head = ftrace_hash[i]; + INIT_HLIST_HEAD(&ftrace_hash[i]); + + /* all CPUS are stopped, we are safe to modify code */ + hlist_for_each_entry(p, t, &head, node) { + ftrace_code_disable(p); + ftrace_update_cnt++; + } + + } + + stop = now(raw_smp_processor_id()); + ftrace_update_time = stop - start; + ftrace_update_tot_cnt += ftrace_update_cnt; + + __register_ftrace_function(&ftrace_shutdown_ops); return 0; } +static void notrace ftrace_update_code(void) +{ + stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); +} + +static int notrace ftraced(void *ignore) +{ + unsigned long usecs; + + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) { + + /* check once a second */ + schedule_timeout(HZ); + + mutex_lock(&ftraced_lock); + if (ftraced_trigger && !ftraced_suspend) { + ftrace_record_suspend++; + ftrace_update_code(); + usecs = nsecs_to_usecs(ftrace_update_time); + if (ftrace_update_tot_cnt > 100000) { + ftrace_update_tot_cnt = 0; + pr_info("hm, dftrace overflow: %lu change%s" + " (%lu total) in %lu usec%s\n", + ftrace_update_cnt, + ftrace_update_cnt != 1 ? "s" : "", + ftrace_update_tot_cnt, + usecs, usecs != 1 ? "s" : ""); + WARN_ON_ONCE(1); + } + ftraced_trigger = 0; + ftrace_record_suspend--; + } + mutex_unlock(&ftraced_lock); + + ftrace_shutdown_replenish(); + + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +static int __init notrace ftrace_shutdown_init(void) +{ + struct task_struct *p; + int ret; + + ret = ftrace_shutdown_arch_init(); + if (ret) + return ret; + + p = kthread_run(ftraced, NULL, "ftraced"); + if (IS_ERR(p)) + return -1; + + __register_ftrace_function(&ftrace_shutdown_ops); + + return 0; +} + +core_initcall(ftrace_shutdown_init); +#else +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) +#endif /* CONFIG_DYNAMIC_FTRACE */ + /** - * clear_ftrace_function - reset the ftrace function + * register_ftrace_function - register a function for profiling + * @ops - ops structure that holds the function for profiling. * - * This NULLs the ftrace function and in essence stops - * tracing. There may be lag + * Register a function to be called by all functions in the + * kernel. + * + * Note: @ops->func and all the functions it calls must be labeled + * with "notrace", otherwise it will go into a + * recursive loop. */ -void clear_ftrace_function(void) +int register_ftrace_function(struct ftrace_ops *ops) { - ftrace_trace_function = ftrace_stub; + ftrace_startup(); + + return __register_ftrace_function(ops); +} + +/** + * unregister_ftrace_function - unresgister a function for profiling. + * @ops - ops structure that holds the function to unregister + * + * Unregister a function that was added to be called by ftrace profiling. + */ +int unregister_ftrace_function(struct ftrace_ops *ops) +{ + int ret; + + ret = __unregister_ftrace_function(ops); + + if (ftrace_list == &ftrace_list_end) + ftrace_shutdown(); + + return ret; } -- cgit v1.2.3 From b0fc494fae96a7089f3651cb451f461c7291244c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add ftrace_enabled sysctl to disable mcount function This patch adds back the sysctl ftrace_enabled. This time it is defaulted to on, if DYNAMIC_FTRACE is configured. When ftrace_enabled is disabled, the ftrace function is set to the stub return. If DYNAMIC_FTRACE is also configured, on ftrace_enabled = 0, the registered ftrace functions will all be set to jmps, but no more new calls to ftrace recording (used to find the ftrace calling sites) will be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 6 +++ kernel/sysctl.c | 11 +++++ kernel/trace/ftrace.c | 125 ++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 124 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 90dbc0ee2046..ccd8537dbdb7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,6 +5,12 @@ #include +extern int ftrace_enabled; +extern int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 29116652dca8..efaf7c5500e9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -455,6 +456,16 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_FTRACE + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ftrace_enabled", + .data = &ftrace_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ftrace_enable_sysctl, + }, +#endif #ifdef CONFIG_KMOD { .ctl_name = KERN_MODPROBE, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d1ae2ba25274..d3de37299ba4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -20,12 +20,24 @@ #include #include #include +#include #include #include #include "trace.h" +#ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_ENABLED_INIT 1 +#else +# define FTRACE_ENABLED_INIT 0 +#endif + +int ftrace_enabled = FTRACE_ENABLED_INIT; +static int last_ftrace_enabled = FTRACE_ENABLED_INIT; + static DEFINE_SPINLOCK(ftrace_lock); +static DEFINE_MUTEX(ftrace_sysctl_lock); + static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, @@ -78,14 +90,16 @@ static int notrace __register_ftrace_function(struct ftrace_ops *ops) smp_wmb(); ftrace_list = ops; - /* - * For one func, simply call it directly. - * For more than one func, call the chain. - */ - if (ops->next == &ftrace_list_end) - ftrace_trace_function = ops->func; - else - ftrace_trace_function = ftrace_list_func; + if (ftrace_enabled) { + /* + * For one func, simply call it directly. + * For more than one func, call the chain. + */ + if (ops->next == &ftrace_list_end) + ftrace_trace_function = ops->func; + else + ftrace_trace_function = ftrace_list_func; + } spin_unlock(&ftrace_lock); @@ -120,10 +134,12 @@ static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) *p = (*p)->next; - /* If we only have one func left, then call that directly */ - if (ftrace_list == &ftrace_list_end || - ftrace_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_list->func; + if (ftrace_enabled) { + /* If we only have one func left, then call that directly */ + if (ftrace_list == &ftrace_list_end || + ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + } out: spin_unlock(&ftrace_lock); @@ -263,7 +279,8 @@ static void notrace ftrace_startup(void) goto out; __unregister_ftrace_function(&ftrace_shutdown_ops); - ftrace_run_startup_code(); + if (ftrace_enabled) + ftrace_run_startup_code(); out: mutex_unlock(&ftraced_lock); } @@ -275,13 +292,32 @@ static void notrace ftrace_shutdown(void) if (ftraced_suspend) goto out; - ftrace_run_shutdown_code(); + if (ftrace_enabled) + ftrace_run_shutdown_code(); __register_ftrace_function(&ftrace_shutdown_ops); out: mutex_unlock(&ftraced_lock); } +static void notrace ftrace_startup_sysctl(void) +{ + mutex_lock(&ftraced_lock); + /* ftraced_suspend is true if we want ftrace running */ + if (ftraced_suspend) + ftrace_run_startup_code(); + mutex_unlock(&ftraced_lock); +} + +static void notrace ftrace_shutdown_sysctl(void) +{ + mutex_lock(&ftraced_lock); + /* ftraced_suspend is true if ftrace is running */ + if (ftraced_suspend) + ftrace_run_shutdown_code(); + mutex_unlock(&ftraced_lock); +} + static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; @@ -341,8 +377,9 @@ static int notrace ftraced(void *ignore) /* check once a second */ schedule_timeout(HZ); + mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); - if (ftraced_trigger && !ftraced_suspend) { + if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) { ftrace_record_suspend++; ftrace_update_code(); usecs = nsecs_to_usecs(ftrace_update_time); @@ -360,6 +397,7 @@ static int notrace ftraced(void *ignore) ftrace_record_suspend--; } mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_sysctl_lock); ftrace_shutdown_replenish(); @@ -389,8 +427,10 @@ static int __init notrace ftrace_shutdown_init(void) core_initcall(ftrace_shutdown_init); #else -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) +# define ftrace_startup_sysctl() do { } while (0) +# define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** @@ -406,9 +446,15 @@ core_initcall(ftrace_shutdown_init); */ int register_ftrace_function(struct ftrace_ops *ops) { + int ret; + + mutex_lock(&ftrace_sysctl_lock); ftrace_startup(); - return __register_ftrace_function(ops); + ret = __register_ftrace_function(ops); + mutex_unlock(&ftrace_sysctl_lock); + + return ret; } /** @@ -421,10 +467,53 @@ int unregister_ftrace_function(struct ftrace_ops *ops) { int ret; + mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); if (ftrace_list == &ftrace_list_end) ftrace_shutdown(); + mutex_unlock(&ftrace_sysctl_lock); + + return ret; +} + +notrace int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + mutex_lock(&ftrace_sysctl_lock); + + ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + + if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) + goto out; + + last_ftrace_enabled = ftrace_enabled; + + if (ftrace_enabled) { + + ftrace_startup_sysctl(); + + /* we are starting ftrace again */ + if (ftrace_list != &ftrace_list_end) { + if (ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + else + ftrace_trace_function = ftrace_list_func; + } + + } else { + /* stopping ftrace calls (just send to ftrace_stub) */ + ftrace_trace_function = ftrace_stub; + + ftrace_shutdown_sysctl(); + } + + out: + mutex_unlock(&ftrace_sysctl_lock); return ret; } -- cgit v1.2.3 From dfa60aba04dae7833d75b2e2be124bb7cfb8239f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use nops instead of jmp This patch patches the call to mcount with nops instead of a jmp over the mcount call. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/alternative.c | 4 ++-- arch/x86/kernel/ftrace.c | 40 ++++++++++++++++++++++++---------------- include/asm-x86/alternative.h | 2 ++ 3 files changed, 28 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 65c7857a90dd..de240ba2e288 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { #ifdef CONFIG_X86_64 extern char __vsyscall_0; -static inline const unsigned char*const * find_nop_table(void) +const unsigned char *const *find_nop_table(void) { return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; @@ -162,7 +162,7 @@ static const struct nop { { -1, NULL } }; -static const unsigned char*const * find_nop_table(void) +const unsigned char *const *find_nop_table(void) { const unsigned char *const *noptable = intel_nops; int i; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5dd58136ef02..2e060c58b860 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -16,11 +16,12 @@ #include #include -#define CALL_BACK 5 +#include -#define JMPFWD 0x03eb +#define CALL_BACK 5 -static unsigned short ftrace_jmp = JMPFWD; +/* Long is fine, even if it is only 4 bytes ;-) */ +static long *ftrace_nop; struct ftrace_record { struct dyn_ftrace rec; @@ -55,13 +56,13 @@ static struct ftrace_page *ftrace_pages; notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) { struct ftrace_record *rec; - unsigned short save; + unsigned long save; ip -= CALL_BACK; - save = *(short *)ip; + save = *(long *)ip; /* If this was already converted, skip it */ - if (save == JMPFWD) + if (save == *ftrace_nop) return NULL; if (ftrace_pages->index == ENTRIES_PER_PAGE) { @@ -79,9 +80,10 @@ static int notrace ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned short old = *(unsigned short *)old_code; - unsigned short new = *(unsigned short *)new_code; - unsigned short replaced; + unsigned replaced; + unsigned old = *(unsigned *)old_code; /* 4 bytes */ + unsigned new = *(unsigned *)new_code; /* 4 bytes */ + unsigned char newch = new_code[4]; int faulted = 0; /* @@ -94,7 +96,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, */ asm volatile ( "1: lock\n" - " cmpxchg %w3, (%2)\n" + " cmpxchg %3, (%2)\n" + " jnz 2f\n" + " movb %b4, 4(%2)\n" "2:\n" ".section .fixup, \"ax\"\n" " movl $1, %0\n" @@ -102,11 +106,12 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, ".previous\n" _ASM_EXTABLE(1b, 3b) : "=r"(faulted), "=a"(replaced) - : "r"(ip), "r"(new), "0"(faulted), "a"(old) + : "r"(ip), "r"(new), "r"(newch), + "0"(faulted), "a"(old) : "memory"); sync_core(); - if (replaced != old) + if (replaced != old && replaced != new) faulted = 2; return faulted; @@ -132,7 +137,7 @@ notrace void ftrace_code_disable(struct dyn_ftrace *rec) /* move the IP back to the start of the call */ ip -= CALL_BACK; - r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp); + r->failed = ftrace_modify_code(ip, save.code, (char *)ftrace_nop); } static void notrace ftrace_replace_code(int saved) @@ -144,9 +149,9 @@ static void notrace ftrace_replace_code(int saved) int i; if (saved) - old = (char *)&ftrace_jmp; + old = (char *)ftrace_nop; else - new = (char *)&ftrace_jmp; + new = (char *)ftrace_nop; for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { @@ -194,12 +199,15 @@ notrace void ftrace_shutdown_replenish(void) ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); } -notrace int ftrace_shutdown_arch_init(void) +notrace int __init ftrace_shutdown_arch_init(void) { + const unsigned char *const *noptable = find_nop_table(); struct ftrace_page *pg; int cnt; int i; + ftrace_nop = (unsigned long *)noptable[CALL_BACK]; + /* allocate a few pages */ ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); if (!ftrace_pages_start) diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h index 1f6a9ca10126..f6aa18eadf71 100644 --- a/include/asm-x86/alternative.h +++ b/include/asm-x86/alternative.h @@ -72,6 +72,8 @@ static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} #endif /* CONFIG_SMP */ +const unsigned char *const *find_nop_table(void); + /* * Alternative instructions for different CPU types or capabilities. * -- cgit v1.2.3 From 3c1720f00bb619302ba19d55986ab565e74d06db Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: move memory management out of arch code This patch moves the memory management of the ftrace records out of the arch code and into the generic code making the arch code simpler. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ftrace.c | 183 ++++++++--------------------------------------- include/linux/ftrace.h | 18 +++-- kernel/trace/ftrace.c | 154 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 192 insertions(+), 163 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 2e060c58b860..b69795efa226 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -23,25 +23,6 @@ /* Long is fine, even if it is only 4 bytes ;-) */ static long *ftrace_nop; -struct ftrace_record { - struct dyn_ftrace rec; - int failed; -} __attribute__((packed)); - -struct ftrace_page { - struct ftrace_page *next; - int index; - struct ftrace_record records[]; -} __attribute__((packed)); - -#define ENTRIES_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record)) - -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - -#define MCOUNT_ADDR ((long)(&mcount)) - union ftrace_code_union { char code[5]; struct { @@ -50,33 +31,41 @@ union ftrace_code_union { } __attribute__((packed)); }; -static struct ftrace_page *ftrace_pages_start; -static struct ftrace_page *ftrace_pages; - -notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +notrace int ftrace_ip_converted(unsigned long ip) { - struct ftrace_record *rec; unsigned long save; ip -= CALL_BACK; save = *(long *)ip; - /* If this was already converted, skip it */ - if (save == *ftrace_nop) - return NULL; + return save == *ftrace_nop; +} - if (ftrace_pages->index == ENTRIES_PER_PAGE) { - if (!ftrace_pages->next) - return NULL; - ftrace_pages = ftrace_pages->next; - } +static int notrace ftrace_calc_offset(long ip, long addr) +{ + return (int)(addr - ip); +} - rec = &ftrace_pages->records[ftrace_pages->index++]; +notrace unsigned char *ftrace_nop_replace(void) +{ + return (char *)ftrace_nop; +} + +notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + static union ftrace_code_union calc; - return &rec->rec; + calc.e8 = 0xe8; + calc.offset = ftrace_calc_offset(ip, addr); + + /* + * No locking needed, this must be called via kstop_machine + * which in essence is like running on a uniprocessor machine. + */ + return calc.code; } -static int notrace +notrace int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -86,6 +75,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char newch = new_code[4]; int faulted = 0; + /* move the IP back to the start of the call */ + ip -= CALL_BACK; + /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -117,129 +109,12 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return faulted; } -static int notrace ftrace_calc_offset(long ip) -{ - return (int)(MCOUNT_ADDR - ip); -} - -notrace void ftrace_code_disable(struct dyn_ftrace *rec) -{ - unsigned long ip; - union ftrace_code_union save; - struct ftrace_record *r = - container_of(rec, struct ftrace_record, rec); - - ip = rec->ip; - - save.e8 = 0xe8; - save.offset = ftrace_calc_offset(ip); - - /* move the IP back to the start of the call */ - ip -= CALL_BACK; - - r->failed = ftrace_modify_code(ip, save.code, (char *)ftrace_nop); -} - -static void notrace ftrace_replace_code(int saved) -{ - unsigned char *new = NULL, *old = NULL; - struct ftrace_record *rec; - struct ftrace_page *pg; - unsigned long ip; - int i; - - if (saved) - old = (char *)ftrace_nop; - else - new = (char *)ftrace_nop; - - for (pg = ftrace_pages_start; pg; pg = pg->next) { - for (i = 0; i < pg->index; i++) { - union ftrace_code_union calc; - rec = &pg->records[i]; - - /* don't modify code that has already faulted */ - if (rec->failed) - continue; - - ip = rec->rec.ip; - - calc.e8 = 0xe8; - calc.offset = ftrace_calc_offset(ip); - - if (saved) - new = calc.code; - else - old = calc.code; - - ip -= CALL_BACK; - - rec->failed = ftrace_modify_code(ip, old, new); - } - } - -} - -notrace void ftrace_startup_code(void) -{ - ftrace_replace_code(1); -} - -notrace void ftrace_shutdown_code(void) -{ - ftrace_replace_code(0); -} - -notrace void ftrace_shutdown_replenish(void) -{ - if (ftrace_pages->next) - return; - - /* allocate another page */ - ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); -} - -notrace int __init ftrace_shutdown_arch_init(void) +int __init ftrace_dyn_arch_init(void) { const unsigned char *const *noptable = find_nop_table(); - struct ftrace_page *pg; - int cnt; - int i; ftrace_nop = (unsigned long *)noptable[CALL_BACK]; - /* allocate a few pages */ - ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); - if (!ftrace_pages_start) - return -1; - - /* - * Allocate a few more pages. - * - * TODO: have some parser search vmlinux before - * final linking to find all calls to ftrace. - * Then we can: - * a) know how many pages to allocate. - * and/or - * b) set up the table then. - * - * The dynamic code is still necessary for - * modules. - */ - - pg = ftrace_pages = ftrace_pages_start; - - cnt = NR_TO_INIT / ENTRIES_PER_PAGE; - - for (i = 0; i < cnt; i++) { - pg->next = (void *)get_zeroed_page(GFP_KERNEL); - - /* If we fail, we'll try later anyway */ - if (!pg->next) - break; - - pg = pg->next; - } - return 0; } + diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ccd8537dbdb7..d509ad6c9cb8 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -42,19 +42,23 @@ extern void mcount(void); # define FTRACE_HASHBITS 10 # define FTRACE_HASHSIZE (1<node, &ftrace_hash[key]); } +static notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +{ + /* If this was already converted, skip it */ + if (ftrace_ip_converted(ip)) + return NULL; + + if (ftrace_pages->index == ENTRIES_PER_PAGE) { + if (!ftrace_pages->next) + return NULL; + ftrace_pages = ftrace_pages->next; + } + + return &ftrace_pages->records[ftrace_pages->index++]; +} + static void notrace ftrace_record_ip(unsigned long ip, unsigned long parent_ip) { @@ -252,6 +282,62 @@ static struct ftrace_ops ftrace_shutdown_ops __read_mostly = .func = ftrace_record_ip, }; +#define MCOUNT_ADDR ((long)(&mcount)) + +static void notrace ftrace_replace_code(int saved) +{ + unsigned char *new = NULL, *old = NULL; + struct dyn_ftrace *rec; + struct ftrace_page *pg; + unsigned long ip; + int failed; + int i; + + if (saved) + old = ftrace_nop_replace(); + else + new = ftrace_nop_replace(); + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + for (i = 0; i < pg->index; i++) { + rec = &pg->records[i]; + + /* don't modify code that has already faulted */ + if (rec->flags & FTRACE_FL_FAILED) + continue; + + ip = rec->ip; + + if (saved) + new = ftrace_call_replace(ip, MCOUNT_ADDR); + else + old = ftrace_call_replace(ip, MCOUNT_ADDR); + + failed = ftrace_modify_code(ip, old, new); + if (failed) + rec->flags |= FTRACE_FL_FAILED; + } + } +} + +static notrace void ftrace_startup_code(void) +{ + ftrace_replace_code(1); +} + +static notrace void ftrace_shutdown_code(void) +{ + ftrace_replace_code(0); +} + +static notrace void ftrace_shutdown_replenish(void) +{ + if (ftrace_pages->next) + return; + + /* allocate another page */ + ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); +} static int notrace __ftrace_modify_code(void *data) { @@ -261,6 +347,23 @@ static int notrace __ftrace_modify_code(void *data) return 0; } +static notrace void +ftrace_code_disable(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip; + unsigned char *nop, *call; + int failed; + + ip = rec->ip; + + nop = ftrace_nop_replace(); + call = ftrace_call_replace(ip, addr); + + failed = ftrace_modify_code(ip, call, nop); + if (failed) + rec->flags |= FTRACE_FL_FAILED; +} + static void notrace ftrace_run_startup_code(void) { stop_machine_run(__ftrace_modify_code, ftrace_startup_code, NR_CPUS); @@ -346,7 +449,7 @@ static int notrace __ftrace_update_code(void *ignore) /* all CPUS are stopped, we are safe to modify code */ hlist_for_each_entry(p, t, &head, node) { - ftrace_code_disable(p); + ftrace_code_disable(p, MCOUNT_ADDR); ftrace_update_cnt++; } @@ -407,12 +510,59 @@ static int notrace ftraced(void *ignore) return 0; } +static int __init ftrace_dyn_table_alloc(void) +{ + struct ftrace_page *pg; + int cnt; + int i; + int ret; + + ret = ftrace_dyn_arch_init(); + if (ret) + return ret; + + /* allocate a few pages */ + ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages_start) + return -1; + + /* + * Allocate a few more pages. + * + * TODO: have some parser search vmlinux before + * final linking to find all calls to ftrace. + * Then we can: + * a) know how many pages to allocate. + * and/or + * b) set up the table then. + * + * The dynamic code is still necessary for + * modules. + */ + + pg = ftrace_pages = ftrace_pages_start; + + cnt = NR_TO_INIT / ENTRIES_PER_PAGE; + + for (i = 0; i < cnt; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + + /* If we fail, we'll try later anyway */ + if (!pg->next) + break; + + pg = pg->next; + } + + return 0; +} + static int __init notrace ftrace_shutdown_init(void) { struct task_struct *p; int ret; - ret = ftrace_shutdown_arch_init(); + ret = ftrace_dyn_table_alloc(); if (ret) return ret; -- cgit v1.2.3 From d61f82d06672f57fca410da6f7fffd15867db622 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use dynamic patching for updating mcount calls This patch replaces the indirect call to the mcount function pointer with a direct call that will be patched by the dynamic ftrace routines. On boot up, the mcount function calls the ftace_stub function. When the dynamic ftrace code is initialized, the ftrace_stub is replaced with a call to the ftrace_record_ip, which records the instruction pointers of the locations that call it. Later, the ftraced daemon will call kstop_machine and patch all the locations to nops. When a ftrace is enabled, the original calls to mcount will now be set top call ftrace_caller, which will do a direct call to the registered ftrace function. This direct call is also patched when the function that should be called is updated. All patching is performed by a kstop_machine routine to prevent any type of race conditions that is associated with modifying code on the fly. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/entry_32.S | 47 +++++++++++- arch/x86/kernel/entry_64.S | 67 ++++++++++++++++- arch/x86/kernel/ftrace.c | 41 +++++++++- include/linux/ftrace.h | 7 +- kernel/trace/ftrace.c | 183 ++++++++++++++++++++++++++------------------- 5 files changed, 261 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f47b9b5440d2..e6517ce0b824 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1110,10 +1110,50 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ #ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE + +ENTRY(mcount) + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + +.globl mcount_call +mcount_call: + call ftrace_stub + + popl %edx + popl %ecx + popl %eax + + ret +END(mcount) + +ENTRY(ftrace_caller) + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + movl 0x4(%ebp), %edx + +.globl ftrace_call +ftrace_call: + call ftrace_stub + + popl %edx + popl %ecx + popl %eax + +.globl ftrace_stub +ftrace_stub: + ret +END(ftrace_caller) + +#else /* ! CONFIG_DYNAMIC_FTRACE */ + ENTRY(mcount) cmpl $ftrace_stub, ftrace_trace_function jnz trace - .globl ftrace_stub ftrace_stub: ret @@ -1126,7 +1166,7 @@ trace: movl 0xc(%esp), %eax movl 0x4(%ebp), %edx - call *ftrace_trace_function + call *ftrace_trace_function popl %edx popl %ecx @@ -1134,7 +1174,8 @@ trace: jmp ftrace_stub END(mcount) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FTRACE */ .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index f046e0c64883..fe25e5febca3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -55,6 +55,70 @@ .code64 #ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(mcount) + + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + +.globl mcount_call +mcount_call: + call ftrace_stub + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + + retq +END(mcount) + +ENTRY(ftrace_caller) + + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + movq 8(%rbp), %rsi + +.globl ftrace_call +ftrace_call: + call ftrace_stub + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + +.globl ftrace_stub +ftrace_stub: + retq +END(ftrace_caller) + +#else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) cmpq $ftrace_stub, ftrace_trace_function jnz trace @@ -89,7 +153,8 @@ trace: jmp ftrace_stub END(mcount) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FTRACE */ #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b69795efa226..9f44623e0072 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -109,10 +109,49 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return faulted; } -int __init ftrace_dyn_arch_init(void) +notrace int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned char old[5], *new; + int ret; + + ip += CALL_BACK; + + memcpy(old, &ftrace_call, 5); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + + return ret; +} + +notrace int ftrace_mcount_set(unsigned long *data) +{ + unsigned long ip = (long)(&mcount_call); + unsigned long *addr = data; + unsigned char old[5], *new; + + /* ip is at the location, but modify code will subtact this */ + ip += CALL_BACK; + + /* + * Replace the mcount stub with a pointer to the + * ip recorder function. + */ + memcpy(old, &mcount_call, 5); + new = ftrace_call_replace(ip, *addr); + *addr = ftrace_modify_code(ip, old, new); + + return 0; +} + +int __init ftrace_dyn_arch_init(void *data) { const unsigned char *const *noptable = find_nop_table(); + /* This is running in kstop_machine */ + + ftrace_mcount_set(data); + ftrace_nop = (unsigned long *)noptable[CALL_BACK]; return 0; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d509ad6c9cb8..b0dd0093058f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,9 +56,14 @@ struct dyn_ftrace { extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); -extern int ftrace_dyn_arch_init(void); +extern int ftrace_dyn_arch_init(void *data); +extern int ftrace_mcount_set(unsigned long *data); extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code); +extern int ftrace_update_ftrace_func(ftrace_func_t func); +extern void ftrace_caller(void); +extern void ftrace_call(void); +extern void mcount_call(void); #endif #ifdef CONFIG_FRAME_POINTER diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f6d9af3bf66b..88544f9bc0ed 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -26,14 +26,8 @@ #include "trace.h" -#ifdef CONFIG_DYNAMIC_FTRACE -# define FTRACE_ENABLED_INIT 1 -#else -# define FTRACE_ENABLED_INIT 0 -#endif - -int ftrace_enabled = FTRACE_ENABLED_INIT; -static int last_ftrace_enabled = FTRACE_ENABLED_INIT; +int ftrace_enabled; +static int last_ftrace_enabled; static DEFINE_SPINLOCK(ftrace_lock); static DEFINE_MUTEX(ftrace_sysctl_lock); @@ -149,6 +143,14 @@ static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE +enum { + FTRACE_ENABLE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_ENABLE_MCOUNT = (1 << 3), + FTRACE_DISABLE_MCOUNT = (1 << 4), +}; + static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); @@ -199,12 +201,8 @@ ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) hlist_add_head(&node->node, &ftrace_hash[key]); } -static notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { - /* If this was already converted, skip it */ - if (ftrace_ip_converted(ip)) - return NULL; - if (ftrace_pages->index == ENTRIES_PER_PAGE) { if (!ftrace_pages->next) return NULL; @@ -215,7 +213,7 @@ static notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) } static void notrace -ftrace_record_ip(unsigned long ip, unsigned long parent_ip) +ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *node; unsigned long flags; @@ -223,6 +221,9 @@ ftrace_record_ip(unsigned long ip, unsigned long parent_ip) int resched; int atomic; + if (!ftrace_enabled) + return; + resched = need_resched(); preempt_disable_notrace(); @@ -251,11 +252,12 @@ ftrace_record_ip(unsigned long ip, unsigned long parent_ip) /* * There's a slight race that the ftraced will update the - * hash and reset here. The arch alloc is responsible - * for seeing if the IP has already changed, and if - * it has, the alloc will fail. + * hash and reset here. If it is already converted, skip it. */ - node = ftrace_alloc_shutdown_node(ip); + if (ftrace_ip_converted(ip)) + goto out_unlock; + + node = ftrace_alloc_dyn_node(ip); if (!node) goto out_unlock; @@ -277,11 +279,7 @@ ftrace_record_ip(unsigned long ip, unsigned long parent_ip) preempt_enable_notrace(); } -static struct ftrace_ops ftrace_shutdown_ops __read_mostly = -{ - .func = ftrace_record_ip, -}; - +#define FTRACE_ADDR ((long)(&ftrace_caller)) #define MCOUNT_ADDR ((long)(&mcount)) static void notrace ftrace_replace_code(int saved) @@ -309,9 +307,9 @@ static void notrace ftrace_replace_code(int saved) ip = rec->ip; if (saved) - new = ftrace_call_replace(ip, MCOUNT_ADDR); + new = ftrace_call_replace(ip, FTRACE_ADDR); else - old = ftrace_call_replace(ip, MCOUNT_ADDR); + old = ftrace_call_replace(ip, FTRACE_ADDR); failed = ftrace_modify_code(ip, old, new); if (failed) @@ -320,16 +318,6 @@ static void notrace ftrace_replace_code(int saved) } } -static notrace void ftrace_startup_code(void) -{ - ftrace_replace_code(1); -} - -static notrace void ftrace_shutdown_code(void) -{ - ftrace_replace_code(0); -} - static notrace void ftrace_shutdown_replenish(void) { if (ftrace_pages->next) @@ -339,16 +327,8 @@ static notrace void ftrace_shutdown_replenish(void) ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); } -static int notrace __ftrace_modify_code(void *data) -{ - void (*func)(void) = data; - - func(); - return 0; -} - static notrace void -ftrace_code_disable(struct dyn_ftrace *rec, unsigned long addr) +ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; unsigned char *nop, *call; @@ -357,67 +337,113 @@ ftrace_code_disable(struct dyn_ftrace *rec, unsigned long addr) ip = rec->ip; nop = ftrace_nop_replace(); - call = ftrace_call_replace(ip, addr); + call = ftrace_call_replace(ip, MCOUNT_ADDR); failed = ftrace_modify_code(ip, call, nop); if (failed) rec->flags |= FTRACE_FL_FAILED; } -static void notrace ftrace_run_startup_code(void) +static int notrace __ftrace_modify_code(void *data) { - stop_machine_run(__ftrace_modify_code, ftrace_startup_code, NR_CPUS); + unsigned long addr; + int *command = data; + + if (*command & FTRACE_ENABLE_CALLS) + ftrace_replace_code(1); + else if (*command & FTRACE_DISABLE_CALLS) + ftrace_replace_code(0); + + if (*command & FTRACE_UPDATE_TRACE_FUNC) + ftrace_update_ftrace_func(ftrace_trace_function); + + if (*command & FTRACE_ENABLE_MCOUNT) { + addr = (unsigned long)ftrace_record_ip; + ftrace_mcount_set(&addr); + } else if (*command & FTRACE_DISABLE_MCOUNT) { + addr = (unsigned long)ftrace_stub; + ftrace_mcount_set(&addr); + } + + return 0; } -static void notrace ftrace_run_shutdown_code(void) +static void notrace ftrace_run_update_code(int command) { - stop_machine_run(__ftrace_modify_code, ftrace_shutdown_code, NR_CPUS); + stop_machine_run(__ftrace_modify_code, &command, NR_CPUS); } +static ftrace_func_t saved_ftrace_func; + static void notrace ftrace_startup(void) { + int command = 0; + mutex_lock(&ftraced_lock); ftraced_suspend++; - if (ftraced_suspend != 1) + if (ftraced_suspend == 1) + command |= FTRACE_ENABLE_CALLS; + + if (saved_ftrace_func != ftrace_trace_function) { + saved_ftrace_func = ftrace_trace_function; + command |= FTRACE_UPDATE_TRACE_FUNC; + } + + if (!command || !ftrace_enabled) goto out; - __unregister_ftrace_function(&ftrace_shutdown_ops); - if (ftrace_enabled) - ftrace_run_startup_code(); + ftrace_run_update_code(command); out: mutex_unlock(&ftraced_lock); } static void notrace ftrace_shutdown(void) { + int command = 0; + mutex_lock(&ftraced_lock); ftraced_suspend--; - if (ftraced_suspend) - goto out; + if (!ftraced_suspend) + command |= FTRACE_DISABLE_CALLS; - if (ftrace_enabled) - ftrace_run_shutdown_code(); + if (saved_ftrace_func != ftrace_trace_function) { + saved_ftrace_func = ftrace_trace_function; + command |= FTRACE_UPDATE_TRACE_FUNC; + } - __register_ftrace_function(&ftrace_shutdown_ops); + if (!command || !ftrace_enabled) + goto out; + + ftrace_run_update_code(command); out: mutex_unlock(&ftraced_lock); } static void notrace ftrace_startup_sysctl(void) { + int command = FTRACE_ENABLE_MCOUNT; + mutex_lock(&ftraced_lock); + /* Force update next time */ + saved_ftrace_func = NULL; /* ftraced_suspend is true if we want ftrace running */ if (ftraced_suspend) - ftrace_run_startup_code(); + command |= FTRACE_ENABLE_CALLS; + + ftrace_run_update_code(command); mutex_unlock(&ftraced_lock); } static void notrace ftrace_shutdown_sysctl(void) { + int command = FTRACE_DISABLE_MCOUNT; + mutex_lock(&ftraced_lock); /* ftraced_suspend is true if ftrace is running */ if (ftraced_suspend) - ftrace_run_shutdown_code(); + command |= FTRACE_DISABLE_CALLS; + + ftrace_run_update_code(command); mutex_unlock(&ftraced_lock); } @@ -430,11 +456,13 @@ static int notrace __ftrace_update_code(void *ignore) struct dyn_ftrace *p; struct hlist_head head; struct hlist_node *t; + int save_ftrace_enabled; cycle_t start, stop; int i; - /* Don't be calling ftrace ops now */ - __unregister_ftrace_function(&ftrace_shutdown_ops); + /* Don't be recording funcs now */ + save_ftrace_enabled = ftrace_enabled; + ftrace_enabled = 0; start = now(raw_smp_processor_id()); ftrace_update_cnt = 0; @@ -449,7 +477,7 @@ static int notrace __ftrace_update_code(void *ignore) /* all CPUS are stopped, we are safe to modify code */ hlist_for_each_entry(p, t, &head, node) { - ftrace_code_disable(p, MCOUNT_ADDR); + ftrace_code_disable(p); ftrace_update_cnt++; } @@ -459,7 +487,7 @@ static int notrace __ftrace_update_code(void *ignore) ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; - __register_ftrace_function(&ftrace_shutdown_ops); + ftrace_enabled = save_ftrace_enabled; return 0; } @@ -515,11 +543,6 @@ static int __init ftrace_dyn_table_alloc(void) struct ftrace_page *pg; int cnt; int i; - int ret; - - ret = ftrace_dyn_arch_init(); - if (ret) - return ret; /* allocate a few pages */ ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); @@ -557,11 +580,19 @@ static int __init ftrace_dyn_table_alloc(void) return 0; } -static int __init notrace ftrace_shutdown_init(void) +static int __init notrace ftrace_dynamic_init(void) { struct task_struct *p; + unsigned long addr; int ret; + addr = (unsigned long)ftrace_record_ip; + stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS); + + /* ftrace_dyn_arch_init places the return code in addr */ + if (addr) + return addr; + ret = ftrace_dyn_table_alloc(); if (ret) return ret; @@ -570,12 +601,12 @@ static int __init notrace ftrace_shutdown_init(void) if (IS_ERR(p)) return -1; - __register_ftrace_function(&ftrace_shutdown_ops); + last_ftrace_enabled = ftrace_enabled = 1; return 0; } -core_initcall(ftrace_shutdown_init); +core_initcall(ftrace_dynamic_init); #else # define ftrace_startup() do { } while (0) # define ftrace_shutdown() do { } while (0) @@ -599,9 +630,8 @@ int register_ftrace_function(struct ftrace_ops *ops) int ret; mutex_lock(&ftrace_sysctl_lock); - ftrace_startup(); - ret = __register_ftrace_function(ops); + ftrace_startup(); mutex_unlock(&ftrace_sysctl_lock); return ret; @@ -619,10 +649,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); - - if (ftrace_list == &ftrace_list_end) - ftrace_shutdown(); - + ftrace_shutdown(); mutex_unlock(&ftrace_sysctl_lock); return ret; -- cgit v1.2.3 From 5072c59fd45e9976d02ee6f18c7336ef97623cbc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add filter select functions to trace This patch adds two files to the debugfs system: /debugfs/tracing/available_filter_functions and /debugfs/tracing/set_ftrace_filter The available_filter_functions lists all functions that has been recorded by the ftraced that has called the ftrace_record_ip function. This is to allow users to see what functions have been converted to nops and can be enabled for tracing. To enable functions, simply echo the names (whitespace delimited) into set_ftrace_filter. Simple wildcards are also allowed. echo 'scheduler' > /debugfs/tracing/set_ftrace_filter Will have only the scheduler be activated when tracing is enabled. echo 'sched_*' > /debugfs/tracing/set_ftrace_filter Will have only the functions starting with 'sched_' be activated. echo '*lock' > /debugfs/tracing/set_ftrace_filter Will have only functions ending with 'lock' be activated. echo '*lock*' > /debugfs/tracing/set_ftrace_filter Will have only functions with 'lock' in its name be activated. Note: 'sched*lock' will not work. The only wildcards that are allowed is an asterisk and the beginning and or end of the string passed in. Multiple names can be passed in with whitespace delimited: echo 'scheduler *lock *acpi*' > /debugfs/tracing/set_ftrace_filter is also the same as: echo 'scheduler' > /debugfs/tracing/set_ftrace_filter echo '*lock' >> /debugfs/tracing/set_ftrace_filter echo '*acpi*' >> /debugfs/tracing/set_ftrace_filter Appending does just that. It appends to the list. To disable all filters simply echo an empty line in: echo > /debugfs/tracing/set_ftrace_filter Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 4 +- kernel/trace/ftrace.c | 527 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 513 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b0dd0093058f..f5911d2d42c3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,7 +43,9 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1< #include #include +#include +#include #include #include #include -#include +#include #include #include +#include #include #include "trace.h" @@ -151,12 +154,15 @@ enum { FTRACE_DISABLE_MCOUNT = (1 << 4), }; +static int ftrace_filtered; + static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); static DEFINE_SPINLOCK(ftrace_shutdown_lock); static DEFINE_MUTEX(ftraced_lock); +static DEFINE_MUTEX(ftrace_filter_lock); struct ftrace_page { struct ftrace_page *next; @@ -282,16 +288,82 @@ ftrace_record_ip(unsigned long ip) #define FTRACE_ADDR ((long)(&ftrace_caller)) #define MCOUNT_ADDR ((long)(&mcount)) -static void notrace ftrace_replace_code(int saved) +static void notrace +__ftrace_replace_code(struct dyn_ftrace *rec, + unsigned char *old, unsigned char *new, int enable) +{ + unsigned long ip; + int failed; + + ip = rec->ip; + + if (ftrace_filtered && enable) { + unsigned long fl; + /* + * If filtering is on: + * + * If this record is set to be filtered and + * is enabled then do nothing. + * + * If this record is set to be filtered and + * it is not enabled, enable it. + * + * If this record is not set to be filtered + * and it is not enabled do nothing. + * + * If this record is not set to be filtered and + * it is enabled, disable it. + */ + fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); + + if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || + (fl == 0)) + return; + + /* + * If it is enabled disable it, + * otherwise enable it! + */ + if (fl == FTRACE_FL_ENABLED) { + /* swap new and old */ + new = old; + old = ftrace_call_replace(ip, FTRACE_ADDR); + rec->flags &= ~FTRACE_FL_ENABLED; + } else { + new = ftrace_call_replace(ip, FTRACE_ADDR); + rec->flags |= FTRACE_FL_ENABLED; + } + } else { + + if (enable) + new = ftrace_call_replace(ip, FTRACE_ADDR); + else + old = ftrace_call_replace(ip, FTRACE_ADDR); + + if (enable) { + if (rec->flags & FTRACE_FL_ENABLED) + return; + rec->flags |= FTRACE_FL_ENABLED; + } else { + if (!(rec->flags & FTRACE_FL_ENABLED)) + return; + rec->flags &= ~FTRACE_FL_ENABLED; + } + } + + failed = ftrace_modify_code(ip, old, new); + if (failed) + rec->flags |= FTRACE_FL_FAILED; +} + +static void notrace ftrace_replace_code(int enable) { unsigned char *new = NULL, *old = NULL; struct dyn_ftrace *rec; struct ftrace_page *pg; - unsigned long ip; - int failed; int i; - if (saved) + if (enable) old = ftrace_nop_replace(); else new = ftrace_nop_replace(); @@ -304,16 +376,7 @@ static void notrace ftrace_replace_code(int saved) if (rec->flags & FTRACE_FL_FAILED) continue; - ip = rec->ip; - - if (saved) - new = ftrace_call_replace(ip, FTRACE_ADDR); - else - old = ftrace_call_replace(ip, FTRACE_ADDR); - - failed = ftrace_modify_code(ip, old, new); - if (failed) - rec->flags |= FTRACE_FL_FAILED; + __ftrace_replace_code(rec, old, new, enable); } } } @@ -580,6 +643,436 @@ static int __init ftrace_dyn_table_alloc(void) return 0; } +enum { + FTRACE_ITER_FILTER = (1 << 0), + FTRACE_ITER_CONT = (1 << 1), +}; + +#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ + +struct ftrace_iterator { + loff_t pos; + struct ftrace_page *pg; + unsigned idx; + unsigned flags; + unsigned char buffer[FTRACE_BUFF_MAX+1]; + unsigned buffer_idx; + unsigned filtered; +}; + +static void notrace * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_iterator *iter = m->private; + struct dyn_ftrace *rec = NULL; + + (*pos)++; + + retry: + if (iter->idx >= iter->pg->index) { + if (iter->pg->next) { + iter->pg = iter->pg->next; + iter->idx = 0; + goto retry; + } + } else { + rec = &iter->pg->records[iter->idx++]; + if ((rec->flags & FTRACE_FL_FAILED) || + ((iter->flags & FTRACE_ITER_FILTER) && + !(rec->flags & FTRACE_FL_FILTER))) { + rec = NULL; + goto retry; + } + } + + iter->pos = *pos; + + return rec; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + struct ftrace_iterator *iter = m->private; + void *p = NULL; + loff_t l = -1; + + if (*pos != iter->pos) { + for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) + ; + } else { + l = *pos; + p = t_next(m, p, &l); + } + + return p; +} + +static void t_stop(struct seq_file *m, void *p) +{ +} + +static int t_show(struct seq_file *m, void *v) +{ + struct dyn_ftrace *rec = v; + char str[KSYM_SYMBOL_LEN]; + + if (!rec) + return 0; + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + + seq_printf(m, "%s\n", str); + + return 0; +} + +static struct seq_operations show_ftrace_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int notrace +ftrace_avail_open(struct inode *inode, struct file *file) +{ + struct ftrace_iterator *iter; + int ret; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + iter->pg = ftrace_pages_start; + iter->pos = -1; + + ret = seq_open(file, &show_ftrace_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = iter; + } else + kfree(iter); + + return ret; +} + +int ftrace_avail_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + struct ftrace_iterator *iter = m->private; + + seq_release(inode, file); + kfree(iter); + return 0; +} + +static void notrace ftrace_filter_reset(void) +{ + struct ftrace_page *pg; + struct dyn_ftrace *rec; + unsigned i; + + /* keep kstop machine from running */ + preempt_disable(); + ftrace_filtered = 0; + pg = ftrace_pages_start; + while (pg) { + for (i = 0; i < pg->index; i++) { + rec = &pg->records[i]; + if (rec->flags & FTRACE_FL_FAILED) + continue; + rec->flags &= ~FTRACE_FL_FILTER; + } + pg = pg->next; + } + preempt_enable(); +} + +static int notrace +ftrace_filter_open(struct inode *inode, struct file *file) +{ + struct ftrace_iterator *iter; + int ret = 0; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + mutex_lock(&ftrace_filter_lock); + if ((file->f_mode & FMODE_WRITE) && + !(file->f_flags & O_APPEND)) + ftrace_filter_reset(); + + if (file->f_mode & FMODE_READ) { + iter->pg = ftrace_pages_start; + iter->pos = -1; + iter->flags = FTRACE_ITER_FILTER; + + ret = seq_open(file, &show_ftrace_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = iter; + } else + kfree(iter); + } else + file->private_data = iter; + mutex_unlock(&ftrace_filter_lock); + + return ret; +} + +static ssize_t notrace +ftrace_filter_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + if (file->f_mode & FMODE_READ) + return seq_read(file, ubuf, cnt, ppos); + else + return -EPERM; +} + +static loff_t notrace +ftrace_filter_lseek(struct file *file, loff_t offset, int origin) +{ + loff_t ret; + + if (file->f_mode & FMODE_READ) + ret = seq_lseek(file, offset, origin); + else + file->f_pos = ret = 1; + + return ret; +} + +enum { + MATCH_FULL, + MATCH_FRONT_ONLY, + MATCH_MIDDLE_ONLY, + MATCH_END_ONLY, +}; + +static void notrace +ftrace_match(unsigned char *buff, int len) +{ + char str[KSYM_SYMBOL_LEN]; + char *search = NULL; + struct ftrace_page *pg; + struct dyn_ftrace *rec; + int type = MATCH_FULL; + unsigned i, match = 0, search_len = 0; + + for (i = 0; i < len; i++) { + if (buff[i] == '*') { + if (!i) { + search = buff + i + 1; + type = MATCH_END_ONLY; + search_len = len - (i + 1); + } else { + if (type == MATCH_END_ONLY) { + type = MATCH_MIDDLE_ONLY; + } else { + match = i; + type = MATCH_FRONT_ONLY; + } + buff[i] = 0; + break; + } + } + } + + /* keep kstop machine from running */ + preempt_disable(); + ftrace_filtered = 1; + pg = ftrace_pages_start; + while (pg) { + for (i = 0; i < pg->index; i++) { + int matched = 0; + char *ptr; + + rec = &pg->records[i]; + if (rec->flags & FTRACE_FL_FAILED) + continue; + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + switch (type) { + case MATCH_FULL: + if (strcmp(str, buff) == 0) + matched = 1; + break; + case MATCH_FRONT_ONLY: + if (memcmp(str, buff, match) == 0) + matched = 1; + break; + case MATCH_MIDDLE_ONLY: + if (strstr(str, search)) + matched = 1; + break; + case MATCH_END_ONLY: + ptr = strstr(str, search); + if (ptr && (ptr[search_len] == 0)) + matched = 1; + break; + } + if (matched) + rec->flags |= FTRACE_FL_FILTER; + } + pg = pg->next; + } + preempt_enable(); +} + +static ssize_t notrace +ftrace_filter_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct ftrace_iterator *iter; + char ch; + size_t read = 0; + ssize_t ret; + + if (!cnt || cnt < 0) + return 0; + + mutex_lock(&ftrace_filter_lock); + + if (file->f_mode & FMODE_READ) { + struct seq_file *m = file->private_data; + iter = m->private; + } else + iter = file->private_data; + + if (!*ppos) { + iter->flags &= ~FTRACE_ITER_CONT; + iter->buffer_idx = 0; + } + + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + + if (!(iter->flags & ~FTRACE_ITER_CONT)) { + /* skip white space */ + while (cnt && isspace(ch)) { + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + + if (isspace(ch)) { + file->f_pos += read; + ret = read; + goto out; + } + + iter->buffer_idx = 0; + } + + while (cnt && !isspace(ch)) { + if (iter->buffer_idx < FTRACE_BUFF_MAX) + iter->buffer[iter->buffer_idx++] = ch; + else { + ret = -EINVAL; + goto out; + } + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + if (isspace(ch)) { + iter->filtered++; + iter->buffer[iter->buffer_idx] = 0; + ftrace_match(iter->buffer, iter->buffer_idx); + iter->buffer_idx = 0; + } else + iter->flags |= FTRACE_ITER_CONT; + + + file->f_pos += read; + + ret = read; + out: + mutex_unlock(&ftrace_filter_lock); + + return ret; +} + +static int notrace +ftrace_filter_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + struct ftrace_iterator *iter; + + mutex_lock(&ftrace_filter_lock); + if (file->f_mode & FMODE_READ) { + iter = m->private; + + seq_release(inode, file); + } else + iter = file->private_data; + + if (iter->buffer_idx) { + iter->filtered++; + iter->buffer[iter->buffer_idx] = 0; + ftrace_match(iter->buffer, iter->buffer_idx); + } + + mutex_lock(&ftrace_sysctl_lock); + mutex_lock(&ftraced_lock); + if (iter->filtered && ftraced_suspend && ftrace_enabled) + ftrace_run_update_code(FTRACE_ENABLE_CALLS); + mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_sysctl_lock); + + kfree(iter); + mutex_unlock(&ftrace_filter_lock); + return 0; +} + +static struct file_operations ftrace_avail_fops = { + .open = ftrace_avail_open, + .read = seq_read, + .llseek = seq_lseek, + .release = ftrace_avail_release, +}; + +static struct file_operations ftrace_filter_fops = { + .open = ftrace_filter_open, + .read = ftrace_filter_read, + .write = ftrace_filter_write, + .llseek = ftrace_filter_lseek, + .release = ftrace_filter_release, +}; + +static __init int ftrace_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("available_filter_functions", 0444, + d_tracer, NULL, &ftrace_avail_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'available_filter_functions' entry\n"); + + entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, + NULL, &ftrace_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_ftrace_filter' entry\n"); + return 0; +} + +fs_initcall(ftrace_init_debugfs); + static int __init notrace ftrace_dynamic_init(void) { struct task_struct *p; @@ -657,14 +1150,14 @@ int unregister_ftrace_function(struct ftrace_ops *ops) notrace int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + struct file *file, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; mutex_lock(&ftrace_sysctl_lock); - ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(table, write, file, buffer, lenp, ppos); if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) goto out; -- cgit v1.2.3 From f43fdad8627fec2d21df92799b254dceb66c9c3c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: fix kexec disable the tracer while kexec pulls the rug from under the old kernel. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/machine_kexec_32.c | 4 ++++ arch/x86/kernel/machine_kexec_64.c | 4 ++++ include/linux/ftrace.h | 7 +++++++ 3 files changed, 15 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index d0b234c9fc31..88923fd7a6fc 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include @@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) unsigned long page_list[PAGES_NR]; void *control_page; + tracer_disable(); + /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 576a03db4511..1558fdc174f9 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include @@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) unsigned long page_list[PAGES_NR]; void *control_page; + tracer_disable(); + /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f5911d2d42c3..a42390c1d6e1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -68,6 +68,13 @@ extern void ftrace_call(void); extern void mcount_call(void); #endif +static inline void tracer_disable(void) +{ +#ifdef CONFIG_FTRACE + ftrace_enabled = 0; +#endif +} + #ifdef CONFIG_FRAME_POINTER /* TODO: need to fix this for ARM */ # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -- cgit v1.2.3 From e1c08bdd9fa73e44096e5a82c0d5928b04ab02c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: force recording Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 4 ++++ kernel/trace/ftrace.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a42390c1d6e1..2c1670c65236 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -54,6 +54,8 @@ struct dyn_ftrace { unsigned long flags; }; +int ftrace_force_update(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -66,6 +68,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#else +# define ftrace_force_update() do { } while (0) #endif static inline void tracer_disable(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 97d5cb7b7e75..4facf5ceeb86 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -146,6 +146,10 @@ static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE +static struct task_struct *ftraced_task; +static DECLARE_WAIT_QUEUE_HEAD(ftraced_waiters); +static unsigned long ftraced_iteration_counter; + enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -590,9 +594,12 @@ static int notrace ftraced(void *ignore) ftraced_trigger = 0; ftrace_record_suspend--; } + ftraced_iteration_counter++; mutex_unlock(&ftraced_lock); mutex_unlock(&ftrace_sysctl_lock); + wake_up_interruptible(&ftraced_waiters); + ftrace_shutdown_replenish(); set_current_state(TASK_INTERRUPTIBLE); @@ -1050,6 +1057,49 @@ static struct file_operations ftrace_filter_fops = { .release = ftrace_filter_release, }; +/** + * ftrace_force_update - force an update to all recording ftrace functions + * + * The ftrace dynamic update daemon only wakes up once a second. + * There may be cases where an update needs to be done immediately + * for tests or internal kernel tracing to begin. This function + * wakes the daemon to do an update and will not return until the + * update is complete. + */ +int ftrace_force_update(void) +{ + unsigned long last_counter; + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + if (!ftraced_task) + return -ENODEV; + + mutex_lock(&ftraced_lock); + last_counter = ftraced_iteration_counter; + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&ftraced_waiters, &wait); + + do { + mutex_unlock(&ftraced_lock); + wake_up_process(ftraced_task); + schedule(); + mutex_lock(&ftraced_lock); + if (signal_pending(current)) { + ret = -EINTR; + break; + } + set_current_state(TASK_INTERRUPTIBLE); + } while (last_counter == ftraced_iteration_counter); + + mutex_unlock(&ftraced_lock); + remove_wait_queue(&ftraced_waiters, &wait); + set_current_state(TASK_RUNNING); + + return ret; +} + static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1095,6 +1145,7 @@ static int __init notrace ftrace_dynamic_init(void) return -1; last_ftrace_enabled = ftrace_enabled = 1; + ftraced_task = p; return 0; } -- cgit v1.2.3 From c7aafc549766b87819285d3480648fc652a47bc4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: cleanups factor out code and clean it up. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 8 +-- kernel/trace/trace.c | 144 ++++++++++++++++++++++++-------------- kernel/trace/trace.h | 8 ++- kernel/trace/trace_irqsoff.c | 32 ++++----- kernel/trace/trace_sched_wakeup.c | 18 ++--- kernel/trace/trace_selftest.c | 25 ++++--- 7 files changed, 134 insertions(+), 103 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2c1670c65236..953a36d6a199 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -69,7 +69,7 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() do { } while (0) +# define ftrace_force_update() ({ 0; }) #endif static inline void tracer_disable(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4facf5ceeb86..6d4d2e86debc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1152,10 +1152,10 @@ static int __init notrace ftrace_dynamic_init(void) core_initcall(ftrace_dynamic_init); #else -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) -# define ftrace_startup_sysctl() do { } while (0) -# define ftrace_shutdown_sysctl() do { } while (0) +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) +# define ftrace_startup_sysctl() do { } while (0) +# define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f6d026f17dbb..61d2f0228866 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -142,12 +142,59 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } +void check_pages(struct trace_array_cpu *data) +{ + struct page *page, *tmp; + + BUG_ON(data->trace_pages.next->prev != &data->trace_pages); + BUG_ON(data->trace_pages.prev->next != &data->trace_pages); + + list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { + BUG_ON(page->lru.next->prev != &page->lru); + BUG_ON(page->lru.prev->next != &page->lru); + } +} + +void *head_page(struct trace_array_cpu *data) +{ + struct page *page; + + check_pages(data); + if (list_empty(&data->trace_pages)) + return NULL; + + page = list_entry(data->trace_pages.next, struct page, lru); + BUG_ON(&page->lru == &data->trace_pages); + + return page_address(page); +} + +notrace static void +flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) +{ + struct list_head flip_pages; + + INIT_LIST_HEAD(&flip_pages); + + tr1->trace_current = NULL; + memcpy(&tr1->trace_current_idx, &tr2->trace_current_idx, + sizeof(struct trace_array_cpu) - + offsetof(struct trace_array_cpu, trace_current_idx)); + + check_pages(tr1); + check_pages(tr2); + list_splice_init(&tr1->trace_pages, &flip_pages); + list_splice_init(&tr2->trace_pages, &tr1->trace_pages); + list_splice_init(&flip_pages, &tr2->trace_pages); + BUG_ON(!list_empty(&flip_pages)); + check_pages(tr1); + check_pages(tr2); +} + notrace void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data; - void *save_trace; - struct list_head save_pages; int i; WARN_ON_ONCE(!irqs_disabled()); @@ -155,11 +202,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) /* clear out all the previous traces */ for_each_possible_cpu(i) { data = tr->data[i]; - save_trace = max_tr.data[i]->trace; - save_pages = max_tr.data[i]->trace_pages; - memcpy(max_tr.data[i], data, sizeof(*data)); - data->trace = save_trace; - data->trace_pages = save_pages; + flip_trace(max_tr.data[i], data); tracing_reset(data); } @@ -177,8 +220,6 @@ notrace void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; - void *save_trace; - struct list_head save_pages; int i; WARN_ON_ONCE(!irqs_disabled()); @@ -186,11 +227,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) for_each_possible_cpu(i) tracing_reset(max_tr.data[i]); - save_trace = max_tr.data[cpu]->trace; - save_pages = max_tr.data[cpu]->trace_pages; - memcpy(max_tr.data[cpu], data, sizeof(*data)); - data->trace = save_trace; - data->trace_pages = save_pages; + flip_trace(max_tr.data[cpu], data); + tracing_reset(data); __update_max_tr(tr, tsk, cpu); @@ -234,9 +272,9 @@ int register_tracer(struct tracer *type) * If we fail, we do not register this tracer. */ for_each_possible_cpu(i) { - if (!data->trace) - continue; data = tr->data[i]; + if (!head_page(data)) + continue; tracing_reset(data); } current_trace = type; @@ -298,7 +336,7 @@ void unregister_tracer(struct tracer *type) void notrace tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; - data->trace_current = data->trace; + data->trace_current = head_page(data); data->trace_current_idx = 0; } @@ -425,26 +463,31 @@ notrace void tracing_record_cmdline(struct task_struct *tsk) } static inline notrace struct trace_entry * -tracing_get_trace_entry(struct trace_array *tr, - struct trace_array_cpu *data) +tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) { unsigned long idx, idx_next; struct trace_entry *entry; - struct page *page; struct list_head *next; + struct page *page; data->trace_idx++; idx = data->trace_current_idx; idx_next = idx + 1; + BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE); + entry = data->trace_current + idx * TRACE_ENTRY_SIZE; if (unlikely(idx_next >= ENTRIES_PER_PAGE)) { page = virt_to_page(data->trace_current); - if (unlikely(&page->lru == data->trace_pages.prev)) - next = data->trace_pages.next; - else - next = page->lru.next; + /* + * Roundrobin - but skip the head (which is not a real page): + */ + next = page->lru.next; + if (unlikely(next == &data->trace_pages)) + next = next->next; + BUG_ON(next == &data->trace_pages); + page = list_entry(next, struct page, lru); data->trace_current = page_address(page); idx_next = 0; @@ -456,18 +499,17 @@ tracing_get_trace_entry(struct trace_array *tr, } static inline notrace void -tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags) +tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) { struct task_struct *tsk = current; unsigned long pc; pc = preempt_count(); - entry->idx = atomic_inc_return(&tracer_counter); - entry->preempt_count = pc & 0xff; - entry->pid = tsk->pid; - entry->t = now(raw_smp_processor_id()); + entry->idx = atomic_inc_return(&tracer_counter); + entry->preempt_count = pc & 0xff; + entry->pid = tsk->pid; + entry->t = now(raw_smp_processor_id()); entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | @@ -476,16 +518,15 @@ tracing_generic_entry_update(struct trace_entry *entry, notrace void ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, - unsigned long flags) + unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; - entry = tracing_get_trace_entry(tr, data); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); - entry->type = TRACE_FN; - entry->fn.ip = ip; - entry->fn.parent_ip = parent_ip; + entry->type = TRACE_FN; + entry->fn.ip = ip; + entry->fn.parent_ip = parent_ip; } notrace void @@ -496,7 +537,7 @@ tracing_sched_switch_trace(struct trace_array *tr, { struct trace_entry *entry; - entry = tracing_get_trace_entry(tr, data); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; entry->ctx.prev_pid = prev->pid; @@ -540,6 +581,8 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, } page = list_entry(iter->next_page[cpu], struct page, lru); + BUG_ON(&data->trace_pages == &page->lru); + array = page_address(page); return &array[iter->next_page_idx[cpu]]; @@ -554,7 +597,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) int cpu; for_each_possible_cpu(cpu) { - if (!tr->data[cpu]->trace) + if (!head_page(tr->data[cpu])) continue; ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); if (ent && @@ -762,7 +805,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) name = type->name; for_each_possible_cpu(cpu) { - if (tr->data[cpu]->trace) { + if (head_page(tr->data[cpu])) { total += tr->data[cpu]->trace_idx; if (tr->data[cpu]->trace_idx > tr->entries) entries += tr->entries; @@ -975,8 +1018,7 @@ static int trace_empty(struct trace_iterator *iter) for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; - if (data->trace && - data->trace_idx) + if (head_page(data) && data->trace_idx) return 0; } return 1; @@ -1576,9 +1618,9 @@ static struct tracer no_tracer __read_mostly = static int trace_alloc_page(void) { struct trace_array_cpu *data; - void *array; struct page *page, *tmp; LIST_HEAD(pages); + void *array; int i; /* first allocate a page for each CPU */ @@ -1610,14 +1652,14 @@ static int trace_alloc_page(void) for_each_possible_cpu(i) { data = global_trace.data[i]; page = list_entry(pages.next, struct page, lru); - list_del(&page->lru); + list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); ClearPageLRU(page); #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; page = list_entry(pages.next, struct page, lru); - list_del(&page->lru); + list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); SetPageLRU(page); #endif @@ -1628,7 +1670,7 @@ static int trace_alloc_page(void) free_pages: list_for_each_entry_safe(page, tmp, &pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } return -ENOMEM; @@ -1654,7 +1696,6 @@ __init static int tracer_alloc_buffers(void) "for trace buffer!\n"); goto free_buffers; } - data->trace = array; /* set the array to the list */ INIT_LIST_HEAD(&data->trace_pages); @@ -1671,7 +1712,6 @@ __init static int tracer_alloc_buffers(void) "for trace buffer!\n"); goto free_buffers; } - max_tr.data[i]->trace = array; INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); page = virt_to_page(array); @@ -1716,24 +1756,22 @@ __init static int tracer_alloc_buffers(void) struct page *page, *tmp; struct trace_array_cpu *data = global_trace.data[i]; - if (data && data->trace) { + if (data) { list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } - data->trace = NULL; } #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; - if (data && data->trace) { + if (data) { list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } - data->trace = NULL; } #endif } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 88edbf1f6788..cc1d34b8b771 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -53,12 +53,12 @@ struct trace_entry { * the trace, etc.) */ struct trace_array_cpu { - void *trace; void *trace_current; - unsigned trace_current_idx; struct list_head trace_pages; - unsigned long trace_idx; atomic_t disabled; + /* these fields get copied into max-trace: */ + unsigned trace_current_idx; + unsigned long trace_idx; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; @@ -216,4 +216,6 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, #endif #endif /* CONFIG_FTRACE_STARTUP_TEST */ +extern void *head_page(struct trace_array_cpu *data); + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 14183b8f79c5..2dfebb67fdfb 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -144,7 +144,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out; - spin_lock(&max_trace_lock); + spin_lock_irqsave(&max_trace_lock, flags); /* check if we are still the max latency */ if (!report_latency(delta)) @@ -165,32 +165,24 @@ check_critical_timing(struct trace_array *tr, update_max_tr_single(tr, current, cpu); - if (tracing_thresh) - printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical section " - "violates %lu us threshold.\n" - " => started at timestamp %lu: ", + if (tracing_thresh) { + printk(KERN_INFO "(%16s-%-5d|#%d):" + " %lu us critical section violates %lu us threshold.\n", current->comm, current->pid, raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh), t0); - else + latency, nsecs_to_usecs(tracing_thresh)); + } else { printk(KERN_INFO "(%16s-%-5d|#%d):" - " new %lu us maximum-latency " - "critical section.\n => started at timestamp %lu: ", + " new %lu us maximum-latency critical section.\n", current->comm, current->pid, raw_smp_processor_id(), - latency, t0); - - print_symbol(KERN_CONT "<%s>\n", data->critical_start); - printk(KERN_CONT " => ended at timestamp %lu: ", t1); - print_symbol(KERN_CONT "<%s>\n", data->critical_end); - dump_stack(); - t1 = nsecs_to_usecs(now(cpu)); - printk(KERN_CONT " => dump-end timestamp %lu\n\n", t1); + latency); + } max_sequence++; out_unlock: - spin_unlock(&max_trace_lock); + spin_unlock_irqrestore(&max_trace_lock, flags); out: data->critical_sequence = max_sequence; @@ -216,7 +208,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(!data) || unlikely(!data->trace) || + if (unlikely(!data) || unlikely(!head_page(data)) || atomic_read(&data->disabled)) return; @@ -256,7 +248,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(!data) || unlikely(!data->trace) || + if (unlikely(!data) || unlikely(!head_page(data)) || !data->critical_start || atomic_read(&data->disabled)) return; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3d10ff01f805..688df965f3f2 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -107,24 +107,18 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) update_max_tr(tr, wakeup_task, wakeup_cpu); if (tracing_thresh) { - printk(KERN_INFO "(%16s-%-5d|#%d): %lu us wakeup latency " - "violates %lu us threshold.\n" - " => started at timestamp %lu: ", + printk(KERN_INFO "(%16s-%-5d|#%d):" + " %lu us wakeup latency violates %lu us threshold.\n", wakeup_task->comm, wakeup_task->pid, raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh), t0); + latency, nsecs_to_usecs(tracing_thresh)); } else { - printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us maximum " - "wakeup latency.\n => started at timestamp %lu: ", + printk(KERN_INFO "(%16s-%-5d|#%d):" + " new %lu us maximum wakeup latency.\n", wakeup_task->comm, wakeup_task->pid, - cpu, latency, t0); + cpu, latency); } - printk(KERN_CONT " ended at timestamp %lu: ", t1); - dump_stack(); - t1 = nsecs_to_usecs(now(cpu)); - printk(KERN_CONT " dump-end timestamp %lu\n\n", t1); - out_unlock: __wakeup_reset(tr); spin_unlock_irqrestore(&wakeup_lock, flags); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index ef4d3cc009f5..c01874c3b1f9 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1,6 +1,7 @@ /* Include in trace.c */ #include +#include static inline int trace_valid_entry(struct trace_entry *entry) { @@ -15,28 +16,29 @@ static inline int trace_valid_entry(struct trace_entry *entry) static int trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) { - struct page *page; struct trace_entry *entries; + struct page *page; int idx = 0; int i; + BUG_ON(list_empty(&data->trace_pages)); page = list_entry(data->trace_pages.next, struct page, lru); entries = page_address(page); - if (data->trace != entries) + if (head_page(data) != entries) goto failed; /* * The starting trace buffer always has valid elements, - * if any element exits. + * if any element exists. */ - entries = data->trace; + entries = head_page(data); for (i = 0; i < tr->entries; i++) { - if (i < data->trace_idx && - !trace_valid_entry(&entries[idx])) { - printk(KERN_CONT ".. invalid entry %d ", entries[idx].type); + if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) { + printk(KERN_CONT ".. invalid entry %d ", + entries[idx].type); goto failed; } @@ -80,11 +82,10 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) int ret = 0; for_each_possible_cpu(cpu) { - if (!tr->data[cpu]->trace) + if (!head_page(tr->data[cpu])) continue; cnt += tr->data[cpu]->trace_idx; - printk("%d: count = %ld\n", cpu, cnt); ret = trace_test_buffer_cpu(tr, tr->data[cpu]); if (ret) @@ -117,6 +118,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) } /* start the tracing */ + ftrace_enabled = 1; + tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ @@ -124,6 +127,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tr->ctrl = 0; trace->ctrl_update(tr); + ftrace_enabled = 0; + /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); @@ -328,7 +333,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* create a high prio thread */ p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test"); - if (!IS_ERR(p)) { + if (IS_ERR(p)) { printk(KERN_CONT "Failed to create ftrace wakeup test thread "); return -1; } -- cgit v1.2.3 From 77a2b37d227483fe52aead242652aee406c25bf0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: startup tester on dynamic tracing. This patch adds a startup self test on dynamic code modification and filters. The test filters on a specific function, makes sure that no other function is traced, exectutes the function, then makes sure that the function is traced. This patch also fixes a slight bug with the ftrace selftest, where tracer_enabled was not being set. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 + kernel/trace/ftrace.c | 19 +++++++ kernel/trace/trace_selftest.c | 113 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 130 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 953a36d6a199..a842d96c6343 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -55,6 +55,7 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); @@ -70,6 +71,7 @@ extern void ftrace_call(void); extern void mcount_call(void); #else # define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif static inline void tracer_disable(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6d4d2e86debc..5e9389faaf75 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1010,6 +1010,25 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, return ret; } +/** + * ftrace_set_filter - set a function to filter on in ftrace + * @buf - the string that holds the function filter text. + * @len - the length of the string. + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled. + * If @buf is NULL and reset is set, all functions will be enabled for tracing. + */ +notrace void ftrace_set_filter(unsigned char *buf, int len, int reset) +{ + mutex_lock(&ftrace_filter_lock); + if (reset) + ftrace_filter_reset(); + if (buf) + ftrace_match(buf, len); + mutex_unlock(&ftrace_filter_lock); +} + static int notrace ftrace_filter_release(struct inode *inode, struct file *file) { diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index c01874c3b1f9..4c8a1b2d8231 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -99,6 +99,100 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) } #ifdef CONFIG_FTRACE + +#ifdef CONFIG_DYNAMIC_FTRACE + +#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func +#define __STR(x) #x +#define STR(x) __STR(x) +static int DYN_FTRACE_TEST_NAME(void) +{ + /* used to call mcount */ + return 0; +} + +/* Test dynamic code modification and ftrace filters */ +int trace_selftest_startup_dynamic_tracing(struct tracer *trace, + struct trace_array *tr, + int (*func)(void)) +{ + unsigned long count; + int ret; + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; + + /* The ftrace test PASSED */ + printk(KERN_CONT "PASSED\n"); + pr_info("Testing dynamic ftrace: "); + + /* enable tracing, and record the filter function */ + ftrace_enabled = 1; + tracer_enabled = 1; + + /* passed in by parameter to fool gcc from optimizing */ + func(); + + /* update the records */ + ret = ftrace_force_update(); + if (ret) { + printk(KERN_CONT ".. ftraced failed .. "); + return ret; + } + + /* filter only on our function */ + ftrace_set_filter(STR(DYN_FTRACE_TEST_NAME), + sizeof(STR(DYN_FTRACE_TEST_NAME)), 1); + + /* enable tracing */ + tr->ctrl = 1; + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + + /* we should have nothing in the buffer */ + ret = trace_test_buffer(tr, &count); + if (ret) + goto out; + + if (count) { + ret = -1; + printk(KERN_CONT ".. filter did not filter .. "); + goto out; + } + + /* call our function again */ + func(); + + /* sleep again */ + msleep(100); + + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + ftrace_enabled = 0; + + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + + /* we should only have one item */ + if (!ret && count != 1) { + printk(KERN_CONT ".. filter failed .."); + ret = -1; + goto out; + } + out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + + /* Enable tracing on all functions again */ + ftrace_set_filter(NULL, 0, 1); + + return ret; +} +#else +# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) +#endif /* CONFIG_DYNAMIC_FTRACE */ /* * Simple verification test of ftrace function tracer. * Enable ftrace, sleep 1/10 second, and then read the trace @@ -109,8 +203,13 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) { unsigned long count; int ret; + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; - /* make sure functions have been recorded */ + /* make sure msleep has been recorded */ + msleep(1); + + /* force the recorded functions to be traced */ ret = ftrace_force_update(); if (ret) { printk(KERN_CONT ".. ftraced failed .. "); @@ -119,6 +218,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* start the tracing */ ftrace_enabled = 1; + tracer_enabled = 1; tr->ctrl = 1; trace->init(tr); @@ -136,8 +236,16 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); ret = -1; + goto out; } + ret = trace_selftest_startup_dynamic_tracing(trace, tr, + DYN_FTRACE_TEST_NAME); + + out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + return ret; } #endif /* CONFIG_FTRACE */ @@ -415,6 +523,3 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr return ret; } #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ - -#ifdef CONFIG_DYNAMIC_FTRACE -#endif /* CONFIG_DYNAMIC_FTRACE */ -- cgit v1.2.3 From 37ad508419f0fdfda7b378756eb1f35cfd26d96d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace - fix dynamic ftrace memory leak The ftrace dynamic function update allocates a record to store the instruction pointers that are being modified. If the modified instruction pointer fails to update, then the record is marked as failed and nothing more is done. Worse, if the modification fails, but the record ip function is still called, it will allocate a new record and try again. In just a matter of time, will this cause a serious memory leak and crash the system. This patch plugs this memory leak. When a record fails, it is included back into the pool of records to be used. Now a record may fail over and over again, but the number of allocated records will not increase. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 ++++--- kernel/trace/ftrace.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a842d96c6343..61e757bd2350 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,9 +43,10 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1<node, &ftrace_hash[key]); } +static notrace void ftrace_free_rec(struct dyn_ftrace *rec) +{ + /* no locking, only called from kstop_machine */ + + rec->ip = (unsigned long)ftrace_free_records; + ftrace_free_records = rec; + rec->flags |= FTRACE_FL_FREE; +} + static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { + struct dyn_ftrace *rec; + + /* First check for freed records */ + if (ftrace_free_records) { + rec = ftrace_free_records; + + /* todo, disable tracing altogether on this warning */ + if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { + WARN_ON_ONCE(1); + ftrace_free_records = NULL; + return NULL; + } + + ftrace_free_records = (void *)rec->ip; + memset(rec, 0, sizeof(*rec)); + return rec; + } + if (ftrace_pages->index == ENTRIES_PER_PAGE) { if (!ftrace_pages->next) return NULL; @@ -356,8 +385,16 @@ __ftrace_replace_code(struct dyn_ftrace *rec, } failed = ftrace_modify_code(ip, old, new); - if (failed) - rec->flags |= FTRACE_FL_FAILED; + if (failed) { + unsigned long key; + /* It is possible that the function hasn't been converted yet */ + key = hash_long(ip, FTRACE_HASHBITS); + if (!ftrace_ip_in_hash(ip, key)) { + rec->flags |= FTRACE_FL_FAILED; + ftrace_free_rec(rec); + } + + } } static void notrace ftrace_replace_code(int enable) @@ -407,8 +444,10 @@ ftrace_code_disable(struct dyn_ftrace *rec) call = ftrace_call_replace(ip, MCOUNT_ADDR); failed = ftrace_modify_code(ip, call, nop); - if (failed) + if (failed) { rec->flags |= FTRACE_FL_FAILED; + ftrace_free_rec(rec); + } } static int notrace __ftrace_modify_code(void *data) -- cgit v1.2.3 From 4eebcc81a33fbc45e28542b50197ed7b3c486d90 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: disable tracing on failure Since ftrace touches practically every function. If we detect any anomaly, we want to fully disable ftrace. This patch adds code to try shutdown ftrace as much as possible without doing any more harm is something is detected not quite correct. This only kills ftrace, this patch does have checks for other parts of the tracer (irqsoff, wakeup, etc.). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 3 ++ kernel/trace/ftrace.c | 112 ++++++++++++++++++++++++++++++++++++++---- kernel/trace/trace_selftest.c | 4 ++ 3 files changed, 110 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 61e757bd2350..4650a3160b7f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,6 +58,9 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e02aa690b2b..ff42345dd78e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,9 +29,16 @@ #include "trace.h" -int ftrace_enabled; +/* ftrace_enabled is a method to turn ftrace on or off */ +int ftrace_enabled __read_mostly; static int last_ftrace_enabled; +/* + * ftrace_disabled is set when an anomaly is discovered. + * ftrace_disabled is much stronger than ftrace_enabled. + */ +static int ftrace_disabled __read_mostly; + static DEFINE_SPINLOCK(ftrace_lock); static DEFINE_MUTEX(ftrace_sysctl_lock); @@ -230,10 +237,11 @@ static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) if (ftrace_free_records) { rec = ftrace_free_records; - /* todo, disable tracing altogether on this warning */ if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { WARN_ON_ONCE(1); ftrace_free_records = NULL; + ftrace_disabled = 1; + ftrace_enabled = 0; return NULL; } @@ -260,7 +268,7 @@ ftrace_record_ip(unsigned long ip) int resched; int atomic; - if (!ftrace_enabled) + if (!ftrace_enabled || ftrace_disabled) return; resched = need_resched(); @@ -485,6 +493,9 @@ static void notrace ftrace_startup(void) { int command = 0; + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftraced_lock); ftraced_suspend++; if (ftraced_suspend == 1) @@ -507,6 +518,9 @@ static void notrace ftrace_shutdown(void) { int command = 0; + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftraced_lock); ftraced_suspend--; if (!ftraced_suspend) @@ -529,6 +543,9 @@ static void notrace ftrace_startup_sysctl(void) { int command = FTRACE_ENABLE_MCOUNT; + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftraced_lock); /* Force update next time */ saved_ftrace_func = NULL; @@ -544,6 +561,9 @@ static void notrace ftrace_shutdown_sysctl(void) { int command = FTRACE_DISABLE_MCOUNT; + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftraced_lock); /* ftraced_suspend is true if ftrace is running */ if (ftraced_suspend) @@ -600,6 +620,9 @@ static int notrace __ftrace_update_code(void *ignore) static void notrace ftrace_update_code(void) { + if (unlikely(ftrace_disabled)) + return; + stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); } @@ -614,6 +637,9 @@ static int notrace ftraced(void *ignore) /* check once a second */ schedule_timeout(HZ); + if (unlikely(ftrace_disabled)) + continue; + mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) { @@ -628,6 +654,7 @@ static int notrace ftraced(void *ignore) ftrace_update_cnt != 1 ? "s" : "", ftrace_update_tot_cnt, usecs, usecs != 1 ? "s" : ""); + ftrace_disabled = 1; WARN_ON_ONCE(1); } ftraced_trigger = 0; @@ -785,6 +812,9 @@ ftrace_avail_open(struct inode *inode, struct file *file) struct ftrace_iterator *iter; int ret; + if (unlikely(ftrace_disabled)) + return -ENODEV; + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; @@ -843,6 +873,9 @@ ftrace_filter_open(struct inode *inode, struct file *file) struct ftrace_iterator *iter; int ret = 0; + if (unlikely(ftrace_disabled)) + return -ENODEV; + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; @@ -1063,6 +1096,9 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, */ notrace void ftrace_set_filter(unsigned char *buf, int len, int reset) { + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftrace_filter_lock); if (reset) ftrace_filter_reset(); @@ -1133,7 +1169,7 @@ int ftrace_force_update(void) DECLARE_WAITQUEUE(wait, current); int ret = 0; - if (!ftraced_task) + if (unlikely(ftrace_disabled)) return -ENODEV; mutex_lock(&ftraced_lock); @@ -1142,6 +1178,11 @@ int ftrace_force_update(void) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&ftraced_waiters, &wait); + if (unlikely(!ftraced_task)) { + ret = -ENODEV; + goto out; + } + do { mutex_unlock(&ftraced_lock); wake_up_process(ftraced_task); @@ -1154,6 +1195,7 @@ int ftrace_force_update(void) set_current_state(TASK_INTERRUPTIBLE); } while (last_counter == ftraced_iteration_counter); + out: mutex_unlock(&ftraced_lock); remove_wait_queue(&ftraced_waiters, &wait); set_current_state(TASK_RUNNING); @@ -1161,6 +1203,22 @@ int ftrace_force_update(void) return ret; } +static void ftrace_force_shutdown(void) +{ + struct task_struct *task; + int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC; + + mutex_lock(&ftraced_lock); + task = ftraced_task; + ftraced_task = NULL; + ftraced_suspend = -1; + ftrace_run_update_code(command); + mutex_unlock(&ftraced_lock); + + if (task) + kthread_stop(task); +} + static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1194,21 +1252,29 @@ static int __init notrace ftrace_dynamic_init(void) stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS); /* ftrace_dyn_arch_init places the return code in addr */ - if (addr) - return addr; + if (addr) { + ret = (int)addr; + goto failed; + } ret = ftrace_dyn_table_alloc(); if (ret) - return ret; + goto failed; p = kthread_run(ftraced, NULL, "ftraced"); - if (IS_ERR(p)) - return -1; + if (IS_ERR(p)) { + ret = -1; + goto failed; + } last_ftrace_enabled = ftrace_enabled = 1; ftraced_task = p; return 0; + + failed: + ftrace_disabled = 1; + return ret; } core_initcall(ftrace_dynamic_init); @@ -1217,8 +1283,30 @@ core_initcall(ftrace_dynamic_init); # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) +# define ftrace_force_shutdown() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ +/** + * ftrace_kill - totally shutdown ftrace + * + * This is a safety measure. If something was detected that seems + * wrong, calling this function will keep ftrace from doing + * any more modifications, and updates. + * used when something went wrong. + */ +void ftrace_kill(void) +{ + mutex_lock(&ftrace_sysctl_lock); + ftrace_disabled = 1; + ftrace_enabled = 0; + + clear_ftrace_function(); + mutex_unlock(&ftrace_sysctl_lock); + + /* Try to totally disable ftrace */ + ftrace_force_shutdown(); +} + /** * register_ftrace_function - register a function for profiling * @ops - ops structure that holds the function for profiling. @@ -1234,6 +1322,9 @@ int register_ftrace_function(struct ftrace_ops *ops) { int ret; + if (unlikely(ftrace_disabled)) + return -1; + mutex_lock(&ftrace_sysctl_lock); ret = __register_ftrace_function(ops); ftrace_startup(); @@ -1267,6 +1358,9 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, { int ret; + if (unlikely(ftrace_disabled)) + return -ENODEV; + mutex_lock(&ftrace_sysctl_lock); ret = proc_dointvec(table, write, file, buffer, lenp, ppos); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index a6f1ed75f836..85715b86a342 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -248,6 +248,10 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; + /* kill ftrace totally if we failed */ + if (ret) + ftrace_kill(); + return ret; } #endif /* CONFIG_FTRACE */ -- cgit v1.2.3 From aeaee8a2c9cb4489f166ca0e39c568e8254faaa6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4650a3160b7f..08fbef1744cc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,9 +58,6 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); -/* totally disable ftrace - can not re-enable after this */ -void ftrace_kill(void); - /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -74,10 +71,13 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + static inline void tracer_disable(void) { #ifdef CONFIG_FTRACE -- cgit v1.2.3 From 86387f7ee5d3273ff4859e2c64ce656639b6ca65 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: add stack tracing Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 + kernel/trace/Kconfig | 1 + kernel/trace/trace.c | 103 ++++++++++++++++++++++++++++++++++++++++--------- kernel/trace/trace.h | 11 ++++++ 4 files changed, 99 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 08fbef1744cc..0d3714e7110b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -93,6 +93,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) # define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) # define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) #else # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) # define CALLER_ADDR1 0UL @@ -100,6 +101,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 0UL # define CALLER_ADDR4 0UL # define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL #endif #ifdef CONFIG_IRQSOFF_TRACER diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3f73a1710242..eb1988ed84b7 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -10,6 +10,7 @@ config TRACER_MAX_TRACE config TRACING bool select DEBUG_FS + select STACKTRACE config FTRACE bool "Kernel Function Tracer" diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 192c1354a7e0..b4b1b4fe99fd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -28,6 +28,8 @@ #include #include +#include + #include "trace.h" unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; @@ -88,6 +90,7 @@ enum trace_type { TRACE_FN, TRACE_CTX, TRACE_WAKE, + TRACE_STACK, TRACE_SPECIAL, __TRACE_LAST_TYPE @@ -109,6 +112,7 @@ enum trace_iterator_flags { TRACE_ITER_HEX = 0x20, TRACE_ITER_BIN = 0x40, TRACE_ITER_BLOCK = 0x80, + TRACE_ITER_STACKTRACE = 0x100, }; #define TRACE_ITER_SYM_MASK \ @@ -124,10 +128,11 @@ static const char *trace_options[] = { "hex", "bin", "block", + "stacktrace", NULL }; -static unsigned trace_flags; +static unsigned trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_STACKTRACE; static DEFINE_SPINLOCK(ftrace_max_lock); @@ -657,7 +662,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); } void @@ -685,13 +690,39 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); +} + +void __trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip) +{ + struct trace_entry *entry; + struct stack_trace trace; + + if (!(trace_flags & TRACE_ITER_STACKTRACE)) + return; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_STACK; + + memset(&entry->stack, 0, sizeof(entry->stack)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = skip; + trace.entries = entry->stack.caller; + + save_stack_trace(&trace); } void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, - struct task_struct *prev, struct task_struct *next, + struct task_struct *prev, + struct task_struct *next, unsigned long flags) { struct trace_entry *entry; @@ -706,16 +737,18 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; + __trace_stack(tr, data, flags, 4); spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); } void tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_array_cpu *data, - struct task_struct *wakee, struct task_struct *curr, + struct task_struct *wakee, + struct task_struct *curr, unsigned long flags) { struct trace_entry *entry; @@ -730,6 +763,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.prev_state = curr->state; entry->ctx.next_pid = wakee->pid; entry->ctx.next_prio = wakee->prio; + __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) @@ -1179,6 +1213,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) unsigned long rel_usecs; char *comm; int S; + int i; if (!next_entry) next_entry = entry; @@ -1197,8 +1232,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); } else { - lat_print_generic(s, entry, cpu); - lat_print_timestamp(s, abs_usecs, rel_usecs); + if (entry->type != TRACE_STACK) { + lat_print_generic(s, entry, cpu); + lat_print_timestamp(s, abs_usecs, rel_usecs); + } } switch (entry->type) { case TRACE_FN: @@ -1226,6 +1263,14 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) entry->special.arg2, entry->special.arg3); break; + case TRACE_STACK: + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + if (i) + trace_seq_puts(s, " <= "); + seq_print_ip_sym(s, entry->stack.caller[i], sym_flags); + } + trace_seq_puts(s, "\n"); + break; default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1241,8 +1286,9 @@ static int print_trace_fmt(struct trace_iterator *iter) unsigned long long t; unsigned long secs; char *comm; - int S; int ret; + int S; + int i; entry = iter->ent; @@ -1252,15 +1298,17 @@ static int print_trace_fmt(struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - if (!ret) - return 0; - ret = trace_seq_printf(s, "[%02d] ", iter->cpu); - if (!ret) - return 0; - ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); - if (!ret) - return 0; + if (entry->type != TRACE_STACK) { + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + if (!ret) + return 0; + ret = trace_seq_printf(s, "[%02d] ", iter->cpu); + if (!ret) + return 0; + ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + if (!ret) + return 0; + } switch (entry->type) { case TRACE_FN: @@ -1303,6 +1351,22 @@ static int print_trace_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_STACK: + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + if (i) { + ret = trace_seq_puts(s, " <= "); + if (!ret) + return 0; + } + ret = seq_print_ip_sym(s, entry->stack.caller[i], + sym_flags); + if (!ret) + return 0; + } + ret = trace_seq_puts(s, "\n"); + if (!ret) + return 0; + break; } return 1; } @@ -1344,6 +1408,7 @@ static int print_raw_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: + case TRACE_STACK: ret = trace_seq_printf(s, " %lx %lx %lx\n", entry->special.arg1, entry->special.arg2, @@ -1399,6 +1464,7 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); break; case TRACE_SPECIAL: + case TRACE_STACK: SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); @@ -1433,6 +1499,7 @@ static int print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); break; case TRACE_SPECIAL: + case TRACE_STACK: SEQ_PUT_FIELD_RET(s, entry->special.arg1); SEQ_PUT_FIELD_RET(s, entry->special.arg2); SEQ_PUT_FIELD_RET(s, entry->special.arg3); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 90e0ba0f6eba..387bdcf45e28 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -34,6 +34,16 @@ struct special_entry { unsigned long arg3; }; +/* + * Stack-trace entry: + */ + +#define FTRACE_STACK_ENTRIES 5 + +struct stack_entry { + unsigned long caller[FTRACE_STACK_ENTRIES]; +}; + /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: @@ -51,6 +61,7 @@ struct trace_entry { struct ftrace_entry fn; struct ctx_switch_entry ctx; struct special_entry special; + struct stack_entry stack; }; }; -- cgit v1.2.3 From 8ac0fca4ccb355ce50471d7aa3f10f5900b28b95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: sched tracer fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ------ kernel/sched.c | 2 +- kernel/trace/trace_sched_wakeup.c | 13 +++---------- 3 files changed, 4 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e26f1fdbfe2..05744f9cb096 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2130,17 +2130,11 @@ ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) #ifdef CONFIG_SCHED_TRACER extern void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -extern void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); #else static inline void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) { } -static inline void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); diff --git a/kernel/sched.c b/kernel/sched.c index 328494e28df2..53ab1174664f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2613,7 +2613,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_new_task(p, rq->curr); + ftrace_wake_up_task(p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 87fa7b253b57..2a012423f9d0 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -201,20 +201,13 @@ out: atomic_dec(&tr->data[cpu]->disabled); } -void -wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) +void wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) { if (likely(!tracer_enabled)) return; - wakeup_check_start(wakeup_trace, wakee, curr); -} - -void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) -{ - if (likely(!tracer_enabled)) - return; + tracing_record_cmdline(curr); + tracing_record_cmdline(wakee); wakeup_check_start(wakeup_trace, wakee, curr); } -- cgit v1.2.3 From 4e65551905fb0300ae7e667cbaa41ee2e3f29a13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: sched tracer, trace full rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 ++++++++++++++++------- kernel/sched.c | 35 ++++++++++++++++++++++--- kernel/trace/trace.c | 55 ++++++++++++++++----------------------- kernel/trace/trace.h | 14 ++++++++++ kernel/trace/trace_sched_switch.c | 24 +++++++++++------ 5 files changed, 108 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 05744f9cb096..652d380ae563 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,20 +2119,34 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); +extern void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr); +extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) +{ +} +static inline void +sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) +{ +} +static inline void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr) +{ +} +static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -#endif - -#ifdef CONFIG_SCHED_TRACER -extern void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -#else static inline void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #endif diff --git a/kernel/sched.c b/kernel/sched.c index 53ab1174664f..b9208a0e33a0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2394,6 +2394,35 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ +#ifdef CONFIG_CONTEXT_SWITCH_TRACER + +void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) +{ + struct sched_entity *se; + struct task_struct *p; + struct rb_node *curr; + struct rq *rq = __rq; + + curr = first_fair(&rq->cfs); + if (!curr) + return; + + while (curr) { + se = rb_entry(curr, struct sched_entity, run_node); + if (!entity_is_task(se)) + continue; + + p = task_of(se); + + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); + + curr = rb_next(curr); + } +} + +#endif + /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -2468,7 +2497,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ - ftrace_wake_up_task(p, rq->curr); + ftrace_wake_up_task(rq, p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2613,7 +2642,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_task(p, rq->curr); + ftrace_wake_up_task(rq, p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2786,7 +2815,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - ftrace_ctx_switch(prev, next); + ftrace_ctx_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e4b7119e263..65173b14b914 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -66,7 +66,18 @@ static struct tracer *current_trace __read_mostly; static int max_tracer_type_len; static DEFINE_MUTEX(trace_types_lock); -static DECLARE_WAIT_QUEUE_HEAD (trace_wait); +static DECLARE_WAIT_QUEUE_HEAD(trace_wait); + +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; + +/* + * FIXME: where should this be called? + */ +void trace_wake_up(void) +{ + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up(&trace_wait); +} #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) @@ -103,18 +114,6 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x08, }; -enum trace_iterator_flags { - TRACE_ITER_PRINT_PARENT = 0x01, - TRACE_ITER_SYM_OFFSET = 0x02, - TRACE_ITER_SYM_ADDR = 0x04, - TRACE_ITER_VERBOSE = 0x08, - TRACE_ITER_RAW = 0x10, - TRACE_ITER_HEX = 0x20, - TRACE_ITER_BIN = 0x40, - TRACE_ITER_BLOCK = 0x80, - TRACE_ITER_STACKTRACE = 0x100, -}; - #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) @@ -132,8 +131,6 @@ static const char *trace_options[] = { NULL }; -static unsigned trace_flags = TRACE_ITER_PRINT_PARENT; - static DEFINE_SPINLOCK(ftrace_max_lock); /* @@ -660,9 +657,6 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } void @@ -673,10 +667,14 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } +#ifdef CONFIG_CONTEXT_SWITCH_TRACER + void -trace_special(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { + struct trace_array_cpu *data = __data; + struct trace_array *tr = __tr; struct trace_entry *entry; unsigned long irq_flags; @@ -688,11 +686,10 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, entry->special.arg2 = arg2; entry->special.arg3 = arg3; spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } +#endif + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, @@ -739,9 +736,6 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_prio = next->prio; __trace_stack(tr, data, flags, 4); spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } void @@ -765,9 +759,6 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_prio = wakee->prio; __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } #ifdef CONFIG_FTRACE @@ -1258,7 +1249,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) comm); break; case TRACE_SPECIAL: - trace_seq_printf(s, " %lx %lx %lx\n", + trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1344,7 +1335,7 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: - ret = trace_seq_printf(s, " %lx %lx %lx\n", + ret = trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1409,7 +1400,7 @@ static int print_raw_fmt(struct trace_iterator *iter) break; case TRACE_SPECIAL: case TRACE_STACK: - ret = trace_seq_printf(s, " %lx %lx %lx\n", + ret = trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 387bdcf45e28..75e237475674 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -274,4 +274,18 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); +extern unsigned long trace_flags; + +enum trace_iterator_flags { + TRACE_ITER_PRINT_PARENT = 0x01, + TRACE_ITER_SYM_OFFSET = 0x02, + TRACE_ITER_SYM_ADDR = 0x04, + TRACE_ITER_VERBOSE = 0x08, + TRACE_ITER_RAW = 0x10, + TRACE_ITER_HEX = 0x20, + TRACE_ITER_BIN = 0x40, + TRACE_ITER_BLOCK = 0x80, + TRACE_ITER_STACKTRACE = 0x100, +}; + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 8b1cf1a3aee0..12658b3f2b28 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -18,7 +18,7 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; static void -ctx_switch_func(struct task_struct *prev, struct task_struct *next) +ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) { struct trace_array *tr = ctx_trace; struct trace_array_cpu *data; @@ -34,14 +34,17 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) + if (likely(disabled == 1)) { tracing_sched_switch_trace(tr, data, prev, next, flags); + ftrace_all_fair_tasks(__rq, tr, data); + } atomic_dec(&data->disabled); local_irq_restore(flags); } -static void wakeup_func(struct task_struct *wakee, struct task_struct *curr) +static void +wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) { struct trace_array *tr = ctx_trace; struct trace_array_cpu *data; @@ -57,14 +60,18 @@ static void wakeup_func(struct task_struct *wakee, struct task_struct *curr) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) + if (likely(disabled == 1)) { tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); + ftrace_all_fair_tasks(__rq, tr, data); + } atomic_dec(&data->disabled); local_irq_restore(flags); } -void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +void +ftrace_ctx_switch(void *__rq, struct task_struct *prev, + struct task_struct *next) { tracing_record_cmdline(prev); @@ -72,7 +79,7 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) * If tracer_switch_func only points to the local * switch func, it still needs the ptr passed to it. */ - ctx_switch_func(prev, next); + ctx_switch_func(__rq, prev, next); /* * Chain to the wakeup tracer (this is a NOP if disabled): @@ -81,11 +88,12 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) } void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +ftrace_wake_up_task(void *__rq, struct task_struct *wakee, + struct task_struct *curr) { tracing_record_cmdline(curr); - wakeup_func(wakee, curr); + wakeup_func(__rq, wakee, curr); /* * Chain to the wakeup tracer (this is a NOP if disabled): -- cgit v1.2.3 From 017730c11241e26577673eb9d957cfc66172ea91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix wakeups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ kernel/sched.c | 18 ++++++++++++++++++ kernel/trace/trace.c | 15 +++++++++++---- 3 files changed, 31 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 652d380ae563..a3970b563757 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); +extern int runqueue_is_locked(void); + extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); diff --git a/kernel/sched.c b/kernel/sched.c index 673b588b713b..9ca4a2e6a236 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -642,6 +642,24 @@ static inline void update_rq_clock(struct rq *rq) # define const_debug static const #endif +/** + * runqueue_is_locked + * + * Returns true if the current cpu runqueue is locked. + * This interface allows printk to be called with the runqueue lock + * held and know whether or not it is OK to wake up the klogd. + */ +int runqueue_is_locked(void) +{ + int cpu = get_cpu(); + struct rq *rq = cpu_rq(cpu); + int ret; + + ret = spin_is_locked(&rq->lock); + put_cpu(); + return ret; +} + /* * Debugging: various feature bits */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 65173b14b914..2ca9d66aa74e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -70,12 +70,13 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; -/* - * FIXME: where should this be called? - */ void trace_wake_up(void) { - if (!(trace_flags & TRACE_ITER_BLOCK)) + /* + * The runqueue_is_locked() can fail, but this is the best we + * have for now: + */ + if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked()) wake_up(&trace_wait); } @@ -657,6 +658,8 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } void @@ -686,6 +689,8 @@ __trace_special(void *__tr, void *__data, entry->special.arg2 = arg2; entry->special.arg3 = arg3; spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } #endif @@ -759,6 +764,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_prio = wakee->prio; __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } #ifdef CONFIG_FTRACE -- cgit v1.2.3 From 1a3c3034336320554a3342572dae98d69e054fc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix __trace_special() Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 20 ++++++++++++-------- kernel/trace/trace.c | 4 ---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index a3970b563757..5b186bed54bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,6 +2119,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_TRACING +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} +#endif + #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); @@ -2126,9 +2138,6 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2146,11 +2155,6 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2ca9d66aa74e..65d2c0a61ed4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -670,8 +670,6 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } -#ifdef CONFIG_CONTEXT_SWITCH_TRACER - void __trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3) @@ -693,8 +691,6 @@ __trace_special(void *__tr, void *__data, trace_wake_up(); } -#endif - void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, -- cgit v1.2.3 From 88a4216c3ec4281fc7e6725cc3a3ccd01fb1aa14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched special Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ++++++ kernel/sched_fair.c | 3 +++ kernel/trace/trace.c | 6 +++--- kernel/trace/trace_sched_switch.c | 24 ++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5b186bed54bc..360ca99033d2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2138,6 +2138,8 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2155,6 +2157,10 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd39c4b8..dc1856f10795 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1061,6 +1061,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, if (!(this_sd->flags & SD_WAKE_AFFINE)) return 0; + ftrace_special(__LINE__, curr->se.avg_overlap, sync); + ftrace_special(__LINE__, p->se.avg_overlap, -1); /* * If the currently running task will sleep within * a reasonable amount of time then attract this newly @@ -1238,6 +1240,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) if (unlikely(se == pse)) return; + ftrace_special(__LINE__, p->pid, se->last_wakeup); cfs_rq_of(pse)->next = pse; /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3a4032492fcb..b87a26414892 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1251,7 +1251,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) comm); break; case TRACE_SPECIAL: - trace_seq_printf(s, " %ld %ld %ld\n", + trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1335,7 +1335,7 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: - ret = trace_seq_printf(s, " %ld %ld %ld\n", + ret = trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1400,7 +1400,7 @@ static int print_raw_fmt(struct trace_iterator *iter) break; case TRACE_SPECIAL: case TRACE_STACK: - ret = trace_seq_printf(s, " %ld %ld %ld\n", + ret = trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 5a217e863586..bddf676914ed 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -103,6 +103,30 @@ ftrace_wake_up_task(void *__rq, struct task_struct *wakee, wakeup_sched_wakeup(wakee, curr); } +void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_array *tr = ctx_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (!tracer_enabled) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + __trace_special(tr, data, arg1, arg2, arg3); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + static void sched_switch_reset(struct trace_array *tr) { int cpu; -- cgit v1.2.3 From 86069782d62e731b4835a0cf8eb7d1d0e17cf306 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86: add a list for custom page fault handlers. Provides kernel modules a way to register custom page fault handlers. On every page fault this will call a list of registered functions. The functions may handle the fault and force do_page_fault() to return immediately. This functionality is similar to the now removed page fault notifiers. Custom page fault handlers are used by debugging and reverse engineering tools. Mmiotrace is one such tool and a patch to add it into the tree will follow. The custom page fault handlers are called earlier in do_page_fault() than the page fault notifiers were. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 8 +++++++ arch/x86/mm/fault.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/kdebug.h | 9 ++++++++ 3 files changed, 73 insertions(+) (limited to 'include') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ac1e31ba4795..9431a8399844 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -168,6 +168,14 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. +config PAGE_FAULT_HANDLERS + bool "Custom page fault handlers" + depends on DEBUG_KERNEL + help + Allow the use of custom page fault handlers. A kernel module may + register a function that is called on every page fault. Custom + handlers are used by some debugging and reverse engineering tools. + # # IO delay types: # diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fd7e1798c75a..343f5c1aacc8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -49,6 +49,60 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) +#ifdef CONFIG_PAGE_FAULT_HANDLERS +static HLIST_HEAD(pf_handlers); /* protected by RCU */ +static DEFINE_SPINLOCK(pf_handlers_writer); + +void register_page_fault_handler(struct pf_handler *new_pfh) +{ + unsigned long flags; + spin_lock_irqsave(&pf_handlers_writer, flags); + hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers); + spin_unlock_irqrestore(&pf_handlers_writer, flags); +} +EXPORT_SYMBOL_GPL(register_page_fault_handler); + +/** + * unregister_page_fault_handler: + * The caller must ensure @old_pfh is not in use anymore before freeing it. + * This function does not guarantee it. The list of handlers is protected by + * RCU, so you can do this by e.g. calling synchronize_rcu(). + */ +void unregister_page_fault_handler(struct pf_handler *old_pfh) +{ + unsigned long flags; + spin_lock_irqsave(&pf_handlers_writer, flags); + hlist_del_rcu(&old_pfh->hlist); + spin_unlock_irqrestore(&pf_handlers_writer, flags); +} +EXPORT_SYMBOL_GPL(unregister_page_fault_handler); +#endif + +/* returns non-zero if do_page_fault() should return */ +static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ +#ifdef CONFIG_PAGE_FAULT_HANDLERS + int ret = 0; + struct pf_handler *cur; + struct hlist_node *ncur; + + if (hlist_empty(&pf_handlers)) + return 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) { + ret = cur->handler(regs, error_code, address); + if (ret) + break; + } + rcu_read_unlock(); + return ret; +#else + return 0; +#endif +} + static inline int notify_page_fault(struct pt_regs *regs) { #ifdef CONFIG_KPROBES @@ -601,6 +655,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; + if (handle_custom_pf(regs, error_code, address)) + return; /* * We fault-in kernel-space virtual memory on-demand. The diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index 96651bb59ba1..a80f2d6cc737 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -35,4 +35,13 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); +struct pf_handler { + struct hlist_node hlist; + int (*handler)(struct pt_regs *regs, unsigned long error_code, + unsigned long address); +}; + +extern void register_page_fault_handler(struct pf_handler *new_pfh); +extern void unregister_page_fault_handler(struct pf_handler *old_pfh); + #endif -- cgit v1.2.3 From 3eefae994d9224fb7771a3ddb683868363c23510 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:04 +0200 Subject: ftrace: limit trace entries Currently there is no protection from the root user to use up all of memory for trace buffers. If the root user allocates too many entries, the OOM killer might start kill off all tasks. This patch adds an algorith to check the following condition: pages_requested > (freeable_memory + current_trace_buffer_pages) / 4 If the above is met then the allocation fails. The above prevents more than 1/4th of freeable memory from being used by trace buffers. To determine the freeable_memory, I made determine_dirtyable_memory in mm/page-writeback.c global. Special thanks goes to Peter Zijlstra for suggesting the above calculation. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/writeback.h | 2 ++ kernel/trace/trace.c | 38 ++++++++++++++++++++++++++++++++++++++ mm/page-writeback.c | 10 +++++++--- 3 files changed, 47 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc288..bd91987c065f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; +extern unsigned long determine_dirtyable_memory(void); + extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82ced406aacf..2824cf48cdca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -51,6 +52,8 @@ static int trace_free_page(void); static int tracing_disabled = 1; +static unsigned long tracing_pages_allocated; + long ns2usecs(cycle_t nsec) { @@ -2591,12 +2594,41 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, } if (val > global_trace.entries) { + long pages_requested; + unsigned long freeable_pages; + + /* make sure we have enough memory before mapping */ + pages_requested = + (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE; + + /* account for each buffer (and max_tr) */ + pages_requested *= tracing_nr_buffers * 2; + + /* Check for overflow */ + if (pages_requested < 0) { + cnt = -ENOMEM; + goto out; + } + + freeable_pages = determine_dirtyable_memory(); + + /* we only allow to request 1/4 of useable memory */ + if (pages_requested > + ((freeable_pages + tracing_pages_allocated) / 4)) { + cnt = -ENOMEM; + goto out; + } + while (global_trace.entries < val) { if (trace_alloc_page()) { cnt = -ENOMEM; goto out; } + /* double check that we don't go over the known pages */ + if (tracing_pages_allocated > pages_requested) + break; } + } else { /* include the number of entries in val (inc of page entries) */ while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1)) @@ -2776,6 +2808,7 @@ static int trace_alloc_page(void) struct page *page, *tmp; LIST_HEAD(pages); void *array; + unsigned pages_allocated = 0; int i; /* first allocate a page for each CPU */ @@ -2787,6 +2820,7 @@ static int trace_alloc_page(void) goto free_pages; } + pages_allocated++; page = virt_to_page(array); list_add(&page->lru, &pages); @@ -2798,6 +2832,7 @@ static int trace_alloc_page(void) "for trace buffer!\n"); goto free_pages; } + pages_allocated++; page = virt_to_page(array); list_add(&page->lru, &pages); #endif @@ -2819,6 +2854,7 @@ static int trace_alloc_page(void) SetPageLRU(page); #endif } + tracing_pages_allocated += pages_allocated; global_trace.entries += ENTRIES_PER_PAGE; return 0; @@ -2853,6 +2889,8 @@ static int trace_free_page(void) page = list_entry(p, struct page, lru); ClearPageLRU(page); list_del(&page->lru); + tracing_pages_allocated--; + tracing_pages_allocated--; __free_page(page); tracing_reset(data); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 789b6adbef37..b38f700825fc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages); static struct prop_descriptor vm_completions; static struct prop_descriptor vm_dirties; -static unsigned long determine_dirtyable_memory(void); - /* * couple the period to the dirty_ratio: * @@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) #endif } -static unsigned long determine_dirtyable_memory(void) +/** + * determine_dirtyable_memory - amount of memory that may be used + * + * Returns the numebr of pages that can currently be freed and used + * by the kernel for direct mappings. + */ +unsigned long determine_dirtyable_memory(void) { unsigned long x; -- cgit v1.2.3 From dc102a8fae2d0d6bf5223fc549247f2e23959ae6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:09 +0200 Subject: Markers - remove extra format argument Denys Vlasenko : > Not in this patch, but I noticed: > > #define __trace_mark(name, call_private, format, args...) \ > do { \ > static const char __mstrtab_##name[] \ > __attribute__((section("__markers_strings"))) \ > = #name "\0" format; \ > static struct marker __mark_##name \ > __attribute__((section("__markers"), aligned(8))) = \ > { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ > 0, 0, marker_probe_cb, \ > { __mark_empty_function, NULL}, NULL }; \ > __mark_check_format(format, ## args); \ > if (unlikely(__mark_##name.state)) { \ > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > } \ > } while (0) > > In this call: > > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > > you make gcc allocate duplicate format string. You can use > &__mstrtab_##name[sizeof(#name)] instead since it holds the same string, > or drop ", format," above and "const char *fmt" from here: > > void (*call)(const struct marker *mdata, /* Probe wrapper */ > void *call_private, const char *fmt, ...); > > since mdata->format is the same and all callees which need it can take it there. Very good point. I actually thought about dropping it, since it would remove an unnecessary argument from the stack. And actually, since I now have the marker_probe_cb sitting between the marker site and the callbacks, there is no API change required. Thanks :) Mathieu Signed-off-by: Mathieu Desnoyers CC: Denys Vlasenko Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/marker.h | 11 +++++------ kernel/marker.c | 30 ++++++++++++++---------------- 2 files changed, 19 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 430f6adf9762..338533abb47b 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -44,8 +44,8 @@ struct marker { */ char state; /* Marker state. */ char ptype; /* probe type : 0 : single, 1 : multi */ - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); + /* Probe wrapper */ + void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; } __attribute__((aligned(8))); @@ -72,8 +72,7 @@ struct marker { __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ - (&__mark_##name, call_private, \ - format, ## args); \ + (&__mark_##name, call_private, ## args);\ } \ } while (0) @@ -117,9 +116,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); /* * Connect a probe to a marker. diff --git a/kernel/marker.c b/kernel/marker.c index b5a9fe1d50d5..1abfb923b761 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex); struct marker_entry { struct hlist_node hlist; char *format; - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); + /* Probe wrapper */ + void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; int refcount; /* Number of times armed. 0 if disarmed. */ @@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function); * marker_probe_cb Callback that prepares the variable argument list for probes. * @mdata: pointer of type struct marker * @call_private: caller site private data - * @fmt: format string * @...: Variable argument list. * * Since we do not use "typical" pointer based RCU in the 1 argument case, we * need to put a full smp_rmb() in this branch. This is why we do not use * rcu_dereference() for the pointer read. */ -void marker_probe_cb(const struct marker *mdata, void *call_private, - const char *fmt, ...) +void marker_probe_cb(const struct marker *mdata, void *call_private, ...) { va_list args; char ptype; @@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, /* Must read the ptr before private data. They are not data * dependant, so we put an explicit smp_rmb() here. */ smp_rmb(); - va_start(args, fmt); - func(mdata->single.probe_private, call_private, fmt, &args); + va_start(args, call_private); + func(mdata->single.probe_private, call_private, mdata->format, + &args); va_end(args); } else { struct marker_probe_closure *multi; @@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, smp_read_barrier_depends(); multi = mdata->multi; for (i = 0; multi[i].func; i++) { - va_start(args, fmt); - multi[i].func(multi[i].probe_private, call_private, fmt, - &args); + va_start(args, call_private); + multi[i].func(multi[i].probe_private, call_private, + mdata->format, &args); va_end(args); } } @@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb); * marker_probe_cb Callback that does not prepare the variable argument list. * @mdata: pointer of type struct marker * @call_private: caller site private data - * @fmt: format string * @...: Variable argument list. * * Should be connected to markers "MARK_NOARGS". */ -void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...) +void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) { va_list args; /* not initialized */ char ptype; @@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata, /* Must read the ptr before private data. They are not data * dependant, so we put an explicit smp_rmb() here. */ smp_rmb(); - func(mdata->single.probe_private, call_private, fmt, &args); + func(mdata->single.probe_private, call_private, mdata->format, + &args); } else { struct marker_probe_closure *multi; int i; @@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata, smp_read_barrier_depends(); multi = mdata->multi; for (i = 0; multi[i].func; i++) - multi[i].func(multi[i].probe_private, call_private, fmt, - &args); + multi[i].func(multi[i].probe_private, call_private, + mdata->format, &args); } preempt_enable(); } -- cgit v1.2.3 From 0aa977f592f17004f9d1d545f2e1bb9ea71896c3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Markers - define non optimized marker To support the forthcoming "immediate values" marker optimization, we must have a way to declare markers in few code paths that does not use instruction modification based enable. This will be the case of printk(), some traps and eventually lockdep instrumentation. Changelog : - Fix reversed boolean logic of "generic". Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/marker.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 338533abb47b..1290653f9241 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -58,8 +58,12 @@ struct marker { * Make sure the alignment of the structure in the __markers section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * + * The "generic" argument controls which marker enabling mechanism must be used. + * If generic is true, a variable read is used. + * If generic is false, immediate values are used. */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ do { \ static const char __mstrtab_##name[] \ __attribute__((section("__markers_strings"))) \ @@ -79,7 +83,7 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) @@ -87,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin, #endif /* CONFIG_MARKERS */ /** - * trace_mark - Marker + * trace_mark - Marker using code patching * @name: marker name, not quoted. * @format: format string * @args...: variable argument list * - * Places a marker. + * Places a marker using optimized code patching technique (imv_read()) + * to be enabled when immediate values are present. */ #define trace_mark(name, format, args...) \ - __trace_mark(name, NULL, format, ## args) + __trace_mark(0, name, NULL, format, ## args) + +/** + * _trace_mark - Marker using variable read + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker using a standard memory read (_imv_read()) to be + * enabled. Should be used for markers in code paths where instruction + * modification based enabling is not welcome. (__init and __exit functions, + * lockdep, some traps, printk). + */ +#define _trace_mark(name, format, args...) \ + __trace_mark(1, name, NULL, format, ## args) /** * MARK_NOARGS - Format string for a marker with no argument. -- cgit v1.2.3 From 5b82a1b08a00b2adca3d9dd9777efff40b7aaaa1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Port ftrace to markers Porting ftrace to the marker infrastructure. Don't need to chain to the wakeup tracer from the sched tracer, because markers support multiple probes connected. Signed-off-by: Mathieu Desnoyers CC: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 ------- kernel/sched.c | 14 +++- kernel/trace/trace.h | 20 +---- kernel/trace/trace_sched_switch.c | 171 +++++++++++++++++++++++++++++++------- kernel/trace/trace_sched_wakeup.c | 106 +++++++++++++++++++++-- 5 files changed, 255 insertions(+), 88 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 360ca99033d2..c0b1c69b55ce 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2131,38 +2131,6 @@ __trace_special(void *__tr, void *__data, } #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -extern void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); -extern void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr); -extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) -{ -} -static inline void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr) -{ -} -static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ -} -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); diff --git a/kernel/sched.c b/kernel/sched.c index ad95cca4e42e..e2e985eeee78 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2500,7 +2500,9 @@ out_activate: success = 1; out_running: - ftrace_wake_up_task(rq, p, rq->curr); + trace_mark(kernel_sched_wakeup, + "pid %d state %ld ## rq %p task %p rq->curr %p", + p->pid, p->state, rq, p, rq->curr); check_preempt_curr(rq, p); p->state = TASK_RUNNING; @@ -2631,7 +2633,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_task(rq, p, rq->curr); + trace_mark(kernel_sched_wakeup_new, + "pid %d state %ld ## rq %p task %p rq->curr %p", + p->pid, p->state, rq, p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2804,7 +2808,11 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - ftrace_ctx_switch(rq, prev, next); + trace_mark(kernel_sched_schedule, + "prev_pid %d next_pid %d prev_state %ld " + "## rq %p prev %p next %p", + prev->pid, next->pid, prev->state, + rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8845033ab49d..f5de0601b408 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -234,25 +234,10 @@ void update_max_tr_single(struct trace_array *tr, extern cycle_t ftrace_now(int cpu); -#ifdef CONFIG_SCHED_TRACER -extern void -wakeup_sched_switch(struct task_struct *prev, struct task_struct *next); -extern void -wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr); -#else -static inline void -wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) -{ -} -#endif - #ifdef CONFIG_CONTEXT_SWITCH_TRACER typedef void (*tracer_switch_func_t)(void *private, + void *__rq, struct task_struct *prev, struct task_struct *next); @@ -262,9 +247,6 @@ struct tracer_switch_ops { struct tracer_switch_ops *next; }; -extern int register_tracer_switch(struct tracer_switch_ops *ops); -extern int unregister_tracer_switch(struct tracer_switch_ops *ops); - #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index a3376478fc2c..d25ffa5eaf2b 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -16,11 +16,14 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; +static atomic_t sched_ref; static void -ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) +sched_switch_func(void *private, void *__rq, struct task_struct *prev, + struct task_struct *next) { - struct trace_array *tr = ctx_trace; + struct trace_array **ptr = private; + struct trace_array *tr = *ptr; struct trace_array_cpu *data; unsigned long flags; long disabled; @@ -41,10 +44,40 @@ ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) local_irq_restore(flags); } +static notrace void +sched_switch_callback(void *probe_data, void *call_data, + const char *format, va_list *args) +{ + struct task_struct *prev; + struct task_struct *next; + struct rq *__rq; + + if (!atomic_read(&sched_ref)) + return; + + /* skip prev_pid %d next_pid %d prev_state %ld */ + (void)va_arg(*args, int); + (void)va_arg(*args, int); + (void)va_arg(*args, long); + __rq = va_arg(*args, typeof(__rq)); + prev = va_arg(*args, typeof(prev)); + next = va_arg(*args, typeof(next)); + + tracing_record_cmdline(prev); + + /* + * If tracer_switch_func only points to the local + * switch func, it still needs the ptr passed to it. + */ + sched_switch_func(probe_data, __rq, prev, next); +} + static void -wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) +wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct + task_struct *curr) { - struct trace_array *tr = ctx_trace; + struct trace_array **ptr = private; + struct trace_array *tr = *ptr; struct trace_array_cpu *data; unsigned long flags; long disabled; @@ -67,35 +100,29 @@ wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) local_irq_restore(flags); } -void -ftrace_ctx_switch(void *__rq, struct task_struct *prev, - struct task_struct *next) +static notrace void +wake_up_callback(void *probe_data, void *call_data, + const char *format, va_list *args) { - if (unlikely(atomic_read(&trace_record_cmdline_enabled))) - tracing_record_cmdline(prev); + struct task_struct *curr; + struct task_struct *task; + struct rq *__rq; - /* - * If tracer_switch_func only points to the local - * switch func, it still needs the ptr passed to it. - */ - ctx_switch_func(__rq, prev, next); + if (likely(!tracer_enabled)) + return; - /* - * Chain to the wakeup tracer (this is a NOP if disabled): - */ - wakeup_sched_switch(prev, next); -} + /* Skip pid %d state %ld */ + (void)va_arg(*args, int); + (void)va_arg(*args, long); + /* now get the meat: "rq %p task %p rq->curr %p" */ + __rq = va_arg(*args, typeof(__rq)); + task = va_arg(*args, typeof(task)); + curr = va_arg(*args, typeof(curr)); -void -ftrace_wake_up_task(void *__rq, struct task_struct *wakee, - struct task_struct *curr) -{ - wakeup_func(__rq, wakee, curr); + tracing_record_cmdline(task); + tracing_record_cmdline(curr); - /* - * Chain to the wakeup tracer (this is a NOP if disabled): - */ - wakeup_sched_wakeup(wakee, curr); + wakeup_func(probe_data, __rq, task, curr); } void @@ -132,15 +159,95 @@ static void sched_switch_reset(struct trace_array *tr) tracing_reset(tr->data[cpu]); } +static int tracing_sched_register(void) +{ + int ret; + + ret = marker_probe_register("kernel_sched_wakeup", + "pid %d state %ld ## rq %p task %p rq->curr %p", + wake_up_callback, + &ctx_trace); + if (ret) { + pr_info("wakeup trace: Couldn't add marker" + " probe to kernel_sched_wakeup\n"); + return ret; + } + + ret = marker_probe_register("kernel_sched_wakeup_new", + "pid %d state %ld ## rq %p task %p rq->curr %p", + wake_up_callback, + &ctx_trace); + if (ret) { + pr_info("wakeup trace: Couldn't add marker" + " probe to kernel_sched_wakeup_new\n"); + goto fail_deprobe; + } + + ret = marker_probe_register("kernel_sched_schedule", + "prev_pid %d next_pid %d prev_state %ld " + "## rq %p prev %p next %p", + sched_switch_callback, + &ctx_trace); + if (ret) { + pr_info("sched trace: Couldn't add marker" + " probe to kernel_sched_schedule\n"); + goto fail_deprobe_wake_new; + } + + return ret; +fail_deprobe_wake_new: + marker_probe_unregister("kernel_sched_wakeup_new", + wake_up_callback, + &ctx_trace); +fail_deprobe: + marker_probe_unregister("kernel_sched_wakeup", + wake_up_callback, + &ctx_trace); + return ret; +} + +static void tracing_sched_unregister(void) +{ + marker_probe_unregister("kernel_sched_schedule", + sched_switch_callback, + &ctx_trace); + marker_probe_unregister("kernel_sched_wakeup_new", + wake_up_callback, + &ctx_trace); + marker_probe_unregister("kernel_sched_wakeup", + wake_up_callback, + &ctx_trace); +} + +void tracing_start_sched_switch(void) +{ + long ref; + + ref = atomic_inc_return(&sched_ref); + if (ref == 1) + tracing_sched_register(); +} + +void tracing_stop_sched_switch(void) +{ + long ref; + + ref = atomic_dec_and_test(&sched_ref); + if (ref) + tracing_sched_unregister(); +} + static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); atomic_inc(&trace_record_cmdline_enabled); tracer_enabled = 1; + tracing_start_sched_switch(); } static void stop_sched_trace(struct trace_array *tr) { + tracing_stop_sched_switch(); atomic_dec(&trace_record_cmdline_enabled); tracer_enabled = 0; } @@ -181,6 +288,14 @@ static struct tracer sched_switch_trace __read_mostly = __init static int init_sched_switch_trace(void) { + int ret = 0; + + if (atomic_read(&sched_ref)) + ret = tracing_sched_register(); + if (ret) { + pr_info("error registering scheduler trace\n"); + return ret; + } return register_tracer(&sched_switch_trace); } device_initcall(init_sched_switch_trace); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 5948011006bc..5d2fb48e47f8 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "trace.h" @@ -44,11 +45,13 @@ static int report_latency(cycle_t delta) return 1; } -void -wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) +static void notrace +wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, + struct task_struct *next) { unsigned long latency = 0, t0 = 0, t1 = 0; - struct trace_array *tr = wakeup_trace; + struct trace_array **ptr = private; + struct trace_array *tr = *ptr; struct trace_array_cpu *data; cycle_t T0, T1, delta; unsigned long flags; @@ -113,6 +116,31 @@ out: atomic_dec(&tr->data[cpu]->disabled); } +static notrace void +sched_switch_callback(void *probe_data, void *call_data, + const char *format, va_list *args) +{ + struct task_struct *prev; + struct task_struct *next; + struct rq *__rq; + + /* skip prev_pid %d next_pid %d prev_state %ld */ + (void)va_arg(*args, int); + (void)va_arg(*args, int); + (void)va_arg(*args, long); + __rq = va_arg(*args, typeof(__rq)); + prev = va_arg(*args, typeof(prev)); + next = va_arg(*args, typeof(next)); + + tracing_record_cmdline(prev); + + /* + * If tracer_switch_func only points to the local + * switch func, it still needs the ptr passed to it. + */ + wakeup_sched_switch(probe_data, __rq, prev, next); +} + static void __wakeup_reset(struct trace_array *tr) { struct trace_array_cpu *data; @@ -188,19 +216,68 @@ out: atomic_dec(&tr->data[cpu]->disabled); } -void wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) +static notrace void +wake_up_callback(void *probe_data, void *call_data, + const char *format, va_list *args) { + struct trace_array **ptr = probe_data; + struct trace_array *tr = *ptr; + struct task_struct *curr; + struct task_struct *task; + struct rq *__rq; + if (likely(!tracer_enabled)) return; + /* Skip pid %d state %ld */ + (void)va_arg(*args, int); + (void)va_arg(*args, long); + /* now get the meat: "rq %p task %p rq->curr %p" */ + __rq = va_arg(*args, typeof(__rq)); + task = va_arg(*args, typeof(task)); + curr = va_arg(*args, typeof(curr)); + + tracing_record_cmdline(task); tracing_record_cmdline(curr); - tracing_record_cmdline(wakee); - wakeup_check_start(wakeup_trace, wakee, curr); + wakeup_check_start(tr, task, curr); } static void start_wakeup_tracer(struct trace_array *tr) { + int ret; + + ret = marker_probe_register("kernel_sched_wakeup", + "pid %d state %ld ## rq %p task %p rq->curr %p", + wake_up_callback, + &wakeup_trace); + if (ret) { + pr_info("wakeup trace: Couldn't add marker" + " probe to kernel_sched_wakeup\n"); + return; + } + + ret = marker_probe_register("kernel_sched_wakeup_new", + "pid %d state %ld ## rq %p task %p rq->curr %p", + wake_up_callback, + &wakeup_trace); + if (ret) { + pr_info("wakeup trace: Couldn't add marker" + " probe to kernel_sched_wakeup_new\n"); + goto fail_deprobe; + } + + ret = marker_probe_register("kernel_sched_schedule", + "prev_pid %d next_pid %d prev_state %ld " + "## rq %p prev %p next %p", + sched_switch_callback, + &wakeup_trace); + if (ret) { + pr_info("sched trace: Couldn't add marker" + " probe to kernel_sched_schedule\n"); + goto fail_deprobe_wake_new; + } + wakeup_reset(tr); /* @@ -215,11 +292,28 @@ static void start_wakeup_tracer(struct trace_array *tr) tracer_enabled = 1; return; +fail_deprobe_wake_new: + marker_probe_unregister("kernel_sched_wakeup_new", + wake_up_callback, + &wakeup_trace); +fail_deprobe: + marker_probe_unregister("kernel_sched_wakeup", + wake_up_callback, + &wakeup_trace); } static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; + marker_probe_unregister("kernel_sched_schedule", + sched_switch_callback, + &wakeup_trace); + marker_probe_unregister("kernel_sched_wakeup_new", + wake_up_callback, + &wakeup_trace); + marker_probe_unregister("kernel_sched_wakeup", + wake_up_callback, + &wakeup_trace); } static void wakeup_tracer_init(struct trace_array *tr) -- cgit v1.2.3 From 74f4e369fc5b52433ad824cef32d3bf1304549be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:15 +0200 Subject: ftrace: stacktrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ kernel/semaphore.c | 2 ++ kernel/trace/trace.c | 4 ++-- kernel/trace/trace.h | 2 +- 4 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0d3714e7110b..017ab44d572a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,4 +120,12 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 5c2942e768cd..1a064adab658 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -31,6 +31,7 @@ #include #include #include +#include static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); @@ -53,6 +54,7 @@ void down(struct semaphore *sem) { unsigned long flags; + ftrace_special(sem->count, 0, __LINE__); spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) sem->count--; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2824cf48cdca..3271916ff033 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -901,7 +901,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; entry->ctx.next_state = next->state; - __trace_stack(tr, data, flags, 4); + __trace_stack(tr, data, flags, 5); __raw_spin_unlock(&data->lock); raw_local_irq_restore(irq_flags); } @@ -927,7 +927,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_pid = wakee->pid; entry->ctx.next_prio = wakee->prio; entry->ctx.next_state = wakee->state; - __trace_stack(tr, data, flags, 5); + __trace_stack(tr, data, flags, 6); __raw_spin_unlock(&data->lock); raw_local_irq_restore(irq_flags); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f5de0601b408..c460e85e94ed 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -51,7 +51,7 @@ struct special_entry { * Stack-trace entry: */ -#define FTRACE_STACK_ENTRIES 5 +#define FTRACE_STACK_ENTRIES 8 struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; -- cgit v1.2.3 From d49dbf33f0bf8748ee3662b973eb57e60525d622 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 May 2008 10:41:53 +0200 Subject: ftrace: fix include file dependency Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 017ab44d572a..911d5d80b49f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -4,6 +4,7 @@ #ifdef CONFIG_FTRACE #include +#include extern int ftrace_enabled; extern int -- cgit v1.2.3 From 489f139614596cbc956a06f5e4bb41288e276fe3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 Feb 2008 13:38:05 +0100 Subject: ftrace: fix build bug Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 911d5d80b49f..922e23d0196f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -106,16 +106,16 @@ static inline void tracer_disable(void) #endif #ifdef CONFIG_IRQSOFF_TRACER - extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); - extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); + extern void time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else # define time_hardirqs_on(a0, a1) do { } while (0) # define time_hardirqs_off(a0, a1) do { } while (0) #endif #ifdef CONFIG_PREEMPT_TRACER - extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); - extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); + extern void trace_preempt_on(unsigned long a0, unsigned long a1); + extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else # define trace_preempt_on(a0, a1) do { } while (0) # define trace_preempt_off(a0, a1) do { } while (0) -- cgit v1.2.3 From e0eca07badc023a675a61906020b397da20f07c3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 23:49:43 -0400 Subject: ftrace, POWERPC: add irqs_disabled_flags to ppc PPC doesn't have the irqs_disabled_flags needed by ftrace. This patch adds it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-powerpc/hw_irq.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/asm-powerpc/hw_irq.h b/include/asm-powerpc/hw_irq.h index ad8c9f7fd0e3..f75a5fc64d2e 100644 --- a/include/asm-powerpc/hw_irq.h +++ b/include/asm-powerpc/hw_irq.h @@ -59,6 +59,11 @@ extern void iseries_handle_interrupts(void); get_paca()->hard_enabled = 0; \ } while(0) +static inline int irqs_disabled_flags(unsigned long flags) +{ + return flags == 0; +} + #else #if defined(CONFIG_BOOKE) @@ -113,6 +118,11 @@ static inline void local_irq_save_ptr(unsigned long *flags) #define hard_irq_enable() local_irq_enable() #define hard_irq_disable() local_irq_disable() +static inline int irqs_disabled_flags(unsigned long flags) +{ + return (flags & MSR_EE) == 0; +} + #endif /* CONFIG_PPC64 */ /* -- cgit v1.2.3 From 8b7d89d02ef3c6a7c73d6596f28cea7632850af4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86: mmiotrace - trace memory mapped IO Mmiotrace is a tool for trapping memory mapped IO (MMIO) accesses within the kernel. It is used for debugging and especially for reverse engineering evil binary drivers. Mmiotrace works by wrapping the ioremap family of kernel functions and marking the returned pages as not present. Access to the IO memory triggers a page fault, which will be handled by mmiotrace's custom page fault handler. This will single-step the faulted instruction with the MMIO page marked as present. Access logs are directed to user space via relay and debug_fs. This page fault approach is necessary, because binary drivers have readl/writel etc. calls inlined and therefore extremely difficult to trap with with e.g. kprobes. This patch depends on the custom page fault handlers patch. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 27 ++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/init_task.c | 1 + arch/x86/kernel/mmiotrace/Makefile | 4 + arch/x86/kernel/mmiotrace/kmmio.c | 391 ++++++++++++++++++++++ arch/x86/kernel/mmiotrace/kmmio.h | 58 ++++ arch/x86/kernel/mmiotrace/mmio-mod.c | 527 ++++++++++++++++++++++++++++++ arch/x86/kernel/mmiotrace/pf_in.c | 489 +++++++++++++++++++++++++++ arch/x86/kernel/mmiotrace/pf_in.h | 39 +++ arch/x86/kernel/mmiotrace/testmmiotrace.c | 77 +++++ include/linux/mmiotrace.h | 62 ++++ 11 files changed, 1677 insertions(+) create mode 100644 arch/x86/kernel/mmiotrace/Makefile create mode 100644 arch/x86/kernel/mmiotrace/kmmio.c create mode 100644 arch/x86/kernel/mmiotrace/kmmio.h create mode 100644 arch/x86/kernel/mmiotrace/mmio-mod.c create mode 100644 arch/x86/kernel/mmiotrace/pf_in.c create mode 100644 arch/x86/kernel/mmiotrace/pf_in.h create mode 100644 arch/x86/kernel/mmiotrace/testmmiotrace.c create mode 100644 include/linux/mmiotrace.h (limited to 'include') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9431a8399844..7c6496e2225e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -176,6 +176,33 @@ config PAGE_FAULT_HANDLERS register a function that is called on every page fault. Custom handlers are used by some debugging and reverse engineering tools. +config MMIOTRACE + tristate "Memory mapped IO tracing" + depends on DEBUG_KERNEL && PAGE_FAULT_HANDLERS && RELAY && DEBUG_FS + default n + help + This will build a kernel module called mmiotrace. + + Mmiotrace traces Memory Mapped I/O access and is meant for debugging + and reverse engineering. The kernel module offers wrapped + versions of the ioremap family of functions. The driver to be traced + must be modified to call these wrappers. A user space program is + required to collect the MMIO data. + + See http://nouveau.freedesktop.org/wiki/MmioTrace + If you are not helping to develop drivers, say N. + +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + default n + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + # # IO delay types: # diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 739d49acd2f1..a51ac153685e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -79,6 +79,8 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace/ + obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index a4f93b4120c1..027a5b6a12b2 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -15,6 +15,7 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ +EXPORT_SYMBOL_GPL(init_mm); /* * Initial thread structure. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile new file mode 100644 index 000000000000..d6905f7f981b --- /dev/null +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o + +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c new file mode 100644 index 000000000000..8ba48f9c91b4 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -0,0 +1,391 @@ +/* Support for MMIO probes. + * Benfit many code from kprobes + * (C) 2002 Louis Zhuang . + * 2007 Alexander Eichner + * 2008 Pekka Paalanen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kmmio.h" + +#define KMMIO_HASH_BITS 6 +#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS) +#define KMMIO_PAGE_HASH_BITS 4 +#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) + +struct kmmio_context { + struct kmmio_fault_page *fpage; + struct kmmio_probe *probe; + unsigned long saved_flags; + int active; +}; + +static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address); +static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, + void *args); + +static DEFINE_SPINLOCK(kmmio_lock); + +/* These are protected by kmmio_lock */ +unsigned int kmmio_count; +static unsigned int handler_registered; +static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; +static LIST_HEAD(kmmio_probes); + +static struct kmmio_context kmmio_ctx[NR_CPUS]; + +static struct pf_handler kmmio_pf_hook = { + .handler = kmmio_page_fault +}; + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +int init_kmmio(void) +{ + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + + register_die_notifier(&nb_die); + return 0; +} + +void cleanup_kmmio(void) +{ + /* + * Assume the following have been already cleaned by calling + * unregister_kmmio_probe() appropriately: + * kmmio_page_table, kmmio_probes + */ + if (handler_registered) { + unregister_page_fault_handler(&kmmio_pf_hook); + synchronize_rcu(); + } + unregister_die_notifier(&nb_die); +} + +/* + * this is basically a dynamic stabbing problem: + * Could use the existing prio tree code or + * Possible better implementations: + * The Interval Skip List: A Data Structure for Finding All Intervals That + * Overlap a Point (might be simple) + * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup + */ +/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ +static struct kmmio_probe *get_kmmio_probe(unsigned long addr) +{ + struct kmmio_probe *p; + list_for_each_entry(p, &kmmio_probes, list) { + if (addr >= p->addr && addr <= (p->addr + p->len)) + return p; + } + return NULL; +} + +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +{ + struct list_head *head, *tmp; + + page &= PAGE_MASK; + head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; + list_for_each(tmp, head) { + struct kmmio_fault_page *p + = list_entry(tmp, struct kmmio_fault_page, list); + if (p->page == page) + return p; + } + + return NULL; +} + +static void arm_kmmio_fault_page(unsigned long page, int *large) +{ + unsigned long address = page & PAGE_MASK; + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + pte_t *pte = pte_offset_kernel(pmd, address); + + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT)); + if (large) + *large = 1; + } else { + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + } + + __flush_tlb_one(page); +} + +static void disarm_kmmio_fault_page(unsigned long page, int *large) +{ + unsigned long address = page & PAGE_MASK; + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + pte_t *pte = pte_offset_kernel(pmd, address); + + if (large && *large) { + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT)); + *large = 0; + } else { + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + } + + __flush_tlb_one(page); +} + +/* + * Interrupts are disabled on entry as trap3 is an interrupt gate + * and they remain disabled thorough out this function. + */ +static int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + struct kmmio_context *ctx; + int cpu; + + /* + * Preemption is now disabled to prevent process switch during + * single stepping. We can only handle one active kmmio trace + * per cpu, so ensure that we finish it before something else + * gets to run. + * + * XXX what if an interrupt occurs between returning from + * do_page_fault() and entering the single-step exception handler? + * And that interrupt triggers a kmmio trap? + */ + preempt_disable(); + cpu = smp_processor_id(); + ctx = &kmmio_ctx[cpu]; + + /* interrupts disabled and CPU-local data => atomicity guaranteed. */ + if (ctx->active) { + /* + * This avoids a deadlock with kmmio_lock. + * If this page fault really was due to kmmio trap, + * all hell breaks loose. + */ + printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " + "for address %lu. Ignoring.\n", + cpu, addr); + goto no_kmmio; + } + ctx->active++; + + /* + * Acquire the kmmio lock to prevent changes affecting + * get_kmmio_fault_page() and get_kmmio_probe(), since we save their + * returned pointers. + * The lock is released in post_kmmio_handler(). + * XXX: could/should get_kmmio_*() be using RCU instead of spinlock? + */ + spin_lock(&kmmio_lock); + + ctx->fpage = get_kmmio_fault_page(addr); + if (!ctx->fpage) { + /* this page fault is not caused by kmmio */ + goto no_kmmio_locked; + } + + ctx->probe = get_kmmio_probe(addr); + ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); + + if (ctx->probe && ctx->probe->pre_handler) + ctx->probe->pre_handler(ctx->probe, regs, addr); + + regs->flags |= TF_MASK; + regs->flags &= ~IF_MASK; + + /* We hold lock, now we set present bit in PTE and single step. */ + disarm_kmmio_fault_page(ctx->fpage->page, NULL); + + return 1; + +no_kmmio_locked: + spin_unlock(&kmmio_lock); + ctx->active--; +no_kmmio: + preempt_enable_no_resched(); + /* page fault not handled by kmmio */ + return 0; +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate + * and they remain disabled thorough out this function. + * And we hold kmmio lock. + */ +static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + struct kmmio_context *ctx = &kmmio_ctx[cpu]; + + if (!ctx->active) + return 0; + + if (ctx->probe && ctx->probe->post_handler) + ctx->probe->post_handler(ctx->probe, condition, regs); + + arm_kmmio_fault_page(ctx->fpage->page, NULL); + + regs->flags &= ~TF_MASK; + regs->flags |= ctx->saved_flags; + + /* These were acquired in kmmio_handler(). */ + ctx->active--; + spin_unlock(&kmmio_lock); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, flags + * will have TF set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (regs->flags & TF_MASK) + return 0; + + return 1; +} + +static int add_kmmio_fault_page(unsigned long page) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (f) { + f->count++; + return 0; + } + + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return -1; + + f->count = 1; + f->page = page; + list_add(&f->list, + &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]); + + arm_kmmio_fault_page(f->page, NULL); + + return 0; +} + +static void release_kmmio_fault_page(unsigned long page) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (!f) + return; + + f->count--; + if (!f->count) { + disarm_kmmio_fault_page(f->page, NULL); + list_del(&f->list); + } +} + +int register_kmmio_probe(struct kmmio_probe *p) +{ + int ret = 0; + unsigned long size = 0; + + spin_lock_irq(&kmmio_lock); + kmmio_count++; + if (get_kmmio_probe(p->addr)) { + ret = -EEXIST; + goto out; + } + list_add(&p->list, &kmmio_probes); + /*printk("adding fault pages...\n");*/ + while (size < p->len) { + if (add_kmmio_fault_page(p->addr + size)) + printk(KERN_ERR "mmio: Unable to set page fault.\n"); + size += PAGE_SIZE; + } + + if (!handler_registered) { + register_page_fault_handler(&kmmio_pf_hook); + handler_registered++; + } + +out: + spin_unlock_irq(&kmmio_lock); + /* + * XXX: What should I do here? + * Here was a call to global_flush_tlb(), but it does not exist + * anymore. + */ + return ret; +} + +void unregister_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long size = 0; + + spin_lock_irq(&kmmio_lock); + while (size < p->len) { + release_kmmio_fault_page(p->addr + size); + size += PAGE_SIZE; + } + list_del(&p->list); + kmmio_count--; + spin_unlock_irq(&kmmio_lock); +} + +/* + * According to 2.6.20, mainly x86_64 arch: + * This is being called from do_page_fault(), via the page fault notifier + * chain. The chain is called for both user space faults and kernel space + * faults (address >= TASK_SIZE64), except not on faults serviced by + * vmalloc_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * The page fault hook functionality has put us inside RCU read lock. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ +static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + if (is_kmmio_active()) + if (kmmio_handler(regs, address) == 1) + return -1; + return 0; +} + +static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct die_args *arg = args; + + if (val == DIE_DEBUG) + if (post_kmmio_handler(arg->err, arg->regs) == 1) + return NOTIFY_STOP; + + return NOTIFY_DONE; +} diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h new file mode 100644 index 000000000000..85b7f68a3b8a --- /dev/null +++ b/arch/x86/kernel/mmiotrace/kmmio.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_KMMIO_H +#define _LINUX_KMMIO_H + +#include +#include +#include +#include +#include +#include +#include + +struct kmmio_probe; +struct kmmio_fault_page; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + + /* start location of the probe point */ + unsigned long addr; + + /* length of the probe region */ + unsigned long len; + + /* Called before addr is executed. */ + kmmio_pre_handler_t pre_handler; + + /* Called after addr is executed, unless... */ + kmmio_post_handler_t post_handler; +}; + +struct kmmio_fault_page { + struct list_head list; + + /* location of the fault page */ + unsigned long page; + + int count; +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +int init_kmmio(void); +void cleanup_kmmio(void); +int register_kmmio_probe(struct kmmio_probe *p); +void unregister_kmmio_probe(struct kmmio_probe *p); + +#endif /* _LINUX_KMMIO_H */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c new file mode 100644 index 000000000000..73561fe85f03 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -0,0 +1,527 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2005 + * Jeff Muizelaar, 2006, 2007 + * Pekka Paalanen, 2008 + * + * Derived from the read-mod example from relay-examples by Tom Zanussi. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for ISA_START_ADDRESS */ + +#include "kmmio.h" +#include "pf_in.h" + +/* This app's relay channel files will appear in /debug/mmio-trace */ +#define APP_DIR "mmio-trace" +/* the marker injection file in /proc */ +#define MARKER_FILE "mmio-marker" + +#define MODULE_NAME "mmiotrace" + +struct trap_reason { + unsigned long addr; + unsigned long ip; + enum reason_type type; + int active_traces; +}; + +static struct trap_reason pf_reason[NR_CPUS]; +static struct mm_io_header_rw cpu_trace[NR_CPUS]; + +static struct file_operations mmio_fops = { + .owner = THIS_MODULE, +}; + +static const size_t subbuf_size = 256*1024; +static struct rchan *chan; +static struct dentry *dir; +static int suspended; /* XXX should this be per cpu? */ +static struct proc_dir_entry *proc_marker_file; + +/* module parameters */ +static unsigned int n_subbufs = 32*4; +static unsigned long filter_offset; +static int nommiotrace; +static int ISA_trace; +static int trace_pc; + +module_param(n_subbufs, uint, 0); +module_param(filter_offset, ulong, 0); +module_param(nommiotrace, bool, 0); +module_param(ISA_trace, bool, 0); +module_param(trace_pc, bool, 0); + +MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); +MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); +MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); +MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); +MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); + +static void record_timestamp(struct mm_io_header *header) +{ + struct timespec now; + + getnstimeofday(&now); + header->sec = now.tv_sec; + header->nsec = now.tv_nsec; +} + +/* + * Write callback for the /proc entry: + * Read a marker and write it to the mmio trace log + */ +static int write_marker(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char *event = NULL; + struct mm_io_header *headp; + int len = (count > 65535) ? 65535 : count; + + event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); + if (!event) + return -ENOMEM; + + headp = (struct mm_io_header *)event; + headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); + headp->data_len = len; + record_timestamp(headp); + + if (copy_from_user(event + sizeof(*headp), buffer, len)) { + kfree(event); + return -EFAULT; + } + + relay_write(chan, event, sizeof(*headp) + len); + kfree(event); + return len; +} + +static void print_pte(unsigned long address) +{ + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + if (pmd_large(*pmd)) { + printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " + "currently supported: %lx\n", + address); + BUG(); + } + printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", + address, + pte_val(*pte_offset_kernel(pmd, address)), + pte_val(*pte_offset_kernel(pmd, address)) & _PAGE_PRESENT); +} + +/* + * For some reason the pre/post pairs have been called in an + * unmatched order. Report and die. + */ +static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) +{ + const unsigned long cpu = smp_processor_id(); + printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " + "last fault for address: %lx\n", + addr, pf_reason[cpu].addr); + print_pte(addr); +#ifdef __i386__ + print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting EIP was at %s\n", + pf_reason[cpu].ip); + printk(KERN_EMERG + "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->ax, regs->bx, regs->cx, regs->dx); + printk(KERN_EMERG + "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#else + print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting RIP was at %s\n", + pf_reason[cpu].ip); + printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", + regs->ax, regs->cx, regs->dx); + printk(KERN_EMERG "rsi: %016lx rdi: %016lx " + "rbp: %016lx rsp: %016lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#endif + BUG(); +} + +static void pre(struct kmmio_probe *p, struct pt_regs *regs, + unsigned long addr) +{ + const unsigned long cpu = smp_processor_id(); + const unsigned long instptr = instruction_pointer(regs); + const enum reason_type type = get_ins_type(instptr); + + /* it doesn't make sense to have more than one active trace per cpu */ + if (pf_reason[cpu].active_traces) + die_kmmio_nesting_error(regs, addr); + else + pf_reason[cpu].active_traces++; + + pf_reason[cpu].type = type; + pf_reason[cpu].addr = addr; + pf_reason[cpu].ip = instptr; + + cpu_trace[cpu].header.type = MMIO_MAGIC; + cpu_trace[cpu].header.pid = 0; + cpu_trace[cpu].header.data_len = sizeof(struct mm_io_rw); + cpu_trace[cpu].rw.address = addr; + + /* + * Only record the program counter when requested. + * It may taint clean-room reverse engineering. + */ + if (trace_pc) + cpu_trace[cpu].rw.pc = instptr; + else + cpu_trace[cpu].rw.pc = 0; + + record_timestamp(&cpu_trace[cpu].header); + + switch (type) { + case REG_READ: + cpu_trace[cpu].header.type |= + (MMIO_READ << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + break; + case REG_WRITE: + cpu_trace[cpu].header.type |= + (MMIO_WRITE << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + cpu_trace[cpu].rw.value = get_ins_reg_val(instptr, regs); + break; + case IMM_WRITE: + cpu_trace[cpu].header.type |= + (MMIO_WRITE << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + cpu_trace[cpu].rw.value = get_ins_imm_val(instptr); + break; + default: + { + unsigned char *ip = (unsigned char *)instptr; + cpu_trace[cpu].header.type |= + (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); + cpu_trace[cpu].rw.value = (*ip) << 16 | + *(ip + 1) << 8 | + *(ip + 2); + } + } +} + +static void post(struct kmmio_probe *p, unsigned long condition, + struct pt_regs *regs) +{ + const unsigned long cpu = smp_processor_id(); + + /* this should always return the active_trace count to 0 */ + pf_reason[cpu].active_traces--; + if (pf_reason[cpu].active_traces) { + printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); + BUG(); + } + + switch (pf_reason[cpu].type) { + case REG_READ: + cpu_trace[cpu].rw.value = get_ins_reg_val(pf_reason[cpu].ip, + regs); + break; + default: + break; + } + relay_write(chan, &cpu_trace[cpu], sizeof(struct mm_io_header_rw)); +} + +/* + * subbuf_start() relay callback. + * + * Defined so that we know when events are dropped due to the buffer-full + * condition. + */ +static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, + void *prev_subbuf, size_t prev_padding) +{ + if (relay_buf_full(buf)) { + if (!suspended) { + suspended = 1; + printk(KERN_ERR MODULE_NAME + ": cpu %d buffer full!!!\n", + smp_processor_id()); + } + return 0; + } else if (suspended) { + suspended = 0; + printk(KERN_ERR MODULE_NAME + ": cpu %d buffer no longer full.\n", + smp_processor_id()); + } + + return 1; +} + +/* file_create() callback. Creates relay file in debugfs. */ +static struct dentry *create_buf_file_handler(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *buf_file; + + mmio_fops.read = relay_file_operations.read; + mmio_fops.open = relay_file_operations.open; + mmio_fops.poll = relay_file_operations.poll; + mmio_fops.mmap = relay_file_operations.mmap; + mmio_fops.release = relay_file_operations.release; + mmio_fops.splice_read = relay_file_operations.splice_read; + + buf_file = debugfs_create_file(filename, mode, parent, buf, + &mmio_fops); + + return buf_file; +} + +/* file_remove() default callback. Removes relay file in debugfs. */ +static int remove_buf_file_handler(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +static struct rchan_callbacks relay_callbacks = { + .subbuf_start = subbuf_start_handler, + .create_buf_file = create_buf_file_handler, + .remove_buf_file = remove_buf_file_handler, +}; + +/* + * create_channel - creates channel /debug/APP_DIR/cpuXXX + * Returns channel on success, NULL otherwise + */ +static struct rchan *create_channel(unsigned size, unsigned n) +{ + return relay_open("cpu", dir, size, n, &relay_callbacks, NULL); +} + +/* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */ +static void destroy_channel(void) +{ + if (chan) { + relay_close(chan); + chan = NULL; + } +} + +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; +}; +static LIST_HEAD(trace_list); +static DEFINE_SPINLOCK(trace_list_lock); + +static void do_ioremap_trace_core(unsigned long offset, unsigned long size, + void __iomem *addr) +{ + struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + struct mm_io_header_map event = { + .header = { + .type = MMIO_MAGIC | + (MMIO_PROBE << MMIO_OPCODE_SHIFT), + .sec = 0, + .nsec = 0, + .pid = 0, + .data_len = sizeof(struct mm_io_map) + }, + .map = { + .phys = offset, + .addr = (unsigned long)addr, + .len = size, + .pc = 0 + } + }; + record_timestamp(&event.header); + + *trace = (struct remap_trace) { + .probe = { + .addr = (unsigned long)addr, + .len = size, + .pre_handler = pre, + .post_handler = post, + } + }; + + relay_write(chan, &event, sizeof(event)); + spin_lock(&trace_list_lock); + list_add_tail(&trace->list, &trace_list); + spin_unlock(&trace_list_lock); + if (!nommiotrace) + register_kmmio_probe(&trace->probe); +} + +static void ioremap_trace_core(unsigned long offset, unsigned long size, + void __iomem *addr) +{ + if ((filter_offset) && (offset != filter_offset)) + return; + + /* Don't trace the low PCI/ISA area, it's always mapped.. */ + if (!ISA_trace && (offset < ISA_END_ADDRESS) && + (offset + size > ISA_START_ADDRESS)) { + printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low " + "PCI/ISA area (0x%lx-0x%lx)\n", + offset, offset + size); + return; + } + do_ioremap_trace_core(offset, size, addr); +} + +void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) +{ + void __iomem *p = ioremap_cache(offset, size); + printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", + offset, size, p); + ioremap_trace_core(offset, size, p); + return p; +} +EXPORT_SYMBOL(ioremap_cache_trace); + +void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) +{ + void __iomem *p = ioremap_nocache(offset, size); + printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", + offset, size, p); + ioremap_trace_core(offset, size, p); + return p; +} +EXPORT_SYMBOL(ioremap_nocache_trace); + +void iounmap_trace(volatile void __iomem *addr) +{ + struct mm_io_header_map event = { + .header = { + .type = MMIO_MAGIC | + (MMIO_UNPROBE << MMIO_OPCODE_SHIFT), + .sec = 0, + .nsec = 0, + .pid = 0, + .data_len = sizeof(struct mm_io_map) + }, + .map = { + .phys = 0, + .addr = (unsigned long)addr, + .len = 0, + .pc = 0 + } + }; + struct remap_trace *trace; + struct remap_trace *tmp; + printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr); + record_timestamp(&event.header); + + spin_lock(&trace_list_lock); + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + if ((unsigned long)addr == trace->probe.addr) { + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + kfree(trace); + break; + } + } + spin_unlock(&trace_list_lock); + relay_write(chan, &event, sizeof(event)); + iounmap(addr); +} +EXPORT_SYMBOL(iounmap_trace); + +static void clear_trace_list(void) +{ + struct remap_trace *trace; + struct remap_trace *tmp; + + spin_lock(&trace_list_lock); + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped " + "trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + kfree(trace); + break; + } + spin_unlock(&trace_list_lock); +} + +static int __init init(void) +{ + if (n_subbufs < 2) + return -EINVAL; + + dir = debugfs_create_dir(APP_DIR, NULL); + if (!dir) { + printk(KERN_ERR MODULE_NAME + ": Couldn't create relay app directory.\n"); + return -ENOMEM; + } + + chan = create_channel(subbuf_size, n_subbufs); + if (!chan) { + debugfs_remove(dir); + printk(KERN_ERR MODULE_NAME + ": relay app channel creation failed\n"); + return -ENOMEM; + } + + init_kmmio(); + + proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); + if (proc_marker_file) + proc_marker_file->write_proc = write_marker; + + printk(KERN_DEBUG MODULE_NAME ": loaded.\n"); + if (nommiotrace) + printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n"); + if (ISA_trace) + printk(KERN_WARNING MODULE_NAME + ": Warning! low ISA range will be traced.\n"); + return 0; +} + +static void __exit cleanup(void) +{ + printk(KERN_DEBUG MODULE_NAME ": unload...\n"); + clear_trace_list(); + cleanup_kmmio(); + remove_proc_entry(MARKER_FILE, NULL); + destroy_channel(); + if (dir) + debugfs_remove(dir); +} + +module_init(init); +module_exit(cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c new file mode 100644 index 000000000000..67ea520dde62 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/pf_in.c @@ -0,0 +1,489 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $ + * Copyright by Intel Crop., 2002 + * Louis Zhuang (louis.zhuang@intel.com) + * + * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 + */ + +#include +#include /* struct pt_regs */ +#include "pf_in.h" + +#ifdef __i386__ +/* IA32 Manual 3, 2-1 */ +static unsigned char prefix_codes[] = { + 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, + 0x65, 0x2E, 0x3E, 0x66, 0x67 +}; +/* IA32 Manual 3, 3-432*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +/* IA32 Manual 3, 3-432*/ +static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; +static unsigned int rw32[] = { + 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; +static unsigned int mw64[] = {}; +#else /* not __i386__ */ +static unsigned char prefix_codes[] = { + 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, + 0xF0, 0xF3, 0xF2, + /* REX Prefixes */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f +}; +/* AMD64 Manual 3, Appendix A*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; +static unsigned int rw32[] = { + 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +/* 8 bit only */ +static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; +/* 16 bit only */ +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +/* 16 or 32 bit */ +static unsigned int mw32[] = { 0xC7 }; +/* 16, 32 or 64 bit */ +static unsigned int mw64[] = { 0x89, 0x8B }; +#endif /* not __i386__ */ + +static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, + int *rexr) +{ + int i; + unsigned char *p = addr; + *shorted = 0; + *enlarged = 0; + *rexr = 0; + +restart: + for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { + if (*p == prefix_codes[i]) { + if (*p == 0x66) + *shorted = 1; +#ifdef __amd64__ + if ((*p & 0xf8) == 0x48) + *enlarged = 1; + if ((*p & 0xf4) == 0x44) + *rexr = 1; +#endif + p++; + goto restart; + } + } + + return (p - addr); +} + +static int get_opcode(unsigned char *addr, unsigned int *opcode) +{ + int len; + + if (*addr == 0x0F) { + /* 0x0F is extension instruction */ + *opcode = *(unsigned short *)addr; + len = 2; + } else { + *opcode = *addr; + len = 1; + } + + return len; +} + +#define CHECK_OP_TYPE(opcode, array, type) \ + for (i = 0; i < ARRAY_SIZE(array); i++) { \ + if (array[i] == opcode) { \ + rv = type; \ + goto exit; \ + } \ + } + +enum reason_type get_ins_type(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int shorted, enlarged, rexr; + int i; + enum reason_type rv = OTHERS; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + CHECK_OP_TYPE(opcode, reg_rop, REG_READ); + CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); + CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); + +exit: + return rv; +} +#undef CHECK_OP_TYPE + +static unsigned int get_ins_reg_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(rw8); i++) + if (rw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(rw32); i++) + if (rw32[i] == opcode) + return (shorted ? 2 : (enlarged ? 8 : 4)); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +unsigned int get_ins_mem_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(mw8); i++) + if (mw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(mw16); i++) + if (mw16[i] == opcode) + return 2; + + for (i = 0; i < ARRAY_SIZE(mw32); i++) + if (mw32[i] == opcode) + return shorted ? 2 : 4; + + for (i = 0; i < ARRAY_SIZE(mw64); i++) + if (mw64[i] == opcode) + return shorted ? 2 : (enlarged ? 8 : 4); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +/* + * Define register ident in mod/rm byte. + * Note: these are NOT the same as in ptrace-abi.h. + */ +enum { + arg_AL = 0, + arg_CL = 1, + arg_DL = 2, + arg_BL = 3, + arg_AH = 4, + arg_CH = 5, + arg_DH = 6, + arg_BH = 7, + + arg_AX = 0, + arg_CX = 1, + arg_DX = 2, + arg_BX = 3, + arg_SP = 4, + arg_BP = 5, + arg_SI = 6, + arg_DI = 7, +#ifdef __amd64__ + arg_R8 = 8, + arg_R9 = 9, + arg_R10 = 10, + arg_R11 = 11, + arg_R12 = 12, + arg_R13 = 13, + arg_R14 = 14, + arg_R15 = 15 +#endif +}; + +static unsigned char *get_reg_w8(int no, struct pt_regs *regs) +{ + unsigned char *rv = NULL; + + switch (no) { + case arg_AL: + rv = (unsigned char *)®s->ax; + break; + case arg_BL: + rv = (unsigned char *)®s->bx; + break; + case arg_CL: + rv = (unsigned char *)®s->cx; + break; + case arg_DL: + rv = (unsigned char *)®s->dx; + break; + case arg_AH: + rv = 1 + (unsigned char *)®s->ax; + break; + case arg_BH: + rv = 1 + (unsigned char *)®s->bx; + break; + case arg_CH: + rv = 1 + (unsigned char *)®s->cx; + break; + case arg_DH: + rv = 1 + (unsigned char *)®s->dx; + break; +#ifdef __amd64__ + case arg_R8: + rv = (unsigned char *)®s->r8; + break; + case arg_R9: + rv = (unsigned char *)®s->r9; + break; + case arg_R10: + rv = (unsigned char *)®s->r10; + break; + case arg_R11: + rv = (unsigned char *)®s->r11; + break; + case arg_R12: + rv = (unsigned char *)®s->r12; + break; + case arg_R13: + rv = (unsigned char *)®s->r13; + break; + case arg_R14: + rv = (unsigned char *)®s->r14; + break; + case arg_R15: + rv = (unsigned char *)®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + break; + } + return rv; +} + +static unsigned long *get_reg_w32(int no, struct pt_regs *regs) +{ + unsigned long *rv = NULL; + + switch (no) { + case arg_AX: + rv = ®s->ax; + break; + case arg_BX: + rv = ®s->bx; + break; + case arg_CX: + rv = ®s->cx; + break; + case arg_DX: + rv = ®s->dx; + break; + case arg_SP: + rv = ®s->sp; + break; + case arg_BP: + rv = ®s->bp; + break; + case arg_SI: + rv = ®s->si; + break; + case arg_DI: + rv = ®s->di; + break; +#ifdef __amd64__ + case arg_R8: + rv = ®s->r8; + break; + case arg_R9: + rv = ®s->r9; + break; + case arg_R10: + rv = ®s->r10; + break; + case arg_R11: + rv = ®s->r11; + break; + case arg_R12: + rv = ®s->r12; + break; + case arg_R13: + rv = ®s->r13; + break; + case arg_R14: + rv = ®s->r14; + break; + case arg_R15: + rv = ®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + } + + return rv; +} + +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) +{ + unsigned int opcode; + unsigned char mod_rm; + int reg; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(reg_rop); i++) + if (reg_rop[i] == opcode) { + rv = REG_READ; + goto do_work; + } + + for (i = 0; i < ARRAY_SIZE(reg_wop); i++) + if (reg_wop[i] == opcode) { + rv = REG_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *get_reg_w8(reg, regs); + + case 2: + return *(unsigned short *)get_reg_w32(reg, regs); + + case 4: + return *(unsigned int *)get_reg_w32(reg, regs); + +#ifdef __amd64__ + case 8: + return *(unsigned long *)get_reg_w32(reg, regs); +#endif + + default: + printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); + } + +err: + return 0; +} + +unsigned long get_ins_imm_val(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char mod_rm; + unsigned char mod; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(imm_wop); i++) + if (imm_wop[i] == opcode) { + rv = IMM_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + mod = mod_rm >> 6; + p++; + switch (mod) { + case 0: + /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ + /* AMD64: XXX Check for address size prefix? */ + if ((mod_rm & 0x7) == 0x5) + p += 4; + break; + + case 1: + p += 1; + break; + + case 2: + p += 4; + break; + + case 3: + default: + printk(KERN_ERR "mmiotrace: not a memory access instruction " + "at 0x%lx, rm_mod=0x%02x\n", + ins_addr, mod_rm); + } + + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *(unsigned char *)p; + + case 2: + return *(unsigned short *)p; + + case 4: + return *(unsigned int *)p; + +#ifdef __amd64__ + case 8: + return *(unsigned long *)p; +#endif + + default: + printk(KERN_ERR "mmiotrace: Error: width.\n"); + } + +err: + return 0; +} diff --git a/arch/x86/kernel/mmiotrace/pf_in.h b/arch/x86/kernel/mmiotrace/pf_in.h new file mode 100644 index 000000000000..e05341a51a27 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/pf_in.h @@ -0,0 +1,39 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +#ifndef __PF_H_ +#define __PF_H_ + +enum reason_type { + NOT_ME, /* page fault is not in regions */ + NOTHING, /* access others point in regions */ + REG_READ, /* read from addr to reg */ + REG_WRITE, /* write from reg to addr */ + IMM_WRITE, /* write from imm to addr */ + OTHERS /* Other instructions can not intercept */ +}; + +enum reason_type get_ins_type(unsigned long ins_addr); +unsigned int get_ins_mem_width(unsigned long ins_addr); +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); +unsigned long get_ins_imm_val(unsigned long ins_addr); + +#endif /* __PF_H_ */ diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c new file mode 100644 index 000000000000..40e66b0e6480 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -0,0 +1,77 @@ +/* + * Written by Pekka Paalanen, 2008 + */ +#include +#include + +extern void __iomem *ioremap_nocache_trace(unsigned long offset, + unsigned long size); +extern void iounmap_trace(volatile void __iomem *addr); + +#define MODULE_NAME "testmmiotrace" + +static unsigned long mmio_address; +module_param(mmio_address, ulong, 0); +MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); + +static void do_write_test(void __iomem *p) +{ + unsigned int i; + for (i = 0; i < 256; i++) + iowrite8(i, p + i); + for (i = 1024; i < (5 * 1024); i += 2) + iowrite16(i * 12 + 7, p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + iowrite32(i * 212371 + 13, p + i); +} + +static void do_read_test(void __iomem *p) +{ + unsigned int i; + volatile unsigned int v; + for (i = 0; i < 256; i++) + v = ioread8(p + i); + for (i = 1024; i < (5 * 1024); i += 2) + v = ioread16(p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + v = ioread32(p + i); +} + +static void do_test(void) +{ + void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); + if (!p) { + printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, " + "aborting.\n"); + return; + } + do_write_test(p); + do_read_test(p); + iounmap_trace(p); +} + +static int __init init(void) +{ + if (mmio_address == 0) { + printk(KERN_ERR MODULE_NAME ": you have to use the module " + "argument mmio_address.\n"); + printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" + " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + return -ENXIO; + } + + printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + "in PCI address space, and writing " + "rubbish in there.\n", mmio_address); + do_test(); + return 0; +} + +static void __exit cleanup(void) +{ + printk(KERN_DEBUG MODULE_NAME ": unloaded.\n"); +} + +module_init(init); +module_exit(cleanup); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h new file mode 100644 index 000000000000..cb247825f3ec --- /dev/null +++ b/include/linux/mmiotrace.h @@ -0,0 +1,62 @@ +#ifndef MMIOTRACE_H +#define MMIOTRACE_H + +#include + +#define MMIO_VERSION 0x04 + +/* mm_io_header.type */ +#define MMIO_OPCODE_MASK 0xff +#define MMIO_OPCODE_SHIFT 0 +#define MMIO_WIDTH_MASK 0xff00 +#define MMIO_WIDTH_SHIFT 8 +#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) +#define MMIO_MAGIC_MASK 0xffff0000 + +enum mm_io_opcode { /* payload type: */ + MMIO_READ = 0x1, /* struct mm_io_rw */ + MMIO_WRITE = 0x2, /* struct mm_io_rw */ + MMIO_PROBE = 0x3, /* struct mm_io_map */ + MMIO_UNPROBE = 0x4, /* struct mm_io_map */ + MMIO_MARKER = 0x5, /* raw char data */ + MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ +}; + +struct mm_io_header { + __u32 type; + __u32 sec; /* timestamp */ + __u32 nsec; + __u32 pid; /* PID of the process, or 0 for kernel core */ + __u16 data_len; /* length of the following payload */ +}; + +struct mm_io_rw { + __u64 address; /* virtual address of register */ + __u64 value; + __u64 pc; /* optional program counter */ +}; + +struct mm_io_map { + __u64 phys; /* base address in PCI space */ + __u64 addr; /* base virtual address */ + __u64 len; /* mapping size */ + __u64 pc; /* optional program counter */ +}; + + +/* + * These structures are used to allow a single relay_write() + * call to write a full packet. + */ + +struct mm_io_header_rw { + struct mm_io_header header; + struct mm_io_rw rw; +} __attribute__((packed)); + +struct mm_io_header_map { + struct mm_io_header header; + struct mm_io_map map; +} __attribute__((packed)); + +#endif /* MMIOTRACE_H */ -- cgit v1.2.3 From 63ffa3e456c1a9884a3ebac997d91e3fdae18d78 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86 mmiotrace: comment about user space ABI Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb247825f3ec..6ec288f1fe24 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,10 @@ #include +/* + * If you change anything here, you must bump MMIO_VERSION. + * This is the relay data format for user space. + */ #define MMIO_VERSION 0x04 /* mm_io_header.type */ @@ -23,7 +27,7 @@ enum mm_io_opcode { /* payload type: */ }; struct mm_io_header { - __u32 type; + __u32 type; /* see MMIO_* macros above */ __u32 sec; /* timestamp */ __u32 nsec; __u32 pid; /* PID of the process, or 0 for kernel core */ -- cgit v1.2.3 From 10c43d2eb50c9a5ad60388b9d3c41c31150049e6 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: explicit call to mmiotrace in do_page_fault() The custom page fault handler list is replaced with a single function pointer. All related functions and variables are renamed for mmiotrace. Signed-off-by: Pekka Paalanen Cc: Christoph Hellwig Cc: Arjan van de Ven Cc: pq@iki.fi Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 14 ++++----- arch/x86/kernel/mmiotrace/kmmio.c | 14 ++++----- arch/x86/mm/fault.c | 66 ++++++++++++++++++++------------------- include/asm-x86/kdebug.h | 12 +++---- 4 files changed, 52 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7c6496e2225e..9491c0ae03a3 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -168,20 +168,18 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config PAGE_FAULT_HANDLERS - bool "Custom page fault handlers" - depends on DEBUG_KERNEL - help - Allow the use of custom page fault handlers. A kernel module may - register a function that is called on every page fault. Custom - handlers are used by some debugging and reverse engineering tools. +config MMIOTRACE_HOOKS + bool + default n config MMIOTRACE tristate "Memory mapped IO tracing" - depends on DEBUG_KERNEL && PAGE_FAULT_HANDLERS && RELAY && DEBUG_FS + depends on DEBUG_KERNEL && RELAY && DEBUG_FS + select MMIOTRACE_HOOKS default n help This will build a kernel module called mmiotrace. + Making this a built-in is heavily discouraged. Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. The kernel module offers wrapped diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 28411dadb8b3..e759f7c3878f 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -51,10 +51,6 @@ static LIST_HEAD(kmmio_probes); static struct kmmio_context kmmio_ctx[NR_CPUS]; -static struct pf_handler kmmio_pf_hook = { - .handler = kmmio_page_fault -}; - static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; @@ -77,7 +73,8 @@ void cleanup_kmmio(void) * kmmio_page_table, kmmio_probes */ if (handler_registered) { - unregister_page_fault_handler(&kmmio_pf_hook); + if (mmiotrace_unregister_pf(&kmmio_page_fault)) + BUG(); synchronize_rcu(); } unregister_die_notifier(&nb_die); @@ -343,8 +340,11 @@ int register_kmmio_probe(struct kmmio_probe *p) } if (!handler_registered) { - register_page_fault_handler(&kmmio_pf_hook); - handler_registered++; + if (mmiotrace_register_pf(&kmmio_page_fault)) + printk(KERN_ERR "mmiotrace: Cannot register page " + "fault handler.\n"); + else + handler_registered++; } out: diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 343f5c1aacc8..e9a086a1a9ff 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -49,53 +49,55 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -#ifdef CONFIG_PAGE_FAULT_HANDLERS -static HLIST_HEAD(pf_handlers); /* protected by RCU */ -static DEFINE_SPINLOCK(pf_handlers_writer); +#ifdef CONFIG_MMIOTRACE_HOOKS +static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */ +static DEFINE_SPINLOCK(mmiotrace_handler_lock); -void register_page_fault_handler(struct pf_handler *new_pfh) +int mmiotrace_register_pf(pf_handler_func new_pfh) { + int ret = 0; unsigned long flags; - spin_lock_irqsave(&pf_handlers_writer, flags); - hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers); - spin_unlock_irqrestore(&pf_handlers_writer, flags); + spin_lock_irqsave(&mmiotrace_handler_lock, flags); + if (mmiotrace_pf_handler) + ret = -EBUSY; + else + mmiotrace_pf_handler = new_pfh; + spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); + return ret; } -EXPORT_SYMBOL_GPL(register_page_fault_handler); +EXPORT_SYMBOL_GPL(mmiotrace_register_pf); /** - * unregister_page_fault_handler: + * mmiotrace_unregister_pf: * The caller must ensure @old_pfh is not in use anymore before freeing it. - * This function does not guarantee it. The list of handlers is protected by - * RCU, so you can do this by e.g. calling synchronize_rcu(). + * This function does not guarantee it. The handler function pointer is + * protected by RCU, so you can do this by e.g. calling synchronize_rcu(). */ -void unregister_page_fault_handler(struct pf_handler *old_pfh) +int mmiotrace_unregister_pf(pf_handler_func old_pfh) { + int ret = 0; unsigned long flags; - spin_lock_irqsave(&pf_handlers_writer, flags); - hlist_del_rcu(&old_pfh->hlist); - spin_unlock_irqrestore(&pf_handlers_writer, flags); + spin_lock_irqsave(&mmiotrace_handler_lock, flags); + if (mmiotrace_pf_handler != old_pfh) + ret = -EPERM; + else + mmiotrace_pf_handler = NULL; + spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); + return ret; } -EXPORT_SYMBOL_GPL(unregister_page_fault_handler); -#endif +EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf); +#endif /* CONFIG_MMIOTRACE_HOOKS */ /* returns non-zero if do_page_fault() should return */ -static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code, - unsigned long address) +static inline int call_mmiotrace(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) { -#ifdef CONFIG_PAGE_FAULT_HANDLERS +#ifdef CONFIG_MMIOTRACE_HOOKS int ret = 0; - struct pf_handler *cur; - struct hlist_node *ncur; - - if (hlist_empty(&pf_handlers)) - return 0; - rcu_read_lock(); - hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) { - ret = cur->handler(regs, error_code, address); - if (ret) - break; - } + if (mmiotrace_pf_handler) + ret = mmiotrace_pf_handler(regs, error_code, address); rcu_read_unlock(); return ret; #else @@ -655,7 +657,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; - if (handle_custom_pf(regs, error_code, address)) + if (call_mmiotrace(regs, error_code, address)) return; /* diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index a80f2d6cc737..7063281040da 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -35,13 +35,11 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -struct pf_handler { - struct hlist_node hlist; - int (*handler)(struct pt_regs *regs, unsigned long error_code, - unsigned long address); -}; +typedef int (*pf_handler_func)(struct pt_regs *regs, + unsigned long error_code, + unsigned long address); -extern void register_page_fault_handler(struct pf_handler *new_pfh); -extern void unregister_page_fault_handler(struct pf_handler *old_pfh); +extern int mmiotrace_register_pf(pf_handler_func new_pfh); +extern int mmiotrace_unregister_pf(pf_handler_func old_pfh); #endif -- cgit v1.2.3 From 0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace full patch, preview 1 kmmio.c handles the list of mmio probes with callbacks, list of traced pages, and attaching into the page fault handler and die notifier. It arms, traps and disarms the given pages, this is the core of mmiotrace. mmio-mod.c is a user interface, hooking into ioremap functions and registering the mmio probes. It also decodes the required information from trapped mmio accesses via the pre and post callbacks in each probe. Currently, hooking into ioremap functions works by redefining the symbols of the target (binary) kernel module, so that it calls the traced versions of the functions. The most notable changes done since the last discussion are: - kmmio.c is a built-in, not part of the module - direct call from fault.c to kmmio.c, removing all dynamic hooks - prepare for unregistering probes at any time - make kmmio re-initializable and accessible to more than one user - rewrite kmmio locking to remove all spinlocks from page fault path Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe() or is there a better way? The function called via call_rcu() itself calls call_rcu() again, will this work or break? There I need a second grace period for RCU after the first grace period for page faults. Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack that next. At some point I will start looking into how to make mmiotrace a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should make the user space part of mmiotracing as simple as 'cat /debug/trace/mmio > dump.txt'. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/init_task.c | 1 - arch/x86/kernel/mmiotrace/Makefile | 8 +- arch/x86/kernel/mmiotrace/kmmio.c | 349 ++++++++++++++++++++---------- arch/x86/kernel/mmiotrace/kmmio.h | 58 ----- arch/x86/kernel/mmiotrace/mmio-mod.c | 81 ++++--- arch/x86/kernel/mmiotrace/pf_in.c | 2 +- arch/x86/kernel/mmiotrace/testmmiotrace.c | 13 +- arch/x86/mm/fault.c | 59 +---- include/asm-x86/kdebug.h | 7 - include/linux/mmiotrace.h | 38 ++++ 10 files changed, 335 insertions(+), 281 deletions(-) delete mode 100644 arch/x86/kernel/mmiotrace/kmmio.h (limited to 'include') diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 027a5b6a12b2..a4f93b4120c1 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -15,7 +15,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ -EXPORT_SYMBOL_GPL(init_mm); /* * Initial thread structure. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile index d6905f7f981b..cf1e747b463e 100644 --- a/arch/x86/kernel/mmiotrace/Makefile +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o - -obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o +obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-objs := pf_in.o mmio-mod.o +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 5e239d0b8467..539a9b19588f 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -17,70 +18,119 @@ #include #include #include +#include #include #include #include #include #include -#include "kmmio.h" +#include -#define KMMIO_HASH_BITS 6 -#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS) #define KMMIO_PAGE_HASH_BITS 4 #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) +struct kmmio_fault_page { + struct list_head list; + struct kmmio_fault_page *release_next; + unsigned long page; /* location of the fault page */ + + /* + * Number of times this page has been registered as a part + * of a probe. If zero, page is disarmed and this may be freed. + * Used only by writers (RCU). + */ + int count; +}; + +struct kmmio_delayed_release { + struct rcu_head rcu; + struct kmmio_fault_page *release_list; +}; + struct kmmio_context { struct kmmio_fault_page *fpage; struct kmmio_probe *probe; unsigned long saved_flags; + unsigned long addr; int active; }; -static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, - unsigned long address); static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args); +static DECLARE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); /* These are protected by kmmio_lock */ +static int kmmio_initialized; unsigned int kmmio_count; -static unsigned int handler_registered; + +/* Read-protected by RCU, write-protected by kmmio_lock. */ static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); +static struct list_head *kmmio_page_list(unsigned long page) +{ + return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; +} + /* Accessed per-cpu */ static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); +/* protected by kmmio_init_mutex */ static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; -int init_kmmio(void) +/** + * Makes sure kmmio is initialized and usable. + * This must be called before any other kmmio function defined here. + * May sleep. + */ +void reference_kmmio(void) { - int i; - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - - register_die_notifier(&nb_die); - return 0; + down(&kmmio_init_mutex); + spin_lock_irq(&kmmio_lock); + if (!kmmio_initialized) { + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + if (register_die_notifier(&nb_die)) + BUG(); + } + kmmio_initialized++; + spin_unlock_irq(&kmmio_lock); + up(&kmmio_init_mutex); } +EXPORT_SYMBOL_GPL(reference_kmmio); -void cleanup_kmmio(void) +/** + * Clean up kmmio after use. This must be called for every call to + * reference_kmmio(). All probes registered after the corresponding + * reference_kmmio() must have been unregistered when calling this. + * May sleep. + */ +void unreference_kmmio(void) { - /* - * Assume the following have been already cleaned by calling - * unregister_kmmio_probe() appropriately: - * kmmio_page_table, kmmio_probes - */ - if (handler_registered) { - if (mmiotrace_unregister_pf(&kmmio_page_fault)) - BUG(); - synchronize_rcu(); + bool unreg = false; + + down(&kmmio_init_mutex); + spin_lock_irq(&kmmio_lock); + + if (kmmio_initialized == 1) { + BUG_ON(is_kmmio_active()); + unreg = true; } - unregister_die_notifier(&nb_die); + kmmio_initialized--; + BUG_ON(kmmio_initialized < 0); + spin_unlock_irq(&kmmio_lock); + + if (unreg) + unregister_die_notifier(&nb_die); /* calls sync_rcu() */ + up(&kmmio_init_mutex); } +EXPORT_SYMBOL(unreference_kmmio); /* * this is basically a dynamic stabbing problem: @@ -90,33 +140,33 @@ void cleanup_kmmio(void) * Overlap a Point (might be simple) * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup */ -/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ +/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) { struct kmmio_probe *p; - list_for_each_entry(p, &kmmio_probes, list) { + list_for_each_entry_rcu(p, &kmmio_probes, list) { if (addr >= p->addr && addr <= (p->addr + p->len)) return p; } return NULL; } +/* You must be holding RCU read lock. */ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) { - struct list_head *head, *tmp; + struct list_head *head; + struct kmmio_fault_page *p; page &= PAGE_MASK; - head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; - list_for_each(tmp, head) { - struct kmmio_fault_page *p - = list_entry(tmp, struct kmmio_fault_page, list); + head = kmmio_page_list(page); + list_for_each_entry_rcu(p, head, list) { if (p->page == page) return p; } - return NULL; } +/** Mark the given page as not present. Access to it will trigger a fault. */ static void arm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; @@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, page); + pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", + __func__, page); return; } @@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level) __flush_tlb_one(page); } +/** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; @@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, page); + pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", + __func__, page); return; } @@ -169,13 +220,25 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) __flush_tlb_one(page); } +/* + * This is being called from do_page_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * We cannot take any locks, because we could be executing especially + * within a kmmio critical section. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ /* * Interrupts are disabled on entry as trap3 is an interrupt gate * and they remain disabled thorough out this function. */ -static int kmmio_handler(struct pt_regs *regs, unsigned long addr) +int kmmio_handler(struct pt_regs *regs, unsigned long addr) { - struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); + struct kmmio_context *ctx; + struct kmmio_fault_page *faultpage; /* * Preemption is now disabled to prevent process switch during @@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) * XXX what if an interrupt occurs between returning from * do_page_fault() and entering the single-step exception handler? * And that interrupt triggers a kmmio trap? + * XXX If we tracing an interrupt service routine or whatever, is + * this enough to keep it on the current cpu? */ preempt_disable(); - /* interrupts disabled and CPU-local data => atomicity guaranteed. */ + rcu_read_lock(); + faultpage = get_kmmio_fault_page(addr); + if (!faultpage) { + /* + * Either this page fault is not caused by kmmio, or + * another CPU just pulled the kmmio probe from under + * our feet. In the latter case all hell breaks loose. + */ + goto no_kmmio; + } + + ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { /* - * This avoids a deadlock with kmmio_lock. + * Prevent overwriting already in-flight context. * If this page fault really was due to kmmio trap, * all hell breaks loose. */ - printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " - "for address %lu. Ignoring.\n", + pr_emerg("kmmio: recursive probe hit on CPU %d, " + "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); - goto no_kmmio; + goto no_kmmio_ctx; } ctx->active++; - /* - * Acquire the kmmio lock to prevent changes affecting - * get_kmmio_fault_page() and get_kmmio_probe(), since we save their - * returned pointers. - * The lock is released in post_kmmio_handler(). - * XXX: could/should get_kmmio_*() be using RCU instead of spinlock? - */ - spin_lock(&kmmio_lock); - - ctx->fpage = get_kmmio_fault_page(addr); - if (!ctx->fpage) { - /* this page fault is not caused by kmmio */ - goto no_kmmio_locked; - } - + ctx->fpage = faultpage; ctx->probe = get_kmmio_probe(addr); ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); + ctx->addr = addr; if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); @@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) regs->flags |= TF_MASK; regs->flags &= ~IF_MASK; - /* We hold lock, now we set present bit in PTE and single step. */ + /* Now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); put_cpu_var(kmmio_ctx); + rcu_read_unlock(); return 1; -no_kmmio_locked: - spin_unlock(&kmmio_lock); - ctx->active--; +no_kmmio_ctx: + put_cpu_var(kmmio_ctx); no_kmmio: + rcu_read_unlock(); preempt_enable_no_resched(); - put_cpu_var(kmmio_ctx); - /* page fault not handled by kmmio */ - return 0; + return 0; /* page fault not handled by kmmio */ } /* * Interrupts are disabled on entry as trap1 is an interrupt gate * and they remain disabled thorough out this function. - * And we hold kmmio lock. + * This must always get called as the pair to kmmio_handler(). */ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) { int ret = 0; + struct kmmio_probe *probe; + struct kmmio_fault_page *faultpage; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) goto out; + rcu_read_lock(); + + faultpage = get_kmmio_fault_page(ctx->addr); + probe = get_kmmio_probe(ctx->addr); + if (faultpage != ctx->fpage || probe != ctx->probe) { + /* + * The trace setup changed after kmmio_handler() and before + * running this respective post handler. User does not want + * the result anymore. + */ + ctx->probe = NULL; + ctx->fpage = NULL; + } + if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage->page, NULL); + if (ctx->fpage) + arm_kmmio_fault_page(ctx->fpage->page, NULL); regs->flags &= ~TF_MASK; regs->flags |= ctx->saved_flags; /* These were acquired in kmmio_handler(). */ ctx->active--; - spin_unlock(&kmmio_lock); + BUG_ON(ctx->active); preempt_enable_no_resched(); /* @@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (!(regs->flags & TF_MASK)) ret = 1; + rcu_read_unlock(); out: put_cpu_var(kmmio_ctx); return ret; } +/* You must be holding kmmio_lock. */ static int add_kmmio_fault_page(unsigned long page) { struct kmmio_fault_page *f; @@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page) page &= PAGE_MASK; f = get_kmmio_fault_page(page); if (f) { + if (!f->count) + arm_kmmio_fault_page(f->page, NULL); f->count++; return 0; } @@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page) f->count = 1; f->page = page; - list_add(&f->list, - &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]); + list_add_rcu(&f->list, kmmio_page_list(f->page)); arm_kmmio_fault_page(f->page, NULL); return 0; } -static void release_kmmio_fault_page(unsigned long page) +/* You must be holding kmmio_lock. */ +static void release_kmmio_fault_page(unsigned long page, + struct kmmio_fault_page **release_list) { struct kmmio_fault_page *f; @@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page) return; f->count--; + BUG_ON(f->count < 0); if (!f->count) { disarm_kmmio_fault_page(f->page, NULL); - list_del(&f->list); + f->release_next = *release_list; + *release_list = f; } } @@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p) ret = -EEXIST; goto out; } - list_add(&p->list, &kmmio_probes); - /*printk("adding fault pages...\n");*/ + list_add_rcu(&p->list, &kmmio_probes); while (size < p->len) { if (add_kmmio_fault_page(p->addr + size)) - printk(KERN_ERR "mmio: Unable to set page fault.\n"); + pr_err("kmmio: Unable to set page fault.\n"); size += PAGE_SIZE; } - - if (!handler_registered) { - if (mmiotrace_register_pf(&kmmio_page_fault)) - printk(KERN_ERR "mmiotrace: Cannot register page " - "fault handler.\n"); - else - handler_registered++; - } - out: spin_unlock_irq(&kmmio_lock); /* * XXX: What should I do here? * Here was a call to global_flush_tlb(), but it does not exist - * anymore. + * anymore. It seems it's not needed after all. */ return ret; } +EXPORT_SYMBOL(register_kmmio_probe); +static void rcu_free_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + while (p) { + struct kmmio_fault_page *next = p->release_next; + BUG_ON(p->count); + kfree(p); + p = next; + } + kfree(dr); +} + +static void remove_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + struct kmmio_fault_page **prevp = &dr->release_list; + unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); + while (p) { + if (!p->count) + list_del_rcu(&p->list); + else + *prevp = p->release_next; + prevp = &p->release_next; + p = p->release_next; + } + spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ + call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); +} + +/* + * Remove a kmmio probe. You have to synchronize_rcu() before you can be + * sure that the callbacks will not be called anymore. + * + * Unregistering a kmmio fault page has three steps: + * 1. release_kmmio_fault_page() + * Disarm the page, wait a grace period to let all faults finish. + * 2. remove_kmmio_fault_pages() + * Remove the pages from kmmio_page_table. + * 3. rcu_free_kmmio_fault_pages() + * Actally free the kmmio_fault_page structs as with RCU. + */ void unregister_kmmio_probe(struct kmmio_probe *p) { unsigned long size = 0; + struct kmmio_fault_page *release_list = NULL; + struct kmmio_delayed_release *drelease; spin_lock_irq(&kmmio_lock); while (size < p->len) { - release_kmmio_fault_page(p->addr + size); + release_kmmio_fault_page(p->addr + size, &release_list); size += PAGE_SIZE; } - list_del(&p->list); + list_del_rcu(&p->list); kmmio_count--; spin_unlock_irq(&kmmio_lock); -} -/* - * According to 2.6.20, mainly x86_64 arch: - * This is being called from do_page_fault(), via the page fault notifier - * chain. The chain is called for both user space faults and kernel space - * faults (address >= TASK_SIZE64), except not on faults serviced by - * vmalloc_fault(). - * - * We may be in an interrupt or a critical section. Also prefecthing may - * trigger a page fault. We may be in the middle of process switch. - * The page fault hook functionality has put us inside RCU read lock. - * - * Local interrupts are disabled, so preemption cannot happen. - * Do not enable interrupts, do not sleep, and watch out for other CPUs. - */ -static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - if (is_kmmio_active()) - if (kmmio_handler(regs, address) == 1) - return -1; - return 0; + drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); + if (!drelease) { + pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); + return; + } + drelease->release_list = release_list; + + /* + * This is not really RCU here. We have just disarmed a set of + * pages so that they cannot trigger page faults anymore. However, + * we cannot remove the pages from kmmio_page_table, + * because a probe hit might be in flight on another CPU. The + * pages are collected into a list, and they will be removed from + * kmmio_page_table when it is certain that no probe hit related to + * these pages can be in flight. RCU grace period sounds like a + * good choice. + * + * If we removed the pages too early, kmmio page fault handler might + * not find the respective kmmio_fault_page and determine it's not + * a kmmio fault, when it actually is. This would lead to madness. + */ + call_rcu(&drelease->rcu, remove_kmmio_fault_pages); } +EXPORT_SYMBOL(unregister_kmmio_probe); static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h deleted file mode 100644 index 85b7f68a3b8a..000000000000 --- a/arch/x86/kernel/mmiotrace/kmmio.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _LINUX_KMMIO_H -#define _LINUX_KMMIO_H - -#include -#include -#include -#include -#include -#include -#include - -struct kmmio_probe; -struct kmmio_fault_page; -struct pt_regs; - -typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, - struct pt_regs *, unsigned long addr); -typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, - unsigned long condition, struct pt_regs *); - -struct kmmio_probe { - struct list_head list; - - /* start location of the probe point */ - unsigned long addr; - - /* length of the probe region */ - unsigned long len; - - /* Called before addr is executed. */ - kmmio_pre_handler_t pre_handler; - - /* Called after addr is executed, unless... */ - kmmio_post_handler_t post_handler; -}; - -struct kmmio_fault_page { - struct list_head list; - - /* location of the fault page */ - unsigned long page; - - int count; -}; - -/* kmmio is active by some kmmio_probes? */ -static inline int is_kmmio_active(void) -{ - extern unsigned int kmmio_count; - return kmmio_count; -} - -int init_kmmio(void); -void cleanup_kmmio(void); -int register_kmmio_probe(struct kmmio_probe *p); -void unregister_kmmio_probe(struct kmmio_probe *p); - -#endif /* _LINUX_KMMIO_H */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index f9c609266d83..e1a508588f03 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -32,7 +32,6 @@ #include #include -#include "kmmio.h" #include "pf_in.h" /* This app's relay channel files will appear in /debug/mmio-trace */ @@ -129,18 +128,17 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, address); + pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n", + __func__, address); return; } if (level == PG_LEVEL_2M) { - printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " - "currently supported: %lx\n", - address); + pr_emerg(MODULE_NAME ": 4MB pages are not currently " + "supported: %lx\n", address); BUG(); } - printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", + pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), pte_val(*pte) & _PAGE_PRESENT); } @@ -152,7 +150,7 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " + pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, " "last fault for address: %lx\n", addr, my_reason->addr); print_pte(addr); @@ -160,20 +158,17 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting EIP was at %s\n", my_reason->ip); - printk(KERN_EMERG - "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); - printk(KERN_EMERG - "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->si, regs->di, regs->bp, regs->sp); #else print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting RIP was at %s\n", my_reason->ip); - printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", + pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", regs->ax, regs->cx, regs->dx); - printk(KERN_EMERG "rsi: %016lx rdi: %016lx " - "rbp: %016lx rsp: %016lx\n", + pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", regs->si, regs->di, regs->bp, regs->sp); #endif put_cpu_var(pf_reason); @@ -251,10 +246,15 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct trap_reason *my_reason = &get_cpu_var(pf_reason); struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + /* + * XXX: This might not get called, if the probe is removed while + * trace hit is on flight. + */ + /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); + pr_emerg(MODULE_NAME ": unexpected post handler"); BUG(); } @@ -283,16 +283,15 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, atomic_t *drop = &per_cpu(dropped, cpu); int count; if (relay_buf_full(buf)) { - if (atomic_inc_return(drop) == 1) { - printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n", - cpu); - } + if (atomic_inc_return(drop) == 1) + pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu); return 0; - } else if ((count = atomic_read(drop))) { - printk(KERN_ERR MODULE_NAME - ": cpu %d buffer no longer full, " - "missed %d events.\n", - cpu, count); + } + count = atomic_read(drop); + if (count) { + pr_err(MODULE_NAME ": cpu %d buffer no longer full, " + "missed %d events.\n", + cpu, count); atomic_sub(count, drop); } @@ -407,8 +406,8 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, /* Don't trace the low PCI/ISA area, it's always mapped.. */ if (!ISA_trace && (offset < ISA_END_ADDRESS) && (offset + size > ISA_START_ADDRESS)) { - printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low " - "PCI/ISA area (0x%lx-0x%lx)\n", + pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area " + "(0x%lx-0x%lx)\n", offset, offset + size); return; } @@ -418,7 +417,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) { void __iomem *p = ioremap_cache(offset, size); - printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", + pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", offset, size, p); ioremap_trace_core(offset, size, p); return p; @@ -428,7 +427,7 @@ EXPORT_SYMBOL(ioremap_cache_trace); void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) { void __iomem *p = ioremap_nocache(offset, size); - printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", + pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", offset, size, p); ioremap_trace_core(offset, size, p); return p; @@ -455,7 +454,7 @@ void iounmap_trace(volatile void __iomem *addr) }; struct remap_trace *trace; struct remap_trace *tmp; - printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr); + pr_debug(MODULE_NAME ": Unmapping %p.\n", addr); record_timestamp(&event.header); spin_lock(&trace_list_lock); @@ -481,7 +480,7 @@ static void clear_trace_list(void) spin_lock(&trace_list_lock); list_for_each_entry_safe(trace, tmp, &trace_list, list) { - printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped " + pr_warning(MODULE_NAME ": purging non-iounmapped " "trace @0x%08lx, size 0x%lx.\n", trace->probe.addr, trace->probe.len); if (!nommiotrace) @@ -500,39 +499,37 @@ static int __init init(void) dir = debugfs_create_dir(APP_DIR, NULL); if (!dir) { - printk(KERN_ERR MODULE_NAME - ": Couldn't create relay app directory.\n"); + pr_err(MODULE_NAME ": Couldn't create relay app directory.\n"); return -ENOMEM; } chan = create_channel(subbuf_size, n_subbufs); if (!chan) { debugfs_remove(dir); - printk(KERN_ERR MODULE_NAME - ": relay app channel creation failed\n"); + pr_err(MODULE_NAME ": relay app channel creation failed\n"); return -ENOMEM; } - init_kmmio(); + reference_kmmio(); proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); if (proc_marker_file) proc_marker_file->write_proc = write_marker; - printk(KERN_DEBUG MODULE_NAME ": loaded.\n"); + pr_debug(MODULE_NAME ": loaded.\n"); if (nommiotrace) - printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n"); + pr_info(MODULE_NAME ": MMIO tracing disabled.\n"); if (ISA_trace) - printk(KERN_WARNING MODULE_NAME - ": Warning! low ISA range will be traced.\n"); + pr_warning(MODULE_NAME ": Warning! low ISA range will be " + "traced.\n"); return 0; } static void __exit cleanup(void) { - printk(KERN_DEBUG MODULE_NAME ": unload...\n"); + pr_debug(MODULE_NAME ": unload...\n"); clear_trace_list(); - cleanup_kmmio(); + unreference_kmmio(); remove_proc_entry(MARKER_FILE, NULL); destroy_channel(); if (dir) diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c index 67ea520dde62..efa1911e20ca 100644 --- a/arch/x86/kernel/mmiotrace/pf_in.c +++ b/arch/x86/kernel/mmiotrace/pf_in.c @@ -19,7 +19,7 @@ * */ -/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $ +/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp * Copyright by Intel Crop., 2002 * Louis Zhuang (louis.zhuang@intel.com) * diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c index 40e66b0e6480..5ecff578672b 100644 --- a/arch/x86/kernel/mmiotrace/testmmiotrace.c +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -41,8 +41,7 @@ static void do_test(void) { void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); if (!p) { - printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, " - "aborting.\n"); + pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } do_write_test(p); @@ -53,14 +52,14 @@ static void do_test(void) static int __init init(void) { if (mmio_address == 0) { - printk(KERN_ERR MODULE_NAME ": you have to use the module " - "argument mmio_address.\n"); - printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" + pr_err(MODULE_NAME ": you have to use the module argument " + "mmio_address.\n"); + pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); return -ENXIO; } - printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " "in PCI address space, and writing " "rubbish in there.\n", mmio_address); do_test(); @@ -69,7 +68,7 @@ static int __init init(void) static void __exit cleanup(void) { - printk(KERN_DEBUG MODULE_NAME ": unloaded.\n"); + pr_debug(MODULE_NAME ": unloaded.\n"); } module_init(init); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e9a086a1a9ff..8c828a68d3b6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -49,60 +50,14 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -#ifdef CONFIG_MMIOTRACE_HOOKS -static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */ -static DEFINE_SPINLOCK(mmiotrace_handler_lock); - -int mmiotrace_register_pf(pf_handler_func new_pfh) -{ - int ret = 0; - unsigned long flags; - spin_lock_irqsave(&mmiotrace_handler_lock, flags); - if (mmiotrace_pf_handler) - ret = -EBUSY; - else - mmiotrace_pf_handler = new_pfh; - spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); - return ret; -} -EXPORT_SYMBOL_GPL(mmiotrace_register_pf); - -/** - * mmiotrace_unregister_pf: - * The caller must ensure @old_pfh is not in use anymore before freeing it. - * This function does not guarantee it. The handler function pointer is - * protected by RCU, so you can do this by e.g. calling synchronize_rcu(). - */ -int mmiotrace_unregister_pf(pf_handler_func old_pfh) -{ - int ret = 0; - unsigned long flags; - spin_lock_irqsave(&mmiotrace_handler_lock, flags); - if (mmiotrace_pf_handler != old_pfh) - ret = -EPERM; - else - mmiotrace_pf_handler = NULL; - spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); - return ret; -} -EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf); -#endif /* CONFIG_MMIOTRACE_HOOKS */ - -/* returns non-zero if do_page_fault() should return */ -static inline int call_mmiotrace(struct pt_regs *regs, - unsigned long error_code, - unsigned long address) +static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { #ifdef CONFIG_MMIOTRACE_HOOKS - int ret = 0; - rcu_read_lock(); - if (mmiotrace_pf_handler) - ret = mmiotrace_pf_handler(regs, error_code, address); - rcu_read_unlock(); - return ret; -#else - return 0; + if (unlikely(is_kmmio_active())) + if (kmmio_handler(regs, addr) == 1) + return -1; #endif + return 0; } static inline int notify_page_fault(struct pt_regs *regs) @@ -657,7 +612,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; - if (call_mmiotrace(regs, error_code, address)) + if (unlikely(kmmio_fault(regs, address))) return; /* diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index 7063281040da..96651bb59ba1 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -35,11 +35,4 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -typedef int (*pf_handler_func)(struct pt_regs *regs, - unsigned long error_code, - unsigned long address); - -extern int mmiotrace_register_pf(pf_handler_func new_pfh); -extern int mmiotrace_unregister_pf(pf_handler_func old_pfh); - #endif diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 6ec288f1fe24..d87a6cd8b686 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,44 @@ #include +#ifdef __KERNEL__ + +#include + +struct kmmio_probe; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + unsigned long addr; /* start location of the probe point */ + unsigned long len; /* length of the probe region */ + kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ + kmmio_post_handler_t post_handler; /* Called after addr is executed */ +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +extern void reference_kmmio(void); +extern void unreference_kmmio(void); +extern int register_kmmio_probe(struct kmmio_probe *p); +extern void unregister_kmmio_probe(struct kmmio_probe *p); + +/* Called from page fault handler. */ +extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); + +#endif /* __KERNEL__ */ + + /* * If you change anything here, you must bump MMIO_VERSION. * This is the relay data format for user space. -- cgit v1.2.3 From d61fc44853f46fb002228b18aa5f30db21fcd4ac Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace, preview 2 Kconfig.debug, Makefile and testmmiotrace.c style fixes. Use real mutex instead of mutex. Fix failure path in register probe func. kmmio: RCU read-locked over single stepping. Generate mapping id's. Make mmio-mod.c built-in and rewrite its locking. Add debugfs file to enable/disable mmiotracing. kmmio: use irqsave spinlocks. Lots of cleanups in mmio-mod.c Marker file moved from /proc into debugfs. Call mmiotrace entrypoints directly from ioremap.c. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 20 +- arch/x86/kernel/mmiotrace/Makefile | 2 +- arch/x86/kernel/mmiotrace/kmmio.c | 72 +++--- arch/x86/kernel/mmiotrace/mmio-mod.c | 397 ++++++++++++++++++++---------- arch/x86/kernel/mmiotrace/testmmiotrace.c | 15 +- arch/x86/mm/ioremap.c | 9 +- include/linux/mmiotrace.h | 18 +- 7 files changed, 332 insertions(+), 201 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9491c0ae03a3..aa0d6462b1fc 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -170,22 +170,19 @@ config IOMMU_LEAK config MMIOTRACE_HOOKS bool - default n config MMIOTRACE - tristate "Memory mapped IO tracing" + bool "Memory mapped IO tracing" depends on DEBUG_KERNEL && RELAY && DEBUG_FS select MMIOTRACE_HOOKS - default n + default y help - This will build a kernel module called mmiotrace. - Making this a built-in is heavily discouraged. - - Mmiotrace traces Memory Mapped I/O access and is meant for debugging - and reverse engineering. The kernel module offers wrapped - versions of the ioremap family of functions. The driver to be traced - must be modified to call these wrappers. A user space program is - required to collect the MMIO data. + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. A user space program is + required to collect the MMIO data from debugfs files. + Tracing is disabled by default and can be enabled from a debugfs + file. See http://nouveau.freedesktop.org/wiki/MmioTrace If you are not helping to develop drivers, say N. @@ -193,7 +190,6 @@ config MMIOTRACE config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m - default n help This is a dumb module for testing mmiotrace. It is very dangerous as it will write garbage to IO memory starting at a given address. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile index cf1e747b463e..dbcd8d50fb8d 100644 --- a/arch/x86/kernel/mmiotrace/Makefile +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-objs := pf_in.o mmio-mod.o +mmiotrace-y := pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 539a9b19588f..efb467933087 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -59,7 +60,7 @@ struct kmmio_context { static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args); -static DECLARE_MUTEX(kmmio_init_mutex); +static DEFINE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); /* These are protected by kmmio_lock */ @@ -90,7 +91,7 @@ static struct notifier_block nb_die = { */ void reference_kmmio(void) { - down(&kmmio_init_mutex); + mutex_lock(&kmmio_init_mutex); spin_lock_irq(&kmmio_lock); if (!kmmio_initialized) { int i; @@ -101,7 +102,7 @@ void reference_kmmio(void) } kmmio_initialized++; spin_unlock_irq(&kmmio_lock); - up(&kmmio_init_mutex); + mutex_unlock(&kmmio_init_mutex); } EXPORT_SYMBOL_GPL(reference_kmmio); @@ -115,7 +116,7 @@ void unreference_kmmio(void) { bool unreg = false; - down(&kmmio_init_mutex); + mutex_lock(&kmmio_init_mutex); spin_lock_irq(&kmmio_lock); if (kmmio_initialized == 1) { @@ -128,7 +129,7 @@ void unreference_kmmio(void) if (unreg) unregister_die_notifier(&nb_die); /* calls sync_rcu() */ - up(&kmmio_init_mutex); + mutex_unlock(&kmmio_init_mutex); } EXPORT_SYMBOL(unreference_kmmio); @@ -244,17 +245,13 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * Preemption is now disabled to prevent process switch during * single stepping. We can only handle one active kmmio trace * per cpu, so ensure that we finish it before something else - * gets to run. - * - * XXX what if an interrupt occurs between returning from - * do_page_fault() and entering the single-step exception handler? - * And that interrupt triggers a kmmio trap? - * XXX If we tracing an interrupt service routine or whatever, is - * this enough to keep it on the current cpu? + * gets to run. We also hold the RCU read lock over single + * stepping to avoid looking up the probe and kmmio_fault_page + * again. */ preempt_disable(); - rcu_read_lock(); + faultpage = get_kmmio_fault_page(addr); if (!faultpage) { /* @@ -287,14 +284,24 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); + /* + * Enable single-stepping and disable interrupts for the faulting + * context. Local interrupts must not get enabled during stepping. + */ regs->flags |= TF_MASK; regs->flags &= ~IF_MASK; /* Now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); + /* + * If another cpu accesses the same page while we are stepping, + * the access will not be caught. It will simply succeed and the + * only downside is we lose the event. If this becomes a problem, + * the user should drop to single cpu before tracing. + */ + put_cpu_var(kmmio_ctx); - rcu_read_unlock(); return 1; no_kmmio_ctx: @@ -313,32 +320,15 @@ no_kmmio: static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) { int ret = 0; - struct kmmio_probe *probe; - struct kmmio_fault_page *faultpage; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) goto out; - rcu_read_lock(); - - faultpage = get_kmmio_fault_page(ctx->addr); - probe = get_kmmio_probe(ctx->addr); - if (faultpage != ctx->fpage || probe != ctx->probe) { - /* - * The trace setup changed after kmmio_handler() and before - * running this respective post handler. User does not want - * the result anymore. - */ - ctx->probe = NULL; - ctx->fpage = NULL; - } - if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - if (ctx->fpage) - arm_kmmio_fault_page(ctx->fpage->page, NULL); + arm_kmmio_fault_page(ctx->fpage->page, NULL); regs->flags &= ~TF_MASK; regs->flags |= ctx->saved_flags; @@ -346,6 +336,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) /* These were acquired in kmmio_handler(). */ ctx->active--; BUG_ON(ctx->active); + rcu_read_unlock(); preempt_enable_no_resched(); /* @@ -355,8 +346,6 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) */ if (!(regs->flags & TF_MASK)) ret = 1; - - rcu_read_unlock(); out: put_cpu_var(kmmio_ctx); return ret; @@ -411,15 +400,16 @@ static void release_kmmio_fault_page(unsigned long page, int register_kmmio_probe(struct kmmio_probe *p) { + unsigned long flags; int ret = 0; unsigned long size = 0; - spin_lock_irq(&kmmio_lock); - kmmio_count++; + spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { ret = -EEXIST; goto out; } + kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < p->len) { if (add_kmmio_fault_page(p->addr + size)) @@ -427,7 +417,7 @@ int register_kmmio_probe(struct kmmio_probe *p) size += PAGE_SIZE; } out: - spin_unlock_irq(&kmmio_lock); + spin_unlock_irqrestore(&kmmio_lock, flags); /* * XXX: What should I do here? * Here was a call to global_flush_tlb(), but it does not exist @@ -478,7 +468,8 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) /* * Remove a kmmio probe. You have to synchronize_rcu() before you can be - * sure that the callbacks will not be called anymore. + * sure that the callbacks will not be called anymore. Only after that + * you may actually release your struct kmmio_probe. * * Unregistering a kmmio fault page has three steps: * 1. release_kmmio_fault_page() @@ -490,18 +481,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) */ void unregister_kmmio_probe(struct kmmio_probe *p) { + unsigned long flags; unsigned long size = 0; struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; - spin_lock_irq(&kmmio_lock); + spin_lock_irqsave(&kmmio_lock, flags); while (size < p->len) { release_kmmio_fault_page(p->addr + size, &release_list); size += PAGE_SIZE; } list_del_rcu(&p->list); kmmio_count--; - spin_unlock_irq(&kmmio_lock); + spin_unlock_irqrestore(&kmmio_lock, flags); drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); if (!drelease) { diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index e1a508588f03..738644061e4e 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -19,6 +19,8 @@ * * Derived from the read-mod example from relay-examples by Tom Zanussi. */ +#define DEBUG 1 + #include #include #include @@ -34,12 +36,12 @@ #include "pf_in.h" -/* This app's relay channel files will appear in /debug/mmio-trace */ -#define APP_DIR "mmio-trace" -/* the marker injection file in /proc */ -#define MARKER_FILE "mmio-marker" +#define NAME "mmiotrace: " -#define MODULE_NAME "mmiotrace" +/* This app's relay channel files will appear in /debug/mmio-trace */ +static const char APP_DIR[] = "mmio-trace"; +/* the marker injection file in /debug/APP_DIR */ +static const char MARKER_FILE[] = "mmio-marker"; struct trap_reason { unsigned long addr; @@ -48,6 +50,15 @@ struct trap_reason { int active_traces; }; +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; + unsigned long phys; + unsigned long id; +}; + +static const size_t subbuf_size = 256*1024; + /* Accessed per-cpu. */ static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); @@ -55,33 +66,53 @@ static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); -static struct file_operations mmio_fops = { - .owner = THIS_MODULE, -}; +static struct dentry *dir; +static struct dentry *enabled_file; +static struct dentry *marker_file; -static const size_t subbuf_size = 256*1024; +static DEFINE_MUTEX(mmiotrace_mutex); +static DEFINE_SPINLOCK(trace_lock); +static atomic_t mmiotrace_enabled; +static LIST_HEAD(trace_list); /* struct remap_trace */ static struct rchan *chan; -static struct dentry *dir; -static struct proc_dir_entry *proc_marker_file; + +/* + * Locking in this file: + * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. + * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex + * and trace_lock. + * - Routines depending on is_enabled() must take trace_lock. + * - trace_list users must hold trace_lock. + * - is_enabled() guarantees that chan is valid. + * - pre/post callbacks assume the effect of is_enabled() being true. + */ /* module parameters */ -static unsigned int n_subbufs = 32*4; -static unsigned long filter_offset; -static int nommiotrace; -static int ISA_trace; -static int trace_pc; +static unsigned int n_subbufs = 32*4; +static unsigned long filter_offset; +static int nommiotrace; +static int ISA_trace; +static int trace_pc; +static int enable_now; module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); +module_param(enable_now, bool, 0); MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); +MODULE_PARM_DESC(enable_now, "Start mmiotracing immediately on module load."); + +static bool is_enabled(void) +{ + return atomic_read(&mmiotrace_enabled); +} static void record_timestamp(struct mm_io_header *header) { @@ -93,15 +124,15 @@ static void record_timestamp(struct mm_io_header *header) } /* - * Write callback for the /proc entry: + * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log */ -static int write_marker(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t write_marker(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { char *event = NULL; struct mm_io_header *headp; - int len = (count > 65535) ? 65535 : count; + ssize_t len = (count > 65535) ? 65535 : count; event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); if (!event) @@ -117,7 +148,12 @@ static int write_marker(struct file *file, const char __user *buffer, return -EFAULT; } - relay_write(chan, event, sizeof(*headp) + len); + spin_lock_irq(&trace_lock); + if (is_enabled()) + relay_write(chan, event, sizeof(*headp) + len); + else + len = -EINVAL; + spin_unlock_irq(&trace_lock); kfree(event); return len; } @@ -128,19 +164,18 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n", + pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", __func__, address); return; } if (level == PG_LEVEL_2M) { - pr_emerg(MODULE_NAME ": 4MB pages are not currently " - "supported: %lx\n", address); + pr_emerg(NAME "4MB pages are not currently supported: " + "0x%08lx\n", address); BUG(); } - pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", - address, pte_val(*pte), - pte_val(*pte) & _PAGE_PRESENT); + pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), + pte_val(*pte) & _PAGE_PRESENT); } /* @@ -150,22 +185,18 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, " - "last fault for address: %lx\n", + pr_emerg(NAME "unexpected fault for address: 0x%08lx, " + "last fault for address: 0x%08lx\n", addr, my_reason->addr); print_pte(addr); + print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); #ifdef __i386__ - print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting EIP was at %s\n", - my_reason->ip); pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->si, regs->di, regs->bp, regs->sp); #else - print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting RIP was at %s\n", - my_reason->ip); pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", regs->ax, regs->cx, regs->dx); pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", @@ -197,6 +228,10 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, my_trace->header.pid = 0; my_trace->header.data_len = sizeof(struct mm_io_rw); my_trace->rw.address = addr; + /* + * struct remap_trace *trace = p->user_data; + * phys = addr - trace->probe.addr + trace->phys; + */ /* * Only record the program counter when requested. @@ -246,15 +281,10 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct trap_reason *my_reason = &get_cpu_var(pf_reason); struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); - /* - * XXX: This might not get called, if the probe is removed while - * trace hit is on flight. - */ - /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - pr_emerg(MODULE_NAME ": unexpected post handler"); + pr_emerg(NAME "unexpected post handler"); BUG(); } @@ -284,20 +314,23 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, int count; if (relay_buf_full(buf)) { if (atomic_inc_return(drop) == 1) - pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu); + pr_err(NAME "cpu %d buffer full!\n", cpu); return 0; } count = atomic_read(drop); if (count) { - pr_err(MODULE_NAME ": cpu %d buffer no longer full, " - "missed %d events.\n", - cpu, count); + pr_err(NAME "cpu %d buffer no longer full, missed %d events.\n", + cpu, count); atomic_sub(count, drop); } return 1; } +static struct file_operations mmio_fops = { + .owner = THIS_MODULE, +}; + /* file_create() callback. Creates relay file in debugfs. */ static struct dentry *create_buf_file_handler(const char *filename, struct dentry *parent, @@ -333,34 +366,10 @@ static struct rchan_callbacks relay_callbacks = { .remove_buf_file = remove_buf_file_handler, }; -/* - * create_channel - creates channel /debug/APP_DIR/cpuXXX - * Returns channel on success, NULL otherwise - */ -static struct rchan *create_channel(unsigned size, unsigned n) -{ - return relay_open("cpu", dir, size, n, &relay_callbacks, NULL); -} - -/* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */ -static void destroy_channel(void) -{ - if (chan) { - relay_close(chan); - chan = NULL; - } -} - -struct remap_trace { - struct list_head list; - struct kmmio_probe probe; -}; -static LIST_HEAD(trace_list); -static DEFINE_SPINLOCK(trace_list_lock); - -static void do_ioremap_trace_core(unsigned long offset, unsigned long size, +static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *addr) { + static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); struct mm_io_header_map event = { .header = { @@ -380,61 +389,49 @@ static void do_ioremap_trace_core(unsigned long offset, unsigned long size, }; record_timestamp(&event.header); + if (!trace) { + pr_err(NAME "kmalloc failed in ioremap\n"); + return; + } + *trace = (struct remap_trace) { .probe = { .addr = (unsigned long)addr, .len = size, .pre_handler = pre, .post_handler = post, - } + .user_data = trace + }, + .phys = offset, + .id = atomic_inc_return(&next_id) }; + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + relay_write(chan, &event, sizeof(event)); - spin_lock(&trace_list_lock); list_add_tail(&trace->list, &trace_list); - spin_unlock(&trace_list_lock); if (!nommiotrace) register_kmmio_probe(&trace->probe); + +not_enabled: + spin_unlock_irq(&trace_lock); } -static void ioremap_trace_core(unsigned long offset, unsigned long size, - void __iomem *addr) +void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) { - if ((filter_offset) && (offset != filter_offset)) + if (!is_enabled()) /* recheck and proper locking in *_core() */ return; - /* Don't trace the low PCI/ISA area, it's always mapped.. */ - if (!ISA_trace && (offset < ISA_END_ADDRESS) && - (offset + size > ISA_START_ADDRESS)) { - pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area " - "(0x%lx-0x%lx)\n", - offset, offset + size); + pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); + if ((filter_offset) && (offset != filter_offset)) return; - } - do_ioremap_trace_core(offset, size, addr); -} - -void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) -{ - void __iomem *p = ioremap_cache(offset, size); - pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", - offset, size, p); - ioremap_trace_core(offset, size, p); - return p; + ioremap_trace_core(offset, size, addr); } -EXPORT_SYMBOL(ioremap_cache_trace); -void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) -{ - void __iomem *p = ioremap_nocache(offset, size); - pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", - offset, size, p); - ioremap_trace_core(offset, size, p); - return p; -} -EXPORT_SYMBOL(ioremap_nocache_trace); - -void iounmap_trace(volatile void __iomem *addr) +static void iounmap_trace_core(volatile void __iomem *addr) { struct mm_io_header_map event = { .header = { @@ -454,84 +451,212 @@ void iounmap_trace(volatile void __iomem *addr) }; struct remap_trace *trace; struct remap_trace *tmp; - pr_debug(MODULE_NAME ": Unmapping %p.\n", addr); + struct remap_trace *found_trace = NULL; + + pr_debug(NAME "Unmapping %p.\n", addr); record_timestamp(&event.header); - spin_lock(&trace_list_lock); + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + list_for_each_entry_safe(trace, tmp, &trace_list, list) { if ((unsigned long)addr == trace->probe.addr) { if (!nommiotrace) unregister_kmmio_probe(&trace->probe); list_del(&trace->list); - kfree(trace); + found_trace = trace; break; } } - spin_unlock(&trace_list_lock); relay_write(chan, &event, sizeof(event)); - iounmap(addr); + +not_enabled: + spin_unlock_irq(&trace_lock); + if (found_trace) { + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + kfree(found_trace); + } +} + +void mmiotrace_iounmap(volatile void __iomem *addr) +{ + might_sleep(); + if (is_enabled()) /* recheck and proper locking in *_core() */ + iounmap_trace_core(addr); } -EXPORT_SYMBOL(iounmap_trace); static void clear_trace_list(void) { struct remap_trace *trace; struct remap_trace *tmp; - spin_lock(&trace_list_lock); - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - pr_warning(MODULE_NAME ": purging non-iounmapped " + /* + * No locking required, because the caller ensures we are in a + * critical section via mutex, and is_enabled() is false, + * i.e. nothing can traverse or modify this list. + * Caller also ensures is_enabled() cannot change. + */ + list_for_each_entry(trace, &trace_list, list) { + pr_notice(NAME "purging non-iounmapped " "trace @0x%08lx, size 0x%lx.\n", trace->probe.addr, trace->probe.len); if (!nommiotrace) unregister_kmmio_probe(&trace->probe); + } + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { list_del(&trace->list); kfree(trace); + } +} + +static ssize_t read_enabled_file_bool(struct file *file, + char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[3]; + + if (is_enabled()) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static void enable_mmiotrace(void); +static void disable_mmiotrace(void); + +static ssize_t write_enabled_file_bool(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[32]; + int buf_size = min(count, (sizeof(buf)-1)); + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + switch (buf[0]) { + case 'y': + case 'Y': + case '1': + enable_mmiotrace(); + break; + case 'n': + case 'N': + case '0': + disable_mmiotrace(); break; } - spin_unlock(&trace_list_lock); + + return count; +} + +/* this ripped from kernel/kprobes.c */ +static struct file_operations fops_enabled = { + .owner = THIS_MODULE, + .read = read_enabled_file_bool, + .write = write_enabled_file_bool +}; + +static struct file_operations fops_marker = { + .owner = THIS_MODULE, + .write = write_marker +}; + +static void enable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (is_enabled()) + goto out; + + chan = relay_open("cpu", dir, subbuf_size, n_subbufs, + &relay_callbacks, NULL); + if (!chan) { + pr_err(NAME "relay app channel creation failed.\n"); + goto out; + } + + reference_kmmio(); + + marker_file = debugfs_create_file("marker", 0660, dir, NULL, + &fops_marker); + if (!marker_file) + pr_err(NAME "marker file creation failed.\n"); + + if (nommiotrace) + pr_info(NAME "MMIO tracing disabled.\n"); + if (ISA_trace) + pr_warning(NAME "Warning! low ISA range will be traced.\n"); + spin_lock_irq(&trace_lock); + atomic_inc(&mmiotrace_enabled); + spin_unlock_irq(&trace_lock); + pr_info(NAME "enabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} + +static void disable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (!is_enabled()) + goto out; + + spin_lock_irq(&trace_lock); + atomic_dec(&mmiotrace_enabled); + BUG_ON(is_enabled()); + spin_unlock_irq(&trace_lock); + + clear_trace_list(); /* guarantees: no more kmmio callbacks */ + unreference_kmmio(); + if (marker_file) { + debugfs_remove(marker_file); + marker_file = NULL; + } + if (chan) { + relay_close(chan); + chan = NULL; + } + + pr_info(NAME "disabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); } static int __init init(void) { + pr_debug(NAME "load...\n"); if (n_subbufs < 2) return -EINVAL; dir = debugfs_create_dir(APP_DIR, NULL); if (!dir) { - pr_err(MODULE_NAME ": Couldn't create relay app directory.\n"); + pr_err(NAME "Couldn't create relay app directory.\n"); return -ENOMEM; } - chan = create_channel(subbuf_size, n_subbufs); - if (!chan) { + enabled_file = debugfs_create_file("enabled", 0600, dir, NULL, + &fops_enabled); + if (!enabled_file) { + pr_err(NAME "Couldn't create enabled file.\n"); debugfs_remove(dir); - pr_err(MODULE_NAME ": relay app channel creation failed\n"); return -ENOMEM; } - reference_kmmio(); - - proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); - if (proc_marker_file) - proc_marker_file->write_proc = write_marker; + if (enable_now) + enable_mmiotrace(); - pr_debug(MODULE_NAME ": loaded.\n"); - if (nommiotrace) - pr_info(MODULE_NAME ": MMIO tracing disabled.\n"); - if (ISA_trace) - pr_warning(MODULE_NAME ": Warning! low ISA range will be " - "traced.\n"); return 0; } static void __exit cleanup(void) { - pr_debug(MODULE_NAME ": unload...\n"); - clear_trace_list(); - unreference_kmmio(); - remove_proc_entry(MARKER_FILE, NULL); - destroy_channel(); + pr_debug(NAME "unload...\n"); + if (enabled_file) + debugfs_remove(enabled_file); + disable_mmiotrace(); if (dir) debugfs_remove(dir); } diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c index 5ecff578672b..cfa60b227c8d 100644 --- a/arch/x86/kernel/mmiotrace/testmmiotrace.c +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -4,10 +4,6 @@ #include #include -extern void __iomem *ioremap_nocache_trace(unsigned long offset, - unsigned long size); -extern void iounmap_trace(volatile void __iomem *addr); - #define MODULE_NAME "testmmiotrace" static unsigned long mmio_address; @@ -28,25 +24,24 @@ static void do_write_test(void __iomem *p) static void do_read_test(void __iomem *p) { unsigned int i; - volatile unsigned int v; for (i = 0; i < 256; i++) - v = ioread8(p + i); + ioread8(p + i); for (i = 1024; i < (5 * 1024); i += 2) - v = ioread16(p + i); + ioread16(p + i); for (i = (5 * 1024); i < (16 * 1024); i += 4) - v = ioread32(p + i); + ioread32(p + i); } static void do_test(void) { - void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); + void __iomem *p = ioremap_nocache(mmio_address, 0x4000); if (!p) { pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } do_write_test(p); do_read_test(p); - iounmap_trace(p); + iounmap(p); } static int __init init(void) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb3159031a..8927c878544d 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, unsigned long new_prot_val; pgprot_t prot; int retval; + void __iomem *ret_addr; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -233,7 +235,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - return (void __iomem *) (vaddr + offset); + ret_addr = (void __iomem *) (vaddr + offset); + mmiotrace_ioremap(phys_addr, size, ret_addr); + + return ret_addr; } /** @@ -325,6 +330,8 @@ void iounmap(volatile void __iomem *addr) addr = (volatile void __iomem *) (PAGE_MASK & (unsigned long __force)addr); + mmiotrace_iounmap(addr); + /* Use the vm area unlocked, assuming the caller ensures there isn't another iounmap for the same address in parallel. Reuse of the virtual address is prevented by diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index d87a6cd8b686..cb5efd0c7f51 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -16,11 +16,12 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; + struct list_head list; /* kmmio internal list */ unsigned long addr; /* start location of the probe point */ unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ + void *user_data; }; /* kmmio is active by some kmmio_probes? */ @@ -38,6 +39,21 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p); /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); +/* Called from ioremap.c */ +#ifdef CONFIG_MMIOTRACE +extern void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_iounmap(volatile void __iomem *addr); +#else +static inline void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +{ +} +static inline void mmiotrace_iounmap(volatile void __iomem *addr) +{ +} +#endif /* CONFIG_MMIOTRACE_HOOKS */ + #endif /* __KERNEL__ */ -- cgit v1.2.3 From f984b51e0779a6dd30feedc41404013ca54e5d05 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: add mmiotrace plugin On Sat, 22 Mar 2008 13:07:47 +0100 Ingo Molnar wrote: > > > i'd suggest the following: pull x86.git and sched-devel.git into a > > > single tree [the two will combine without rejects]. Then try to add a > > > kernel/tracing/trace_mmiotrace.c ftrace plugin. The trace_sysprof.c > > > plugin might be a good example. > > > > I did this and now I have mmiotrace enabled/disabled via the tracing > > framework (what do we call this, since ftrace is one of the tracers?). > > cool! could you send the patches for that? (even if they are not fully > functional yet) Patch attached in the end. Nice to see how much code disappeared. I tried to mark all the features I had to break with XXX-comments. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 3 +- arch/x86/kernel/mmiotrace/mmio-mod.c | 208 +++++------------------------------ include/linux/mmiotrace.h | 6 + kernel/trace/Makefile | 1 + kernel/trace/trace_mmiotrace.c | 84 ++++++++++++++ 5 files changed, 123 insertions(+), 179 deletions(-) create mode 100644 kernel/trace/trace_mmiotrace.c (limited to 'include') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index aa0d6462b1fc..7e4b8494078e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,8 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && RELAY && DEBUG_FS + depends on DEBUG_KERNEL && RELAY + select TRACING select MMIOTRACE_HOOKS default y help diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 738644061e4e..c7a67d7e482b 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -22,9 +22,8 @@ #define DEBUG 1 #include -#include #include -#include +#include #include #include #include @@ -63,18 +62,18 @@ static const size_t subbuf_size = 256*1024; static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); +#if 0 /* XXX: no way gather this info anymore */ /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); +#endif static struct dentry *dir; -static struct dentry *enabled_file; static struct dentry *marker_file; static DEFINE_MUTEX(mmiotrace_mutex); static DEFINE_SPINLOCK(trace_lock); static atomic_t mmiotrace_enabled; static LIST_HEAD(trace_list); /* struct remap_trace */ -static struct rchan *chan; /* * Locking in this file: @@ -93,36 +92,24 @@ static unsigned long filter_offset; static int nommiotrace; static int ISA_trace; static int trace_pc; -static int enable_now; module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); -module_param(enable_now, bool, 0); MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); -MODULE_PARM_DESC(enable_now, "Start mmiotracing immediately on module load."); static bool is_enabled(void) { return atomic_read(&mmiotrace_enabled); } -static void record_timestamp(struct mm_io_header *header) -{ - struct timespec now; - - getnstimeofday(&now); - header->sec = now.tv_sec; - header->nsec = now.tv_nsec; -} - /* * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log @@ -141,7 +128,6 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, headp = (struct mm_io_header *)event; headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); headp->data_len = len; - record_timestamp(headp); if (copy_from_user(event + sizeof(*headp), buffer, len)) { kfree(event); @@ -149,9 +135,11 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, } spin_lock_irq(&trace_lock); +#if 0 /* XXX: convert this to use tracing */ if (is_enabled()) relay_write(chan, event, sizeof(*headp) + len); else +#endif len = -EINVAL; spin_unlock_irq(&trace_lock); kfree(event); @@ -242,7 +230,11 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, else my_trace->rw.pc = 0; - record_timestamp(&my_trace->header); + /* + * XXX: the timestamp recorded will be *after* the tracing has been + * done, not at the time we hit the instruction. SMP implications + * on event ordering? + */ switch (type) { case REG_READ: @@ -295,77 +287,19 @@ static void post(struct kmmio_probe *p, unsigned long condition, default: break; } - relay_write(chan, my_trace, sizeof(*my_trace)); + + /* + * XXX: Several required values are ignored: + * - mapping id + * - program counter + * Also the address should be physical, not virtual. + */ + mmio_trace_record(my_trace->header.type, my_trace->rw.address, + my_trace->rw.value); put_cpu_var(cpu_trace); put_cpu_var(pf_reason); } -/* - * subbuf_start() relay callback. - * - * Defined so that we know when events are dropped due to the buffer-full - * condition. - */ -static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) -{ - unsigned int cpu = buf->cpu; - atomic_t *drop = &per_cpu(dropped, cpu); - int count; - if (relay_buf_full(buf)) { - if (atomic_inc_return(drop) == 1) - pr_err(NAME "cpu %d buffer full!\n", cpu); - return 0; - } - count = atomic_read(drop); - if (count) { - pr_err(NAME "cpu %d buffer no longer full, missed %d events.\n", - cpu, count); - atomic_sub(count, drop); - } - - return 1; -} - -static struct file_operations mmio_fops = { - .owner = THIS_MODULE, -}; - -/* file_create() callback. Creates relay file in debugfs. */ -static struct dentry *create_buf_file_handler(const char *filename, - struct dentry *parent, - int mode, - struct rchan_buf *buf, - int *is_global) -{ - struct dentry *buf_file; - - mmio_fops.read = relay_file_operations.read; - mmio_fops.open = relay_file_operations.open; - mmio_fops.poll = relay_file_operations.poll; - mmio_fops.mmap = relay_file_operations.mmap; - mmio_fops.release = relay_file_operations.release; - mmio_fops.splice_read = relay_file_operations.splice_read; - - buf_file = debugfs_create_file(filename, mode, parent, buf, - &mmio_fops); - - return buf_file; -} - -/* file_remove() default callback. Removes relay file in debugfs. */ -static int remove_buf_file_handler(struct dentry *dentry) -{ - debugfs_remove(dentry); - return 0; -} - -static struct rchan_callbacks relay_callbacks = { - .subbuf_start = subbuf_start_handler, - .create_buf_file = create_buf_file_handler, - .remove_buf_file = remove_buf_file_handler, -}; - static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *addr) { @@ -387,7 +321,6 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, .pc = 0 } }; - record_timestamp(&event.header); if (!trace) { pr_err(NAME "kmalloc failed in ioremap\n"); @@ -410,7 +343,10 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, if (!is_enabled()) goto not_enabled; - relay_write(chan, &event, sizeof(event)); + /* + * XXX: Insufficient data recorded! + */ + mmio_trace_record(event.header.type, event.map.addr, event.map.len); list_add_tail(&trace->list, &trace_list); if (!nommiotrace) register_kmmio_probe(&trace->probe); @@ -454,7 +390,6 @@ static void iounmap_trace_core(volatile void __iomem *addr) struct remap_trace *found_trace = NULL; pr_debug(NAME "Unmapping %p.\n", addr); - record_timestamp(&event.header); spin_lock_irq(&trace_lock); if (!is_enabled()) @@ -469,7 +404,8 @@ static void iounmap_trace_core(volatile void __iomem *addr) break; } } - relay_write(chan, &event, sizeof(event)); + mmio_trace_record(event.header.type, event.map.addr, + found_trace ? found_trace->id : -1); not_enabled: spin_unlock_irq(&trace_lock); @@ -512,77 +448,23 @@ static void clear_trace_list(void) } } -static ssize_t read_enabled_file_bool(struct file *file, - char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[3]; - - if (is_enabled()) - buf[0] = '1'; - else - buf[0] = '0'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static void enable_mmiotrace(void); -static void disable_mmiotrace(void); - -static ssize_t write_enabled_file_bool(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[32]; - int buf_size = min(count, (sizeof(buf)-1)); - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - switch (buf[0]) { - case 'y': - case 'Y': - case '1': - enable_mmiotrace(); - break; - case 'n': - case 'N': - case '0': - disable_mmiotrace(); - break; - } - - return count; -} - -/* this ripped from kernel/kprobes.c */ -static struct file_operations fops_enabled = { - .owner = THIS_MODULE, - .read = read_enabled_file_bool, - .write = write_enabled_file_bool -}; - static struct file_operations fops_marker = { .owner = THIS_MODULE, .write = write_marker }; -static void enable_mmiotrace(void) +void enable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (is_enabled()) goto out; - chan = relay_open("cpu", dir, subbuf_size, n_subbufs, - &relay_callbacks, NULL); - if (!chan) { - pr_err(NAME "relay app channel creation failed.\n"); - goto out; - } - reference_kmmio(); +#if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); +#endif if (!marker_file) pr_err(NAME "marker file creation failed.\n"); @@ -598,7 +480,7 @@ out: mutex_unlock(&mmiotrace_mutex); } -static void disable_mmiotrace(void) +void disable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (!is_enabled()) @@ -615,17 +497,13 @@ static void disable_mmiotrace(void) debugfs_remove(marker_file); marker_file = NULL; } - if (chan) { - relay_close(chan); - chan = NULL; - } pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } -static int __init init(void) +int __init init_mmiotrace(void) { pr_debug(NAME "load...\n"); if (n_subbufs < 2) @@ -636,31 +514,5 @@ static int __init init(void) pr_err(NAME "Couldn't create relay app directory.\n"); return -ENOMEM; } - - enabled_file = debugfs_create_file("enabled", 0600, dir, NULL, - &fops_enabled); - if (!enabled_file) { - pr_err(NAME "Couldn't create enabled file.\n"); - debugfs_remove(dir); - return -ENOMEM; - } - - if (enable_now) - enable_mmiotrace(); - return 0; } - -static void __exit cleanup(void) -{ - pr_debug(NAME "unload...\n"); - if (enabled_file) - debugfs_remove(enabled_file); - disable_mmiotrace(); - if (dir) - debugfs_remove(dir); -} - -module_init(init); -module_exit(cleanup); -MODULE_LICENSE("GPL"); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb5efd0c7f51..579b3b06c90e 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,6 +54,12 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ +/* in kernel/trace/trace_mmiotrace.c */ +extern int __init init_mmiotrace(void); +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); + #endif /* __KERNEL__ */ diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d9efbbfa2bdf..c44a7dce9086 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -18,5 +18,6 @@ obj-$(CONFIG_FTRACE) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o +obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c new file mode 100644 index 000000000000..e4dd03cc5aa6 --- /dev/null +++ b/kernel/trace/trace_mmiotrace.c @@ -0,0 +1,84 @@ +/* + * Memory mapped I/O tracing + * + * Copyright (C) 2008 Pekka Paalanen + */ + +#define DEBUG 1 + +#include +#include + +#include "trace.h" + +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); + +static struct trace_array *mmio_trace_array; + + +static void mmio_trace_init(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + mmio_trace_array = tr; + if (tr->ctrl) + enable_mmiotrace(); +} + +static void mmio_trace_reset(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + if (tr->ctrl) + disable_mmiotrace(); +} + +static void mmio_trace_ctrl_update(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + if (tr->ctrl) + enable_mmiotrace(); + else + disable_mmiotrace(); +} + +static struct tracer mmio_tracer __read_mostly = +{ + .name = "mmiotrace", + .init = mmio_trace_init, + .reset = mmio_trace_reset, + .ctrl_update = mmio_trace_ctrl_update, +}; + +__init static int init_mmio_trace(void) +{ + int ret = init_mmiotrace(); + if (ret) + return ret; + return register_tracer(&mmio_tracer); +} +device_initcall(init_mmio_trace); + +void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg) +{ + struct trace_array *tr = mmio_trace_array; + struct trace_array_cpu *data = tr->data[smp_processor_id()]; + + if (!current || current->pid == 0) { + /* + * XXX: This is a problem. We need to able to record, no + * matter what. tracing_generic_entry_update() would crash. + */ + static unsigned limit; + if (limit++ < 12) + pr_err("Error in %s: no current.\n", __func__); + return; + } + if (!tr || !data) { + static unsigned limit; + if (limit++ < 12) + pr_err("%s: no tr or data\n", __func__); + return; + } + __trace_special(tr, data, type, addr, arg); +} -- cgit v1.2.3 From bd8ac686c73c7e925fcfe0b02dc4e7b947127864 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: mmiotrace, updates here is a patch that makes mmiotrace work almost well within the tracing framework. The patch applies on top of my previous patch. I have my own output formatting in place now. Summary of changes: - fix the NULL dereference that was due to not calling tracing_reset() - add print_line() callback into struct tracer - implement print_line() for mmiotrace, producing up-to-spec text - add my output header, but that is not really called in the right place - rewrote the main structs in mmiotrace - added two new trace entry types: TRACE_MMIO_RW and TRACE_MMIO_MAP - made some functions in trace.c non-static - check current==NULL in tracing_generic_entry_update() - fix(?) comparison in trace_seq_printf() Things seem to work fine except a few issues. Markers (text lines injected into mmiotrace log) are missing, I did not feel hacking them in before we have variable length entries. My output header is printed only for 'trace' file, but not 'trace_pipe'. For some reason, despite my quick fix, iter->trace is NULL in print_trace_line() when called from 'trace_pipe' file, which means I don't get proper output formatting. I only tried by loading nouveau.ko, which just detects the card, and that is traced fine. I didn't try further. Map, two reads and unmap. Works perfectly. I am missing the information about overflows, I'd prefer to have a counter for lost events. I didn't try, but I guess currently there is no way of knowning when it overflows? So, not too far from being fully operational, it seems :-) And looking at the diffstat, there also is some 700-900 lines of user space code that just became obsolete. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 2 +- arch/x86/kernel/mmiotrace/mmio-mod.c | 140 ++++++++++---------------------- include/linux/mmiotrace.h | 85 ++++++-------------- kernel/trace/trace.c | 34 ++++++++ kernel/trace/trace.h | 14 ++++ kernel/trace/trace_mmiotrace.c | 151 ++++++++++++++++++++++++++++------- 6 files changed, 238 insertions(+), 188 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7e4b8494078e..1d6de0d67f99 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,7 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && RELAY + depends on DEBUG_KERNEL select TRACING select MMIOTRACE_HOOKS default y diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index c7a67d7e482b..62abc281a512 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -37,11 +37,6 @@ #define NAME "mmiotrace: " -/* This app's relay channel files will appear in /debug/mmio-trace */ -static const char APP_DIR[] = "mmio-trace"; -/* the marker injection file in /debug/APP_DIR */ -static const char MARKER_FILE[] = "mmio-marker"; - struct trap_reason { unsigned long addr; unsigned long ip; @@ -56,18 +51,15 @@ struct remap_trace { unsigned long id; }; -static const size_t subbuf_size = 256*1024; - /* Accessed per-cpu. */ static DEFINE_PER_CPU(struct trap_reason, pf_reason); -static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); +static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); #if 0 /* XXX: no way gather this info anymore */ /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); #endif -static struct dentry *dir; static struct dentry *marker_file; static DEFINE_MUTEX(mmiotrace_mutex); @@ -82,24 +74,21 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ * and trace_lock. * - Routines depending on is_enabled() must take trace_lock. * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that chan is valid. + * - is_enabled() guarantees that mmio_trace_record is allowed. * - pre/post callbacks assume the effect of is_enabled() being true. */ /* module parameters */ -static unsigned int n_subbufs = 32*4; static unsigned long filter_offset; static int nommiotrace; static int ISA_trace; static int trace_pc; -module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); -MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); @@ -110,6 +99,7 @@ static bool is_enabled(void) return atomic_read(&mmiotrace_enabled); } +#if 0 /* XXX: needs rewrite */ /* * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log @@ -145,6 +135,7 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, kfree(event); return len; } +#endif static void print_pte(unsigned long address) { @@ -198,9 +189,10 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, unsigned long addr) { struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); + struct remap_trace *trace = p->user_data; /* it doesn't make sense to have more than one active trace per cpu */ if (my_reason->active_traces) @@ -212,23 +204,17 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, my_reason->addr = addr; my_reason->ip = instptr; - my_trace->header.type = MMIO_MAGIC; - my_trace->header.pid = 0; - my_trace->header.data_len = sizeof(struct mm_io_rw); - my_trace->rw.address = addr; - /* - * struct remap_trace *trace = p->user_data; - * phys = addr - trace->probe.addr + trace->phys; - */ + my_trace->phys = addr - trace->probe.addr + trace->phys; + my_trace->map_id = trace->id; /* * Only record the program counter when requested. * It may taint clean-room reverse engineering. */ if (trace_pc) - my_trace->rw.pc = instptr; + my_trace->pc = instptr; else - my_trace->rw.pc = 0; + my_trace->pc = 0; /* * XXX: the timestamp recorded will be *after* the tracing has been @@ -238,28 +224,25 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, switch (type) { case REG_READ: - my_trace->header.type |= - (MMIO_READ << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + my_trace->opcode = MMIO_READ; + my_trace->width = get_ins_mem_width(instptr); break; case REG_WRITE: - my_trace->header.type |= - (MMIO_WRITE << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - my_trace->rw.value = get_ins_reg_val(instptr, regs); + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_reg_val(instptr, regs); break; case IMM_WRITE: - my_trace->header.type |= - (MMIO_WRITE << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - my_trace->rw.value = get_ins_imm_val(instptr); + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_imm_val(instptr); break; default: { unsigned char *ip = (unsigned char *)instptr; - my_trace->header.type |= - (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); - my_trace->rw.value = (*ip) << 16 | *(ip + 1) << 8 | + my_trace->opcode = MMIO_UNKNOWN_OP; + my_trace->width = 0; + my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | *(ip + 2); } } @@ -271,7 +254,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct pt_regs *regs) { struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); /* this should always return the active_trace count to 0 */ my_reason->active_traces--; @@ -282,20 +265,13 @@ static void post(struct kmmio_probe *p, unsigned long condition, switch (my_reason->type) { case REG_READ: - my_trace->rw.value = get_ins_reg_val(my_reason->ip, regs); + my_trace->value = get_ins_reg_val(my_reason->ip, regs); break; default: break; } - /* - * XXX: Several required values are ignored: - * - mapping id - * - program counter - * Also the address should be physical, not virtual. - */ - mmio_trace_record(my_trace->header.type, my_trace->rw.address, - my_trace->rw.value); + mmio_trace_rw(my_trace); put_cpu_var(cpu_trace); put_cpu_var(pf_reason); } @@ -305,21 +281,11 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, { static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); - struct mm_io_header_map event = { - .header = { - .type = MMIO_MAGIC | - (MMIO_PROBE << MMIO_OPCODE_SHIFT), - .sec = 0, - .nsec = 0, - .pid = 0, - .data_len = sizeof(struct mm_io_map) - }, - .map = { - .phys = offset, - .addr = (unsigned long)addr, - .len = size, - .pc = 0 - } + struct mmiotrace_map map = { + .phys = offset, + .virt = (unsigned long)addr, + .len = size, + .opcode = MMIO_PROBE }; if (!trace) { @@ -338,15 +304,13 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, .phys = offset, .id = atomic_inc_return(&next_id) }; + map.map_id = trace->id; spin_lock_irq(&trace_lock); if (!is_enabled()) goto not_enabled; - /* - * XXX: Insufficient data recorded! - */ - mmio_trace_record(event.header.type, event.map.addr, event.map.len); + mmio_trace_mapping(&map); list_add_tail(&trace->list, &trace_list); if (!nommiotrace) register_kmmio_probe(&trace->probe); @@ -369,21 +333,11 @@ mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) static void iounmap_trace_core(volatile void __iomem *addr) { - struct mm_io_header_map event = { - .header = { - .type = MMIO_MAGIC | - (MMIO_UNPROBE << MMIO_OPCODE_SHIFT), - .sec = 0, - .nsec = 0, - .pid = 0, - .data_len = sizeof(struct mm_io_map) - }, - .map = { - .phys = 0, - .addr = (unsigned long)addr, - .len = 0, - .pc = 0 - } + struct mmiotrace_map map = { + .phys = 0, + .virt = (unsigned long)addr, + .len = 0, + .opcode = MMIO_UNPROBE }; struct remap_trace *trace; struct remap_trace *tmp; @@ -404,8 +358,8 @@ static void iounmap_trace_core(volatile void __iomem *addr) break; } } - mmio_trace_record(event.header.type, event.map.addr, - found_trace ? found_trace->id : -1); + map.map_id = (found_trace) ? found_trace->id : -1; + mmio_trace_mapping(&map); not_enabled: spin_unlock_irq(&trace_lock); @@ -448,10 +402,12 @@ static void clear_trace_list(void) } } +#if 0 /* XXX: out of order */ static struct file_operations fops_marker = { .owner = THIS_MODULE, .write = write_marker }; +#endif void enable_mmiotrace(void) { @@ -464,9 +420,9 @@ void enable_mmiotrace(void) #if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); -#endif if (!marker_file) pr_err(NAME "marker file creation failed.\n"); +#endif if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); @@ -502,17 +458,3 @@ void disable_mmiotrace(void) out: mutex_unlock(&mmiotrace_mutex); } - -int __init init_mmiotrace(void) -{ - pr_debug(NAME "load...\n"); - if (n_subbufs < 2) - return -EINVAL; - - dir = debugfs_create_dir(APP_DIR, NULL); - if (!dir) { - pr_err(NAME "Couldn't create relay app directory.\n"); - return -ENOMEM; - } - return 0; -} diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 579b3b06c90e..c88a9c197d22 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,73 +54,38 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ -/* in kernel/trace/trace_mmiotrace.c */ -extern int __init init_mmiotrace(void); -extern void enable_mmiotrace(void); -extern void disable_mmiotrace(void); -extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); - -#endif /* __KERNEL__ */ - - -/* - * If you change anything here, you must bump MMIO_VERSION. - * This is the relay data format for user space. - */ -#define MMIO_VERSION 0x04 - -/* mm_io_header.type */ -#define MMIO_OPCODE_MASK 0xff -#define MMIO_OPCODE_SHIFT 0 -#define MMIO_WIDTH_MASK 0xff00 -#define MMIO_WIDTH_SHIFT 8 -#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) -#define MMIO_MAGIC_MASK 0xffff0000 - -enum mm_io_opcode { /* payload type: */ - MMIO_READ = 0x1, /* struct mm_io_rw */ - MMIO_WRITE = 0x2, /* struct mm_io_rw */ - MMIO_PROBE = 0x3, /* struct mm_io_map */ - MMIO_UNPROBE = 0x4, /* struct mm_io_map */ +enum mm_io_opcode { + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ MMIO_MARKER = 0x5, /* raw char data */ - MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ + MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */ }; -struct mm_io_header { - __u32 type; /* see MMIO_* macros above */ - __u32 sec; /* timestamp */ - __u32 nsec; - __u32 pid; /* PID of the process, or 0 for kernel core */ - __u16 data_len; /* length of the following payload */ +struct mmiotrace_rw { + unsigned long phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; -struct mm_io_rw { - __u64 address; /* virtual address of register */ - __u64 value; - __u64 pc; /* optional program counter */ +struct mmiotrace_map { + unsigned long phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; -struct mm_io_map { - __u64 phys; /* base address in PCI space */ - __u64 addr; /* base virtual address */ - __u64 len; /* mapping size */ - __u64 pc; /* optional program counter */ -}; - - -/* - * These structures are used to allow a single relay_write() - * call to write a full packet. - */ - -struct mm_io_header_rw { - struct mm_io_header header; - struct mm_io_rw rw; -} __attribute__((packed)); +/* in kernel/trace/trace_mmiotrace.c */ +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_rw(struct mmiotrace_rw *rw); +extern void mmio_trace_mapping(struct mmiotrace_map *map); -struct mm_io_header_map { - struct mm_io_header header; - struct mm_io_map map; -} __attribute__((packed)); +#endif /* __KERNEL__ */ #endif /* MMIOTRACE_H */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3271916ff033..d14fe49e9638 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -831,6 +831,40 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } +#ifdef CONFIG_MMIOTRACE +void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, + struct mmiotrace_rw *rw) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_RW; + entry->mmiorw = *rw; + spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); +} + +void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, + struct mmiotrace_map *map) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_MAP; + entry->mmiomap = *map; + spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); +} +#endif + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c460e85e94ed..0ef9ef74c806 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -5,6 +5,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -14,6 +15,8 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_SPECIAL, + TRACE_MMIO_RW, + TRACE_MMIO_MAP, __TRACE_LAST_TYPE }; @@ -75,6 +78,8 @@ struct trace_entry { struct ctx_switch_entry ctx; struct special_entry special; struct stack_entry stack; + struct mmiotrace_rw mmiorw; + struct mmiotrace_map mmiomap; }; }; @@ -255,6 +260,15 @@ extern unsigned long ftrace_update_tot_cnt; extern int DYN_FTRACE_TEST_NAME(void); #endif +#ifdef CONFIG_MMIOTRACE +extern void __trace_mmiotrace_rw(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_rw *rw); +extern void __trace_mmiotrace_map(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_map *map); +#endif + #ifdef CONFIG_FTRACE_STARTUP_TEST #ifdef CONFIG_FTRACE extern int trace_selftest_startup_function(struct tracer *trace, diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index e4dd03cc5aa6..3a12b1ad0c63 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -11,19 +11,26 @@ #include "trace.h" -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); - static struct trace_array *mmio_trace_array; +static void mmio_reset_data(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} static void mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; - if (tr->ctrl) + if (tr->ctrl) { + mmio_reset_data(tr); enable_mmiotrace(); + } } static void mmio_trace_reset(struct trace_array *tr) @@ -31,15 +38,110 @@ static void mmio_trace_reset(struct trace_array *tr) pr_debug("in %s\n", __func__); if (tr->ctrl) disable_mmiotrace(); + mmio_reset_data(tr); + mmio_trace_array = NULL; } static void mmio_trace_ctrl_update(struct trace_array *tr) { pr_debug("in %s\n", __func__); - if (tr->ctrl) + if (tr->ctrl) { + mmio_reset_data(tr); enable_mmiotrace(); - else + } else { disable_mmiotrace(); + } +} + +/* XXX: This is not called for trace_pipe file! */ +void mmio_print_header(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + trace_seq_printf(s, "VERSION broken 20070824\n"); + /* TODO: print /proc/bus/pci/devices contents as PCIDEV lines */ +} + +static int mmio_print_rw(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct mmiotrace_rw *rw = &entry->mmiorw; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret = 1; + + switch (entry->mmiorw.opcode) { + case MMIO_READ: + ret = trace_seq_printf(s, + "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, rw->phys, + rw->value, rw->pc, entry->pid); + break; + case MMIO_WRITE: + ret = trace_seq_printf(s, + "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, rw->phys, + rw->value, rw->pc, entry->pid); + break; + case MMIO_UNKNOWN_OP: + ret = trace_seq_printf(s, + "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", + secs, usec_rem, rw->map_id, rw->phys, + (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, + (rw->value >> 0) & 0xff, rw->pc, entry->pid); + break; + default: + ret = trace_seq_printf(s, "rw what?\n"); + break; + } + if (ret) + return 1; + return 0; +} + +static int mmio_print_map(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct mmiotrace_map *m = &entry->mmiomap; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret = 1; + + switch (entry->mmiorw.opcode) { + case MMIO_PROBE: + ret = trace_seq_printf(s, + "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", + secs, usec_rem, m->map_id, m->phys, m->virt, m->len, + 0UL, entry->pid); + break; + case MMIO_UNPROBE: + ret = trace_seq_printf(s, + "UNMAP %lu.%06lu %d 0x%lx %d\n", + secs, usec_rem, m->map_id, 0UL, entry->pid); + break; + default: + ret = trace_seq_printf(s, "map what?\n"); + break; + } + if (ret) + return 1; + return 0; +} + +/* return 0 to abort printing without consuming current entry in pipe mode */ +static int mmio_print_line(struct trace_iterator *iter) +{ + switch (iter->ent->type) { + case TRACE_MMIO_RW: + return mmio_print_rw(iter); + case TRACE_MMIO_MAP: + return mmio_print_map(iter); + default: + return 1; /* ignore unknown entries */ + } } static struct tracer mmio_tracer __read_mostly = @@ -47,38 +149,31 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, + .open = mmio_print_header, .ctrl_update = mmio_trace_ctrl_update, + .print_line = mmio_print_line, }; __init static int init_mmio_trace(void) { - int ret = init_mmiotrace(); - if (ret) - return ret; return register_tracer(&mmio_tracer); } device_initcall(init_mmio_trace); -void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg) +void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; struct trace_array_cpu *data = tr->data[smp_processor_id()]; + __trace_mmiotrace_rw(tr, data, rw); +} - if (!current || current->pid == 0) { - /* - * XXX: This is a problem. We need to able to record, no - * matter what. tracing_generic_entry_update() would crash. - */ - static unsigned limit; - if (limit++ < 12) - pr_err("Error in %s: no current.\n", __func__); - return; - } - if (!tr || !data) { - static unsigned limit; - if (limit++ < 12) - pr_err("%s: no tr or data\n", __func__); - return; - } - __trace_special(tr, data, type, addr, arg); +void mmio_trace_mapping(struct mmiotrace_map *map) +{ + struct trace_array *tr = mmio_trace_array; + struct trace_array_cpu *data; + + preempt_disable(); + data = tr->data[smp_processor_id()]; + __trace_mmiotrace_map(tr, data, map); + preempt_enable(); } -- cgit v1.2.3 From 138295373ccf7625fcb0218dfea114837983bc39 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: mmiotrace update, #2 another weekend, another patch. This should apply on top of my previous patch from March 23rd. Summary of changes: - Print PCI device list in output header - work around recursive probe hits on SMP - refactor dis/arm_kmmio_fault_page() and add check for page levels - remove un/reference_kmmio(), the die notifier hook is registered permanently into the list - explicitly check for single stepping in die notifier callback I have tested this version on my UP Athlon64 desktop with Nouveau, and SMP Core 2 Duo laptop with the proprietary nvidia driver. Both systems are 64-bit. One previously unknown bug crept into daylight: the ftrace framework's output routines print the first entry last after buffer has wrapped around. The most important regressions compared to non-ftrace mmiotrace at this time are: - failure of trace_pipe file - illegal lines in output file - unaware of losing data due to buffer full Personally I'd like to see these three solved before submitting to mainline. Other issues may come up once we know when we lose events. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mmiotrace/kmmio.c | 186 ++++++++++++++--------------------- arch/x86/kernel/mmiotrace/mmio-mod.c | 3 - include/linux/mmiotrace.h | 2 - kernel/trace/trace_mmiotrace.c | 47 ++++++++- 4 files changed, 120 insertions(+), 118 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index efb467933087..cd0d95fe4fe6 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -5,15 +5,12 @@ * 2008 Pekka Paalanen */ -#include #include #include #include #include #include -#include #include -#include #include #include #include @@ -22,10 +19,9 @@ #include #include #include -#include #include -#include - +#include +#include #include #define KMMIO_PAGE_HASH_BITS 4 @@ -57,14 +53,9 @@ struct kmmio_context { int active; }; -static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, - void *args); - -static DEFINE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); -/* These are protected by kmmio_lock */ -static int kmmio_initialized; +/* Protected by kmmio_lock */ unsigned int kmmio_count; /* Read-protected by RCU, write-protected by kmmio_lock. */ @@ -79,60 +70,6 @@ static struct list_head *kmmio_page_list(unsigned long page) /* Accessed per-cpu */ static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); -/* protected by kmmio_init_mutex */ -static struct notifier_block nb_die = { - .notifier_call = kmmio_die_notifier -}; - -/** - * Makes sure kmmio is initialized and usable. - * This must be called before any other kmmio function defined here. - * May sleep. - */ -void reference_kmmio(void) -{ - mutex_lock(&kmmio_init_mutex); - spin_lock_irq(&kmmio_lock); - if (!kmmio_initialized) { - int i; - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - if (register_die_notifier(&nb_die)) - BUG(); - } - kmmio_initialized++; - spin_unlock_irq(&kmmio_lock); - mutex_unlock(&kmmio_init_mutex); -} -EXPORT_SYMBOL_GPL(reference_kmmio); - -/** - * Clean up kmmio after use. This must be called for every call to - * reference_kmmio(). All probes registered after the corresponding - * reference_kmmio() must have been unregistered when calling this. - * May sleep. - */ -void unreference_kmmio(void) -{ - bool unreg = false; - - mutex_lock(&kmmio_init_mutex); - spin_lock_irq(&kmmio_lock); - - if (kmmio_initialized == 1) { - BUG_ON(is_kmmio_active()); - unreg = true; - } - kmmio_initialized--; - BUG_ON(kmmio_initialized < 0); - spin_unlock_irq(&kmmio_lock); - - if (unreg) - unregister_die_notifier(&nb_die); /* calls sync_rcu() */ - mutex_unlock(&kmmio_init_mutex); -} -EXPORT_SYMBOL(unreference_kmmio); - /* * this is basically a dynamic stabbing problem: * Could use the existing prio tree code or @@ -167,58 +104,56 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -/** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, int *page_level) +static void set_page_present(unsigned long addr, bool present, int *pglevel) { - unsigned long address = page & PAGE_MASK; + pteval_t pteval; + pmdval_t pmdval; int level; - pte_t *pte = lookup_address(address, &level); + pmd_t *pmd; + pte_t *pte = lookup_address(addr, &level); if (!pte) { - pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", - __func__, page); + pr_err("kmmio: no pte for page 0x%08lx\n", addr); return; } - if (level == PG_LEVEL_2M) { - pmd_t *pmd = (pmd_t *)pte; - set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT)); - } else { - /* PG_LEVEL_4K */ - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + if (pglevel) + *pglevel = level; + + switch (level) { + case PG_LEVEL_2M: + pmd = (pmd_t *)pte; + pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + if (present) + pmdval |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(pmdval)); + break; + + case PG_LEVEL_4K: + pteval = pte_val(*pte) & ~_PAGE_PRESENT; + if (present) + pteval |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(pteval)); + break; + + default: + pr_err("kmmio: unexpected page level 0x%x.\n", level); + return; } - if (page_level) - *page_level = level; + __flush_tlb_one(addr); +} - __flush_tlb_one(page); +/** Mark the given page as not present. Access to it will trigger a fault. */ +static void arm_kmmio_fault_page(unsigned long page, int *page_level) +{ + set_page_present(page & PAGE_MASK, false, page_level); } /** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) { - unsigned long address = page & PAGE_MASK; - int level; - pte_t *pte = lookup_address(address, &level); - - if (!pte) { - pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", - __func__, page); - return; - } - - if (level == PG_LEVEL_2M) { - pmd_t *pmd = (pmd_t *)pte; - set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT)); - } else { - /* PG_LEVEL_4K */ - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - } - - if (page_level) - *page_level = level; - - __flush_tlb_one(page); + set_page_present(page & PAGE_MASK, true, page_level); } /* @@ -240,6 +175,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) { struct kmmio_context *ctx; struct kmmio_fault_page *faultpage; + int ret = 0; /* default to fault not handled */ /* * Preemption is now disabled to prevent process switch during @@ -257,21 +193,35 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) /* * Either this page fault is not caused by kmmio, or * another CPU just pulled the kmmio probe from under - * our feet. In the latter case all hell breaks loose. + * our feet. The latter case should not be possible. */ goto no_kmmio; } ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { + disarm_kmmio_fault_page(faultpage->page, NULL); + if (addr == ctx->addr) { + /* + * On SMP we sometimes get recursive probe hits on the + * same address. Context is already saved, fall out. + */ + pr_debug("kmmio: duplicate probe hit on CPU %d, for " + "address 0x%08lx.\n", + smp_processor_id(), addr); + ret = 1; + goto no_kmmio_ctx; + } /* * Prevent overwriting already in-flight context. - * If this page fault really was due to kmmio trap, - * all hell breaks loose. + * This should not happen, let's hope disarming at least + * prevents a panic. */ pr_emerg("kmmio: recursive probe hit on CPU %d, " "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); goto no_kmmio_ctx; } ctx->active++; @@ -302,14 +252,14 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) */ put_cpu_var(kmmio_ctx); - return 1; + return 1; /* fault handled */ no_kmmio_ctx: put_cpu_var(kmmio_ctx); no_kmmio: rcu_read_unlock(); preempt_enable_no_resched(); - return 0; /* page fault not handled by kmmio */ + return ret; } /* @@ -322,8 +272,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) int ret = 0; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); - if (!ctx->active) + if (!ctx->active) { + pr_debug("kmmio: spurious debug trap on CPU %d.\n", + smp_processor_id()); goto out; + } if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); @@ -525,9 +478,22 @@ static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, { struct die_args *arg = args; - if (val == DIE_DEBUG) + if (val == DIE_DEBUG && (arg->err & DR_STEP)) if (post_kmmio_handler(arg->err, arg->regs) == 1) return NOTIFY_STOP; return NOTIFY_DONE; } + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +static int __init init_kmmio(void) +{ + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + return register_die_notifier(&nb_die); +} +fs_initcall(init_kmmio); /* should be before device_initcall() */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 62abc281a512..8256546d49bf 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -415,8 +415,6 @@ void enable_mmiotrace(void) if (is_enabled()) goto out; - reference_kmmio(); - #if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); @@ -448,7 +446,6 @@ void disable_mmiotrace(void) spin_unlock_irq(&trace_lock); clear_trace_list(); /* guarantees: no more kmmio callbacks */ - unreference_kmmio(); if (marker_file) { debugfs_remove(marker_file); marker_file = NULL; diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index c88a9c197d22..dd6b64b160fc 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -31,8 +31,6 @@ static inline int is_kmmio_active(void) return kmmio_count; } -extern void reference_kmmio(void); -extern void unreference_kmmio(void); extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 3a12b1ad0c63..361472b5788c 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -8,6 +8,7 @@ #include #include +#include #include "trace.h" @@ -53,12 +54,52 @@ static void mmio_trace_ctrl_update(struct trace_array *tr) } } +static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) +{ + int ret = 0; + int i; + resource_size_t start, end; + const struct pci_driver *drv = pci_dev_driver(dev); + + /* XXX: incomplete checks for trace_seq_printf() return value */ + ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x", + dev->bus->number, dev->devfn, + dev->vendor, dev->device, dev->irq); + /* + * XXX: is pci_resource_to_user() appropriate, since we are + * supposed to interpret the __ioremap() phys_addr argument based on + * these printed values? + */ + for (i = 0; i < 7; i++) { + pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); + ret += trace_seq_printf(s, " %llx", + (unsigned long long)(start | + (dev->resource[i].flags & PCI_REGION_FLAG_MASK))); + } + for (i = 0; i < 7; i++) { + pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); + ret += trace_seq_printf(s, " %llx", + dev->resource[i].start < dev->resource[i].end ? + (unsigned long long)(end - start) + 1 : 0); + } + if (drv) + ret += trace_seq_printf(s, " %s\n", drv->name); + else + ret += trace_seq_printf(s, " \n"); + return ret; +} + /* XXX: This is not called for trace_pipe file! */ -void mmio_print_header(struct trace_iterator *iter) +static void mmio_print_header(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; - trace_seq_printf(s, "VERSION broken 20070824\n"); - /* TODO: print /proc/bus/pci/devices contents as PCIDEV lines */ + struct pci_dev *dev = NULL; + + trace_seq_printf(s, "VERSION 20070824\n"); + + for_each_pci_dev(dev) + mmio_print_pcidev(s, dev); + /* XXX: return value? What if header is very long? */ } static int mmio_print_rw(struct trace_iterator *iter) -- cgit v1.2.3 From 970e6fa03885f32cc43e42cb08c73a5f54cd8bd9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: code style cleanups From c2da03771e29159627c5c7b9509ec70bce9f91ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 28 Apr 2008 21:25:22 +0300 Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 4 ++-- arch/x86/mm/mmio-mod.c | 7 +++---- arch/x86/mm/testmmiotrace.c | 2 +- include/linux/mmiotrace.h | 6 +----- 4 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 3ad27b8504a5..6a92d9111b64 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -17,10 +17,10 @@ #include #include #include -#include +#include #include #include -#include +#include #include #include diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 1f77d8532037..a8d2a0019da4 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -418,11 +418,10 @@ static void enter_uniprocessor(void) for_each_cpu_mask(cpu, downed_cpus) { err = cpu_down(cpu); - if (!err) { + if (!err) pr_info(NAME "CPU%d is down.\n", cpu); - } else { + else pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); - } } if (num_online_cpus() > 1) pr_warning(NAME "multiple CPUs still online, " diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index cfa60b227c8d..d877c5b423ef 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -2,7 +2,7 @@ * Written by Pekka Paalanen, 2008 */ #include -#include +#include #define MODULE_NAME "testmmiotrace" diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index dd6b64b160fc..de8e91258da7 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,9 +1,7 @@ #ifndef MMIOTRACE_H #define MMIOTRACE_H -#include - -#ifdef __KERNEL__ +#include #include @@ -84,6 +82,4 @@ extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); -#endif /* __KERNEL__ */ - #endif /* MMIOTRACE_H */ -- cgit v1.2.3 From dee310d0adf41019aca476052ac3085ff286d9be Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: x86 mmiotrace: use resource_size_t for phys addresses Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 11 ++++++----- include/linux/mmiotrace.h | 14 +++++++------- kernel/trace/trace_mmiotrace.c | 20 ++++++++++++-------- 3 files changed, 25 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 278998c1998f..3b04a0126121 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -48,7 +48,7 @@ struct trap_reason { struct remap_trace { struct list_head list; struct kmmio_probe probe; - unsigned long phys; + resource_size_t phys; unsigned long id; }; @@ -275,7 +275,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, put_cpu_var(pf_reason); } -static void ioremap_trace_core(unsigned long offset, unsigned long size, +static void ioremap_trace_core(resource_size_t offset, unsigned long size, void __iomem *addr) { static atomic_t next_id; @@ -319,13 +319,14 @@ not_enabled: spin_unlock_irq(&trace_lock); } -void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr) { if (!is_enabled()) /* recheck and proper locking in *_core() */ return; - pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); + pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", + (unsigned long long)offset, size, addr); if ((filter_offset) && (offset != filter_offset)) return; ioremap_trace_core(offset, size, addr); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index de8e91258da7..5cbbc374e945 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -2,7 +2,6 @@ #define MMIOTRACE_H #include - #include struct kmmio_probe; @@ -37,14 +36,15 @@ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); /* Called from ioremap.c */ #ifdef CONFIG_MMIOTRACE -extern void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr); extern void mmiotrace_iounmap(volatile void __iomem *addr); #else -static inline void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +static inline void mmiotrace_ioremap(resource_size_t offset, + unsigned long size, void __iomem *addr) { } + static inline void mmiotrace_iounmap(volatile void __iomem *addr) { } @@ -60,7 +60,7 @@ enum mm_io_opcode { }; struct mmiotrace_rw { - unsigned long phys; /* PCI address of register */ + resource_size_t phys; /* PCI address of register */ unsigned long value; unsigned long pc; /* optional program counter */ int map_id; @@ -69,7 +69,7 @@ struct mmiotrace_rw { }; struct mmiotrace_map { - unsigned long phys; /* base address in PCI space */ + resource_size_t phys; /* base address in PCI space */ unsigned long virt; /* base virtual address */ unsigned long len; /* mapping size */ int map_id; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 3c1dacdc2d85..b13dc19dcbb4 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -184,20 +184,23 @@ static int mmio_print_rw(struct trace_iterator *iter) switch (entry->mmiorw.opcode) { case MMIO_READ: ret = trace_seq_printf(s, - "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", - rw->width, secs, usec_rem, rw->map_id, rw->phys, + "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, rw->value, rw->pc, 0); break; case MMIO_WRITE: ret = trace_seq_printf(s, - "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", - rw->width, secs, usec_rem, rw->map_id, rw->phys, + "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, rw->value, rw->pc, 0); break; case MMIO_UNKNOWN_OP: ret = trace_seq_printf(s, - "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", - secs, usec_rem, rw->map_id, rw->phys, + "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n", + secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, (rw->value >> 0) & 0xff, rw->pc, 0); break; @@ -223,8 +226,9 @@ static int mmio_print_map(struct trace_iterator *iter) switch (entry->mmiorw.opcode) { case MMIO_PROBE: ret = trace_seq_printf(s, - "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", - secs, usec_rem, m->map_id, m->phys, m->virt, m->len, + "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", + secs, usec_rem, m->map_id, + (unsigned long long)m->phys, m->virt, m->len, 0UL, 0); break; case MMIO_UNPROBE: -- cgit v1.2.3 From a50445d76c22a34ae149704ea5adaef171c8acb7 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: rename kmmio_probe::user_data to :private. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 4 ++-- include/linux/mmiotrace.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 3b04a0126121..ed0e0e90b3ef 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -191,7 +191,7 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); - struct remap_trace *trace = p->user_data; + struct remap_trace *trace = p->private; /* it doesn't make sense to have more than one active trace per cpu */ if (my_reason->active_traces) @@ -299,7 +299,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, .len = size, .pre_handler = pre, .post_handler = post, - .user_data = trace + .private = trace }, .phys = offset, .id = atomic_inc_return(&next_id) diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 5cbbc374e945..61d19e1b7a0b 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -18,7 +18,7 @@ struct kmmio_probe { unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *user_data; + void *private; }; /* kmmio is active by some kmmio_probes? */ -- cgit v1.2.3 From 40bd21740012132afb62d78ac3e6b82372b2fbc2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:44:02 +0200 Subject: x86: automatical unification of i8259.c Make conversion of i8259 very mechanical -- i8259 was generated by --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/i8259.c | 368 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/i8259_32.c | 295 ------------------------------------ arch/x86/kernel/i8259_64.c | 309 ------------------------------------- include/asm-x86/i8259.h | 2 + 5 files changed, 371 insertions(+), 605 deletions(-) create mode 100644 arch/x86/kernel/i8259.c (limited to 'include') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b4720..f71a76ef2596 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -18,7 +18,7 @@ CFLAGS_tsc_64.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o +obj-y += setup_$(BITS).o i8259.o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c new file mode 100644 index 000000000000..2decba6b0101 --- /dev/null +++ b/arch/x86/kernel/i8259.c @@ -0,0 +1,368 @@ +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#ifndef CONFIG_X86_64 +#include +#else /* CONFIG_X86_64 */ +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#ifndef CONFIG_X86_64 +#include +#endif /* ! CONFIG_X86_64 */ +#include + +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * plus some generic x86 specific things if generic specifics makes + * any sense at all. + */ + +static int i8259A_auto_eoi; +DEFINE_SPINLOCK(i8259A_lock); +static void mask_and_ack_8259A(unsigned int); + +struct irq_chip i8259A_chip = { + .name = "XT-PIC", + .mask = disable_8259A_irq, + .disable = disable_8259A_irq, + .unmask = enable_8259A_irq, + .mask_ack = mask_and_ack_8259A, +}; + +/* + * 8259A PIC functions to handle ISA devices: + */ + +/* + * This contains the irq mask for both 8259A irq controllers, + */ +unsigned int cached_irq_mask = 0xffff; + +/* + * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) + * boards the timer interrupt is not really connected to any IO-APIC pin, + * it's fed to the master 8259A's IR0 line only. + * + * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. + * this 'mixed mode' IRQ handling costs nothing because it's only used + * at IRQ setup time. + */ +unsigned long io_apic_irqs; + +void disable_8259A_irq(unsigned int irq) +{ + unsigned int mask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void enable_8259A_irq(unsigned int irq) +{ + unsigned int mask = ~(1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask &= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +int i8259A_irq_pending(unsigned int irq) +{ + unsigned int mask = 1<> 8); + spin_unlock_irqrestore(&i8259A_lock, flags); + + return ret; +} + +void make_8259A_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + io_apic_irqs &= ~(1<> 8); + outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ + return value; +} + +/* + * Careful! The 8259A is a fragile beast, it pretty + * much _has_ to be done exactly like this (mask it + * first, _then_ send the EOI, and the order of EOI + * to the two 8259s is important! + */ +static void mask_and_ack_8259A(unsigned int irq) +{ + unsigned int irqmask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + /* + * Lightweight spurious IRQ detection. We do not want + * to overdo spurious IRQ handling - it's usually a sign + * of hardware problems, so we only do the checks we can + * do without slowing down good hardware unnecessarily. + * + * Note that IRQ7 and IRQ15 (the two spurious IRQs + * usually resulting from the 8259A-1|2 PICs) occur + * even if the IRQ is masked in the 8259A. Thus we + * can check spurious 8259A IRQs without doing the + * quite slow i8259A_irq_real() call for every IRQ. + * This does not cover 100% of spurious interrupts, + * but should be enough to warn the user that there + * is something bad going on ... + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; + cached_irq_mask |= irqmask; + +handle_real_irq: + if (irq & 8) { + inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ + outb(cached_slave_mask, PIC_SLAVE_IMR); +#ifndef CONFIG_X86_64 + outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ +#else /* CONFIG_X86_64 */ + /* 'Specific EOI' to slave */ + outb(0x60+(irq&7),PIC_SLAVE_CMD); + /* 'Specific EOI' to master-IRQ2 */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); +#endif /* CONFIG_X86_64 */ + } else { + inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ + outb(cached_master_mask, PIC_MASTER_IMR); +#ifndef CONFIG_X86_64 + outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ +#else /* CONFIG_X86_64 */ + /* 'Specific EOI' to master */ + outb(0x60+irq,PIC_MASTER_CMD); +#endif /* CONFIG_X86_64 */ + } + spin_unlock_irqrestore(&i8259A_lock, flags); + return; + +spurious_8259A_irq: + /* + * this is the slow path - should happen rarely. + */ + if (i8259A_irq_real(irq)) + /* + * oops, the IRQ _is_ in service according to the + * 8259A - not spurious, go handle it. + */ + goto handle_real_irq; + + { + static int spurious_irq_mask; + /* + * At this point we can be sure the IRQ is spurious, + * lets ACK and report it. [once per IRQ] + */ + if (!(spurious_irq_mask & irqmask)) { +#ifndef CONFIG_X86_64 + printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); +#else /* CONFIG_X86_64 */ + printk(KERN_DEBUG + "spurious 8259A interrupt: IRQ%d.\n", irq); +#endif /* CONFIG_X86_64 */ + spurious_irq_mask |= irqmask; + } + atomic_inc(&irq_err_count); + /* + * Theoretically we do not have to handle this IRQ, + * but in Linux this does not cause problems and is + * simpler for us. + */ + goto handle_real_irq; + } +} + +static char irq_trigger[2]; +/** + * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ + */ +static void restore_ELCR(char *trigger) +{ + outb(trigger[0], 0x4d0); + outb(trigger[1], 0x4d1); +} + +static void save_ELCR(char *trigger) +{ + /* IRQ 0,1,2,8,13 are marked as reserved */ + trigger[0] = inb(0x4d0) & 0xF8; + trigger[1] = inb(0x4d1) & 0xDE; +} + +static int i8259A_resume(struct sys_device *dev) +{ + init_8259A(i8259A_auto_eoi); + restore_ELCR(irq_trigger); + return 0; +} + +static int i8259A_suspend(struct sys_device *dev, pm_message_t state) +{ + save_ELCR(irq_trigger); + return 0; +} + +static int i8259A_shutdown(struct sys_device *dev) +{ + /* Put the i8259A into a quiescent state that + * the kernel initialization code can get it + * out of. + */ + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + return 0; +} + +static struct sysdev_class i8259_sysdev_class = { + .name = "i8259", + .suspend = i8259A_suspend, + .resume = i8259A_resume, + .shutdown = i8259A_shutdown, +}; + +static struct sys_device device_i8259A = { + .id = 0, + .cls = &i8259_sysdev_class, +}; + +static int __init i8259A_init_sysfs(void) +{ + int error = sysdev_class_register(&i8259_sysdev_class); + if (!error) + error = sysdev_register(&device_i8259A); + return error; +} + +device_initcall(i8259A_init_sysfs); + +void init_8259A(int auto_eoi) +{ + unsigned long flags; + + i8259A_auto_eoi = auto_eoi; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + /* + * outb_pic - this has to work on a wide range of PC hardware. + */ + outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ +#ifndef CONFIG_X86_64 + outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ + outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ +#else /* CONFIG_X86_64 */ + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* 8259A-1 (the master) has a slave on IR2 */ + outb_pic(0x04, PIC_MASTER_IMR); +#endif /* CONFIG_X86_64 */ + if (auto_eoi) /* master does Auto EOI */ + outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); + else /* master expects normal EOI */ + outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + + outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ +#ifndef CONFIG_X86_64 + outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ +#else /* CONFIG_X86_64 */ + /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* 8259A-2 is a slave on master's IR2 */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); + /* (slave's support for AEOI in flat mode is to be investigated) */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); + +#endif /* CONFIG_X86_64 */ + if (auto_eoi) + /* + * In AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_chip.mask_ack = disable_8259A_irq; + else + i8259A_chip.mask_ack = mask_and_ack_8259A; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index fe631967d625..d66914287ee1 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -21,302 +21,7 @@ #include #include -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index fa57a1568508..a95d2bd27e9d 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -87,315 +87,6 @@ static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = #undef IRQ #undef IRQLIST_16 -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - /* 'Specific EOI' to slave */ - outb(0x60+(irq&7),PIC_SLAVE_CMD); - /* 'Specific EOI' to master-IRQ2 */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - /* 'Specific EOI' to master */ - outb(0x60+irq,PIC_MASTER_CMD); - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG - "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); - /* 8259A-1 (the master) has a slave on IR2 */ - outb_pic(0x04, PIC_MASTER_IMR); - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); - /* 8259A-2 is a slave on master's IR2 */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); - /* (slave's support for AEOI in flat mode is to be investigated) */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); - - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 45d4df3e51e6..2f98df91f1f2 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -55,4 +55,6 @@ static inline void outb_pic(unsigned char value, unsigned int port) udelay(2); } +extern struct irq_chip i8259A_chip; + #endif /* __ASM_I8259_H__ */ -- cgit v1.2.3 From 21fd5132b223a10bdf17713dd0bf321cbd6471d2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:44:02 +0200 Subject: x86: automatical unification of i8259.c Make conversion of i8259 very mechanical -- i8259 was generated by diff -D, with too different parts left in i8259_32 and i8259_64.c. Only "by hand" changes were removal of #ifdef from middle of the comment (prevented compilation) and removal of one static to allow splitting into files. Of course, it will need some cleanups now, and those will follow. Signed-of-by: Pavel Machek --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/i8259.c | 368 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/i8259_32.c | 295 ------------------------------------ arch/x86/kernel/i8259_64.c | 309 ------------------------------------- include/asm-x86/i8259.h | 2 + 5 files changed, 371 insertions(+), 605 deletions(-) create mode 100644 arch/x86/kernel/i8259.c (limited to 'include') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b4720..f71a76ef2596 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -18,7 +18,7 @@ CFLAGS_tsc_64.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o +obj-y += setup_$(BITS).o i8259.o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c new file mode 100644 index 000000000000..2decba6b0101 --- /dev/null +++ b/arch/x86/kernel/i8259.c @@ -0,0 +1,368 @@ +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#ifndef CONFIG_X86_64 +#include +#else /* CONFIG_X86_64 */ +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#ifndef CONFIG_X86_64 +#include +#endif /* ! CONFIG_X86_64 */ +#include + +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * plus some generic x86 specific things if generic specifics makes + * any sense at all. + */ + +static int i8259A_auto_eoi; +DEFINE_SPINLOCK(i8259A_lock); +static void mask_and_ack_8259A(unsigned int); + +struct irq_chip i8259A_chip = { + .name = "XT-PIC", + .mask = disable_8259A_irq, + .disable = disable_8259A_irq, + .unmask = enable_8259A_irq, + .mask_ack = mask_and_ack_8259A, +}; + +/* + * 8259A PIC functions to handle ISA devices: + */ + +/* + * This contains the irq mask for both 8259A irq controllers, + */ +unsigned int cached_irq_mask = 0xffff; + +/* + * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) + * boards the timer interrupt is not really connected to any IO-APIC pin, + * it's fed to the master 8259A's IR0 line only. + * + * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. + * this 'mixed mode' IRQ handling costs nothing because it's only used + * at IRQ setup time. + */ +unsigned long io_apic_irqs; + +void disable_8259A_irq(unsigned int irq) +{ + unsigned int mask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void enable_8259A_irq(unsigned int irq) +{ + unsigned int mask = ~(1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask &= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +int i8259A_irq_pending(unsigned int irq) +{ + unsigned int mask = 1<> 8); + spin_unlock_irqrestore(&i8259A_lock, flags); + + return ret; +} + +void make_8259A_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + io_apic_irqs &= ~(1<> 8); + outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ + return value; +} + +/* + * Careful! The 8259A is a fragile beast, it pretty + * much _has_ to be done exactly like this (mask it + * first, _then_ send the EOI, and the order of EOI + * to the two 8259s is important! + */ +static void mask_and_ack_8259A(unsigned int irq) +{ + unsigned int irqmask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + /* + * Lightweight spurious IRQ detection. We do not want + * to overdo spurious IRQ handling - it's usually a sign + * of hardware problems, so we only do the checks we can + * do without slowing down good hardware unnecessarily. + * + * Note that IRQ7 and IRQ15 (the two spurious IRQs + * usually resulting from the 8259A-1|2 PICs) occur + * even if the IRQ is masked in the 8259A. Thus we + * can check spurious 8259A IRQs without doing the + * quite slow i8259A_irq_real() call for every IRQ. + * This does not cover 100% of spurious interrupts, + * but should be enough to warn the user that there + * is something bad going on ... + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; + cached_irq_mask |= irqmask; + +handle_real_irq: + if (irq & 8) { + inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ + outb(cached_slave_mask, PIC_SLAVE_IMR); +#ifndef CONFIG_X86_64 + outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ +#else /* CONFIG_X86_64 */ + /* 'Specific EOI' to slave */ + outb(0x60+(irq&7),PIC_SLAVE_CMD); + /* 'Specific EOI' to master-IRQ2 */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); +#endif /* CONFIG_X86_64 */ + } else { + inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ + outb(cached_master_mask, PIC_MASTER_IMR); +#ifndef CONFIG_X86_64 + outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ +#else /* CONFIG_X86_64 */ + /* 'Specific EOI' to master */ + outb(0x60+irq,PIC_MASTER_CMD); +#endif /* CONFIG_X86_64 */ + } + spin_unlock_irqrestore(&i8259A_lock, flags); + return; + +spurious_8259A_irq: + /* + * this is the slow path - should happen rarely. + */ + if (i8259A_irq_real(irq)) + /* + * oops, the IRQ _is_ in service according to the + * 8259A - not spurious, go handle it. + */ + goto handle_real_irq; + + { + static int spurious_irq_mask; + /* + * At this point we can be sure the IRQ is spurious, + * lets ACK and report it. [once per IRQ] + */ + if (!(spurious_irq_mask & irqmask)) { +#ifndef CONFIG_X86_64 + printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); +#else /* CONFIG_X86_64 */ + printk(KERN_DEBUG + "spurious 8259A interrupt: IRQ%d.\n", irq); +#endif /* CONFIG_X86_64 */ + spurious_irq_mask |= irqmask; + } + atomic_inc(&irq_err_count); + /* + * Theoretically we do not have to handle this IRQ, + * but in Linux this does not cause problems and is + * simpler for us. + */ + goto handle_real_irq; + } +} + +static char irq_trigger[2]; +/** + * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ + */ +static void restore_ELCR(char *trigger) +{ + outb(trigger[0], 0x4d0); + outb(trigger[1], 0x4d1); +} + +static void save_ELCR(char *trigger) +{ + /* IRQ 0,1,2,8,13 are marked as reserved */ + trigger[0] = inb(0x4d0) & 0xF8; + trigger[1] = inb(0x4d1) & 0xDE; +} + +static int i8259A_resume(struct sys_device *dev) +{ + init_8259A(i8259A_auto_eoi); + restore_ELCR(irq_trigger); + return 0; +} + +static int i8259A_suspend(struct sys_device *dev, pm_message_t state) +{ + save_ELCR(irq_trigger); + return 0; +} + +static int i8259A_shutdown(struct sys_device *dev) +{ + /* Put the i8259A into a quiescent state that + * the kernel initialization code can get it + * out of. + */ + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + return 0; +} + +static struct sysdev_class i8259_sysdev_class = { + .name = "i8259", + .suspend = i8259A_suspend, + .resume = i8259A_resume, + .shutdown = i8259A_shutdown, +}; + +static struct sys_device device_i8259A = { + .id = 0, + .cls = &i8259_sysdev_class, +}; + +static int __init i8259A_init_sysfs(void) +{ + int error = sysdev_class_register(&i8259_sysdev_class); + if (!error) + error = sysdev_register(&device_i8259A); + return error; +} + +device_initcall(i8259A_init_sysfs); + +void init_8259A(int auto_eoi) +{ + unsigned long flags; + + i8259A_auto_eoi = auto_eoi; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + /* + * outb_pic - this has to work on a wide range of PC hardware. + */ + outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ +#ifndef CONFIG_X86_64 + outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ + outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ +#else /* CONFIG_X86_64 */ + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* 8259A-1 (the master) has a slave on IR2 */ + outb_pic(0x04, PIC_MASTER_IMR); +#endif /* CONFIG_X86_64 */ + if (auto_eoi) /* master does Auto EOI */ + outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); + else /* master expects normal EOI */ + outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + + outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ +#ifndef CONFIG_X86_64 + outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ +#else /* CONFIG_X86_64 */ + /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* 8259A-2 is a slave on master's IR2 */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); + /* (slave's support for AEOI in flat mode is to be investigated) */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); + +#endif /* CONFIG_X86_64 */ + if (auto_eoi) + /* + * In AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_chip.mask_ack = disable_8259A_irq; + else + i8259A_chip.mask_ack = mask_and_ack_8259A; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index fe631967d625..d66914287ee1 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -21,302 +21,7 @@ #include #include -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index 1870e0e86559..b44095efcf83 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -101,315 +101,6 @@ static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = #undef IRQ #undef IRQLIST_16 -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - /* 'Specific EOI' to slave */ - outb(0x60+(irq&7),PIC_SLAVE_CMD); - /* 'Specific EOI' to master-IRQ2 */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - /* 'Specific EOI' to master */ - outb(0x60+irq,PIC_MASTER_CMD); - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG - "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); - /* 8259A-1 (the master) has a slave on IR2 */ - outb_pic(0x04, PIC_MASTER_IMR); - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); - /* 8259A-2 is a slave on master's IR2 */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); - /* (slave's support for AEOI in flat mode is to be investigated) */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); - - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 45d4df3e51e6..2f98df91f1f2 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -55,4 +55,6 @@ static inline void outb_pic(unsigned char value, unsigned int port) udelay(2); } +extern struct irq_chip i8259A_chip; + #endif /* __ASM_I8259_H__ */ -- cgit v1.2.3 From 3711ccb07b7f0a13f4f1aa16a8fdca9c930f21ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 17:24:34 +0200 Subject: x86: fixup the fallout of the bitops changes Signed-off-by: Thomas Gleixner --- include/asm-x86/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 25d710532561..895339d2bc0b 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -245,7 +245,7 @@ static inline void set_restore_sigmask(void) { struct thread_info *ti = current_thread_info(); ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); + set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); } #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 42fdfa238a23643226910acf922ea930b3286032 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 23:14:51 +0200 Subject: namespacecheck: more kernel/printk.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/kernel.h | 6 ------ kernel/printk.c | 13 ------------- 2 files changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 792bf0aa779b..f2a668c195bf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -184,9 +184,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int log_buf_get_len(void); -extern int log_buf_read(int idx); -extern int log_buf_copy(char *dest, int idx, int len); extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; @@ -202,9 +199,6 @@ static inline int vprintk(const char *s, va_list args) { return 0; } static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); static inline int __cold printk(const char *s, ...) { return 0; } -static inline int log_buf_get_len(void) { return 0; } -static inline int log_buf_read(int idx) { return 0; } -static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } static inline int printk_ratelimit(void) { return 0; } static inline int __printk_ratelimit(int ratelimit_jiffies, \ int ratelimit_burst) { return 0; } diff --git a/kernel/printk.c b/kernel/printk.c index b620e3d96136..55d16e57499a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -267,19 +267,6 @@ int log_buf_copy(char *dest, int idx, int len) return ret; } -/* - * Extract a single character from the log buffer. - */ -static int log_buf_read(int idx) -{ - char ret; - - if (log_buf_copy(&ret, idx, 1) == 1) - return ret; - else - return -1; -} - /* * Commands to do_syslog: * -- cgit v1.2.3 From 6360b1fbb4a939efd34fc770c2ebd927c55506e0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: move BUG_TABLE into RODATA Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/avr32/kernel/vmlinux.lds.S | 2 -- arch/parisc/kernel/vmlinux.lds.S | 1 - arch/powerpc/kernel/vmlinux.lds.S | 2 -- arch/s390/kernel/vmlinux.lds.S | 1 - arch/sh/kernel/vmlinux_32.lds.S | 1 - arch/sh/kernel/vmlinux_64.lds.S | 1 - arch/x86/kernel/vmlinux_32.lds.S | 8 +++----- arch/x86/kernel/vmlinux_64.lds.S | 10 ++++------ include/asm-generic/vmlinux.lds.h | 6 ++++++ 9 files changed, 13 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index 481cfd40c053..bc932c9b4272 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -93,8 +93,6 @@ SECTIONS __stop___ex_table = .; } - BUG_TABLE - RODATA . = ALIGN(THREAD_SIZE); diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 50b4a3a25d0a..ff7d4ff4675a 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -66,7 +66,6 @@ SECTIONS _etext = .; RODATA - BUG_TABLE /* writeable */ /* Make sure this is page aligned so diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0c3000bf8d75..53d57d17a894 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -64,8 +64,6 @@ SECTIONS NOTES - BUG_TABLE - /* * Init sections discarded at runtime */ diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b4607155e8d0..76c1e60c92f3 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -40,7 +40,6 @@ SECTIONS _etext = .; /* End of text section */ NOTES :text :note - BUG_TABLE :text RODATA diff --git a/arch/sh/kernel/vmlinux_32.lds.S b/arch/sh/kernel/vmlinux_32.lds.S index c7113786ecd4..7b4b82bd1156 100644 --- a/arch/sh/kernel/vmlinux_32.lds.S +++ b/arch/sh/kernel/vmlinux_32.lds.S @@ -44,7 +44,6 @@ SECTIONS _etext = .; /* End of text section */ - BUG_TABLE NOTES RO_DATA(PAGE_SIZE) diff --git a/arch/sh/kernel/vmlinux_64.lds.S b/arch/sh/kernel/vmlinux_64.lds.S index d1e177009a41..33fa46451406 100644 --- a/arch/sh/kernel/vmlinux_64.lds.S +++ b/arch/sh/kernel/vmlinux_64.lds.S @@ -65,7 +65,6 @@ SECTIONS _etext = .; /* End of text section */ - BUG_TABLE NOTES RO_DATA(PAGE_SIZE) diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e9..aa0855471c79 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -49,16 +49,14 @@ SECTIONS _etext = .; /* End of text section */ } :text = 0x9090 + NOTES :text :note + . = ALIGN(16); /* Exception table */ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; *(__ex_table) __stop___ex_table = .; - } - - NOTES :text :note - - BUG_TABLE :text + } :text = 0x9090 . = ALIGN(4); .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fad3674b06a5..d123747af1e4 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -19,7 +19,7 @@ PHDRS { data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(4); /* R__ */ + note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS { @@ -40,16 +40,14 @@ SECTIONS _etext = .; /* End of text section */ } :text = 0x9090 + NOTES :text :note + . = ALIGN(16); /* Exception table */ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; *(__ex_table) __stop___ex_table = .; - } - - NOTES :text :note - - BUG_TABLE :text + } :text = 0x9090 RODATA diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778e916c..dd2cc8122ad8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -67,6 +67,8 @@ *(.rodata1) \ } \ \ + BUG_TABLE \ + \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -310,6 +312,7 @@ .stab.indexstr 0 : { *(.stab.indexstr) } \ .comment 0 : { *(.comment) } +#ifdef CONFIG_GENERIC_BUG #define BUG_TABLE \ . = ALIGN(8); \ __bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) { \ @@ -317,6 +320,9 @@ *(__bug_table) \ __stop___bug_table = .; \ } +#else +#define BUG_TABLE +#endif #define NOTES \ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ -- cgit v1.2.3 From 63687a528c39a67c1a213cdffa09feb0e6af9dbe Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: x86: move tracedata to RODATA .. allowing it to be write-protected just as other read-only data under CONFIG_DEBUG_RODATA. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vmlinux_32.lds.S | 7 ------- arch/x86/kernel/vmlinux_64.lds.S | 7 ------- drivers/base/power/trace.c | 2 +- include/asm-generic/vmlinux.lds.h | 14 ++++++++++++++ include/asm-x86/resume-trace.h | 2 +- include/linux/resume-trace.h | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e9..2674f5796275 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -60,13 +60,6 @@ SECTIONS BUG_TABLE :text - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - RODATA /* writeable */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fad3674b06a5..687041bfbae5 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -53,13 +53,6 @@ SECTIONS RODATA - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 2b4b392dcbc1..87a7f1d02578 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -153,7 +153,7 @@ EXPORT_SYMBOL(set_trace_device); * it's not any guarantee, but it's a high _likelihood_ that * the match is valid). */ -void generate_resume_trace(void *tracedata, unsigned int user) +void generate_resume_trace(const void *tracedata, unsigned int user) { unsigned short lineno = *(unsigned short *)tracedata; const char *file = *(const char **)(tracedata + 2); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778e916c..f1992dc5c424 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -93,6 +93,8 @@ VMLINUX_SYMBOL(__end_rio_route_ops) = .; \ } \ \ + TRACEDATA \ + \ /* Kernel symbol table: Normal symbols */ \ __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab) = .; \ @@ -318,6 +320,18 @@ __stop___bug_table = .; \ } +#ifdef CONFIG_PM_TRACE +#define TRACEDATA \ + . = ALIGN(4); \ + .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { \ + __tracedata_start = .; \ + *(.tracedata) \ + __tracedata_end = .; \ + } +#else +#define TRACEDATA +#endif + #define NOTES \ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_notes) = .; \ diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h index 2557514d7ef6..8d9f0b41ee86 100644 --- a/include/asm-x86/resume-trace.h +++ b/include/asm-x86/resume-trace.h @@ -6,7 +6,7 @@ #define TRACE_RESUME(user) \ do { \ if (pm_trace_enabled) { \ - void *tracedata; \ + const void *tracedata; \ asm volatile(_ASM_MOV_UL " $1f,%0\n" \ ".section .tracedata,\"a\"\n" \ "1:\t.word %c1\n\t" \ diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index f3f4f28c6960..c9ba2fdf807d 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -8,7 +8,7 @@ extern int pm_trace_enabled; struct device; extern void set_trace_device(struct device *); -extern void generate_resume_trace(void *tracedata, unsigned int user); +extern void generate_resume_trace(const void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ -- cgit v1.2.3 From a2eddfa95919a730e0e5ed17e9c303fe5ba249cd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: x86: make /proc/stat account for all interrupts LAPIC interrupts, which don't go through the generic interrupt handling code, aren't accounted for in /proc/stat. Hence this patch adds a mechanism architectures can use to accordingly adjust the statistics. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq_32.c | 38 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irq_64.c | 28 ++++++++++++++++++++++++++++ fs/proc/proc_misc.c | 9 +++++++++ include/asm-x86/hardirq.h | 6 ++++++ 4 files changed, 81 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 147352df28b9..468acd04aa2e 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -313,16 +313,20 @@ skip: per_cpu(irq_stat,j).irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); +#endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); @@ -331,6 +335,40 @@ skip: return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = nmi_count(cpu); + +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).apic_timer_irqs; +#endif +#ifdef CONFIG_SMP + sum += per_cpu(irq_stat, cpu).irq_resched_count; + sum += per_cpu(irq_stat, cpu).irq_call_count; + sum += per_cpu(irq_stat, cpu).irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += per_cpu(irq_stat, cpu).irq_thermal_count; +#endif +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).irq_spurious_count; +#endif + return sum; +} + +u64 arch_irq_stat(void) +{ + u64 sum = atomic_read(&irq_err_count); + +#ifdef CONFIG_X86_IO_APIC + sum += atomic_read(&irq_mis_count); +#endif + return sum; +} + #ifdef CONFIG_HOTPLUG_CPU #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 3aac15466a91..1f78b238d8d2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -135,6 +135,7 @@ skip: seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); @@ -143,6 +144,7 @@ skip: for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); +#endif seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); @@ -152,6 +154,32 @@ skip: return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = cpu_pda(cpu)->__nmi_count; + + sum += cpu_pda(cpu)->apic_timer_irqs; +#ifdef CONFIG_SMP + sum += cpu_pda(cpu)->irq_resched_count; + sum += cpu_pda(cpu)->irq_call_count; + sum += cpu_pda(cpu)->irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += cpu_pda(cpu)->irq_thermal_count; + sum += cpu_pda(cpu)->irq_threshold_count; +#endif + sum += cpu_pda(cpu)->irq_spurious_count; + return sum; +} + +u64 arch_irq_stat(void) +{ + return atomic_read(&irq_err_count); +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 74a323d2b850..903e617bec58 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -472,6 +472,13 @@ static const struct file_operations proc_vmalloc_operations = { }; #endif +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif +#ifndef arch_irq_stat +#define arch_irq_stat() 0 +#endif + static int show_stat(struct seq_file *p, void *v) { int i; @@ -509,7 +516,9 @@ static int show_stat(struct seq_file *p, void *v) sum += temp; per_irq_sum[j] += temp; } + sum += arch_irq_stat_cpu(i); } + sum += arch_irq_stat(); seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)cputime64_to_clock_t(user), diff --git a/include/asm-x86/hardirq.h b/include/asm-x86/hardirq.h index 314434d664e7..000787df66e6 100644 --- a/include/asm-x86/hardirq.h +++ b/include/asm-x86/hardirq.h @@ -3,3 +3,9 @@ #else # include "hardirq_64.h" #endif + +extern u64 arch_irq_stat_cpu(unsigned int cpu); +#define arch_irq_stat_cpu arch_irq_stat_cpu + +extern u64 arch_irq_stat(void); +#define arch_irq_stat arch_irq_stat -- cgit v1.2.3 From 962cf36c5bf6d2840b8d66ee9a606fae2f540bbd Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Thu, 15 May 2008 11:15:37 -0300 Subject: Remove argument from open_softirq which is always NULL As git-grep shows, open_softirq() is always called with the last argument being NULL block/blk-core.c: open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); kernel/hrtimer.c: open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); kernel/rcuclassic.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/rcupreempt.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/sched.c: open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); kernel/softirq.c: open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); kernel/softirq.c: open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); kernel/timer.c: open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); net/core/dev.c: open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); net/core/dev.c: open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); This observation has already been made by Matthew Wilcox in June 2002 (http://www.cs.helsinki.fi/linux/linux-kernel/2002-25/0687.html) "I notice that none of the current softirq routines use the data element passed to them." and the situation hasn't changed since them. So it appears we can safely remove that extra argument to save 128 (54) bytes of kernel data (text). Signed-off-by: Carlos R. Mafra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- block/blk-core.c | 2 +- include/linux/interrupt.h | 3 +-- kernel/hrtimer.c | 2 +- kernel/rcuclassic.c | 2 +- kernel/rcupreempt.c | 2 +- kernel/sched.c | 2 +- kernel/softirq.c | 7 +++---- kernel/timer.c | 2 +- net/core/dev.c | 4 ++-- 9 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 6a9cc0d22a61..75fdc65136e8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2048,7 +2048,7 @@ int __init blk_dev_init(void) for_each_possible_cpu(i) INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); - open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); register_hotcpu_notifier(&blk_cpu_notifier); return 0; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f1fc7470d26c..a86186dd0474 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -285,12 +285,11 @@ enum struct softirq_action { void (*action)(struct softirq_action *); - void *data; }; asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); -extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); +extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 421be5fe5cc7..861b4088092a 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1669,7 +1669,7 @@ void __init hrtimers_init(void) (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); #ifdef CONFIG_HIGH_RES_TIMERS - open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); #endif } diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index f4ffbd0f306f..f6e01f3ae9c6 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -529,7 +529,7 @@ static void __cpuinit rcu_online_cpu(int cpu) rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp); rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp); - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } static int __cpuinit rcu_cpu_notify(struct notifier_block *self, diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index e1cdf196a515..9dd827db359f 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1125,7 +1125,7 @@ void __init __rcu_init(void) for_each_online_cpu(cpu) rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu); - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } /* diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a91539..56ea3a203a5a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8154,7 +8154,7 @@ void __init sched_init(void) #endif #ifdef CONFIG_SMP - open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); + open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); #endif #ifdef CONFIG_RT_MUTEXES diff --git a/kernel/softirq.c b/kernel/softirq.c index 36e061740047..059256874e9b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -347,9 +347,8 @@ void raise_softirq(unsigned int nr) local_irq_restore(flags); } -void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) +void open_softirq(int nr, void (*action)(struct softirq_action *)) { - softirq_vec[nr].data = data; softirq_vec[nr].action = action; } @@ -503,8 +502,8 @@ void __init softirq_init(void) &per_cpu(tasklet_hi_vec, cpu).head; } - open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); - open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); + open_softirq(TASKLET_SOFTIRQ, tasklet_action); + open_softirq(HI_SOFTIRQ, tasklet_hi_action); } static int ksoftirqd(void * __bind_cpu) diff --git a/kernel/timer.c b/kernel/timer.c index ceacc6626572..b4da888497fa 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1502,7 +1502,7 @@ void __init init_timers(void) BUG_ON(err == NOTIFY_BAD); register_cpu_notifier(&timers_nb); - open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); + open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } /** diff --git a/net/core/dev.c b/net/core/dev.c index 582963077877..cf0e16731dc7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4563,8 +4563,8 @@ static int __init net_dev_init(void) dev_boot_phase = 0; - open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); - open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); + open_softirq(NET_RX_SOFTIRQ, net_rx_action); hotcpu_notifier(dev_cpu_callback, 0); dst_init(); -- cgit v1.2.3 From 5136dea5734cfddbc6d7ccb7ead85a3ac7ce3de2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 May 2008 16:10:41 -0700 Subject: x86: bitops take an unsigned long * All (or most) other architectures do this. So should x86. Fix. Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/bitops.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index ee4b3ead6a43..7d2494bdc660 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -43,7 +43,7 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(int nr, volatile void *addr) +static inline void set_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -57,7 +57,7 @@ static inline void set_bit(int nr, volatile void *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(int nr, volatile void *addr) +static inline void __set_bit(int nr, volatile unsigned long *addr) { asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -72,7 +72,7 @@ static inline void __set_bit(int nr, volatile void *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile void *addr) +static inline void clear_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); } @@ -85,13 +85,13 @@ static inline void clear_bit(int nr, volatile void *addr) * clear_bit() is atomic and implies release semantics before the memory * operation. It can be used for an unlock. */ -static inline void clear_bit_unlock(unsigned nr, volatile void *addr) +static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); clear_bit(nr, addr); } -static inline void __clear_bit(int nr, volatile void *addr) +static inline void __clear_bit(int nr, volatile unsigned long *addr) { asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } @@ -108,7 +108,7 @@ static inline void __clear_bit(int nr, volatile void *addr) * No memory barrier is required here, because x86 cannot reorder stores past * older loads. Same principle as spin_unlock. */ -static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) +static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); __clear_bit(nr, addr); @@ -126,7 +126,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(int nr, volatile void *addr) +static inline void __change_bit(int nr, volatile unsigned long *addr) { asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); } @@ -140,7 +140,7 @@ static inline void __change_bit(int nr, volatile void *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void change_bit(int nr, volatile void *addr) +static inline void change_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); } @@ -153,7 +153,7 @@ static inline void change_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_set_bit(int nr, volatile void *addr) +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -170,7 +170,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile void *addr) +static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -184,7 +184,7 @@ static inline int test_and_set_bit_lock(int nr, volatile void *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(int nr, volatile void *addr) +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -203,7 +203,7 @@ static inline int __test_and_set_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_clear_bit(int nr, volatile void *addr) +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -223,7 +223,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_clear_bit(int nr, volatile void *addr) +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -235,7 +235,7 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr) } /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, volatile void *addr) +static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -255,7 +255,7 @@ static inline int __test_and_change_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_change_bit(int nr, volatile void *addr) +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -266,13 +266,13 @@ static inline int test_and_change_bit(int nr, volatile void *addr) return oldbit; } -static inline int constant_test_bit(int nr, const volatile void *addr) +static inline int constant_test_bit(int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; } -static inline int variable_test_bit(int nr, volatile const void *addr) +static inline int variable_test_bit(int nr, volatile const unsigned long *addr) { int oldbit; -- cgit v1.2.3 From 635ee418381566f03819408e1303ef21fcf2d41c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:35 +0200 Subject: x86: create prototype for (un)map_devmem Global functions need a prototype. Add it. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-x86/page.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index dc936dddf161..ed165097520f 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -51,8 +51,15 @@ #ifndef __ASSEMBLY__ +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pgprotval_t pgprot; } pgprot_t; + extern int page_is_ram(unsigned long pagenr); extern int devmem_is_allowed(unsigned long pagenr); +extern void map_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); +extern void unmap_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); extern unsigned long max_pfn_mapped; @@ -74,9 +81,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pgprotval_t pgprot; } pgprot_t; - static inline pgd_t native_make_pgd(pgdval_t val) { return (pgd_t) { val }; -- cgit v1.2.3 From d1097635deed7196c2a859f285c29267779ca45a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 23:42:01 +0200 Subject: x86: move mmconfig declarations to header arch/x86/kernel/mmconf-fam10h_64.c is missing the prototypes, which are decalred in arch/x86/kernel/setup_64.c. Move the prototypes and the inline stubs to the appropriate header file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/mmconf-fam10h_64.c | 1 + arch/x86/kernel/setup_64.c | 13 +------------ include/asm-x86/mmconfig.h | 12 ++++++++++++ 3 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 include/asm-x86/mmconfig.h (limited to 'include') diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index edc5fbfe85c0..fdfdc550b366 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "../pci/pci.h" diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff1286ad8a..341230db74e1 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #ifdef CONFIG_PARAVIRT @@ -293,18 +294,6 @@ static void __init parse_setup_data(void) } } -#ifdef CONFIG_PCI_MMCONFIG -extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); -#else -void __cpuinit fam10h_check_enable_mmcfg(void) -{ -} -void __init check_enable_amd_mmconf_dmi(void) -{ -} -#endif - /* * setup_arch - architecture-specific boot-time initializations * diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h new file mode 100644 index 000000000000..95beda07c6fa --- /dev/null +++ b/include/asm-x86/mmconfig.h @@ -0,0 +1,12 @@ +#ifndef _ASM_MMCONFIG_H +#define _ASM_MMCONFIG_H + +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __init check_enable_amd_mmconf_dmi(void); +#else +static inline void fam10h_check_enable_mmcfg(void) { } +static inline void check_enable_amd_mmconf_dmi(void) { } +#endif + +#endif -- cgit v1.2.3 From 4c8ab98249fa3cead320ec0ee4cde9960b951989 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:43:38 +0200 Subject: i386: move FIX_ACPI_* into non-permanent range .. as they are used at early boot time only. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/fixmap_32.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h index 4b96148e90c1..f0df7ee96816 100644 --- a/include/asm-x86/fixmap_32.h +++ b/include/asm-x86/fixmap_32.h @@ -79,10 +79,6 @@ enum fixed_addresses { FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif @@ -103,6 +99,10 @@ enum fixed_addresses { (__end_of_permanent_fixed_addresses & 511), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, FIX_WP_TEST, +#ifdef CONFIG_ACPI + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif -- cgit v1.2.3 From ebdd561a19d7917ac49fff9c355aa4833c504bf1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86: constify data in reboot.c Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 18 +++++++++--------- arch/x86/kernel/reboot_fixups_32.c | 4 ++-- include/asm-x86/reboot.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f6be7d5f82f8..f8a62160e151 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -27,7 +27,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static long no_idt[3]; +static const struct desc_ptr no_idt = {}; static int reboot_mode; enum reboot_type reboot_type = BOOT_KBD; int reboot_force; @@ -201,15 +201,15 @@ core_initcall(reboot_init); controller to pulse the CPU reset line, which is more thorough, but doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct desc_ptr +static const struct desc_ptr real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, real_mode_idt = { 0x3ff, 0 }; @@ -231,7 +231,7 @@ real_mode_idt = { 0x3ff, 0 }; More could be done here to set up the registers as if a CPU reset had occurred; hopefully real BIOSs don't assume much. */ -static unsigned char real_mode_switch [] = +static const unsigned char real_mode_switch [] = { 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ @@ -245,7 +245,7 @@ static unsigned char real_mode_switch [] = 0x24, 0x10, /* f: andb $0x10,al */ 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ }; -static unsigned char jump_to_bios [] = +static const unsigned char jump_to_bios [] = { 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ }; @@ -255,7 +255,7 @@ static unsigned char jump_to_bios [] = * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(unsigned char *code, int length) +void machine_real_restart(const unsigned char *code, int length) { local_irq_disable(); @@ -368,7 +368,7 @@ static void native_machine_emergency_restart(void) } case BOOT_TRIPLE: - load_idt((const struct desc_ptr *)&no_idt); + load_idt(&no_idt); __asm__ __volatile__("int3"); reboot_type = BOOT_KBD; diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index dec0b5ec25c2..61a837743fe5 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -49,7 +49,7 @@ struct device_fixup { void (*reboot_fixup)(struct pci_dev *); }; -static struct device_fixup fixups_table[] = { +static const struct device_fixup fixups_table[] = { { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, @@ -64,7 +64,7 @@ static struct device_fixup fixups_table[] = { */ void mach_reboot_fixups(void) { - struct device_fixup *cur; + const struct device_fixup *cur; struct pci_dev *dev; int i; diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h index e63741f19392..206f355786dc 100644 --- a/include/asm-x86/reboot.h +++ b/include/asm-x86/reboot.h @@ -14,8 +14,8 @@ struct machine_ops { extern struct machine_ops machine_ops; -void machine_real_restart(unsigned char *code, int length); void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); +void machine_real_restart(const unsigned char *code, int length); #endif /* _ASM_REBOOT_H */ -- cgit v1.2.3 From 9831bfb201a8e00415d0f9e5b5a50d902a90b2db Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86 - hide X86_VM_MASK from userland programs v3 X86_VM_MASK is kernel specific flags so hide it from userland programs. It should be defined *before* ptrace.h inclusion because of circular link between these files Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/vm86.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h index 074b357146df..cbf4a0effa75 100644 --- a/include/asm-x86/vm86.h +++ b/include/asm-x86/vm86.h @@ -14,12 +14,6 @@ #include -#ifdef CONFIG_VM86 -#define X86_VM_MASK X86_EFLAGS_VM -#else -#define X86_VM_MASK 0 /* No VM86 support */ -#endif - #define BIOSSEG 0x0f000 #define CPU_086 0 @@ -133,6 +127,13 @@ struct vm86plus_struct { }; #ifdef __KERNEL__ + +#ifdef CONFIG_VM86 +#define X86_VM_MASK X86_EFLAGS_VM +#else +#define X86_VM_MASK 0 /* No VM86 support */ +#endif + /* * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 * mode - the main change is that the old segment descriptors aren't -- cgit v1.2.3 From 2237ce2057b82561f7ea27ed30153571f404112d Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Fri, 16 May 2008 11:01:26 +0900 Subject: x86: cleanup, remove duplicate declaration of unknown_nmi_panic Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Ingo Molnar --- include/asm-x86/nmi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 1e363021e72f..8455bf36d8df 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -46,7 +46,6 @@ extern void nmi_watchdog_default(void); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -extern int unknown_nmi_panic; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); -- cgit v1.2.3 From 23eb271b9186531e1eb704dda9ab37abcfd0ca4a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 May 2008 16:10:39 -0700 Subject: x86: setup_force_cpu_cap(): don't do clear_bit(non-unsigned-long) Another hack to make proper prototyping of x86 bitops viable. Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/asm-x86/cpufeature.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 0d609c837a41..78b47e7404eb 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -142,11 +142,11 @@ extern const char * const x86_power_flags[32]; #define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) #define setup_clear_cpu_cap(bit) do { \ clear_cpu_cap(&boot_cpu_data, bit); \ - set_bit(bit, cleared_cpu_caps); \ + set_bit(bit, (unsigned long *)cleared_cpu_caps); \ } while (0) #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ - clear_bit(bit, cleared_cpu_caps); \ + clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -- cgit v1.2.3 From f0766440dda7ace8a43b030f75e2dcb82449fb85 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 9 May 2008 19:09:48 -0700 Subject: x86: unify current.h Simply stitch these together. There are just two definitions that are shared but the file is resonably small and putting these things together shows that further unifications requires a unification of the per cpu / pda handling between both arches. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/current.h | 42 ++++++++++++++++++++++++++++++++++++++---- include/asm-x86/current_32.h | 17 ----------------- include/asm-x86/current_64.h | 27 --------------------------- 3 files changed, 38 insertions(+), 48 deletions(-) delete mode 100644 include/asm-x86/current_32.h delete mode 100644 include/asm-x86/current_64.h (limited to 'include') diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h index d2526d3f7346..7515c19d4988 100644 --- a/include/asm-x86/current.h +++ b/include/asm-x86/current.h @@ -1,5 +1,39 @@ +#ifndef _X86_CURRENT_H +#define _X86_CURRENT_H + #ifdef CONFIG_X86_32 -# include "current_32.h" -#else -# include "current_64.h" -#endif +#include +#include + +struct task_struct; + +DECLARE_PER_CPU(struct task_struct *, current_task); +static __always_inline struct task_struct *get_current(void) +{ + return x86_read_percpu(current_task); +} + +#else /* X86_32 */ + +#ifndef __ASSEMBLY__ +#include + +struct task_struct; + +static __always_inline struct task_struct *get_current(void) +{ + return read_pda(pcurrent); +} + +#else /* __ASSEMBLY__ */ + +#include +#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg + +#endif /* __ASSEMBLY__ */ + +#endif /* X86_32 */ + +#define current get_current() + +#endif /* X86_CURRENT_H */ diff --git a/include/asm-x86/current_32.h b/include/asm-x86/current_32.h deleted file mode 100644 index 5af9bdb97a16..000000000000 --- a/include/asm-x86/current_32.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _I386_CURRENT_H -#define _I386_CURRENT_H - -#include -#include - -struct task_struct; - -DECLARE_PER_CPU(struct task_struct *, current_task); -static __always_inline struct task_struct *get_current(void) -{ - return x86_read_percpu(current_task); -} - -#define current get_current() - -#endif /* !(_I386_CURRENT_H) */ diff --git a/include/asm-x86/current_64.h b/include/asm-x86/current_64.h deleted file mode 100644 index 2d368ede2fc1..000000000000 --- a/include/asm-x86/current_64.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _X86_64_CURRENT_H -#define _X86_64_CURRENT_H - -#if !defined(__ASSEMBLY__) -struct task_struct; - -#include - -static inline struct task_struct *get_current(void) -{ - struct task_struct *t = read_pda(pcurrent); - return t; -} - -#define current get_current() - -#else - -#ifndef ASM_OFFSET_H -#include -#endif - -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg - -#endif - -#endif /* !(_X86_64_CURRENT_H) */ -- cgit v1.2.3 From 78d64fc21d2aa425c65de49765cddecc84d595a4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 12 May 2008 15:44:39 +0200 Subject: x86: include/asm-x86/string_32.h - style only Looked at this file because of __memcpy warnings. Thought it could use a style/checkpatch cleanup. No change in vmlinux. [tglx: fixed the remaining issues ] Signed-off-by: Joe Perches Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/string_32.h | 316 +++++++++++++++++++++++++------------------- 1 file changed, 181 insertions(+), 135 deletions(-) (limited to 'include') diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h index b49369ad9a61..8d0c593c4413 100644 --- a/include/asm-x86/string_32.h +++ b/include/asm-x86/string_32.h @@ -29,81 +29,116 @@ extern char *strchr(const char *s, int c); #define __HAVE_ARCH_STRLEN extern size_t strlen(const char *s); -static __always_inline void * __memcpy(void * to, const void * from, size_t n) +static __always_inline void *__memcpy(void *to, const void *from, size_t n) { -int d0, d1, d2; -__asm__ __volatile__( - "rep ; movsl\n\t" - "movl %4,%%ecx\n\t" - "andl $3,%%ecx\n\t" - "jz 1f\n\t" - "rep ; movsb\n\t" - "1:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) - : "memory"); -return (to); + int d0, d1, d2; + asm volatile("rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "andl $3,%%ecx\n\t" + "jz 1f\n\t" + "rep ; movsb\n\t" + "1:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory"); + return to; } /* * This looks ugly, but the compiler can optimize it totally, * as the count is constant. */ -static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n) +static __always_inline void *__constant_memcpy(void *to, const void *from, + size_t n) { long esi, edi; - if (!n) return to; -#if 1 /* want to do small copies with non-string ops? */ + if (!n) + return to; + switch (n) { - case 1: *(char*)to = *(char*)from; return to; - case 2: *(short*)to = *(short*)from; return to; - case 4: *(int*)to = *(int*)from; return to; -#if 1 /* including those doable with two moves? */ - case 3: *(short*)to = *(short*)from; - *((char*)to+2) = *((char*)from+2); return to; - case 5: *(int*)to = *(int*)from; - *((char*)to+4) = *((char*)from+4); return to; - case 6: *(int*)to = *(int*)from; - *((short*)to+2) = *((short*)from+2); return to; - case 8: *(int*)to = *(int*)from; - *((int*)to+1) = *((int*)from+1); return to; -#endif + case 1: + *(char *)to = *(char *)from; + return to; + case 2: + *(short *)to = *(short *)from; + return to; + case 4: + *(int *)to = *(int *)from; + return to; + + case 3: + *(short *)to = *(short *)from; + *((char *)to + 2) = *((char *)from + 2); + return to; + case 5: + *(int *)to = *(int *)from; + *((char *)to + 4) = *((char *)from + 4); + return to; + case 6: + *(int *)to = *(int *)from; + *((short *)to + 2) = *((short *)from + 2); + return to; + case 8: + *(int *)to = *(int *)from; + *((int *)to + 1) = *((int *)from + 1); + return to; } -#endif - esi = (long) from; - edi = (long) to; - if (n >= 5*4) { + + esi = (long)from; + edi = (long)to; + if (n >= 5 * 4) { /* large block: use rep prefix */ int ecx; - __asm__ __volatile__( - "rep ; movsl" - : "=&c" (ecx), "=&D" (edi), "=&S" (esi) - : "0" (n/4), "1" (edi),"2" (esi) - : "memory" + asm volatile("rep ; movsl" + : "=&c" (ecx), "=&D" (edi), "=&S" (esi) + : "0" (n / 4), "1" (edi), "2" (esi) + : "memory" ); } else { /* small block: don't clobber ecx + smaller code */ - if (n >= 4*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 3*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 2*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 1*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); + if (n >= 4 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 3 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 2 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 1 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); } switch (n % 4) { /* tail */ - case 0: return to; - case 1: __asm__ __volatile__("movsb" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; - case 2: __asm__ __volatile__("movsw" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; - default: __asm__ __volatile__("movsw\n\tmovsb" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; + case 0: + return to; + case 1: + asm volatile("movsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + case 2: + asm volatile("movsw" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + default: + asm volatile("movsw\n\tmovsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; } } @@ -117,87 +152,86 @@ static __always_inline void * __constant_memcpy(void * to, const void * from, si * This CPU favours 3DNow strongly (eg AMD Athlon) */ -static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) +static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) { if (len < 512) return __constant_memcpy(to, from, len); return _mmx_memcpy(to, from, len); } -static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) +static inline void *__memcpy3d(void *to, const void *from, size_t len) { if (len < 512) return __memcpy(to, from, len); return _mmx_memcpy(to, from, len); } -#define memcpy(t, f, n) \ -(__builtin_constant_p(n) ? \ - __constant_memcpy3d((t),(f),(n)) : \ - __memcpy3d((t),(f),(n))) +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy3d((t), (f), (n)) \ + : __memcpy3d((t), (f), (n))) #else /* * No 3D Now! */ - -#define memcpy(t, f, n) \ -(__builtin_constant_p(n) ? \ - __constant_memcpy((t),(f),(n)) : \ - __memcpy((t),(f),(n))) + +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy((t), (f), (n)) \ + : __memcpy((t), (f), (n))) #endif #define __HAVE_ARCH_MEMMOVE -void *memmove(void * dest,const void * src, size_t n); +void *memmove(void *dest, const void *src, size_t n); #define memcmp __builtin_memcmp #define __HAVE_ARCH_MEMCHR -extern void *memchr(const void * cs,int c,size_t count); +extern void *memchr(const void *cs, int c, size_t count); -static inline void * __memset_generic(void * s, char c,size_t count) +static inline void *__memset_generic(void *s, char c, size_t count) { -int d0, d1; -__asm__ __volatile__( - "rep\n\t" - "stosb" - : "=&c" (d0), "=&D" (d1) - :"a" (c),"1" (s),"0" (count) - :"memory"); -return s; + int d0, d1; + asm volatile("rep\n\t" + "stosb" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "1" (s), "0" (count) + : "memory"); + return s; } /* we might want to write optimized versions of these later */ -#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) +#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) /* - * memset(x,0,y) is a reasonably common thing to do, so we want to fill + * memset(x, 0, y) is a reasonably common thing to do, so we want to fill * things 32 bits at a time even when we don't know the size of the * area at compile-time.. */ -static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count) +static __always_inline +void *__constant_c_memset(void *s, unsigned long c, size_t count) { -int d0, d1; -__asm__ __volatile__( - "rep ; stosl\n\t" - "testb $2,%b3\n\t" - "je 1f\n\t" - "stosw\n" - "1:\ttestb $1,%b3\n\t" - "je 2f\n\t" - "stosb\n" - "2:" - :"=&c" (d0), "=&D" (d1) - :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) - :"memory"); -return (s); + int d0, d1; + asm volatile("rep ; stosl\n\t" + "testb $2,%b3\n\t" + "je 1f\n\t" + "stosw\n" + "1:\ttestb $1,%b3\n\t" + "je 2f\n\t" + "stosb\n" + "2:" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) + : "memory"); + return s; } /* Added by Gertjan van Wingerde to make minix and sysv module work */ #define __HAVE_ARCH_STRNLEN -extern size_t strnlen(const char * s, size_t count); +extern size_t strnlen(const char *s, size_t count); /* end of additional stuff */ #define __HAVE_ARCH_STRSTR @@ -207,66 +241,78 @@ extern char *strstr(const char *cs, const char *ct); * This looks horribly ugly, but the compiler can optimize it totally, * as we by now know that both pattern and count is constant.. */ -static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) +static __always_inline +void *__constant_c_and_count_memset(void *s, unsigned long pattern, + size_t count) { switch (count) { + case 0: + return s; + case 1: + *(unsigned char *)s = pattern & 0xff; + return s; + case 2: + *(unsigned short *)s = pattern & 0xffff; + return s; + case 3: + *(unsigned short *)s = pattern & 0xffff; + *((unsigned char *)s + 2) = pattern & 0xff; + return s; + case 4: + *(unsigned long *)s = pattern; + return s; + } + +#define COMMON(x) \ + asm volatile("rep ; stosl" \ + x \ + : "=&c" (d0), "=&D" (d1) \ + : "a" (pattern), "0" (count/4), "1" ((long)s) \ + : "memory") + + { + int d0, d1; + switch (count % 4) { case 0: + COMMON(""); return s; case 1: - *(unsigned char *)s = pattern & 0xff; + COMMON("\n\tstosb"); return s; case 2: - *(unsigned short *)s = pattern & 0xffff; + COMMON("\n\tstosw"); return s; - case 3: - *(unsigned short *)s = pattern & 0xffff; - *(2+(unsigned char *)s) = pattern & 0xff; + default: + COMMON("\n\tstosw\n\tstosb"); return s; - case 4: - *(unsigned long *)s = pattern; - return s; - } -#define COMMON(x) \ -__asm__ __volatile__( \ - "rep ; stosl" \ - x \ - : "=&c" (d0), "=&D" (d1) \ - : "a" (pattern),"0" (count/4),"1" ((long) s) \ - : "memory") -{ - int d0, d1; - switch (count % 4) { - case 0: COMMON(""); return s; - case 1: COMMON("\n\tstosb"); return s; - case 2: COMMON("\n\tstosw"); return s; - default: COMMON("\n\tstosw\n\tstosb"); return s; + } } -} - + #undef COMMON } -#define __constant_c_x_memset(s, c, count) \ -(__builtin_constant_p(count) ? \ - __constant_c_and_count_memset((s),(c),(count)) : \ - __constant_c_memset((s),(c),(count))) +#define __constant_c_x_memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_c_and_count_memset((s), (c), (count)) \ + : __constant_c_memset((s), (c), (count))) -#define __memset(s, c, count) \ -(__builtin_constant_p(count) ? \ - __constant_count_memset((s),(c),(count)) : \ - __memset_generic((s),(c),(count))) +#define __memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_count_memset((s), (c), (count)) \ + : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET -#define memset(s, c, count) \ -(__builtin_constant_p(c) ? \ - __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ - __memset((s),(c),(count))) +#define memset(s, c, count) \ + (__builtin_constant_p(c) \ + ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ + (count)) \ + : __memset((s), (c), (count))) /* * find the first occurrence of byte 'c', or 1 past the area if none */ #define __HAVE_ARCH_MEMSCAN -extern void *memscan(void * addr, int c, size_t size); +extern void *memscan(void *addr, int c, size_t size); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 0dbfafa5fcd4dd189e2adc7b6ed9e0405e846d79 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Wed, 23 Apr 2008 15:09:05 +0200 Subject: x86: move i386 memory setup code to e820_32.c The x86_64 code has centralized the memory setup code in e820_64.c. This patch copies that approach to i386: - early_param("mem", ...) parsing is moved from setup_32.c to e820_32.c. - setup_memory_map() and finish_e820_parsing() are factored out from setup_arch(), and declarations are added to e820_32.h. - print_memory_map() is made static and removed from e820_32.h. - user_defined_memmap is marked as __initdata. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_32.c | 60 ++++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/setup_32.c | 50 ++------------------------------------ include/asm-x86/e820_32.h | 4 +++- 3 files changed, 63 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index ed733e7cf4e6..31ea2bb8c91a 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -30,7 +30,6 @@ unsigned long pci_mem_start = 0x10000000; #ifdef CONFIG_PCI EXPORT_SYMBOL(pci_mem_start); #endif -extern int user_defined_memmap; static struct resource system_rom_resource = { .name = "System ROM", @@ -584,7 +583,7 @@ void __init e820_register_memory(void) pci_mem_start, gapstart, gapsize); } -void __init print_memory_map(char *who) +static void __init print_memory_map(char *who) { int i; @@ -692,6 +691,54 @@ e820_all_mapped(unsigned long s, unsigned long e, unsigned type) return 0; } +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ +char * __init __attribute__((weak)) memory_setup(void) +{ + return machine_specific_memory_setup(); +} + +void __init setup_memory_map(void) +{ + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); + print_memory_map(memory_setup()); +} + +static int __initdata user_defined_memmap; + +/* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to , overriding the bios size. + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * to +, overriding the bios size. + * + * HPA tells me bootloaders need to parse mem=, so no new + * option should be mem= [also see Documentation/i386/boot.txt] + */ +static int __init parse_mem(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "nopentium") == 0) { + setup_clear_cpu_cap(X86_FEATURE_PSE); + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long mem_size; + + mem_size = memparse(arg, &arg); + limit_regions(mem_size); + user_defined_memmap = 1; + } + return 0; +} +early_param("mem", parse_mem); + static int __init parse_memmap(char *arg) { if (!arg) @@ -762,6 +809,15 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, new_type); } } + +void __init finish_e820_parsing(void) +{ + if (user_defined_memmap) { + printk(KERN_INFO "user-defined physical RAM map:\n"); + print_memory_map("user"); + } +} + void __init update_e820(void) { u8 nr_map; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 2c5f8b213e86..b54c79c91efd 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -237,42 +237,6 @@ static inline void copy_edd(void) } #endif -int __initdata user_defined_memmap; - -/* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to , overriding the bios size. - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * to +, overriding the bios size. - * - * HPA tells me bootloaders need to parse mem=, so no new - * option should be mem= [also see Documentation/i386/boot.txt] - */ -static int __init parse_mem(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "nopentium") == 0) { - setup_clear_cpu_cap(X86_FEATURE_PSE); - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long mem_size; - - mem_size = memparse(arg, &arg); - limit_regions(mem_size); - user_defined_memmap = 1; - } - return 0; -} -early_param("mem", parse_mem); - #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. @@ -725,12 +689,6 @@ static void set_mca_bus(int x) static void set_mca_bus(int x) { } #endif -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ -char * __init __attribute__((weak)) memory_setup(void) -{ - return machine_specific_memory_setup(); -} - #ifdef CONFIG_NUMA /* * In the golden day, when everything among i386 and x86_64 will be @@ -786,8 +744,7 @@ void __init setup_arch(char **cmdline_p) #endif ARCH_SETUP - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(memory_setup()); + setup_memory_map(); copy_edd(); @@ -807,10 +764,7 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); - if (user_defined_memmap) { - printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); - } + finish_e820_parsing(); strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index a9f7c6ec32bf..e1f10c60901f 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -18,6 +18,9 @@ #ifndef __ASSEMBLY__ +extern void setup_memory_map(void); +extern void finish_e820_parsing(void); + extern struct e820map e820; extern void update_e820(void); @@ -32,7 +35,6 @@ extern void update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); extern void e820_register_memory(void); extern void limit_regions(unsigned long long size); -extern void print_memory_map(char *who); extern void init_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -- cgit v1.2.3 From 42651f15824d003e8357693ab72c4dbb3e280836 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Apr 2008 01:59:49 -0700 Subject: x86: fix trimming e820 with MTRR holes. converting MTRR layout from continous to discrete, some time could run out of MTRRs. So add gran_sizek to prevent that by dumpping small RAM piece less than gran_sizek. previous trimming only can handle highest_pfn from mtrr to end_pfn from e820. when have more than 4g RAM installed, there will be holes below 4g. so need to check ram below 4g is coverred well. need to be applied after [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v7 Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mtrr/main.c | 101 +++++++++++++++++++++++++++++++++------- arch/x86/kernel/e820_32.c | 7 ++- arch/x86/kernel/e820_64.c | 6 ++- include/asm-x86/e820_32.h | 2 +- include/asm-x86/e820_64.h | 2 +- 5 files changed, 97 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8a6f68b45e3e..9ab5c16b0d52 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1095,6 +1095,17 @@ int __init amd_special_default_mtrr(void) return 0; } +static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) +{ + u64 trim_start, trim_size; + trim_start = start_pfn; + trim_start <<= PAGE_SHIFT; + trim_size = limit_pfn; + trim_size <<= PAGE_SHIFT; + trim_size -= trim_start; + return update_memory_range(trim_start, trim_size, E820_RAM, + E820_RESERVED); +} /** * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs * @end_pfn: ending page frame number @@ -1110,8 +1121,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) { unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; - u64 trim_start, trim_size; + struct res_range range[RANGE_NUM]; + int nr_range; + u64 total_real_trim_size; + int changed; + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; /* * Make sure we only trim uncachable memory on machines that * support the Intel MTRR architecture: @@ -1123,9 +1139,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) if (def != MTRR_TYPE_UNCACHABLE) return 0; - if (amd_special_default_mtrr()) - return 0; - /* Find highest cached pfn */ for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, &base, &size, &type); @@ -1145,26 +1158,80 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) return 0; } - if (highest_pfn < end_pfn) { + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type >= MTRR_NUM_TYPES) + continue; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* no entry for WB? */ + if (!num[MTRR_TYPE_WRBACK]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + nr_range = 0; + if (mtrr_tom2) { + range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); + range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; + if (highest_pfn < range[nr_range].end + 1) + highest_pfn = range[nr_range].end + 1; + nr_range++; + } + nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); + + changed = 0; + total_real_trim_size = 0; + + /* check the top at first */ + i = nr_range - 1; + if (range[i].end + 1 < end_pfn) { + total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn); + } + + if (total_real_trim_size) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %luMB of RAM.\n", - (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); + " all of memory, losing %lluMB of RAM.\n", + total_real_trim_size >> 20); WARN_ON(1); - printk(KERN_INFO "update e820 for mtrr\n"); - trim_start = highest_pfn; - trim_start <<= PAGE_SHIFT; - trim_size = end_pfn; - trim_size <<= PAGE_SHIFT; - trim_size -= trim_start; - update_memory_range(trim_start, trim_size, E820_RAM, - E820_RESERVED); + printk(KERN_INFO "update e820 for mtrr -- end_pfn\n"); update_e820(); - return 1; + changed = 1; } - return 0; + total_real_trim_size = 0; + if (range[0].start) + total_real_trim_size += real_trim_memory(0, range[0].start); + + for (i = 0; i < nr_range - 1; i--) { + if (range[i].end + 1 < range[i+1].start) + total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start); + } + + if (total_real_trim_size) { + printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" + " all of memory, losing %lluMB of RAM.\n", + total_real_trim_size >> 20); + + WARN_ON(1); + + printk(KERN_INFO "update e820 for mtrr -- holes\n"); + update_e820(); + changed = 1; + } + + return changed; } /** diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 31ea2bb8c91a..857f706273a8 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -783,10 +783,11 @@ static int __init parse_memmap(char *arg) return 0; } early_param("memmap", parse_memmap); -void __init update_memory_range(u64 start, u64 size, unsigned old_type, +u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { int i; + u64 real_updated_size = 0; BUG_ON(old_type == new_type); @@ -798,6 +799,7 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, /* totally covered? */ if (ei->addr >= start && ei->size <= size) { ei->type = new_type; + real_updated_size += ei->size; continue; } /* partially covered */ @@ -807,7 +809,10 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, continue; add_memory_region(final_start, final_end - final_start, new_type); + real_updated_size += final_end - final_start; } + + return real_updated_size; } void __init finish_e820_parsing(void) diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 124480c0008d..848b2cd2d1dd 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -829,10 +829,11 @@ void __init finish_e820_parsing(void) } } -void __init update_memory_range(u64 start, u64 size, unsigned old_type, +u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { int i; + u64 real_updated_size = 0; BUG_ON(old_type == new_type); @@ -844,6 +845,7 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, /* totally covered? */ if (ei->addr >= start && ei->size <= size) { ei->type = new_type; + real_updated_size += ei->size; continue; } /* partially covered */ @@ -853,7 +855,9 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, continue; add_memory_region(final_start, final_end - final_start, new_type); + real_updated_size += final_end - final_start; } + return real_updated_size; } void __init update_e820(void) diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index e1f10c60901f..af0711b220df 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -31,7 +31,7 @@ extern void propagate_e820_map(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void add_memory_region(unsigned long long start, unsigned long long size, int type); -extern void update_memory_range(u64 start, u64 size, unsigned old_type, +extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); extern void e820_register_memory(void); extern void limit_regions(unsigned long long size); diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 71c4d685d30d..6ae3e2803286 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -21,7 +21,7 @@ extern unsigned long find_e820_area_size(unsigned long start, unsigned long align); extern void add_memory_region(unsigned long start, unsigned long size, int type); -extern void update_memory_range(u64 start, u64 size, unsigned old_type, +extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); extern void setup_memory_region(void); extern void contig_e820_setup(void); -- cgit v1.2.3 From b79cd8f1268bab57ff85b19d131f7f23deab2dee Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 11 May 2008 00:30:15 -0700 Subject: x86: make e820.c to have common functions remove the duplicated copy of these functions. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/e820.c | 475 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_32.c | 411 +-------------------------------------- arch/x86/kernel/e820_64.c | 444 ------------------------------------------ arch/x86/kernel/setup_32.c | 2 +- include/asm-x86/e820.h | 14 ++ include/asm-x86/e820_32.h | 12 -- include/asm-x86/e820_64.h | 12 -- 8 files changed, 496 insertions(+), 876 deletions(-) create mode 100644 arch/x86/kernel/e820.c (limited to 'include') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b4720..5369a4e36f17 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += bootflag.o e820_$(BITS).o +obj-y += bootflag.o e820_$(BITS).o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c new file mode 100644 index 000000000000..2cb686f60d0d --- /dev/null +++ b/arch/x86/kernel/e820.c @@ -0,0 +1,475 @@ +/* + * Handle the memory map. + * The functions here do the job until bootmem takes over. + * + * Getting sanitize_e820_map() in sync with i386 version by applying change: + * - Provisions for empty E820 memory regions (reported by certain BIOSes). + * Alex Achenbach , December 2002. + * Venkatesh Pallipadi + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct e820map e820; + +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0xaeedbabe; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif + +/* + * This function checks if any part of the range is mapped + * with type. + */ +int +e820_any_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(e820_any_mapped); + +/* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* + * if start is now at or beyond end, we're done, full + * coverage + */ + if (start >= end) + return 1; + } + return 0; +} + +/* + * Add a memory region to the kernel e820 map. + */ +void __init add_memory_region(u64 start, u64 size, int type) +{ + int x = e820.nr_map; + + if (x == E820MAX) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820.map[x].addr = start; + e820.map[x].size = size; + e820.map[x].type = type; + e820.nr_map++; +} + +void __init e820_print_map(char *who) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + printk(KERN_INFO " %s: %016Lx - %016Lx ", who, + (unsigned long long) e820.map[i].addr, + (unsigned long long) + (e820.map[i].addr + e820.map[i].size)); + switch (e820.map[i].type) { + case E820_RAM: + printk(KERN_CONT "(usable)\n"); + break; + case E820_RESERVED: + printk(KERN_CONT "(reserved)\n"); + break; + case E820_ACPI: + printk(KERN_CONT "(ACPI data)\n"); + break; + case E820_NVS: + printk(KERN_CONT "(ACPI NVS)\n"); + break; + default: + printk(KERN_CONT "type %u\n", e820.map[i].type); + break; + } + } +} + +/* + * Sanitize the BIOS e820 map. + * + * Some e820 responses include overlapping entries. The following + * replaces the original e820 map with a new one, removing overlaps. + * + */ +int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) +{ + struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ + unsigned long long addr; /* address for this change point */ + }; + static struct change_member change_point_list[2*E820MAX] __initdata; + static struct change_member *change_point[2*E820MAX] __initdata; + static struct e820entry *overlap_list[E820MAX] __initdata; + static struct e820entry new_bios[E820MAX] __initdata; + struct change_member *change_tmp; + unsigned long current_type, last_type; + unsigned long long last_addr; + int chgidx, still_changing; + int overlap_entries; + int new_bios_entry; + int old_nr, new_nr, chg_nr; + int i; + + /* + Visually we're performing the following + (1,2,3,4 = memory types)... + + Sample memory map (w/overlaps): + ____22__________________ + ______________________4_ + ____1111________________ + _44_____________________ + 11111111________________ + ____________________33__ + ___________44___________ + __________33333_________ + ______________22________ + ___________________2222_ + _________111111111______ + _____________________11_ + _________________4______ + + Sanitized equivalent (no overlap): + 1_______________________ + _44_____________________ + ___1____________________ + ____22__________________ + ______11________________ + _________1______________ + __________3_____________ + ___________44___________ + _____________33_________ + _______________2________ + ________________1_______ + _________________4______ + ___________________2____ + ____________________33__ + ______________________4_ + */ + + /* if there's only one memory region, don't bother */ + if (*pnr_map < 2) + return -1; + + old_nr = *pnr_map; + + /* bail out if we find any unreasonable addresses in bios map */ + for (i = 0; i < old_nr; i++) + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) + return -1; + + /* create pointers for initial change-point information (for sorting) */ + for (i = 0; i < 2 * old_nr; i++) + change_point[i] = &change_point_list[i]; + + /* record all known change-points (starting and ending addresses), + omitting those that are for empty memory regions */ + chgidx = 0; + for (i = 0; i < old_nr; i++) { + if (biosmap[i].size != 0) { + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } + } + chg_nr = chgidx; + + /* sort change-point list by memory addresses (low -> high) */ + still_changing = 1; + while (still_changing) { + still_changing = 0; + for (i = 1; i < chg_nr; i++) { + unsigned long long curaddr, lastaddr; + unsigned long long curpbaddr, lastpbaddr; + + curaddr = change_point[i]->addr; + lastaddr = change_point[i - 1]->addr; + curpbaddr = change_point[i]->pbios->addr; + lastpbaddr = change_point[i - 1]->pbios->addr; + + /* + * swap entries, when: + * + * curaddr > lastaddr or + * curaddr == lastaddr and curaddr == curpbaddr and + * lastaddr != lastpbaddr + */ + if (curaddr < lastaddr || + (curaddr == lastaddr && curaddr == curpbaddr && + lastaddr != lastpbaddr)) { + change_tmp = change_point[i]; + change_point[i] = change_point[i-1]; + change_point[i-1] = change_tmp; + still_changing = 1; + } + } + } + + /* create a new bios memory map, removing overlaps */ + overlap_entries = 0; /* number of entries in the overlap table */ + new_bios_entry = 0; /* index for creating new bios map entries */ + last_type = 0; /* start with undefined memory type */ + last_addr = 0; /* start with 0 as last starting address */ + + /* loop through change-points, determining affect on the new bios map */ + for (chgidx = 0; chgidx < chg_nr; chgidx++) { + /* keep track of all overlapping bios entries */ + if (change_point[chgidx]->addr == + change_point[chgidx]->pbios->addr) { + /* + * add map entry to overlap list (> 1 entry + * implies an overlap) + */ + overlap_list[overlap_entries++] = + change_point[chgidx]->pbios; + } else { + /* + * remove entry from list (order independent, + * so swap with last) + */ + for (i = 0; i < overlap_entries; i++) { + if (overlap_list[i] == + change_point[chgidx]->pbios) + overlap_list[i] = + overlap_list[overlap_entries-1]; + } + overlap_entries--; + } + /* + * if there are overlapping entries, decide which + * "type" to use (larger value takes precedence -- + * 1=usable, 2,3,4,4+=unusable) + */ + current_type = 0; + for (i = 0; i < overlap_entries; i++) + if (overlap_list[i]->type > current_type) + current_type = overlap_list[i]->type; + /* + * continue building up new bios map based on this + * information + */ + if (current_type != last_type) { + if (last_type != 0) { + new_bios[new_bios_entry].size = + change_point[chgidx]->addr - last_addr; + /* + * move forward only if the new size + * was non-zero + */ + if (new_bios[new_bios_entry].size != 0) + /* + * no more space left for new + * bios entries ? + */ + if (++new_bios_entry >= E820MAX) + break; + } + if (current_type != 0) { + new_bios[new_bios_entry].addr = + change_point[chgidx]->addr; + new_bios[new_bios_entry].type = current_type; + last_addr = change_point[chgidx]->addr; + } + last_type = current_type; + } + } + /* retain count for new bios entries */ + new_nr = new_bios_entry; + + /* copy new bios mapping into original location */ + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); + *pnr_map = new_nr; + + return 0; +} + +/* + * Copy the BIOS e820 map into a safe place. + * + * Sanity-check it while we're at it.. + * + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. + */ +int __init copy_e820_map(struct e820entry *biosmap, int nr_map) +{ + /* Only one memory region (or negative)? Ignore it */ + if (nr_map < 2) + return -1; + + do { + u64 start = biosmap->addr; + u64 size = biosmap->size; + u64 end = start + size; + u32 type = biosmap->type; + + /* Overflow in 64 bits? Ignore the memory map. */ + if (start > end) + return -1; + + add_memory_region(start, size, type); + } while (biosmap++, --nr_map); + return 0; +} + +u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, + unsigned new_type) +{ + int i; + u64 real_updated_size = 0; + + BUG_ON(old_type == new_type); + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 final_start, final_end; + if (ei->type != old_type) + continue; + /* totally covered? */ + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { + ei->type = new_type; + real_updated_size += ei->size; + continue; + } + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(start + size, ei->addr + ei->size); + if (final_start >= final_end) + continue; + add_memory_region(final_start, final_end - final_start, + new_type); + real_updated_size += final_end - final_start; + } + return real_updated_size; +} + +void __init update_e820(void) +{ + u8 nr_map; + + nr_map = e820.nr_map; + if (sanitize_e820_map(e820.map, &nr_map)) + return; + e820.nr_map = nr_map; + printk(KERN_INFO "modified physical RAM map:\n"); + e820_print_map("modified"); +} + +/* + * Search for the biggest gap in the low 32 bits of the e820 + * memory space. We pass this space to PCI to assign MMIO resources + * for hotplug or unconfigured devices in. + * Hopefully the BIOS let enough space left. + */ +__init void e820_setup_gap(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; + int found = 0; + + last = 0x100000000ull; + gapstart = 0x10000000; + gapsize = 0x400000; + i = e820.nr_map; + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap > gapsize) { + gapsize = gap; + gapstart = end; + found = 1; + } + } + if (start < last) + last = start; + } + +#ifdef CONFIG_X86_64 + if (!found) { + gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " + "address range\n" + KERN_ERR "PCI: Unassigned devices with 32bit resource " + "registers may break!\n"); + } +#endif + + /* + * See how much we want to round up: start off with + * rounding to the next 1MB area. + */ + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; + + printk(KERN_INFO + "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", + pci_mem_start, gapstart, gapsize); +} + diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 751d517d802c..dfe25751038b 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -16,21 +16,6 @@ #include #include -struct e820map e820; -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; -static struct change_member change_point_list[2*E820MAX] __initdata; -static struct change_member *change_point[2*E820MAX] __initdata; -static struct e820entry *overlap_list[E820MAX] __initdata; -static struct e820entry new_bios[E820MAX] __initdata; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -254,223 +239,6 @@ void __init e820_mark_nosave_regions(void) } #endif -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; -} /* add_memory_region */ - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) -{ - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) { - return -1; - } - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; iaddr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; /* true number of change-points */ - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if > , swap */ - /* or, if current= & last=, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing=1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; ipbios) - overlap_list[i] = overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ - current_type = 0; - for (i=0; itype > current_type) - current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ - if (new_bios[new_bios_entry].size != 0) - if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; - } - last_type = current_type; - } - } - new_nr = new_bios_entry; /* retain count for new bios entries */ - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) - */ -int __init copy_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - add_memory_region(start, size, type); - } while (biosmap++, --nr_map); - - return 0; -} - /* * Find the highest page frame number we have available */ @@ -535,86 +303,12 @@ void __init register_bootmem_low_pages(unsigned long max_low_pfn) } } -void __init e820_register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - -static void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; - } - } -} - void __init limit_regions(unsigned long long size) { unsigned long long current_addr; int i; - print_memory_map("limit_regions start"); + e820_print_map("limit_regions start"); for (i = 0; i < e820.nr_map; i++) { current_addr = e820.map[i].addr + e820.map[i].size; if (current_addr < size) @@ -633,62 +327,10 @@ void __init limit_regions(unsigned long long size) e820.nr_map = i + 1; e820.map[i].size -= current_addr - size; } - print_memory_map("limit_regions endfor"); + e820_print_map("limit_regions endfor"); return; } - print_memory_map("limit_regions endfunc"); -} - -/* - * This function checks if any part of the range is mapped - * with type. - */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - const struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - - /* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; + e820_print_map("limit_regions endfunc"); } /* Overridden in paravirt.c if CONFIG_PARAVIRT */ @@ -700,7 +342,7 @@ char * __init __attribute__((weak)) memory_setup(void) void __init setup_memory_map(void) { printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(memory_setup()); + e820_print_map(memory_setup()); } static int __initdata user_defined_memmap; @@ -783,55 +425,12 @@ static int __init parse_memmap(char *arg) return 0; } early_param("memmap", parse_memmap); -u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - int i; - u64 real_updated_size = 0; - - BUG_ON(old_type == new_type); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - if (ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { - ei->type = new_type; - real_updated_size += ei->size; - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - add_memory_region(final_start, final_end - final_start, - new_type); - real_updated_size += final_end - final_start; - } - - return real_updated_size; -} void __init finish_e820_parsing(void) { if (user_defined_memmap) { printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); + e820_print_map("user"); } } -void __init update_e820(void) -{ - u8 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - print_memory_map("modified"); -} diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index c45b4dea4055..28a14eb65d3d 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -29,8 +29,6 @@ #include #include -struct e820map e820; - /* * PFN of last memory page. */ @@ -176,62 +174,6 @@ again: } return changed; } -/* - * This function checks if any part of the range is mapped - * with type. - */ -int -e820_any_mapped(unsigned long start, unsigned long end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - -/* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init e820_all_mapped(unsigned long start, unsigned long end, - unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* - * if start is now at or beyond end, we're done, full - * coverage - */ - if (start >= end) - return 1; - } - return 0; -} /* * Find a free area with specified alignment in a specific range. @@ -434,24 +376,6 @@ e820_register_active_regions(int nid, unsigned long start_pfn, add_active_range(nid, ei_startpfn, ei_endpfn); } -/* - * Add a memory region to the kernel e820 map. - */ -void __init add_memory_region(unsigned long start, unsigned long size, int type) -{ - int x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; -} - /* * Find the hole size (in bytes) in the memory range. * @start: starting address of the memory range to scan @@ -473,266 +397,6 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end) return end - start - (ram << PAGE_SHIFT); } -static void __init e820_print_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) - (e820.map[i].addr + e820.map[i].size)); - switch (e820.map[i].type) { - case E820_RAM: - printk(KERN_CONT "(usable)\n"); - break; - case E820_RESERVED: - printk(KERN_CONT "(reserved)\n"); - break; - case E820_ACPI: - printk(KERN_CONT "(ACPI data)\n"); - break; - case E820_NVS: - printk(KERN_CONT "(ACPI NVS)\n"); - break; - default: - printk(KERN_CONT "type %u\n", e820.map[i].type); - break; - } - } -} - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) -{ - struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ - }; - static struct change_member change_point_list[2*E820MAX] __initdata; - static struct change_member *change_point[2*E820MAX] __initdata; - static struct e820entry *overlap_list[E820MAX] __initdata; - static struct e820entry new_bios[E820MAX] __initdata; - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following - (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) - return -1; - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i = 0; i < old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) - return -1; - - /* create pointers for initial change-point information (for sorting) */ - for (i = 0; i < 2 * old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i = 0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + - biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i = 1; i < chg_nr; i++) { - unsigned long long curaddr, lastaddr; - unsigned long long curpbaddr, lastpbaddr; - - curaddr = change_point[i]->addr; - lastaddr = change_point[i - 1]->addr; - curpbaddr = change_point[i]->pbios->addr; - lastpbaddr = change_point[i - 1]->pbios->addr; - - /* - * swap entries, when: - * - * curaddr > lastaddr or - * curaddr == lastaddr and curaddr == curpbaddr and - * lastaddr != lastpbaddr - */ - if (curaddr < lastaddr || - (curaddr == lastaddr && curaddr == curpbaddr && - lastaddr != lastpbaddr)) { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing = 1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries = 0; /* number of entries in the overlap table */ - new_bios_entry = 0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - - /* loop through change-points, determining affect on the new bios map */ - for (chgidx = 0; chgidx < chg_nr; chgidx++) { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == - change_point[chgidx]->pbios->addr) { - /* - * add map entry to overlap list (> 1 entry - * implies an overlap) - */ - overlap_list[overlap_entries++] = - change_point[chgidx]->pbios; - } else { - /* - * remove entry from list (order independent, - * so swap with last) - */ - for (i = 0; i < overlap_entries; i++) { - if (overlap_list[i] == - change_point[chgidx]->pbios) - overlap_list[i] = - overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* - * if there are overlapping entries, decide which - * "type" to use (larger value takes precedence -- - * 1=usable, 2,3,4,4+=unusable) - */ - current_type = 0; - for (i = 0; i < overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* - * continue building up new bios map based on this - * information - */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* - * move forward only if the new size - * was non-zero - */ - if (new_bios[new_bios_entry].size != 0) - /* - * no more space left for new - * bios entries ? - */ - if (++new_bios_entry >= E820MAX) - break; - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = - change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr = change_point[chgidx]->addr; - } - last_type = current_type; - } - } - /* retain count for new bios entries */ - new_nr = new_bios_entry; - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - */ -static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - add_memory_region(start, size, type); - } while (biosmap++, --nr_map); - return 0; -} - static void early_panic(char *msg) { early_printk(msg); @@ -829,114 +493,6 @@ void __init finish_e820_parsing(void) } } -u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - int i; - u64 real_updated_size = 0; - - BUG_ON(old_type == new_type); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - if (ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { - ei->type = new_type; - real_updated_size += ei->size; - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - add_memory_region(final_start, final_end - final_start, - new_type); - real_updated_size += final_end - final_start; - } - return real_updated_size; -} - -void __init update_e820(void) -{ - u8 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - e820_print_map("modified"); -} - -unsigned long pci_mem_start = 0xaeedbabe; -EXPORT_SYMBOL(pci_mem_start); - -/* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. We pass this space to PCI to assign MMIO resources - * for hotplug or unconfigured devices in. - * Hopefully the BIOS let enough space left. - */ -__init void e820_setup_gap(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long last; - int i; - int found = 0; - - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - found = 1; - } - } - if (start < last) - last = start; - } - - if (!found) { - gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " - "address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource " - "registers may break!\n"); - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk(KERN_INFO - "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); -} - int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) { int i; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index b54c79c91efd..5faeab69edd9 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -875,7 +875,7 @@ void __init setup_arch(char **cmdline_p) get_smp_config(); #endif - e820_register_memory(); + e820_setup_gap(); e820_mark_nosave_regions(); #ifdef CONFIG_VT diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 7004251fc66b..b5b519feba1d 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -20,6 +20,20 @@ struct e820map { __u32 nr_map; struct e820entry map[E820MAX]; }; + +extern struct e820map e820; + +extern int e820_any_mapped(u64 start, u64 end, unsigned type); +extern int e820_all_mapped(u64 start, u64 end, unsigned type); +extern void add_memory_region(u64 start, u64 size, int type); +extern void e820_print_map(char *who); +extern int sanitize_e820_map(struct e820entry *biosmap, char *pnr_map); +extern int copy_e820_map(struct e820entry *biosmap, int nr_map); +extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, + unsigned new_type); +extern void update_e820(void); +extern void e820_setup_gap(void); + #endif /* __ASSEMBLY__ */ #define ISA_START_ADDRESS 0xa0000 diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index af0711b220df..9576b438fbd9 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -21,19 +21,8 @@ extern void setup_memory_map(void); extern void finish_e820_parsing(void); -extern struct e820map e820; -extern void update_e820(void); - -extern int e820_all_mapped(unsigned long start, unsigned long end, - unsigned type); -extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern void propagate_e820_map(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); -extern void add_memory_region(unsigned long long start, - unsigned long long size, int type); -extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type); -extern void e820_register_memory(void); extern void limit_regions(unsigned long long size); extern void init_iomem_resources(struct resource *code_resource, struct resource *data_resource, @@ -47,6 +36,5 @@ static inline void e820_mark_nosave_regions(void) } #endif - #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 6ae3e2803286..9fac77e01441 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -19,34 +19,22 @@ extern unsigned long find_e820_area(unsigned long start, unsigned long end, extern unsigned long find_e820_area_size(unsigned long start, unsigned long *sizep, unsigned long align); -extern void add_memory_region(unsigned long start, unsigned long size, - int type); -extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type); extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); extern void e820_reserve_resources(void); extern void e820_mark_nosave_regions(void); -extern int e820_any_mapped(unsigned long start, unsigned long end, - unsigned type); -extern int e820_all_mapped(unsigned long start, unsigned long end, - unsigned type); extern int e820_any_non_reserved(unsigned long start, unsigned long end); extern int is_memory_any_valid(unsigned long start, unsigned long end); extern int e820_all_non_reserved(unsigned long start, unsigned long end); extern int is_memory_all_valid(unsigned long start, unsigned long end); extern unsigned long e820_hole_size(unsigned long start, unsigned long end); -extern void e820_setup_gap(void); extern void e820_register_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn); extern void finish_e820_parsing(void); -extern struct e820map e820; -extern void update_e820(void); - extern void reserve_early(unsigned long start, unsigned long end, char *name); extern void free_early(unsigned long start, unsigned long end); extern void early_res_to_bootmem(unsigned long start, unsigned long end); -- cgit v1.2.3 From e3f8ba81fdce852abe36a33eed7b243c4a0acfac Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:04 -0700 Subject: x86 boot: include missing smp.h header The patch: x86: convert cpu_to_apicid to be a per cpu variable introduced a dependency of ipi.h on smp.h in x86 builds with an allnoconfig. Including smp.h in ipi.h fixes the build error: In file included from arch/x86/kernel/traps_64.c:48: include/asm/ipi.h: In function 'send_IPI_mask_sequence': include/asm/ipi.h:114: error: 'per_cpu__x86_cpu_to_apicid' undeclared (first use in this function) Signed-off-by: Paul Jackson Cc: Mike Travis Signed-off-by: Ingo Molnar --- include/asm-x86/ipi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index ecc80f341f37..196d63c28aa4 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -20,6 +20,7 @@ #include #include +#include /* * the following functions deal with sending IPIs between CPUs. -- cgit v1.2.3 From e9197bf0114661195bee35e7795cfc42164d9b2c Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:10 -0700 Subject: x86 boot: remove some unused extern function declarations Remove three extern declarations for routines that don't exist. Fix a typo in a comment. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_64.c | 2 +- include/linux/efi.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c5066d519e5d..afb07ffb931d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -233,7 +233,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, else bootmap_start = round_up(start, PAGE_SIZE); /* - * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like + * SMP_CACHE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE */ bootmap = early_node_mem(nodeid, bootmap_start, end, diff --git a/include/linux/efi.h b/include/linux/efi.h index a5f359a7ad0e..807373d467f7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out) extern void efi_init (void); extern void *efi_get_pal_addr (void); extern void efi_map_pal_code (void); -extern void efi_map_memmap(void); extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ @@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); -extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, - u64 attr); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); -extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** -- cgit v1.2.3 From c801ed3860fe2f84281d4cae4c3e6ca87e81e890 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:23 -0700 Subject: x86 boot: simplify pageblock_bits enum declaration The use of #defines with '##' pre-processor concatenation is a useful way to form several symbol names with a common pattern. But when there is just a single name obtained from that #define, it's just obfuscation. Better to just write the plain symbol name, as is. The following patch is a result of my wasting ten minutes looking through the kernel to figure out what 'PB_migrate_end' meant, and forgetting what I came to do, by the time I figured out that the #define PB_range macro defined it. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- include/linux/pageblock-flags.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e875905f7b12..e8c06122be36 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -25,13 +25,11 @@ #include -/* Macro to aid the definition of ranges of bits */ -#define PB_range(name, required_bits) \ - name, name ## _end = (name + required_bits) - 1 - /* Bit indices that affect a whole block of pages */ enum pageblock_bits { - PB_range(PB_migrate, 3), /* 3 bits required for migrate types */ + PB_migrate, + PB_migrate_end = PB_migrate + 3 - 1, + /* 3 bits required for migrate types */ NR_PAGEBLOCK_BITS }; -- cgit v1.2.3 From c3965bd15118742d72b4bc1a290d37b3f081eb98 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:34 -0700 Subject: x86 boot: proper use of ARRAY_SIZE instead of repeated E820MAX constant This patch is motivated by a subsequent patch which will allow for more memory map entries on EFI supported systems than can be passed via the x86 legacy BIOS E820 interface. The legacy interface is limited to E820MAX == 128 memory entries, and that "E820MAX" manifest constant was used as the size for several arrays and loops over those arrays. The primary change in this patch is to change code loop sizes over those arrays from using the constant E820MAX, to using the ARRAY_SIZE() macro evaluated for the array being looped. That way, a subsequent patch can change the size of some of these arrays, without breaking this code. This patch also adds a parameter to the sanitize_e820_map() routine, which had an implicit size for the array passed it of E820MAX entries. This new parameter explicitly passes the size of said array. Once again, this will allow a subsequent patch to change that array size for some calls to sanitize_e820_map() without breaking the code. As part of enhancing the sanitize_e820_map() interface this way, I further combined the unnecessarily distinct x86_32 and x86_64 declarations for this routine into a single, commonly used, declaration. This patch in itself should make no difference to the resulting kernel binary. [ mingo@elte.hu: merged to -tip ] Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- arch/x86/boot/memory.c | 3 ++- arch/x86/kernel/e820.c | 9 +++++---- arch/x86/kernel/e820_64.c | 6 ++++-- arch/x86/mach-default/setup.c | 4 +++- arch/x86/mach-voyager/setup.c | 4 +++- include/asm-x86/e820.h | 3 ++- include/asm-x86/setup.h | 1 - 7 files changed, 19 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index acad32eb4290..53165c97336b 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -13,6 +13,7 @@ */ #include "boot.h" +#include #define SMAP 0x534d4150 /* ASCII "SMAP" */ @@ -53,7 +54,7 @@ static int detect_memory_e820(void) count++; desc++; - } while (next && count < E820MAX); + } while (next && count < ARRAY_SIZE(boot_params.e820_map)); return boot_params.e820_entries = count; } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2cb686f60d0d..2396b9da8027 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -95,7 +95,7 @@ void __init add_memory_region(u64 start, u64 size, int type) { int x = e820.nr_map; - if (x == E820MAX) { + if (x == ARRAY_SIZE(e820.map)) { printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); return; } @@ -142,7 +142,8 @@ void __init e820_print_map(char *who) * replaces the original e820 map with a new one, removing overlaps. * */ -int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) +int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, + char *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ @@ -314,7 +315,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) * no more space left for new * bios entries ? */ - if (++new_bios_entry >= E820MAX) + if (++new_bios_entry >= max_nr_map) break; } if (current_type != 0) { @@ -403,7 +404,7 @@ void __init update_e820(void) u8 nr_map; nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) return; e820.nr_map = nr_map; printk(KERN_INFO "modified physical RAM map:\n"); diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index c10b4aece5e4..58dc6eee4d4f 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -413,7 +413,9 @@ char *__init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + sanitize_e820_map(boot_params.e820_map, + ARRAY_SIZE(boot_params.e820_map), + &boot_params.e820_entries); if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); @@ -484,7 +486,7 @@ void __init finish_e820_parsing(void) if (userdef) { char nr = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr) < 0) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) early_panic("Invalid user supplied memory map"); e820.nr_map = nr; diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 0c28a071824c..38a856c4fc07 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -163,7 +163,9 @@ char * __init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + sanitize_e820_map(boot_params.e820_map, + ARRAY_SIZE(boot_params.e820_map), + &boot_params.e820_entries); if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { unsigned long mem_size; diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 5ae5466b9eb9..662b5c0a77d6 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -111,7 +111,9 @@ char *__init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + sanitize_e820_map(boot_params.e820_map, + ARRAY_SIZE(boot_params.e820_map), + &boot_params.e820_entries); if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { unsigned long mem_size; diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index b5b519feba1d..65c891d2a4cb 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -27,7 +27,8 @@ extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void add_memory_region(u64 start, u64 size, int type); extern void e820_print_map(char *who); -extern int sanitize_e820_map(struct e820entry *biosmap, char *pnr_map); +extern int +sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, char *pnr_map); extern int copy_e820_map(struct e820entry *biosmap, int nr_map); extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index fa6763af8d26..ffa0f540fc7f 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -56,7 +56,6 @@ char * __init machine_specific_memory_setup(void); char *memory_setup(void); int __init copy_e820_map(struct e820entry *biosmap, int nr_map); -int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map); void __init add_memory_region(unsigned long long start, unsigned long long size, int type); -- cgit v1.2.3 From 028b785888c523baccdf27af0cdbf1deb92edec0 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:40 -0700 Subject: x86 boot: extend some internal memory map arrays to handle larger EFI input Extend internal boot time memory tables to allow for up to three entries per node, which may be larger than the 128 E820MAX entries handled by the legacy BIOS E820 interface. The EFI interface, if present, is capable of passing memory map entries for these larger node counts. This patch requires an earlier patch that rewrote code depending on these array sizes from using E820MAX explicitly to size loops, to instead using ARRAY_SIZE() of the applicable array. Another patch following this one will provide the code to pick up additional memory entries passed via the EFI interface from the BIOS and insert them in the following, now enlarged, arrays. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 8 ++++---- include/asm-x86/e820.h | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2396b9da8027..3f7777b255aa 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -149,10 +149,10 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, struct e820entry *pbios; /* pointer to original bios entry */ unsigned long long addr; /* address for this change point */ }; - static struct change_member change_point_list[2*E820MAX] __initdata; - static struct change_member *change_point[2*E820MAX] __initdata; - static struct e820entry *overlap_list[E820MAX] __initdata; - static struct e820entry new_bios[E820MAX] __initdata; +static struct change_member change_point_list[2*E820_X_MAX] __initdata; +static struct change_member *change_point[2*E820_X_MAX] __initdata; +static struct e820entry *overlap_list[E820_X_MAX] __initdata; +static struct e820entry new_bios[E820_X_MAX] __initdata; struct change_member *change_tmp; unsigned long current_type, last_type; unsigned long long last_addr; diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 65c891d2a4cb..ab18457a95c0 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -2,6 +2,41 @@ #define __ASM_E820_H #define E820MAP 0x2d0 /* our map */ #define E820MAX 128 /* number of entries in E820MAP */ + +/* + * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the + * constrained space in the zeropage. If we have more nodes than + * that, and if we've booted off EFI firmware, then the EFI tables + * passed us from the EFI firmware can list more nodes. Size our + * internal memory map tables to have room for these additional + * nodes, based on up to three entries per node for which the + * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), + * plus E820MAX, allowing space for the possible duplicate E820 + * entries that might need room in the same arrays, prior to the + * call to sanitize_e820_map() to remove duplicates. The allowance + * of three memory map entries per node is "enough" entries for + * the initial hardware platform motivating this mechanism to make + * use of additional EFI map entries. Future platforms may want + * to allow more than three entries per node or otherwise refine + * this size. + */ + +/* + * Odd: 'make headers_check' complains about numa.h if I try + * to collapse the next two #ifdef lines to a single line: + * #if defined(__KERNEL__) && defined(CONFIG_EFI) + */ +#ifdef __KERNEL__ +#ifdef CONFIG_EFI +#include +#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) +#else /* ! CONFIG_EFI */ +#define E820_X_MAX E820MAX +#endif +#else /* ! __KERNEL__ */ +#define E820_X_MAX E820MAX +#endif + #define E820NR 0x1e8 /* # entries in E820MAP */ #define E820_RAM 1 @@ -18,7 +53,7 @@ struct e820entry { struct e820map { __u32 nr_map; - struct e820entry map[E820MAX]; + struct e820entry map[E820_X_MAX]; }; extern struct e820map e820; -- cgit v1.2.3 From 6e9bcc796b120d17b08dde7ab958b82ddb899889 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:46 -0700 Subject: x86 boot: change sanitize_e820_map parameter from byte to int to allow bigger memory maps The map size counter passed into, and back out of, sanitize_e820_map(), was an eight bit type (char or u8), as derived from its origins in legacy BIOS E820 structures. This patch changes that type to an 'int', to allow this sanitize routine to also be used on larger maps (larger than the 256 count that fits in a char). The legacy BIOS E820 interface of course does not change; that remains at 8 bits for this count, holding up to E820MAX == 128 entries. But the kernel internals can handle more when those additional memory map entries are passed from the BIOS via EFI interfaces. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 5 +++-- arch/x86/kernel/e820_64.c | 7 +++++-- arch/x86/mach-default/setup.c | 5 ++++- arch/x86/mach-voyager/setup.c | 5 ++++- include/asm-x86/e820.h | 2 +- 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 3f7777b255aa..91abf5b2fb94 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -143,7 +143,7 @@ void __init e820_print_map(char *who) * */ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, - char *pnr_map) + int *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ @@ -204,6 +204,7 @@ static struct e820entry new_bios[E820_X_MAX] __initdata; return -1; old_nr = *pnr_map; + BUG_ON(old_nr > max_nr_map); /* bail out if we find any unreasonable addresses in bios map */ for (i = 0; i < old_nr; i++) @@ -401,7 +402,7 @@ u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, void __init update_e820(void) { - u8 nr_map; + int nr_map; nr_map = e820.nr_map; if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 58dc6eee4d4f..354fbb221709 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -407,15 +407,18 @@ static void early_panic(char *msg) char *__init machine_specific_memory_setup(void) { char *who = "BIOS-e820"; + int new_nr; /* * Try to copy the BIOS-supplied E820-map. * * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ + new_nr = boot_params.e820_entries; sanitize_e820_map(boot_params.e820_map, ARRAY_SIZE(boot_params.e820_map), - &boot_params.e820_entries); + &new_nr); + boot_params.e820_entries = new_nr; if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); @@ -484,7 +487,7 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { - char nr = e820.nr_map; + int nr = e820.nr_map; if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) early_panic("Invalid user supplied memory map"); diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 38a856c4fc07..56b4c39cb7fa 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -153,6 +153,7 @@ late_initcall(print_ipi_mode); char * __init machine_specific_memory_setup(void) { char *who; + int new_nr; who = "BIOS-e820"; @@ -163,9 +164,11 @@ char * __init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ + new_nr = boot_params.e820_entries; sanitize_e820_map(boot_params.e820_map, ARRAY_SIZE(boot_params.e820_map), - &boot_params.e820_entries); + &new_nr); + boot_params.e820_entries = new_nr; if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { unsigned long mem_size; diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 662b5c0a77d6..f4aca9fa9546 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -62,6 +62,7 @@ void __init time_init_hook(void) char *__init machine_specific_memory_setup(void) { char *who; + int new_nr; who = "NOT VOYAGER"; @@ -111,9 +112,11 @@ char *__init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ + new_nr = boot_params.e820_entries; sanitize_e820_map(boot_params.e820_map, ARRAY_SIZE(boot_params.e820_map), - &boot_params.e820_entries); + &new_nr); + boot_params.e820_entries = new_nr; if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { unsigned long mem_size; diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index ab18457a95c0..1eb13b881437 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -63,7 +63,7 @@ extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void add_memory_region(u64 start, u64 size, int type); extern void e820_print_map(char *who); extern int -sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, char *pnr_map); +sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map); extern int copy_e820_map(struct e820entry *biosmap, int nr_map); extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); -- cgit v1.2.3 From a4c81cf684350797939416c99effb9d3ae46bca6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 18 May 2008 01:18:57 -0700 Subject: x86: extend e820 ealy_res support 32bit move early_res related from e820_64.c to e820.c make edba detection to be done in head32.c remove smp_alloc_memory, because we have fixed trampoline address now. Signed-off-by: Yinghai Lu arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 10 files changed, 320 insertions(+), 319 deletions(-) Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 --------------------------------- arch/x86/kernel/head32.c | 76 +++++++++++++ arch/x86/kernel/setup_32.c | 109 ++++-------------- arch/x86/kernel/smpboot.c | 17 --- arch/x86/kernel/trampoline.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 9 -- include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 -- include/asm-x86/smp.h | 1 - 10 files changed, 320 insertions(+), 319 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 41c480ae47df..35da8cdbe5e6 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -22,7 +22,9 @@ #include #include #include +#include #include +#include struct e820map e820; @@ -493,3 +495,215 @@ __init void e820_setup_gap(void) pci_mem_start, gapstart, gapsize); } + +/* + * Early reserved memory areas. + */ +#define MAX_EARLY_RES 20 + +struct early_res { + u64 start, end; + char name[16]; +}; +static struct early_res early_res[MAX_EARLY_RES] __initdata = { + { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, +#endif +#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, +#endif + {} +}; + +void __init reserve_early(u64 start, u64 end, char *name) +{ + int i; + struct early_res *r; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (end > r->start && start < r->end) + panic("Overlapping early reservations %llx-%llx %s to %llx-%llx %s\n", + start, end - 1, name?name:"", r->start, + r->end - 1, r->name); + } + if (i >= MAX_EARLY_RES) + panic("Too many early reservations"); + r = &early_res[i]; + r->start = start; + r->end = end; + if (name) + strncpy(r->name, name, sizeof(r->name) - 1); +} + +void __init free_early(u64 start, u64 end) +{ + struct early_res *r; + int i, j; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (start == r->start && end == r->end) + break; + } + if (i >= MAX_EARLY_RES || !early_res[i].end) + panic("free_early on not reserved area: %llx-%llx!", + start, end); + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memmove(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + +void __init early_res_to_bootmem(u64 start, u64 end) +{ + int i; + u64 final_start, final_end; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i, + final_start, final_end - 1, r->name); +#ifdef CONFIG_X86_64 + reserve_bootmem_generic(final_start, final_end - final_start); +#else + reserve_bootmem(final_start, final_end - final_start, + BOOTMEM_DEFAULT); +#endif + } +} + +/* Check for already reserved areas */ +static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) +{ + int i; + u64 addr = *addrp, last; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last >= r->start && addr < r->end) { + *addrp = addr = round_up(r->end, align); + changed = 1; + goto again; + } + } + return changed; +} + +/* Check for already reserved areas */ +static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) +{ + int i; + u64 addr = *addrp, last; + u64 size = *sizep; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last > r->start && addr < r->start) { + size = r->start - addr; + changed = 1; + goto again; + } + if (last > r->end && addr < r->end) { + addr = round_up(r->end, align); + size = last - addr; + changed = 1; + goto again; + } + if (last <= r->end && addr >= r->start) { + (*sizep)++; + return 0; + } + } + if (changed) { + *addrp = addr; + *sizep = size; + } + return changed; +} + +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + while (bad_addr(&addr, size, align) && addr+size <= ei_last) + ; + last = addr + size; + if (last > ei_last) + continue; + if (last > end) + continue; + return addr; + } + return -1ULL; +} + +/* + * Find next free range after *start + */ +u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + *sizep = ei_last - addr; + while (bad_addr_size(&addr, sizep, align) && + addr + *sizep <= ei_last) + ; + last = addr + *sizep; + if (last > ei_last) + continue; + return addr; + } + return -1UL; + +} diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 354fbb221709..07941b554519 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -46,202 +46,6 @@ unsigned long max_pfn_mapped; */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -/* - * Early reserved memory areas. - */ -#define MAX_EARLY_RES 20 - -struct early_res { - unsigned long start, end; - char name[16]; -}; -static struct early_res early_res[MAX_EARLY_RES] __initdata = { - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ -#ifdef CONFIG_X86_TRAMPOLINE - { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, -#endif - {} -}; - -void __init reserve_early(unsigned long start, unsigned long end, char *name) -{ - int i; - struct early_res *r; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", - start, end - 1, name?name:"", r->start, r->end - 1, r->name); - } - if (i >= MAX_EARLY_RES) - panic("Too many early reservations"); - r = &early_res[i]; - r->start = start; - r->end = end; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); -} - -void __init free_early(unsigned long start, unsigned long end) -{ - struct early_res *r; - int i, j; - - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (start == r->start && end == r->end) - break; - } - if (i >= MAX_EARLY_RES || !early_res[i].end) - panic("free_early on not reserved area: %lx-%lx!", start, end); - - for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; -} - -void __init early_res_to_bootmem(unsigned long start, unsigned long end) -{ - int i; - unsigned long final_start, final_end; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) - continue; - printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, - final_start, final_end - 1, r->name); - reserve_bootmem_generic(final_start, final_end - final_start); - } -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last >= r->start && addr < r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - unsigned long size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} - -/* - * Find a free area with specified alignment in a specific range. - */ -unsigned long __init find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - continue; - if (last > end) - continue; - return addr; - } - return -1UL; -} - -/* - * Find next free range after *start - */ -unsigned long __init find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && - addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - continue; - return addr; - } - return -1UL; - -} /* * Find the highest page frame number we have available */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3db059058927..c216d3c2a991 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -8,7 +8,83 @@ #include #include +#include +#include +#include +#include + +#define BIOS_LOWMEM_KILOBYTES 0x413 + +/* + * The BIOS places the EBDA/XBDA at the top of conventional + * memory, and usually decreases the reported amount of + * conventional memory (int 0x12) too. This also contains a + * workaround for Dell systems that neglect to reserve EBDA. + * The same workaround also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in. + */ +static void __init reserve_ebda_region(void) +{ + unsigned int lowmem, ebda_addr; + + /* To determine the position of the EBDA and the */ + /* end of conventional memory, we need to look at */ + /* the BIOS data area. In a paravirtual environment */ + /* that area is absent. We'll just have to assume */ + /* that the paravirt case can handle memory setup */ + /* correctly, without our help. */ + if (paravirt_enabled()) + return; + + /* end of low (conventional) memory */ + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); + lowmem <<= 10; + + /* start of EBDA area */ + ebda_addr = get_bios_ebda(); + + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ + if ((lowmem - ebda_addr) <= 0x10000) + lowmem = ebda_addr; + + /* Fixup: bios does not report an EBDA at all. */ + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ + if ((ebda_addr == 0) && (lowmem >= 0x9f000)) + lowmem = 0x9f000; + + /* Paranoia: should never happen, but... */ + if ((lowmem == 0) || (lowmem >= 0x100000)) + lowmem = 0x9f000; + + /* reserve all memory between lowmem and the 1MB mark */ + reserve_early(lowmem, 0x100000, "BIOS reserved"); +} + void __init i386_start_kernel(void) { + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + +#ifdef CONFIG_BLK_DEV_INITRD + /* Reserve INITRD */ + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = ramdisk_image + ramdisk_size; + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + } +#endif + reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE"); + + reserve_ebda_region(); + + /* + * At this point everything still needed from the boot loader + * or BIOS or kernel text should be early reserved or marked not + * RAM in e820. All other memory is free game. + */ + start_kernel(); } diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5faeab69edd9..fed482c62450 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -359,56 +359,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -#define BIOS_LOWMEM_KILOBYTES 0x413 - -/* - * The BIOS places the EBDA/XBDA at the top of conventional - * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. - */ -static void __init reserve_ebda_region(void) -{ - unsigned int lowmem, ebda_addr; - - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ - if (paravirt_enabled()) - return; - - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; - - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; - - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; - - /* reserve all memory between lowmem and the 1MB mark */ - reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); -} - #ifndef CONFIG_NEED_MULTIPLE_NODES static void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) @@ -522,25 +472,32 @@ static void __init reserve_initrd(void) unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; unsigned long ramdisk_here; - initrd_start = 0; - if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) return; /* No initrd provided by bootloader */ + initrd_start = 0; + if (ramdisk_end < ramdisk_image) { + free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd wraps around end of memory, " "disabling initrd\n"); return; } if (ramdisk_size >= end_of_lowmem/2) { + free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; } + if (ramdisk_end <= end_of_lowmem) { /* All in lowmem, easy case */ - reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); + /* + * don't need to reserve again, already reserved early + * in i386_start_kernel, and early_res_to_bootmem + * convert that to reserved in bootmem + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; return; @@ -582,6 +539,8 @@ static void __init relocate_initrd(void) p = (char *)__va(ramdisk_image); memcpy(q, p, clen); q += clen; + /* need to free these low pages...*/ + free_bootmem(ramdisk_image, clen); ramdisk_image += clen; ramdisk_size -= clen; } @@ -600,47 +559,28 @@ static void __init relocate_initrd(void) ramdisk_image += clen; ramdisk_size -= clen; } + /* high pages is not converted by early_res_to_bootmem */ } #endif /* CONFIG_BLK_DEV_INITRD */ void __init setup_bootmem_allocator(void) { - unsigned long bootmap_size; + unsigned long bootmap_size, bootmap; /* * Initialize the boot-time allocator (with low memory only): */ - bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); - + bootmap_size = bootmem_bootmap_pages(max_low_pfn)<> PAGE_SHIFT, max_low_pfn); register_bootmem_low_pages(max_low_pfn); + early_res_to_bootmem(0, max_low_pfn<llc_shared_map; } -#ifdef CONFIG_X86_32 -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - if (__pa(trampoline_base) >= 0x9F000) - BUG(); -} -#endif - static void impress_friends(void) { int cpu; diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index abbf199adebb..1106fac6024d 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -2,7 +2,7 @@ #include -/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ +/* ready for x86_64 and x86 */ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); /* diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 8acbf0cdf1a5..7bbebbfe8c4e 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1137,15 +1137,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, 0, 1, 1); } -/* used to set up the trampoline for other CPUs when the memory manager - * is sorted out */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - if (__pa(trampoline_base) >= 0x93000) - BUG(); -} - /* send a reschedule CPI to one CPU by physical CPU number*/ static void voyager_smp_send_reschedule(int cpu) { diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 1eb13b881437..5a58e2bb1d78 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -70,6 +70,12 @@ extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, extern void update_e820(void); extern void e820_setup_gap(void); +extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); +extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); +extern void reserve_early(u64 start, u64 end, char *name); +extern void free_early(u64 start, u64 end); +extern void early_res_to_bootmem(u64 start, u64 end); + #endif /* __ASSEMBLY__ */ #define ISA_START_ADDRESS 0xa0000 diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 9fac77e01441..37f176a02bc6 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -14,11 +14,6 @@ #include #ifndef __ASSEMBLY__ -extern unsigned long find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align); -extern unsigned long find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align); extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); @@ -35,10 +30,6 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn, extern void finish_e820_parsing(void); -extern void reserve_early(unsigned long start, unsigned long end, char *name); -extern void free_early(unsigned long start, unsigned long end); -extern void early_res_to_bootmem(unsigned long start, unsigned long end); - #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 1ebaa5cd3112..514e52b95cef 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -201,7 +201,6 @@ extern void cpu_exit_clear(void); extern void cpu_uninit(void); #endif -extern void smp_alloc_memory(void); extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 32c5061265caf201d6a2c0d02181e2b68769c22c Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:02:51 +0400 Subject: x86: move es7000_plat out of mpparse.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 11 ++++++----- arch/x86/mach-es7000/es7000plat.c | 2 ++ include/asm-x86/system.h | 1 - 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5a18b2b9852e..ff1342325efc 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -793,15 +793,14 @@ void __init find_smp_config(void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ -/* - * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: - */ -int es7000_plat; - #ifdef CONFIG_ACPI #ifdef CONFIG_X86_IO_APIC +#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) +extern int es7000_plat; +#endif + #define MP_ISA_BUS 0 static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; @@ -928,11 +927,13 @@ void __init mp_config_acpi_legacy_irqs(void) set_bit(MP_ISA_BUS, mp_bus_not_pci); Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); +#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) /* * Older generations of ES7000 have no legacy identity mappings */ if (es7000_plat == 1) return; +#endif /* * Locate the IOAPIC that manages the ISA IRQs (0-15). diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index f5d6f7d8b86e..a41c77a47227 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c @@ -52,6 +52,8 @@ static struct mip_reg *host_reg; static int mip_port; static unsigned long mip_addr, host_addr; +int es7000_plat; + /* * GSI override for ES7000 platforms. */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd79b29..9f7f63ba0042 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -303,7 +303,6 @@ static inline void clflush(volatile void *__p) void disable_hlt(void); void enable_hlt(void); -extern int es7000_plat; void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); -- cgit v1.2.3 From 5f8951487ddbacbc949e9ffae574f94791f9b4dd Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:03:04 +0400 Subject: x86: make mp_ioapic_routing definition local Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 7 ++++++- include/asm-x86/io_apic.h | 7 ------- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2ad09c4c6ae..f75c2030f4b5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -852,7 +852,12 @@ static int __init acpi_parse_madt_lapic_entries(void) extern int es7000_plat; #endif -static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; +static struct { + int apic_id; + int gsi_base; + int gsi_end; + DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +} mp_ioapic_routing[MAX_IO_APICS]; static int mp_find_ioapic(int gsi) { diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index d593e14f0341..2ef96f0596e2 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -112,13 +112,6 @@ extern int nr_ioapic_registers[MAX_IO_APICS]; #define MP_MAX_IOAPIC_PIN 127 -struct mp_ioapic_routing { - int apic_id; - int gsi_base; - int gsi_end; - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); -}; - /* I/O APIC entries */ extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; -- cgit v1.2.3 From ec2cd0a22e2715f776a934e01c4f8ea098324fe1 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:03:10 +0400 Subject: x86: make struct config_ioapic not MPspec specific Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 28 ++++++++++++------------ arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/io_apic_32.c | 52 ++++++++++++++++++++++---------------------- arch/x86/kernel/io_apic_64.c | 26 +++++++++++----------- arch/x86/kernel/mpparse.c | 8 +++++-- include/asm-x86/io_apic.h | 10 ++++++++- 6 files changed, 69 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f75c2030f4b5..a26dd91faf0f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -887,8 +887,8 @@ static u8 __init uniq_ioapic_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); for (i = 0; i < nr_ioapics; i++) { - struct mpc_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mpc_apicid, used); + struct mp_config_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->mp_apicid, used); } if (!test_bit(id, used)) return id; @@ -920,29 +920,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) idx = nr_ioapics; - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; + mp_ioapics[idx].mp_type = MP_IOAPIC; + mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; + mp_ioapics[idx].mp_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); + mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); #ifdef CONFIG_X86_32 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); + mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); #else - mp_ioapics[idx].mpc_apicver = 0; + mp_ioapics[idx].mp_apicver = 0; #endif /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, + mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; @@ -975,7 +975,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; mp_irqs[mp_irq_entries].mpc_srcbusirq = bus_irq; /* IRQ */ mp_irqs[mp_irq_entries].mpc_dstapic = - mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ + mp_ioapics[ioapic].mp_apicid; /* APIC ID */ mp_irqs[mp_irq_entries].mpc_dstirq = pin; /* INTIN# */ if (++mp_irq_entries == MAX_IRQ_SOURCES) @@ -1016,7 +1016,7 @@ void __init mp_config_acpi_legacy_irqs(void) mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */ mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; #ifdef CONFIG_X86_IO_APIC - mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; + mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mp_apicid; #endif /* * Use the default configuration for the IRQs 0-15. Unless diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1bdeb6c..fa1be1d62916 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1202,7 +1202,7 @@ void __init init_apic_mappings(void) for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; + ioapic_phys = mp_ioapics[i].mp_apicaddr; if (!ioapic_phys) { printk(KERN_ERR "WARNING: bogus zero IO-APIC " diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a40d54fc1fdd..5af1b717236c 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -72,7 +72,7 @@ int sis_apic_bug = -1; int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ @@ -110,7 +110,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -802,7 +802,7 @@ static int find_irq_entry(int apic, int pin, int type) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid || mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && mp_irqs[i].mpc_dstirq == pin) return i; @@ -844,7 +844,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic) return apic; } } @@ -872,7 +872,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) int lbus = mp_irqs[i].mpc_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic || mp_irqs[i].mpc_dstapic == MP_APIC_ALL) break; @@ -1250,12 +1250,12 @@ static void __init setup_IO_APIC_irqs(void) if (first_notcon) { apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mpc_apicid, + mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mpc_apicid, pin); + mp_ioapics[apic].mp_apicid, pin); continue; } @@ -1357,7 +1357,7 @@ void __init print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1376,7 +1376,7 @@ void __init print_IO_APIC(void) reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -1749,14 +1749,14 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].mpc_apicid; + old_id = mp_ioapics[apic].mp_apicid; - if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; } /* @@ -1765,9 +1765,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mpc_apicid)) { + mp_ioapics[apic].mp_apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -1776,13 +1776,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mpc_apicid = i; + mp_ioapics[apic].mp_apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -1791,11 +1791,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mpc_apicid) + if (old_id != mp_ioapics[apic].mp_apicid) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mpc_dstapic == old_id) mp_irqs[i].mpc_dstapic - = mp_ioapics[apic].mpc_apicid; + = mp_ioapics[apic].mp_apicid; /* * Read the right value from the MPC table and @@ -1803,9 +1803,9 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -1816,7 +1816,7 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); @@ -2355,8 +2355,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2789,7 +2789,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, + mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, edge_level, active_high_low); ioapic_register_intr(irq, entry.vector, edge_level); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8dfcc529..4555ad8c2070 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -104,7 +104,7 @@ DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ @@ -140,7 +140,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -454,7 +454,7 @@ static int find_irq_entry(int apic, int pin, int type) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid || mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && mp_irqs[i].mpc_dstirq == pin) return i; @@ -496,7 +496,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic) return apic; } } @@ -524,7 +524,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) int lbus = mp_irqs[i].mpc_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic || mp_irqs[i].mpc_dstapic == MP_APIC_ALL) break; @@ -846,7 +846,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); /* @@ -887,10 +887,10 @@ static void __init setup_IO_APIC_irqs(void) idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); continue; } if (!first_notcon) { @@ -965,7 +965,7 @@ void __apicdebuginit print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -983,7 +983,7 @@ void __apicdebuginit print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); @@ -1841,8 +1841,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2336,7 +2336,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; + ioapic_phys = mp_ioapics[i].mp_apicaddr; } else { ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d05b70c329c4..9f1e5bf7f0fb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -176,7 +176,11 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) if (bad_ioapic(m->mpc_apicaddr)) return; - mp_ioapics[nr_ioapics] = *m; + mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; + mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; + mp_ioapics[nr_ioapics].mp_type = m->mpc_type; + mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; + mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; nr_ioapics++; } @@ -426,7 +430,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.mpc_type = MP_INTSRC; intsrc.mpc_irqflag = 0; /* conforming */ intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; intsrc.mpc_irqtype = mp_INT; diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 2ef96f0596e2..ade76c0d03ae 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -112,8 +112,16 @@ extern int nr_ioapic_registers[MAX_IO_APICS]; #define MP_MAX_IOAPIC_PIN 127 +struct mp_config_ioapic { + unsigned long mp_apicaddr; + unsigned int mp_apicid; + unsigned char mp_type; + unsigned char mp_apicver; + unsigned char mp_flags; +}; + /* I/O APIC entries */ -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; -- cgit v1.2.3 From 2fddb6e28e903a3ab1704cc5aac01be5a59dc05b Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:03:17 +0400 Subject: x86: make config_irqsrc not MPspec specific Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 43 +++++++++++++++-------------- arch/x86/kernel/io_apic_32.c | 64 ++++++++++++++++++++++---------------------- arch/x86/kernel/io_apic_64.c | 58 +++++++++++++++++++-------------------- arch/x86/kernel/mpparse.c | 8 +++++- include/asm-x86/io_apic.h | 12 ++++++++- 5 files changed, 100 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a26dd91faf0f..276ec058f683 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -969,14 +969,14 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC; - mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT; - mp_irqs[mp_irq_entries].mpc_irqflag = (trigger << 2) | polarity; - mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; - mp_irqs[mp_irq_entries].mpc_srcbusirq = bus_irq; /* IRQ */ - mp_irqs[mp_irq_entries].mpc_dstapic = + mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mp_irqtype = mp_INT; + mp_irqs[mp_irq_entries].mp_irqflag = (trigger << 2) | polarity; + mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; + mp_irqs[mp_irq_entries].mp_srcbusirq = bus_irq; /* IRQ */ + mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ - mp_irqs[mp_irq_entries].mpc_dstirq = pin; /* INTIN# */ + mp_irqs[mp_irq_entries].mp_dstirq = pin; /* INTIN# */ if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); @@ -1012,12 +1012,11 @@ void __init mp_config_acpi_legacy_irqs(void) if (ioapic < 0) return; - mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC; - mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */ - mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; -#ifdef CONFIG_X86_IO_APIC - mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mp_apicid; -#endif + mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mp_irqflag = 0; /* Conforming */ + mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; + mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; + /* * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. @@ -1026,17 +1025,17 @@ void __init mp_config_acpi_legacy_irqs(void) int idx; for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; + struct mp_config_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS - && irq->mpc_srcbusirq == i) + if (irq->mp_srcbus == MP_ISA_BUS + && irq->mp_srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == - mp_irqs[mp_irq_entries].mpc_dstapic) && - (irq->mpc_dstirq == i)) + if ((irq->mp_dstapic == + mp_irqs[mp_irq_entries].mp_dstapic) && + (irq->mp_dstirq == i)) break; } @@ -1045,9 +1044,9 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } - mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT; - mp_irqs[mp_irq_entries].mpc_srcbusirq = i; /* Identity mapped */ - mp_irqs[mp_irq_entries].mpc_dstirq = i; + mp_irqs[mp_irq_entries].mp_irqtype = mp_INT; + mp_irqs[mp_irq_entries].mp_srcbusirq = i; /* Identity mapped */ + mp_irqs[mp_irq_entries].mp_dstirq = i; if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 5af1b717236c..ea68c3e5ba1d 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -76,7 +76,7 @@ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -801,10 +801,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) return i; return -1; @@ -818,13 +818,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) - return mp_irqs[i].mpc_dstirq; + return mp_irqs[i].mp_dstirq; } return -1; } @@ -834,17 +834,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -869,23 +869,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -952,7 +952,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -969,13 +969,13 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) + switch (mp_irqs[idx].mp_irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ { @@ -1012,13 +1012,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ { @@ -1097,16 +1097,16 @@ static inline int irq_trigger(int idx) static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mpc_dstirq != pin) + if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) - irq = mp_irqs[idx].mpc_srcbusirq; + irq = mp_irqs[idx].mp_srcbusirq; else { /* * PCI IRQs are mapped in order @@ -1793,8 +1793,8 @@ static void __init setup_ioapic_ids_from_mpc(void) */ if (old_id != mp_ioapics[apic].mp_apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_dstapic == old_id) - mp_irqs[i].mpc_dstapic + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic = mp_ioapics[apic].mp_apicid; /* @@ -2810,8 +2810,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == mp_INT && - mp_irqs[i].mpc_srcbusirq == bus_irq) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 4555ad8c2070..e7f1476ed537 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -108,7 +108,7 @@ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -453,10 +453,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) return i; return -1; @@ -470,13 +470,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) - return mp_irqs[i].mpc_dstirq; + return mp_irqs[i].mp_dstirq; } return -1; } @@ -486,17 +486,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -521,23 +521,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -565,13 +565,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) + switch (mp_irqs[idx].mp_irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -607,13 +607,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -660,16 +660,16 @@ static inline int irq_trigger(int idx) static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mpc_dstirq != pin) + if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mpc_srcbusirq; + irq = mp_irqs[idx].mp_srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -2242,8 +2242,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == mp_INT && - mp_irqs[i].mpc_srcbusirq == bus_irq) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9f1e5bf7f0fb..59f051db236d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -186,12 +186,18 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) static void __init MP_intsrc_info(struct mpc_config_intsrc *m) { - mp_irqs[mp_irq_entries] = *m; printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + mp_irqs[mp_irq_entries].mp_dstapic = m->mpc_dstapic; + mp_irqs[mp_irq_entries].mp_type = m->mpc_type; + mp_irqs[mp_irq_entries].mp_irqtype = m->mpc_irqtype; + mp_irqs[mp_irq_entries].mp_irqflag = m->mpc_irqflag; + mp_irqs[mp_irq_entries].mp_srcbus = m->mpc_srcbus; + mp_irqs[mp_irq_entries].mp_srcbusirq = m->mpc_srcbusirq; + mp_irqs[mp_irq_entries].mp_dstirq = m->mpc_dstirq; if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index ade76c0d03ae..86d8c3bdcca4 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -120,6 +120,16 @@ struct mp_config_ioapic { unsigned char mp_flags; }; +struct mp_config_intsrc { + unsigned int mp_dstapic; + unsigned char mp_type; + unsigned char mp_irqtype; + unsigned short mp_irqflag; + unsigned char mp_srcbus; + unsigned char mp_srcbusirq; + unsigned char mp_dstirq; +}; + /* I/O APIC entries */ extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; @@ -127,7 +137,7 @@ extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; -- cgit v1.2.3 From bf62f3981c7076714e3b9f5fa6989a806cad02bf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 20 May 2008 20:10:58 -0700 Subject: x86: move e820_mark_nosave_regions to e820.c and make e820_mark_nosave_regions to take limit_pfn to use max_low_pfn for 32bit and end_pfn for 64bit Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820.c | 32 ++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_32.c | 32 -------------------------------- arch/x86/kernel/e820_64.c | 32 -------------------------------- arch/x86/kernel/setup_32.c | 2 +- arch/x86/kernel/setup_64.c | 2 +- include/asm-x86/e820.h | 9 +++++++++ include/asm-x86/e820_32.h | 8 -------- include/asm-x86/e820_64.h | 1 - 8 files changed, 43 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 35da8cdbe5e6..0cd9132c9450 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -495,6 +496,37 @@ __init void e820_setup_gap(void) pci_mem_start, gapstart, gapsize); } +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +/** + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for + * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(unsigned long limit_pfn) +{ + int i; + unsigned long pfn; + + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (pfn < PFN_UP(ei->addr)) + register_nosave_region(pfn, PFN_UP(ei->addr)); + + pfn = PFN_DOWN(ei->addr + ei->size); + if (ei->type != E820_RAM) + register_nosave_region(PFN_UP(ei->addr), pfn); + + if (pfn >= limit_pfn) + break; + } +} +#endif /* * Early reserved memory areas. diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index dfe25751038b..db760d4706af 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -208,37 +207,6 @@ void __init init_iomem_resources(struct resource *code_resource, } } -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -/** - * e820_mark_nosave_regions - Find the ranges of physical addresses that do not - * correspond to e820 RAM areas and mark the corresponding pages as nosave for - * hibernation. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= max_low_pfn) - break; - } -} -#endif - /* * Find the highest page frame number we have available */ diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 3b2e0b1bb49b..5e063e72b24a 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -93,37 +92,6 @@ void __init e820_reserve_resources(void) } } -/* - * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for software - * suspend and suspend to RAM. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long paddr; - - paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (paddr < ei->addr) - register_nosave_region(PFN_DOWN(paddr), - PFN_UP(ei->addr)); - - paddr = round_down(ei->addr + ei->size, PAGE_SIZE); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), - PFN_DOWN(paddr)); - - if (paddr >= (end_pfn << PAGE_SHIFT)) - break; - } -} - /* * Finds an active region in the address range from start_pfn to last_pfn and * returns its range in ei_startpfn and ei_endpfn for the e820 entry. diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 3c451d143eba..e1173aecf69a 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -820,7 +820,7 @@ void __init setup_arch(char **cmdline_p) #endif e820_setup_gap(); - e820_mark_nosave_regions(); + e820_mark_nosave_regions(max_low_pfn); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff1286ad8a..975a5da46e49 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -516,7 +516,7 @@ void __init setup_arch(char **cmdline_p) * We trust e820 completely. No explicit ROM probing in memory. */ e820_reserve_resources(); - e820_mark_nosave_regions(); + e820_mark_nosave_regions(end_pfn); /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 5a58e2bb1d78..4266a2c5f2e8 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -70,6 +70,15 @@ extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, extern void update_e820(void); extern void e820_setup_gap(void); +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +extern void e820_mark_nosave_regions(unsigned long limit_pfn); +#else +static inline void e820_mark_nosave_regions(unsigned long limit_pfn) +{ +} +#endif + extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); extern void reserve_early(u64 start, u64 end, char *name); diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index 9576b438fbd9..7ace82570a36 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -28,13 +28,5 @@ extern void init_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -extern void e820_mark_nosave_regions(void); -#else -static inline void e820_mark_nosave_regions(void) -{ -} -#endif - #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 37f176a02bc6..917963ccab69 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -18,7 +18,6 @@ extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); extern void e820_reserve_resources(void); -extern void e820_mark_nosave_regions(void); extern int e820_any_non_reserved(unsigned long start, unsigned long end); extern int is_memory_any_valid(unsigned long start, unsigned long end); extern int e820_all_non_reserved(unsigned long start, unsigned long end); -- cgit v1.2.3 From ce6444d39fdea29dcf145d2d95fe9cdc850aa53c Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 19 May 2008 19:47:09 +0400 Subject: x86: mp_bus_id_to_pci_bus is not needed --- arch/x86/kernel/io_apic_32.c | 2 +- arch/x86/kernel/io_apic_64.c | 2 +- arch/x86/kernel/mpparse.c | 10 ---------- include/asm-x86/mpspec.h | 2 -- 4 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index ea68c3e5ba1d..17b2e8f862d3 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -864,7 +864,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " "slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if (test_bit(bus, mp_bus_not_pci)) { printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index e7f1476ed537..9637675ea555 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -516,7 +516,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if (test_bit(bus, mp_bus_not_pci)) { apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d67cd7600a21..83dfd6696c2a 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -41,13 +41,6 @@ int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; - -static int mp_current_pci_id; - -/* - * Intel MP BIOS table parsing routines: - */ /* * Checksum an MP configuration block. @@ -101,7 +94,6 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) static void __init MP_bus_info(struct mpc_config_bus *m) { char str[7]; - memcpy(str, m->mpc_bustype, 6); str[6] = 0; @@ -130,8 +122,6 @@ static void __init MP_bus_info(struct mpc_config_bus *m) mpc_oem_pci_bus(m, translation_table[mpc_record]); #endif clear_bit(m->mpc_busid, mp_bus_not_pci); - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; #if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index 57a991b9c053..b785ddd8d761 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -32,8 +32,6 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES]; extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES]; - extern unsigned int boot_cpu_physical_apicid; extern int smp_found_config; extern int mpc_default_type; -- cgit v1.2.3 From 2e4db9d2c5db9c9dd3e1d1ec3263bc4a990ff8df Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Mon, 19 May 2008 14:23:44 -0700 Subject: x86: remove duplicate declaration of unknown_nmi_panic Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/asm-x86/nmi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 1e363021e72f..8455bf36d8df 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -46,7 +46,6 @@ extern void nmi_watchdog_default(void); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -extern int unknown_nmi_panic; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); -- cgit v1.2.3 From ddca03c98a7f7ad5ab09967ff52d4ed60358c896 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:31 +0400 Subject: x86: nmi - unify die_nmi() interface By slightly changing 32bit mode die_nmi() we may unify the interface and make it common for both (32/64bit) modes Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi_32.c | 7 +++---- arch/x86/kernel/traps_32.c | 8 +++++--- include/asm-x86/nmi.h | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 69bdae555c18..bd04a28f7a5c 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -320,8 +320,6 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); -extern void die_nmi(struct pt_regs *, const char *msg); - notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { @@ -375,7 +373,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, 0); } else { __get_cpu_var(last_irq_sum) = sum; local_set(&__get_cpu_var(alert_counter), 0); @@ -406,7 +405,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) char buf[64]; sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); + die_nmi(buf, regs, 0); return 0; } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index bde6f63e15d5..f31e6651fa6f 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -755,9 +755,9 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) { - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; spin_lock(&nmi_print_lock); @@ -766,10 +766,12 @@ void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) * to get a message out: */ bust_spinlocks(1); - printk(KERN_EMERG "%s", msg); + printk(KERN_EMERG "%s", str); printk(" on CPU%d, ip %08lx, registers:\n", smp_processor_id(), regs->ip); show_registers(regs); + if (do_panic) + panic("Non maskable interrupt"); console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 8455bf36d8df..7cd5b6a88138 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -38,12 +38,12 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev) #ifdef CONFIG_X86_64 extern void default_do_nmi(struct pt_regs *); -extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern void nmi_watchdog_default(void); #else #define nmi_watchdog_default() do {} while (0) #endif +extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); -- cgit v1.2.3 From 4b82b277707a39b97271439c475f186f63ec4692 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:35 +0400 Subject: x86: nmi_32.c - add nmi_watchdog_default helper Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi_32.c | 19 +++++++++++++------ include/asm-x86/nmi.h | 4 +--- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 4437fe1edabc..7714e8478213 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -51,6 +51,17 @@ static DEFINE_PER_CPU(short, wd_enabled); static int endflag __initdata = 0; +/* Run after command line and cpu_init init, but before all other checks */ +void nmi_watchdog_default(void) +{ + if (nmi_watchdog != NMI_DEFAULT) + return; + if (lapic_watchdog_ok()) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; +} + #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -437,12 +448,8 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, return -EIO; } - if (nmi_watchdog == NMI_DEFAULT) { - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; - } + /* if nmi_watchdog is not set yet, then set it */ + nmi_watchdog_default(); if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog_enabled) diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 7cd5b6a88138..1e8f34d7ab68 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -38,11 +38,9 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev) #ifdef CONFIG_X86_64 extern void default_do_nmi(struct pt_regs *); -extern void nmi_watchdog_default(void); -#else -#define nmi_watchdog_default() do {} while (0) #endif +extern void nmi_watchdog_default(void); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -- cgit v1.2.3 From 4b6011bc6ec71660859139ac8d28b7d0badd681c Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Sun, 25 May 2008 21:16:36 +0200 Subject: x86: Remove obsolete LOCK macro from include/asm-x86/atomic_64.h Commit d167a518 "[PATCH] x86_64: x86_64 version of the smp alternative patch." has left the LOCK macro in include/asm-x86_64/atomic.h, which is now include/asm-x86/atomic_64.h. Its scope should be local to the file, other architectures don't provide it, I couldn't find an in-tree user of it and allyesconfig, allmodconfig and allnoconfig build fine without it, so this patch removes it. Signed-off-by: Sven Wegener Signed-off-by: Thomas Gleixner --- include/asm-x86/atomic_64.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h index 3e0cd7d38335..fe589c153db8 100644 --- a/include/asm-x86/atomic_64.h +++ b/include/asm-x86/atomic_64.h @@ -11,12 +11,6 @@ * resource counting etc.. */ -#ifdef CONFIG_SMP -#define LOCK "lock ; " -#else -#define LOCK "" -#endif - /* * Make sure gcc doesn't try to be clever and move things around * on us. We need to use _exactly_ the address the user gave us, -- cgit v1.2.3 From 1a20d3ecf5c2a6435df2b756435b1eb1c657d45b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 26 May 2008 13:36:53 -0700 Subject: x86: string_32.h: workaround for broken gcc 4.0 gcc 4.0 fails to allocate %eax for the pattern operand in the rep store instructions used by memset; force it to do so by declaring a register variable. Signed-off-by: H. Peter Anvin --- include/asm-x86/string_32.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h index 8d0c593c4413..193578cd1fd9 100644 --- a/include/asm-x86/string_32.h +++ b/include/asm-x86/string_32.h @@ -267,11 +267,18 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, asm volatile("rep ; stosl" \ x \ : "=&c" (d0), "=&D" (d1) \ - : "a" (pattern), "0" (count/4), "1" ((long)s) \ + : "a" (eax), "0" (count/4), "1" ((long)s) \ : "memory") { int d0, d1; +#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 + /* Workaround for broken gcc 4.0 */ + register unsigned long eax asm("%eax") = pattern; +#else + unsigned long eax = pattern; +#endif + switch (count % 4) { case 0: COMMON(""); -- cgit v1.2.3 From 41c52c0db9607e59f90da7da5309489fa06e887f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 11:46:33 -0400 Subject: ftrace: set_ftrace_notrace feature While debugging latencies in the RT kernel, I found that it would be nice to be able to filter away functions from the trace than just to filter on functions. I added a new interface to the debugfs tracing directory called set_ftrace_notrace When dynamic frace is enabled, this lets you filter away functions that will not be recorded in the trace. It is similar to adding 'notrace' to those functions but by doing it without recompiling the kernel. Here's how set_ftrace_filter and set_ftrace_notrace interact. Remember, if set_ftrace_filter is set, it removes all functions from the trace execpt for those listed in the set_ftrace_filter. set_ftrace_notrace will prevent those functions from being traced. If you were to set one function in both set_ftrace_filter and set_ftrace_notrace and that function was the same, then you would end up with an empty trace. the set of functions to trace is: set_ftrace_filter == empty then all functions not in set_ftrace_notrace else set of the set_ftrace_filter and not in set of set_ftrace_notrace. Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 1 + kernel/trace/ftrace.c | 170 +++++++++++++++++++++++++++++++++++++------------ 2 files changed, 131 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 922e23d0196f..ffbbd54a720e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -48,6 +48,7 @@ enum { FTRACE_FL_FAILED = (1 << 1), FTRACE_FL_FILTER = (1 << 2), FTRACE_FL_ENABLED = (1 << 3), + FTRACE_FL_NOTRACE = (1 << 4), }; struct dyn_ftrace { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 89bd9a6f52ec..2552454609cf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -170,7 +170,7 @@ static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); static DEFINE_SPINLOCK(ftrace_shutdown_lock); static DEFINE_MUTEX(ftraced_lock); -static DEFINE_MUTEX(ftrace_filter_lock); +static DEFINE_MUTEX(ftrace_regex_lock); struct ftrace_page { struct ftrace_page *next; @@ -337,13 +337,12 @@ static void __ftrace_replace_code(struct dyn_ftrace *rec, unsigned char *old, unsigned char *new, int enable) { - unsigned long ip; + unsigned long ip, fl; int failed; ip = rec->ip; if (ftrace_filtered && enable) { - unsigned long fl; /* * If filtering is on: * @@ -356,13 +355,16 @@ __ftrace_replace_code(struct dyn_ftrace *rec, * If this record is not set to be filtered * and it is not enabled do nothing. * + * If this record is set not to trace then + * do nothing. + * * If this record is not set to be filtered and * it is enabled, disable it. */ fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || - (fl == 0)) + (fl == 0) || (rec->flags & FTRACE_FL_NOTRACE)) return; /* @@ -380,9 +382,17 @@ __ftrace_replace_code(struct dyn_ftrace *rec, } } else { - if (enable) + if (enable) { + /* + * If this record is set not to trace and is + * not enabled, do nothing. + */ + fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); + if (fl == FTRACE_FL_NOTRACE) + return; + new = ftrace_call_replace(ip, FTRACE_ADDR); - else + } else old = ftrace_call_replace(ip, FTRACE_ADDR); if (enable) { @@ -721,6 +731,7 @@ static int __init ftrace_dyn_table_alloc(void) enum { FTRACE_ITER_FILTER = (1 << 0), FTRACE_ITER_CONT = (1 << 1), + FTRACE_ITER_NOTRACE = (1 << 2), }; #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ @@ -754,7 +765,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos) rec = &iter->pg->records[iter->idx++]; if ((rec->flags & FTRACE_FL_FAILED) || ((iter->flags & FTRACE_ITER_FILTER) && - !(rec->flags & FTRACE_FL_FILTER))) { + !(rec->flags & FTRACE_FL_FILTER)) || + ((iter->flags & FTRACE_ITER_NOTRACE) && + !(rec->flags & FTRACE_FL_NOTRACE))) { rec = NULL; goto retry; } @@ -847,22 +860,24 @@ int ftrace_avail_release(struct inode *inode, struct file *file) return 0; } -static void ftrace_filter_reset(void) +static void ftrace_filter_reset(int enable) { struct ftrace_page *pg; struct dyn_ftrace *rec; + unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; unsigned i; /* keep kstop machine from running */ preempt_disable(); - ftrace_filtered = 0; + if (enable) + ftrace_filtered = 0; pg = ftrace_pages_start; while (pg) { for (i = 0; i < pg->index; i++) { rec = &pg->records[i]; if (rec->flags & FTRACE_FL_FAILED) continue; - rec->flags &= ~FTRACE_FL_FILTER; + rec->flags &= ~type; } pg = pg->next; } @@ -870,7 +885,7 @@ static void ftrace_filter_reset(void) } static int -ftrace_filter_open(struct inode *inode, struct file *file) +ftrace_regex_open(struct inode *inode, struct file *file, int enable) { struct ftrace_iterator *iter; int ret = 0; @@ -882,15 +897,16 @@ ftrace_filter_open(struct inode *inode, struct file *file) if (!iter) return -ENOMEM; - mutex_lock(&ftrace_filter_lock); + mutex_lock(&ftrace_regex_lock); if ((file->f_mode & FMODE_WRITE) && !(file->f_flags & O_APPEND)) - ftrace_filter_reset(); + ftrace_filter_reset(enable); if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; iter->pos = -1; - iter->flags = FTRACE_ITER_FILTER; + iter->flags = enable ? FTRACE_ITER_FILTER : + FTRACE_ITER_NOTRACE; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { @@ -900,13 +916,25 @@ ftrace_filter_open(struct inode *inode, struct file *file) kfree(iter); } else file->private_data = iter; - mutex_unlock(&ftrace_filter_lock); + mutex_unlock(&ftrace_regex_lock); return ret; } +static int +ftrace_filter_open(struct inode *inode, struct file *file) +{ + return ftrace_regex_open(inode, file, 1); +} + +static int +ftrace_notrace_open(struct inode *inode, struct file *file) +{ + return ftrace_regex_open(inode, file, 0); +} + static ssize_t -ftrace_filter_read(struct file *file, char __user *ubuf, +ftrace_regex_read(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) { if (file->f_mode & FMODE_READ) @@ -916,7 +944,7 @@ ftrace_filter_read(struct file *file, char __user *ubuf, } static loff_t -ftrace_filter_lseek(struct file *file, loff_t offset, int origin) +ftrace_regex_lseek(struct file *file, loff_t offset, int origin) { loff_t ret; @@ -936,13 +964,14 @@ enum { }; static void -ftrace_match(unsigned char *buff, int len) +ftrace_match(unsigned char *buff, int len, int enable) { char str[KSYM_SYMBOL_LEN]; char *search = NULL; struct ftrace_page *pg; struct dyn_ftrace *rec; int type = MATCH_FULL; + unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; unsigned i, match = 0, search_len = 0; for (i = 0; i < len; i++) { @@ -966,7 +995,8 @@ ftrace_match(unsigned char *buff, int len) /* keep kstop machine from running */ preempt_disable(); - ftrace_filtered = 1; + if (enable) + ftrace_filtered = 1; pg = ftrace_pages_start; while (pg) { for (i = 0; i < pg->index; i++) { @@ -997,7 +1027,7 @@ ftrace_match(unsigned char *buff, int len) break; } if (matched) - rec->flags |= FTRACE_FL_FILTER; + rec->flags |= flag; } pg = pg->next; } @@ -1005,8 +1035,8 @@ ftrace_match(unsigned char *buff, int len) } static ssize_t -ftrace_filter_write(struct file *file, const char __user *ubuf, - size_t cnt, loff_t *ppos) +ftrace_regex_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos, int enable) { struct ftrace_iterator *iter; char ch; @@ -1016,7 +1046,7 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, if (!cnt || cnt < 0) return 0; - mutex_lock(&ftrace_filter_lock); + mutex_lock(&ftrace_regex_lock); if (file->f_mode & FMODE_READ) { struct seq_file *m = file->private_data; @@ -1045,7 +1075,6 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, cnt--; } - if (isspace(ch)) { file->f_pos += read; ret = read; @@ -1072,7 +1101,7 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, if (isspace(ch)) { iter->filtered++; iter->buffer[iter->buffer_idx] = 0; - ftrace_match(iter->buffer, iter->buffer_idx); + ftrace_match(iter->buffer, iter->buffer_idx, enable); iter->buffer_idx = 0; } else iter->flags |= FTRACE_ITER_CONT; @@ -1082,11 +1111,39 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, ret = read; out: - mutex_unlock(&ftrace_filter_lock); + mutex_unlock(&ftrace_regex_lock); return ret; } +static ssize_t +ftrace_filter_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return ftrace_regex_write(file, ubuf, cnt, ppos, 1); +} + +static ssize_t +ftrace_notrace_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return ftrace_regex_write(file, ubuf, cnt, ppos, 0); +} + +static void +ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) +{ + if (unlikely(ftrace_disabled)) + return; + + mutex_lock(&ftrace_regex_lock); + if (reset) + ftrace_filter_reset(enable); + if (buf) + ftrace_match(buf, len, enable); + mutex_unlock(&ftrace_regex_lock); +} + /** * ftrace_set_filter - set a function to filter on in ftrace * @buf - the string that holds the function filter text. @@ -1098,24 +1155,31 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, */ void ftrace_set_filter(unsigned char *buf, int len, int reset) { - if (unlikely(ftrace_disabled)) - return; + ftrace_set_regex(buf, len, reset, 1); +} - mutex_lock(&ftrace_filter_lock); - if (reset) - ftrace_filter_reset(); - if (buf) - ftrace_match(buf, len); - mutex_unlock(&ftrace_filter_lock); +/** + * ftrace_set_notrace - set a function to not trace in ftrace + * @buf - the string that holds the function notrace text. + * @len - the length of the string. + * @reset - non zero to reset all filters before applying this filter. + * + * Notrace Filters denote which functions should not be enabled when tracing + * is enabled. If @buf is NULL and reset is set, all functions will be enabled + * for tracing. + */ +void ftrace_set_notrace(unsigned char *buf, int len, int reset) +{ + ftrace_set_regex(buf, len, reset, 0); } static int -ftrace_filter_release(struct inode *inode, struct file *file) +ftrace_regex_release(struct inode *inode, struct file *file, int enable) { struct seq_file *m = (struct seq_file *)file->private_data; struct ftrace_iterator *iter; - mutex_lock(&ftrace_filter_lock); + mutex_lock(&ftrace_regex_lock); if (file->f_mode & FMODE_READ) { iter = m->private; @@ -1126,7 +1190,7 @@ ftrace_filter_release(struct inode *inode, struct file *file) if (iter->buffer_idx) { iter->filtered++; iter->buffer[iter->buffer_idx] = 0; - ftrace_match(iter->buffer, iter->buffer_idx); + ftrace_match(iter->buffer, iter->buffer_idx, enable); } mutex_lock(&ftrace_sysctl_lock); @@ -1137,10 +1201,22 @@ ftrace_filter_release(struct inode *inode, struct file *file) mutex_unlock(&ftrace_sysctl_lock); kfree(iter); - mutex_unlock(&ftrace_filter_lock); + mutex_unlock(&ftrace_regex_lock); return 0; } +static int +ftrace_filter_release(struct inode *inode, struct file *file) +{ + return ftrace_regex_release(inode, file, 1); +} + +static int +ftrace_notrace_release(struct inode *inode, struct file *file) +{ + return ftrace_regex_release(inode, file, 0); +} + static struct file_operations ftrace_avail_fops = { .open = ftrace_avail_open, .read = seq_read, @@ -1150,12 +1226,20 @@ static struct file_operations ftrace_avail_fops = { static struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, - .read = ftrace_filter_read, + .read = ftrace_regex_read, .write = ftrace_filter_write, - .llseek = ftrace_filter_lseek, + .llseek = ftrace_regex_lseek, .release = ftrace_filter_release, }; +static struct file_operations ftrace_notrace_fops = { + .open = ftrace_notrace_open, + .read = ftrace_regex_read, + .write = ftrace_notrace_write, + .llseek = ftrace_regex_lseek, + .release = ftrace_notrace_release, +}; + /** * ftrace_force_update - force an update to all recording ftrace functions * @@ -1239,6 +1323,12 @@ static __init int ftrace_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'set_ftrace_filter' entry\n"); + + entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, + NULL, &ftrace_notrace_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_ftrace_notrace' entry\n"); return 0; } -- cgit v1.2.3 From ccbfac2923c9febaeaf07a50054027a92b502718 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 14:31:07 -0400 Subject: ftrace: powerpc clean ups This patch cleans up the ftrace code in PowerPC based on the comments from Michael Ellerman. Signed-off-by: Steven Rostedt Cc: Michael Ellerman Cc: proski@gnu.org Cc: a.p.zijlstra@chello.nl Cc: Pekka Paalanen Cc: Steven Rostedt Cc: linuxppc-dev@ozlabs.org Cc: Soeren Sandmann Pedersen Cc: paulus@samba.org Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/entry_32.S | 11 ++--------- arch/powerpc/kernel/ftrace.c | 8 +++++++- arch/powerpc/kernel/ppc_ksyms.c | 5 +++++ arch/powerpc/kernel/setup_32.c | 5 ----- arch/powerpc/kernel/setup_64.c | 5 ----- include/asm-powerpc/ftrace.h | 6 ++++++ 6 files changed, 20 insertions(+), 20 deletions(-) create mode 100644 include/asm-powerpc/ftrace.h (limited to 'include') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0e6221889ca9..3b1dd29d9f91 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1129,18 +1129,11 @@ _GLOBAL(_mcount) stw r5, 8(r1) LOAD_REG_ADDR(r5, ftrace_trace_function) -#if 0 - mtctr r3 - mr r1, r5 - bctrl -#endif lwz r5,0(r5) -#if 1 + mtctr r5 bctrl -#else - bl ftrace_stub -#endif + nop lwz r6, 8(r1) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 5a4993fefa45..69ed41223468 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -51,10 +51,16 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static unsigned int op; + /* + * It would be nice to just use create_function_call, but that will + * update the code itself. Here we need to just return the + * instruction that is going to be modified, without modifying the + * code. + */ addr = GET_ADDR(addr); /* Set to "bl addr" */ - op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffe); + op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffc); /* * No locking needed, this must be called via kstop_machine diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index cf6b5a7d8b3f..4300db52662a 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_PPC32 extern void transfer_to_handler(void); @@ -68,6 +69,10 @@ EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); #endif +#ifdef CONFIG_FTRACE +EXPORT_SYMBOL(_mcount); +#endif + EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 22f8e2bacd32..19e8fcb9cea8 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -47,11 +47,6 @@ #include #endif -#ifdef CONFIG_FTRACE -extern void _mcount(void); -EXPORT_SYMBOL(_mcount); -#endif - extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 277bf18cbbcc..098fd96a394a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -85,11 +85,6 @@ struct ppc64_caches ppc64_caches = { }; EXPORT_SYMBOL_GPL(ppc64_caches); -#ifdef CONFIG_FTRACE -extern void _mcount(void); -EXPORT_SYMBOL(_mcount); -#endif - /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. diff --git a/include/asm-powerpc/ftrace.h b/include/asm-powerpc/ftrace.h new file mode 100644 index 000000000000..b1bfa704b6e5 --- /dev/null +++ b/include/asm-powerpc/ftrace.h @@ -0,0 +1,6 @@ +#ifndef _ASM_POWERPC_FTRACE +#define _ASM_POWERPC_FTRACE + +extern void _mcount(void); + +#endif -- cgit v1.2.3 From 4226ab93d8ae3fd895abe45879fe34d489a98718 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:30:58 +0100 Subject: x86: use pteval_t for _PAGE_FOO Rather than making _PAGE_* constants signed, and then relying on sign-extension to make sure that masks derived from them are wide enough, just explicitly type them pteval_t. This guarantees that they and any derived values are the right size for the current pte format. The reliance on sign extension is fragile, and invokes some very subtle corners of the C type system. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/asm-x86/pgtable.h | 49 +++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 97c271b2910b..fab3ecb54b68 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -20,30 +20,25 @@ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ -/* - * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a - * sign-extended value on 32-bit with all 1's in the upper word, - * which preserves the upper pte values on 64-bit ptes: - */ -#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) -#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW) -#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER) -#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT) -#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD) -#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED) -#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY) -#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */ -#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */ -#define _PAGE_UNUSED1 (_AC(1, L)<<_PAGE_BIT_UNUSED1) -#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2) -#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3) -#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT) -#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE) +#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) +#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) +#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) +#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) +#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) +#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) +#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) +#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) +#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2) +#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) +#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX) +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #else -#define _PAGE_NX 0 +#define _PAGE_NX (_AT(pteval_t, 0)) #endif /* If _PAGE_PRESENT is clear, we use these: */ @@ -210,22 +205,22 @@ static inline int pmd_large(pmd_t pte) static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); + return __pte(pte_val(pte) & ~_PAGE_DIRTY); } static inline pte_t pte_mkold(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); } static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); + return __pte(pte_val(pte) & ~_PAGE_RW); } static inline pte_t pte_mkexec(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); + return __pte(pte_val(pte) & ~_PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) @@ -250,7 +245,7 @@ static inline pte_t pte_mkhuge(pte_t pte) static inline pte_t pte_clrhuge(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); + return __pte(pte_val(pte) & ~_PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) @@ -260,7 +255,7 @@ static inline pte_t pte_mkglobal(pte_t pte) static inline pte_t pte_clrglobal(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); + return __pte(pte_val(pte) & ~_PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) -- cgit v1.2.3 From 4e09e21ccb0dfe7ee8d5641192e0072e83bd916b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:03 +0100 Subject: x86: use symbolic constant in stts() Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/asm-x86/system.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd79b29..7e4c133795a1 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -289,7 +289,7 @@ static inline void native_wbinvd(void) #endif/* CONFIG_PARAVIRT */ -#define stts() write_cr0(8 | read_cr0()) +#define stts() write_cr0(read_cr0() | X86_CR0_TS) #endif /* __KERNEL__ */ -- cgit v1.2.3 From 0acf10d8fbd52926217d3933d196b33fe2468f18 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:30:59 +0100 Subject: xen: add raw console write functions for debug Add a couple of functions which can write directly to the Xen console for debugging. This output ends up on the host's dom0 console (assuming it allows the domain to write there). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/char/hvc_xen.c | 26 ++++++++++++++++++++++++++ include/xen/hvc-console.h | 3 +++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index dd68f8541c2d..e97d9d168325 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -157,3 +157,29 @@ struct console xenboot_console = { .write = xenboot_write_console, .flags = CON_PRINTBUFFER | CON_BOOT, }; + +void xen_raw_console_write(const char *str) +{ + int len = strlen(str); + + while(len > 0) { + int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); + if (rc <= 0) + break; + + str += rc; + len -= rc; + } +} + +void xen_raw_printk(const char *fmt, ...) +{ + static char buf[512]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + xen_raw_console_write(buf); +} diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index 21c0ecfd786d..efc3237ab990 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,4 +3,7 @@ extern struct console xenboot_console; +void xen_raw_console_write(const char *str); +void xen_raw_printk(const char *fmt, ...); + #endif /* XEN_HVC_CONSOLE_H */ -- cgit v1.2.3 From 7b1333aa4cb546ddeb9c05098a53d9a777623a05 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:01 +0100 Subject: xen: use hypercall rather than clts Xen will trap and emulate clts, but its better to use a hypercall. Also, xenner doesn't handle clts. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/enlighten.c | 28 ++++++++++++++++++++++++++-- include/asm-x86/xen/hypercall.h | 7 +++++++ 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a05b7721bc81..446f4cd649e6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -607,6 +607,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, xen_mc_issue(PARAVIRT_LAZY_MMU); } +static void xen_clts(void) +{ + struct multicall_space mcs; + + mcs = xen_mc_entry(0); + + MULTI_fpu_taskswitch(mcs.mc, 0); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_write_cr0(unsigned long cr0) +{ + struct multicall_space mcs; + + /* Only pay attention to cr0.TS; everything else is + ignored. */ + mcs = xen_mc_entry(0); + + MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + static void xen_write_cr2(unsigned long cr2) { x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; @@ -978,10 +1002,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .set_debugreg = xen_set_debugreg, .get_debugreg = xen_get_debugreg, - .clts = native_clts, + .clts = xen_clts, .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, + .write_cr0 = xen_write_cr0, .read_cr4 = native_read_cr4, .read_cr4_safe = native_read_cr4_safe, diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index c2ccd997ed35..897ff79cca3b 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h @@ -314,6 +314,13 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) return _hypercall2(int, nmi_op, op, arg); } +static inline void +MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) +{ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = set; +} + static inline void MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, pte_t new_val, unsigned long flags) -- cgit v1.2.3 From 349c709f42453707f74bece0d9d35ee5b3842893 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:02 +0100 Subject: xen: use new sched_op Use the new sched_op hypercall, mainly because xenner doesn't support the old one. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/enlighten.c | 6 ++++-- include/asm-x86/xen/hypercall.h | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 446f4cd649e6..35ddaf50180a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -254,7 +254,7 @@ static void xen_irq_enable(void) static void xen_safe_halt(void) { /* Blocking includes an implicit local_irq_enable(). */ - if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) + if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) BUG(); } @@ -1138,11 +1138,13 @@ static const struct smp_ops xen_smp_ops __initdata = { static void xen_reboot(int reason) { + struct sched_shutdown r = { .reason = reason }; + #ifdef CONFIG_SMP smp_send_stop(); #endif - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) + if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) BUG(); } diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index 897ff79cca3b..2a4f9b41d684 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h @@ -176,9 +176,9 @@ HYPERVISOR_fpu_taskswitch(int set) } static inline int -HYPERVISOR_sched_op(int cmd, unsigned long arg) +HYPERVISOR_sched_op(int cmd, void *arg) { - return _hypercall2(int, sched_op, cmd, arg); + return _hypercall2(int, sched_op_new, cmd, arg); } static inline long -- cgit v1.2.3 From a15af1c9ea2750a9ff01e51615c45950bad8221b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:06 +0100 Subject: x86/paravirt: add pte_flags to just get pte flags Add pte_flags() to extract the flags from a pte. This is a special case of pte_val() which is only guaranteed to return the pte's flags correctly; the page number may be corrupted or missing. The intent is to allow paravirt implementations to return pte flags without having to do any translation of the page number (most notably, Xen). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 1 + arch/x86/xen/enlighten.c | 1 + drivers/lguest/lg.h | 1 - include/asm-x86/page.h | 1 + include/asm-x86/paravirt.h | 15 +++++++++++++++ include/asm-x86/pgtable.h | 16 ++++++++-------- 6 files changed, 26 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 74f0c5ea2a03..c98d54688180 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -403,6 +403,7 @@ struct pv_mmu_ops pv_mmu_ops = { #endif /* PAGETABLE_LEVELS >= 3 */ .pte_val = native_pte_val, + .pte_flags = native_pte_val, .pgd_val = native_pgd_val, .make_pte = native_make_pte, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4a372b71239d..1b4b5fa498b3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1101,6 +1101,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_pmd = xen_set_pmd, .pte_val = xen_pte_val, + .pte_flags = native_pte_val, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 005bd045d2eb..5faefeaf6790 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -136,7 +136,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user); * first step in the migration to the kernel types. pte_pfn is already defined * in the kernel. */ #define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) -#define pte_flags(x) (pte_val(x) & ~PAGE_MASK) #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) /* interrupts_and_traps.c: */ diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index dc936dddf161..a1e2b9470f25 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -160,6 +160,7 @@ static inline pteval_t native_pte_val(pte_t pte) #endif #define pte_val(x) native_pte_val(x) +#define pte_flags(x) native_pte_val(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 0f13b945e240..5ea37a48eecb 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -239,6 +239,7 @@ struct pv_mmu_ops { unsigned long addr, pte_t *ptep); pteval_t (*pte_val)(pte_t); + pteval_t (*pte_flags)(pte_t); pte_t (*make_pte)(pteval_t pte); pgdval_t (*pgd_val)(pgd_t); @@ -996,6 +997,20 @@ static inline pteval_t pte_val(pte_t pte) return ret; } +static inline pteval_t pte_flags(pte_t pte) +{ + pteval_t ret; + + if (sizeof(pteval_t) > sizeof(long)) + ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, + pte.pte, (u64)pte.pte >> 32); + else + ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, + pte.pte); + + return ret; +} + static inline pgd_t __pgd(pgdval_t val) { pgdval_t ret; diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 97c271b2910b..47a852cb8c92 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -164,37 +164,37 @@ extern struct list_head pgd_list; */ static inline int pte_dirty(pte_t pte) { - return pte_val(pte) & _PAGE_DIRTY; + return pte_flags(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { - return pte_val(pte) & _PAGE_ACCESSED; + return pte_flags(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { - return pte_val(pte) & _PAGE_RW; + return pte_flags(pte) & _PAGE_RW; } static inline int pte_file(pte_t pte) { - return pte_val(pte) & _PAGE_FILE; + return pte_flags(pte) & _PAGE_FILE; } static inline int pte_huge(pte_t pte) { - return pte_val(pte) & _PAGE_PSE; + return pte_flags(pte) & _PAGE_PSE; } static inline int pte_global(pte_t pte) { - return pte_val(pte) & _PAGE_GLOBAL; + return pte_flags(pte) & _PAGE_GLOBAL; } static inline int pte_exec(pte_t pte) { - return !(pte_val(pte) & _PAGE_NX); + return !(pte_flags(pte) & _PAGE_NX); } static inline int pte_special(pte_t pte) @@ -305,7 +305,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) return __pgprot(preservebits | addbits); } -#define pte_pgprot(x) __pgprot(pte_val(x) & ~PTE_MASK) +#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_MASK) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) -- cgit v1.2.3 From 9e124fe16ff24746d6de5a2ad685266d7bce0e08 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:07 +0100 Subject: xen: Enable console tty by default in domU if it's not a dummy Without console= arguments on the kernel command line, the first console to register becomes enabled and the preferred console (the one behind /dev/console). This is normally tty (assuming CONFIG_VT_CONSOLE is enabled, which it commonly is). This is okay as long tty is a useful console. But unless we have the PV framebuffer, and it is enabled for this domain, tty0 in domU is merely a dummy. In that case, we want the preferred console to be the Xen console hvc0, and we want it without having to fiddle with the kernel command line. Commit b8c2d3dfbc117dff26058fbac316b8acfc2cb5f7 did that for us. Since we now have the PV framebuffer, we want to enable and prefer tty again, but only when PVFB is enabled. But even then we still want to enable the Xen console as well. Problem: when tty registers, we can't yet know whether the PVFB is enabled. By the time we can know (xenstore is up), the console setup game is over. Solution: enable console tty by default, but keep hvc as the preferred console. Change the preferred console to tty when PVFB probes successfully, unless we've been given console kernel parameters. Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/enlighten.c | 4 +++- drivers/video/xen-fbfront.c | 25 +++++++++++++++++++++++++ include/linux/console.h | 2 ++ kernel/printk.c | 3 +++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1b4b5fa498b3..6cfb708408e9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1256,8 +1256,10 @@ asmlinkage void __init xen_start_kernel(void) ? __pa(xen_start_info->mod_start) : 0; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; - if (!is_initial_xendomain()) + if (!is_initial_xendomain()) { + add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } /* Start the world */ start_kernel(); diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 619a6f8d65a2..4e10876e62fc 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -18,6 +18,7 @@ * frame buffer. */ +#include #include #include #include @@ -48,6 +49,7 @@ struct xenfb_info { static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; +static void xenfb_make_preferred_console(void); static int xenfb_remove(struct xenbus_device *); static void xenfb_init_shared_page(struct xenfb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); @@ -348,6 +350,7 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, if (ret < 0) goto error; + xenfb_make_preferred_console(); return 0; error_nomem: @@ -358,6 +361,28 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, return ret; } +static __devinit void +xenfb_make_preferred_console(void) +{ + struct console *c; + + if (console_set_on_cmdline) + return; + + acquire_console_sem(); + for (c = console_drivers; c; c = c->next) { + if (!strcmp(c->name, "tty") && c->index == 0) + break; + } + release_console_sem(); + if (c) { + unregister_console(c); + c->flags |= CON_CONSDEV; + c->flags &= ~CON_PRINTBUFFER; /* don't print again */ + register_console(c); + } +} + static int xenfb_resume(struct xenbus_device *dev) { struct xenfb_info *info = dev->dev.driver_data; diff --git a/include/linux/console.h b/include/linux/console.h index a4f27fbdf549..248e6e3b9b73 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -108,6 +108,8 @@ struct console { struct console *next; }; +extern int console_set_on_cmdline; + extern int add_preferred_console(char *name, int idx, char *options); extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); diff --git a/kernel/printk.c b/kernel/printk.c index 8fb01c32aa3b..028ed75d4864 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -121,6 +121,8 @@ struct console_cmdline static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int selected_console = -1; static int preferred_console = -1; +int console_set_on_cmdline; +EXPORT_SYMBOL(console_set_on_cmdline); /* Flag: console code may call schedule() */ static int console_may_schedule; @@ -890,6 +892,7 @@ static int __init console_setup(char *str) *s = 0; __add_preferred_console(buf, idx, options, brl_options); + console_set_on_cmdline = 1; return 1; } __setup("console=", console_setup); -- cgit v1.2.3 From 6ba0e7b36c7cc1745b3cbeda244d14edae3ad058 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:08 +0100 Subject: xen pvfb: Pointer z-axis (mouse wheel) support Add z-axis motion to pointer events. Backward compatible, because there's space for the z-axis in union xenkbd_in_event, and old backends zero it. Derived from http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/57dfe0098000 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/1edfea26a2a9 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/c3ff0b26f664 Signed-off-by: Pat Campbell Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/input/xen-kbdfront.c | 8 +++++++- include/xen/interface/io/kbdif.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index 0f47f4697cdf..9da4452786fa 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -66,6 +66,9 @@ static irqreturn_t input_handler(int rq, void *dev_id) case XENKBD_TYPE_MOTION: input_report_rel(dev, REL_X, event->motion.rel_x); input_report_rel(dev, REL_Y, event->motion.rel_y); + if (event->motion.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->motion.rel_z); break; case XENKBD_TYPE_KEY: dev = NULL; @@ -84,6 +87,9 @@ static irqreturn_t input_handler(int rq, void *dev_id) case XENKBD_TYPE_POS: input_report_abs(dev, ABS_X, event->pos.abs_x); input_report_abs(dev, ABS_Y, event->pos.abs_y); + if (event->pos.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->pos.rel_z); break; } if (dev) @@ -152,7 +158,7 @@ static int __devinit xenkbd_probe(struct xenbus_device *dev, ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); for (i = BTN_LEFT; i <= BTN_TASK; i++) set_bit(i, ptr->keybit); - ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y); + ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL); input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h index fb97f4284ffd..8066c7849fbe 100644 --- a/include/xen/interface/io/kbdif.h +++ b/include/xen/interface/io/kbdif.h @@ -49,6 +49,7 @@ struct xenkbd_motion { uint8_t type; /* XENKBD_TYPE_MOTION */ int32_t rel_x; /* relative X motion */ int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ }; struct xenkbd_key { @@ -61,6 +62,7 @@ struct xenkbd_position { uint8_t type; /* XENKBD_TYPE_POS */ int32_t abs_x; /* absolute X position (in FB pixels) */ int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ }; #define XENKBD_IN_EVENT_SIZE 40 -- cgit v1.2.3 From e4dcff1f6e7582f76c2c9990b1d9111bbc8e26ef Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:11 +0100 Subject: xen pvfb: Dynamic mode support (screen resizing) The pvfb backend indicates dynamic mode support by creating node feature_resize with a non-zero value in its xenstore directory. xen-fbfront sends a resize notification event on mode change. Fully backwards compatible both ways. Framebuffer size and initial resolution can be controlled through kernel parameter xen_fbfront.video. The backend enforces a separate size limit, which it advertises in node videoram in its xenstore directory. xen-kbdfront gets the maximum screen resolution from nodes width and height in the backend's xenstore directory instead of hardcoding it. Additional goodie: support for larger framebuffers (512M on a 64-bit system with 4K pages). Changing the number of bits per pixels dynamically is not supported, yet. Ported from http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/92f7b3144f41 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/bfc040135633 Signed-off-by: Pat Campbell Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/input/xen-kbdfront.c | 10 +++ drivers/video/xen-fbfront.c | 183 +++++++++++++++++++++++++++++++++------- include/xen/interface/io/fbif.h | 29 +++++-- 3 files changed, 188 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index eaf69cf5b444..9ce3b3baf3a2 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -300,6 +300,16 @@ InitWait: */ if (dev->state != XenbusStateConnected) goto InitWait; /* no InitWait seen yet, fudge it */ + + /* Set input abs params to match backend screen res */ + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "width", "%d", &val) > 0) + input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0); + + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "height", "%d", &val) > 0) + input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0); + break; case XenbusStateClosing: diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 291eef695594..47ed39b52f9c 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -43,23 +43,47 @@ struct xenfb_info { struct xenfb_page *page; unsigned long *mfns; int update_wanted; /* XENFB_TYPE_UPDATE wanted */ + int feature_resize; /* XENFB_TYPE_RESIZE ok */ + struct xenfb_resize resize; /* protected by resize_lock */ + int resize_dpy; /* ditto */ + spinlock_t resize_lock; struct xenbus_device *xbdev; }; -static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; +#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8) + +enum { KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT }; +static int video[KPARAM_CNT] = { 2, XENFB_WIDTH, XENFB_HEIGHT }; +module_param_array(video, int, NULL, 0); +MODULE_PARM_DESC(video, + "Video memory size in MB, width, height in pixels (default 2,800,600)"); static void xenfb_make_preferred_console(void); static int xenfb_remove(struct xenbus_device *); -static void xenfb_init_shared_page(struct xenfb_info *); +static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); static void xenfb_disconnect_backend(struct xenfb_info *); +static void xenfb_send_event(struct xenfb_info *info, + union xenfb_out_event *event) +{ + u32 prod; + + prod = info->page->out_prod; + /* caller ensures !xenfb_queue_full() */ + mb(); /* ensure ring space available */ + XENFB_OUT_RING_REF(info->page, prod) = *event; + wmb(); /* ensure ring contents visible */ + info->page->out_prod = prod + 1; + + notify_remote_via_irq(info->irq); +} + static void xenfb_do_update(struct xenfb_info *info, int x, int y, int w, int h) { union xenfb_out_event event; - u32 prod; memset(&event, 0, sizeof(event)); event.type = XENFB_TYPE_UPDATE; @@ -68,14 +92,19 @@ static void xenfb_do_update(struct xenfb_info *info, event.update.width = w; event.update.height = h; - prod = info->page->out_prod; /* caller ensures !xenfb_queue_full() */ - mb(); /* ensure ring space available */ - XENFB_OUT_RING_REF(info->page, prod) = event; - wmb(); /* ensure ring contents visible */ - info->page->out_prod = prod + 1; + xenfb_send_event(info, &event); +} - notify_remote_via_irq(info->irq); +static void xenfb_do_resize(struct xenfb_info *info) +{ + union xenfb_out_event event; + + memset(&event, 0, sizeof(event)); + event.resize = info->resize; + + /* caller ensures !xenfb_queue_full() */ + xenfb_send_event(info, &event); } static int xenfb_queue_full(struct xenfb_info *info) @@ -87,12 +116,28 @@ static int xenfb_queue_full(struct xenfb_info *info) return prod - cons == XENFB_OUT_RING_LEN; } +static void xenfb_handle_resize_dpy(struct xenfb_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&info->resize_lock, flags); + if (info->resize_dpy) { + if (!xenfb_queue_full(info)) { + info->resize_dpy = 0; + xenfb_do_resize(info); + } + } + spin_unlock_irqrestore(&info->resize_lock, flags); +} + static void xenfb_refresh(struct xenfb_info *info, int x1, int y1, int w, int h) { unsigned long flags; - int y2 = y1 + h - 1; int x2 = x1 + w - 1; + int y2 = y1 + h - 1; + + xenfb_handle_resize_dpy(info); if (!info->update_wanted) return; @@ -225,6 +270,57 @@ static ssize_t xenfb_write(struct fb_info *p, const char __user *buf, return res; } +static int +xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + int required_mem_len; + + xenfb_info = info->par; + + if (!xenfb_info->feature_resize) { + if (var->xres == video[KPARAM_WIDTH] && + var->yres == video[KPARAM_HEIGHT] && + var->bits_per_pixel == xenfb_info->page->depth) { + return 0; + } + return -EINVAL; + } + + /* Can't resize past initial width and height */ + if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT]) + return -EINVAL; + + required_mem_len = var->xres * var->yres * xenfb_info->page->depth / 8; + if (var->bits_per_pixel == xenfb_info->page->depth && + var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) && + required_mem_len <= info->fix.smem_len) { + var->xres_virtual = var->xres; + var->yres_virtual = var->yres; + return 0; + } + return -EINVAL; +} + +static int xenfb_set_par(struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + unsigned long flags; + + xenfb_info = info->par; + + spin_lock_irqsave(&xenfb_info->resize_lock, flags); + xenfb_info->resize.type = XENFB_TYPE_RESIZE; + xenfb_info->resize.width = info->var.xres; + xenfb_info->resize.height = info->var.yres; + xenfb_info->resize.stride = info->fix.line_length; + xenfb_info->resize.depth = info->var.bits_per_pixel; + xenfb_info->resize.offset = 0; + xenfb_info->resize_dpy = 1; + spin_unlock_irqrestore(&xenfb_info->resize_lock, flags); + return 0; +} + static struct fb_ops xenfb_fb_ops = { .owner = THIS_MODULE, .fb_read = fb_sys_read, @@ -233,6 +329,8 @@ static struct fb_ops xenfb_fb_ops = { .fb_fillrect = xenfb_fillrect, .fb_copyarea = xenfb_copyarea, .fb_imageblit = xenfb_imageblit, + .fb_check_var = xenfb_check_var, + .fb_set_par = xenfb_set_par, }; static irqreturn_t xenfb_event_handler(int rq, void *dev_id) @@ -261,6 +359,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, { struct xenfb_info *info; struct fb_info *fb_info; + int fb_size; + int val; int ret; info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -268,18 +368,35 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); return -ENOMEM; } + + /* Limit kernel param videoram amount to what is in xenstore */ + if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) { + if (val < video[KPARAM_MEM]) + video[KPARAM_MEM] = val; + } + + /* If requested res does not fit in available memory, use default */ + fb_size = video[KPARAM_MEM] * 1024 * 1024; + if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8 + > fb_size) { + video[KPARAM_WIDTH] = XENFB_WIDTH; + video[KPARAM_HEIGHT] = XENFB_HEIGHT; + fb_size = XENFB_DEFAULT_FB_LEN; + } + dev->dev.driver_data = info; info->xbdev = dev; info->irq = -1; info->x1 = info->y1 = INT_MAX; spin_lock_init(&info->dirty_lock); + spin_lock_init(&info->resize_lock); - info->fb = vmalloc(xenfb_mem_len); + info->fb = vmalloc(fb_size); if (info->fb == NULL) goto error_nomem; - memset(info->fb, 0, xenfb_mem_len); + memset(info->fb, 0, fb_size); - info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT; + info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT; info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); if (!info->mfns) @@ -290,8 +407,6 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, if (!info->page) goto error_nomem; - xenfb_init_shared_page(info); - /* abusing framebuffer_alloc() to allocate pseudo_palette */ fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); if (fb_info == NULL) @@ -304,9 +419,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->screen_base = info->fb; fb_info->fbops = &xenfb_fb_ops; - fb_info->var.xres_virtual = fb_info->var.xres = info->page->width; - fb_info->var.yres_virtual = fb_info->var.yres = info->page->height; - fb_info->var.bits_per_pixel = info->page->depth; + fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH]; + fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT]; + fb_info->var.bits_per_pixel = XENFB_DEPTH; fb_info->var.red = (struct fb_bitfield){16, 8, 0}; fb_info->var.green = (struct fb_bitfield){8, 8, 0}; @@ -318,9 +433,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->var.vmode = FB_VMODE_NONINTERLACED; fb_info->fix.visual = FB_VISUAL_TRUECOLOR; - fb_info->fix.line_length = info->page->line_length; + fb_info->fix.line_length = fb_info->var.xres * XENFB_DEPTH / 8; fb_info->fix.smem_start = 0; - fb_info->fix.smem_len = xenfb_mem_len; + fb_info->fix.smem_len = fb_size; strcpy(fb_info->fix.id, "xen"); fb_info->fix.type = FB_TYPE_PACKED_PIXELS; fb_info->fix.accel = FB_ACCEL_NONE; @@ -337,6 +452,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->fbdefio = &xenfb_defio; fb_deferred_io_init(fb_info); + xenfb_init_shared_page(info, fb_info); + ret = register_framebuffer(fb_info); if (ret) { fb_deferred_io_cleanup(fb_info); @@ -389,7 +506,7 @@ static int xenfb_resume(struct xenbus_device *dev) struct xenfb_info *info = dev->dev.driver_data; xenfb_disconnect_backend(info); - xenfb_init_shared_page(info); + xenfb_init_shared_page(info, info->fb_info); return xenfb_connect_backend(dev, info); } @@ -417,20 +534,23 @@ static unsigned long vmalloc_to_mfn(void *address) return pfn_to_mfn(vmalloc_to_pfn(address)); } -static void xenfb_init_shared_page(struct xenfb_info *info) +static void xenfb_init_shared_page(struct xenfb_info *info, + struct fb_info *fb_info) { int i; + int epd = PAGE_SIZE / sizeof(info->mfns[0]); for (i = 0; i < info->nr_pages; i++) info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); - info->page->pd[0] = vmalloc_to_mfn(info->mfns); - info->page->pd[1] = 0; - info->page->width = XENFB_WIDTH; - info->page->height = XENFB_HEIGHT; - info->page->depth = XENFB_DEPTH; - info->page->line_length = (info->page->depth / 8) * info->page->width; - info->page->mem_length = xenfb_mem_len; + for (i = 0; i * epd < info->nr_pages; i++) + info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]); + + info->page->width = fb_info->var.xres; + info->page->height = fb_info->var.yres; + info->page->depth = fb_info->var.bits_per_pixel; + info->page->line_length = fb_info->fix.line_length; + info->page->mem_length = fb_info->fix.smem_len; info->page->in_cons = info->page->in_prod = 0; info->page->out_cons = info->page->out_prod = 0; } @@ -530,6 +650,11 @@ InitWait: val = 0; if (val) info->update_wanted = 1; + + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-resize", "%d", &val) < 0) + val = 0; + info->feature_resize = val; break; case XenbusStateClosing: diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h index 5a934dd7796d..974a51ed9165 100644 --- a/include/xen/interface/io/fbif.h +++ b/include/xen/interface/io/fbif.h @@ -49,11 +49,27 @@ struct xenfb_update { int32_t height; /* rect height */ }; +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize { + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* start offset within framebuffer */ +}; + #define XENFB_OUT_EVENT_SIZE 40 union xenfb_out_event { uint8_t type; struct xenfb_update update; + struct xenfb_resize resize; char pad[XENFB_OUT_EVENT_SIZE]; }; @@ -105,15 +121,18 @@ struct xenfb_page { * Each directory page holds PAGE_SIZE / sizeof(*pd) * framebuffer pages, and can thus map up to PAGE_SIZE * * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and - * sizeof(unsigned long) == 4, that's 4 Megs. Two directory - * pages should be enough for a while. + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 + * Megs 64 bit. 256 directories give enough room for a 512 + * Meg framebuffer with a max resolution of 12,800x10,240. + * Should be enough for a while with room leftover for + * expansion. */ - unsigned long pd[2]; + unsigned long pd[256]; }; /* - * Wart: xenkbd needs to know resolution. Put it here until a better - * solution is found, but don't leak it to the backend. + * Wart: xenkbd needs to know default resolution. Put it here until a + * better solution is found, but don't leak it to the backend. */ #ifdef __KERNEL__ #define XENFB_WIDTH 800 -- cgit v1.2.3 From bfdab126cfa6fe3c2ddb8b6007a38202b510b6c1 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 26 May 2008 23:31:15 +0100 Subject: xen: add missing definitions in include/xen/interface/memory.h which ia64/xen needs Add xen handles realted definitions for xen memory which ia64/xen needs. Pointer argumsnts for ia64/xen hypercall are passed in pseudo physical address (guest physical address) so that it is required to convert guest kernel virtual address into pseudo physical address. The xen guest handle represents such arguments. Signed-off-by: Isaku Yamahata Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/xen/interface/memory.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index da768469aa92..af36ead16817 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -29,7 +29,7 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - ulong extent_start; + GUEST_HANDLE(ulong) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ unsigned long nr_extents; @@ -50,6 +50,7 @@ struct xen_memory_reservation { domid_t domid; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); /* * Returns the maximum machine frame number of mapped RAM in this system. @@ -85,7 +86,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - ulong extent_start; + GUEST_HANDLE(ulong) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -93,6 +94,7 @@ struct xen_machphys_mfn_list { */ unsigned int nr_extents; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* * Sets the GPFN at which a particular page appears in the specified guest's @@ -115,6 +117,7 @@ struct xen_add_to_physmap { /* GPFN where the source mapping page should appear. */ unsigned long gpfn; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); /* * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error @@ -129,13 +132,14 @@ struct xen_translate_gpfn_list { unsigned long nr_gpfns; /* List of GPFNs to translate. */ - ulong gpfn_list; + GUEST_HANDLE(ulong) gpfn_list; /* * Output list to contain MFN translations. May be the same as the input * list (in which case each input GPFN is overwritten with the output MFN). */ - ulong mfn_list; + GUEST_HANDLE(ulong) mfn_list; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); #endif /* __XEN_PUBLIC_MEMORY_H__ */ -- cgit v1.2.3 From d451bb7aa852627bdf7be7937dc3d9d9f261b235 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:18 +0100 Subject: xen: make phys_to_machine structure dynamic We now support the use of memory hotplug, so the physical to machine page mapping structure must be dynamic. This is implemented as a two-level radix tree structure, which allows us to efficiently incrementally allocate memory for the p2m table as new pages are added. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/mmu.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/setup.c | 2 -- arch/x86/xen/xen-ops.h | 2 ++ include/asm-x86/xen/page.h | 20 ++++------- 5 files changed, 94 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5c0635a8bffd..73d3c84a3495 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1221,7 +1221,7 @@ asmlinkage void __init xen_start_kernel(void) /* Get mfn list */ if (!xen_feature(XENFEAT_auto_translated_physmap)) - phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; + xen_build_dynamic_phys_to_machine(); pgd = (pgd_t *)xen_start_info->pt_base; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 07c2653ec335..c3b27dec6f03 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -56,6 +56,91 @@ #include "multicalls.h" #include "mmu.h" +/* + * This should probably be a config option. On 32-bit, it costs 1 + * page/gig of memory; on 64-bit its 2 pages/gig. If we want it to be + * completely unbounded we can add another level to the p2m structure. + */ +#define MAX_GUEST_PAGES (16ull * 1024*1024*1024 / PAGE_SIZE) +#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) + +static unsigned long *p2m_top[MAX_GUEST_PAGES / P2M_ENTRIES_PER_PAGE]; + +static inline unsigned p2m_top_index(unsigned long pfn) +{ + BUG_ON(pfn >= MAX_GUEST_PAGES); + return pfn / P2M_ENTRIES_PER_PAGE; +} + +static inline unsigned p2m_index(unsigned long pfn) +{ + return pfn % P2M_ENTRIES_PER_PAGE; +} + +void __init xen_build_dynamic_phys_to_machine(void) +{ + unsigned pfn; + unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; + + BUG_ON(xen_start_info->nr_pages >= MAX_GUEST_PAGES); + + for(pfn = 0; + pfn < xen_start_info->nr_pages; + pfn += P2M_ENTRIES_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + + p2m_top[topidx] = &mfn_list[pfn]; + } +} + +unsigned long get_phys_to_machine(unsigned long pfn) +{ + unsigned topidx, idx; + + topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == NULL) + return INVALID_P2M_ENTRY; + + idx = p2m_index(pfn); + return p2m_top[topidx][idx]; +} + +static void alloc_p2m(unsigned long **pp) +{ + unsigned long *p; + unsigned i; + + p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + BUG_ON(p == NULL); + + for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) + p[i] = INVALID_P2M_ENTRY; + + if (cmpxchg(pp, NULL, p) != NULL) + free_page((unsigned long)p); +} + +void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + unsigned topidx, idx; + + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + + topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == NULL) { + /* no need to allocate a page to store an invalid entry */ + if (mfn == INVALID_P2M_ENTRY) + return; + alloc_p2m(&p2m_top[topidx]); + } + + idx = p2m_index(pfn); + p2m_top[topidx][idx] = mfn; +} + xmaddr_t arbitrary_virt_to_machine(unsigned long address) { unsigned int level; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 82517e4a752a..37f8f0b8f743 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -27,8 +27,6 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; -unsigned long *phys_to_machine_mapping; -EXPORT_SYMBOL(phys_to_machine_mapping); /** * machine_specific_memory_setup - Hook for machine specific memory setup. diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f1063ae08037..7bdc8c5c9244 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -22,6 +22,8 @@ void __init xen_arch_setup(void); void __init xen_init_IRQ(void); void xen_enable_sysenter(void); +void __init xen_build_dynamic_phys_to_machine(void); + void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); unsigned long xen_cpu_khz(void); diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index e11f24038b1d..293344f8102e 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -26,15 +26,15 @@ typedef struct xpaddr { #define FOREIGN_FRAME_BIT (1UL<<31) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) -extern unsigned long *phys_to_machine_mapping; +extern unsigned long get_phys_to_machine(unsigned long pfn); +extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); static inline unsigned long pfn_to_mfn(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) @@ -42,7 +42,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; - return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); + return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; } static inline unsigned long mfn_to_pfn(unsigned long mfn) @@ -106,20 +106,12 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) unsigned long pfn = mfn_to_pfn(mfn); if ((pfn < max_mapnr) && !xen_feature(XENFEAT_auto_translated_physmap) - && (phys_to_machine_mapping[pfn] != mfn)) + && (get_phys_to_machine(pfn) != mfn)) return max_mapnr; /* force !pfn_valid() */ + /* XXX fixme; not true with sparsemem */ return pfn; } -static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return; - } - phys_to_machine_mapping[pfn] = mfn; -} - /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) -- cgit v1.2.3 From 8006ec3e911f93d702e1d4a4e387e244ab434924 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:19 +0100 Subject: xen: add configurable max domain size Add a config option to set the max size of a Xen domain. This is used to scale the size of the physical-to-machine array; it ends up using around 1 page/GByte, so there's no reason to be very restrictive. For a 32-bit guest, the default value of 8GB is probably sufficient; there's not much point in giving a 32-bit machine much more memory than that. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/Kconfig | 10 ++++++++++ arch/x86/xen/mmu.c | 25 ++++++++++++------------- arch/x86/xen/setup.c | 3 +++ include/asm-x86/xen/page.h | 5 +++++ 4 files changed, 30 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 525b108411bd..d0f1c7c5dc3d 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -11,3 +11,13 @@ config XEN This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the Xen hypervisor. + +config XEN_MAX_DOMAIN_MEMORY + int "Maximum allowed size of a domain in gigabytes" + default 8 + depends on XEN + help + The pseudo-physical to machine address array is sized + according to the maximum possible memory size of a Xen + domain. This array uses 1 page per gigabyte, so there's no + need to be too stingy here. \ No newline at end of file diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c3b27dec6f03..644232aa7bfb 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -56,19 +56,13 @@ #include "multicalls.h" #include "mmu.h" -/* - * This should probably be a config option. On 32-bit, it costs 1 - * page/gig of memory; on 64-bit its 2 pages/gig. If we want it to be - * completely unbounded we can add another level to the p2m structure. - */ -#define MAX_GUEST_PAGES (16ull * 1024*1024*1024 / PAGE_SIZE) #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) -static unsigned long *p2m_top[MAX_GUEST_PAGES / P2M_ENTRIES_PER_PAGE]; +static unsigned long *p2m_top[MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE]; static inline unsigned p2m_top_index(unsigned long pfn) { - BUG_ON(pfn >= MAX_GUEST_PAGES); + BUG_ON(pfn >= MAX_DOMAIN_PAGES); return pfn / P2M_ENTRIES_PER_PAGE; } @@ -81,12 +75,9 @@ void __init xen_build_dynamic_phys_to_machine(void) { unsigned pfn; unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; + unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); - BUG_ON(xen_start_info->nr_pages >= MAX_GUEST_PAGES); - - for(pfn = 0; - pfn < xen_start_info->nr_pages; - pfn += P2M_ENTRIES_PER_PAGE) { + for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { unsigned topidx = p2m_top_index(pfn); p2m_top[topidx] = &mfn_list[pfn]; @@ -97,6 +88,9 @@ unsigned long get_phys_to_machine(unsigned long pfn) { unsigned topidx, idx; + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) + return INVALID_P2M_ENTRY; + topidx = p2m_top_index(pfn); if (p2m_top[topidx] == NULL) return INVALID_P2M_ENTRY; @@ -129,6 +123,11 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) return; } + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { + BUG_ON(mfn != INVALID_P2M_ENTRY); + return; + } + topidx = p2m_top_index(pfn); if (p2m_top[topidx] == NULL) { /* no need to allocate a page to store an invalid entry */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 37f8f0b8f743..488447878a9d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -36,6 +37,8 @@ char * __init xen_memory_setup(void) { unsigned long max_pfn = xen_start_info->nr_pages; + max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); + e820.nr_map = 0; add_memory_region(0, LOWMEMSIZE(), E820_RAM); add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index 293344f8102e..377c04591c15 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -26,6 +26,11 @@ typedef struct xpaddr { #define FOREIGN_FRAME_BIT (1UL<<31) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) +/* Maximum amount of memory we can handle in a domain in pages */ +#define MAX_DOMAIN_PAGES \ + ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) + + extern unsigned long get_phys_to_machine(unsigned long pfn); extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); -- cgit v1.2.3 From eb1e305f4ef201e549ffd475b7dcbcd4ec36d7dc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:23 +0100 Subject: xen: add rebind_evtchn_irq Add rebind_evtchn_irq(), which will rebind an device driver's existing irq to a new event channel on restore. Since the new event channel will be masked and bound to vcpu0, we update the state accordingly and unmask the irq once everything is set up. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/xen/events.c | 27 +++++++++++++++++++++++++++ include/xen/events.h | 1 + 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 4f0f22b020ea..f64e9798129c 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -557,6 +557,33 @@ out: put_cpu(); } +/* Rebind a new event channel to an existing irq. */ +void rebind_evtchn_irq(int evtchn, int irq) +{ + /* Make sure the irq is masked, since the new event channel + will also be masked. */ + disable_irq(irq); + + spin_lock(&irq_mapping_update_lock); + + /* After resume the irq<->evtchn mappings are all cleared out */ + BUG_ON(evtchn_to_irq[evtchn] != -1); + /* Expect irq to have been bound before, + so the bindcount should be non-0 */ + BUG_ON(irq_bindcount[irq] == 0); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + + spin_unlock(&irq_mapping_update_lock); + + /* new event channels are always bound to cpu 0 */ + irq_set_affinity(irq, cpumask_of_cpu(0)); + + /* Unmask the event channel. */ + enable_irq(irq); +} + /* Rebind an evtchn so that it gets delivered to a specific cpu */ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { diff --git a/include/xen/events.h b/include/xen/events.h index acd8e062c85f..a82ec0c45c38 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -32,6 +32,7 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); int resend_irq_on_evtchn(unsigned int irq); +void rebind_evtchn_irq(int evtchn, int irq); static inline void notify_remote_via_evtchn(int port) { -- cgit v1.2.3 From 6b9b732d0e396a3f1a95977162a8624aafce38a1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:25 +0100 Subject: xen-console: add save/restore Add code to: 1. Deal with the console page being canonicalized. During save, the console's mfn in the start_info structure is canonicalized to a pfn. In order to deal with that, we always use a copy of the pfn and indirect off that all the time. However, we fall back to using the mfn if the pfn hasn't been initialized yet. 2. Restore the console event channel, and rebind it to the existing irq. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/char/hvc_xen.c | 25 +++++++++++++++++++++---- include/xen/hvc-console.h | 2 ++ 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index d5fe0a8952fa..db2ae4216279 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -39,9 +39,14 @@ static int xencons_irq; /* ------------------------------------------------------------------ */ +static unsigned long console_pfn = ~0ul; + static inline struct xencons_interface *xencons_interface(void) { - return mfn_to_virt(xen_start_info->console.domU.mfn); + if (console_pfn == ~0ul) + return mfn_to_virt(xen_start_info->console.domU.mfn); + else + return __va(console_pfn << PAGE_SHIFT); } static inline void notify_daemon(void) @@ -101,20 +106,32 @@ static int __init xen_init(void) { struct hvc_struct *hp; - if (!is_running_on_xen()) - return 0; + if (!is_running_on_xen() || + is_initial_xendomain() || + !xen_start_info->console.domU.evtchn) + return -ENODEV; xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); if (xencons_irq < 0) - xencons_irq = 0 /* NO_IRQ */; + xencons_irq = 0; /* NO_IRQ */ + hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); if (IS_ERR(hp)) return PTR_ERR(hp); hvc = hp; + + console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn); + return 0; } +void xen_console_resume(void) +{ + if (xencons_irq) + rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); +} + static void __exit xen_fini(void) { if (hvc) diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index efc3237ab990..fd5483a059bb 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,6 +3,8 @@ extern struct console xenboot_console; +void xen_console_resume(void); + void xen_raw_console_write(const char *str); void xen_raw_printk(const char *fmt, ...); -- cgit v1.2.3 From 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:27 +0100 Subject: xen: implement save/restore This patch implements Xen save/restore and migration. Saving is triggered via xenbus, which is polled in drivers/xen/manage.c. When a suspend request comes in, the kernel prepares itself for saving by: 1 - Freeze all processes. This is primarily to prevent any partially-completed pagetable updates from confusing the suspend process. If CONFIG_PREEMPT isn't defined, then this isn't necessary. 2 - Suspend xenbus and other devices 3 - Stop_machine, to make sure all the other vcpus are quiescent. The Xen tools require the domain to run its save off vcpu0. 4 - Within the stop_machine state, it pins any unpinned pgds (under construction or destruction), performs canonicalizes various other pieces of state (mostly converting mfns to pfns), and finally 5 - Suspend the domain Restore reverses the steps used to save the domain, ending when all the frozen processes are thawed. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/Makefile | 2 +- arch/x86/xen/enlighten.c | 6 +-- arch/x86/xen/mmu.c | 46 +++++++++++++++++ arch/x86/xen/smp.c | 2 +- arch/x86/xen/suspend.c | 42 +++++++++++++++ arch/x86/xen/time.c | 8 +++ arch/x86/xen/xen-ops.h | 4 ++ drivers/xen/events.c | 83 +++++++++++++++++++++++++++++ drivers/xen/grant-table.c | 4 +- drivers/xen/manage.c | 126 ++++++++++++++++++++++++++++++++++++++++----- include/linux/page-flags.h | 1 + include/xen/events.h | 3 ++ include/xen/grant_table.h | 3 ++ include/xen/xen-ops.h | 9 ++++ 14 files changed, 318 insertions(+), 21 deletions(-) create mode 100644 arch/x86/xen/suspend.c (limited to 'include') diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 40b119b6b103..2ba2d1649131 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o xen-asm.o grant-table.o + time.o xen-asm.o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ce67dc8f36af..b94f63ac228b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -857,7 +857,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) PFN_DOWN(__pa(xen_start_info->pt_base))); } -static __init void setup_shared_info(void) +void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); @@ -894,7 +894,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.set_pte = xen_set_pte; - setup_shared_info(); + xen_setup_shared_info(); /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ @@ -902,7 +902,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) } /* This is called once we have the cpu_possible_map */ -void __init xen_setup_vcpu_info_placement(void) +void xen_setup_vcpu_info_placement(void) { int cpu; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4740cda36563..e95955968ba3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -560,6 +560,29 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On save, we need to pin all pagetables to make sure they get their + * mfns turned into pfns. Search the list for any unpinned pgds and pin + * them (unpinned pgds are not currently in use, probably because the + * process is under construction or destruction). + */ +void xen_mm_pin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) { + xen_pgd_pin((pgd_t *)page_address(page)); + SetPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + /* The init_mm pagetable is really pinned as soon as its created, but that's before we have page structures to store the bits. So do all the book-keeping now. */ @@ -617,6 +640,29 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On resume, undo any pinning done at save, so that the rest of the + * kernel doesn't see any unexpected pinned pagetables. + */ +void xen_mm_unpin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (PageSavePinned(page)) { + BUG_ON(!PagePinned(page)); + printk("unpinning pinned %p\n", page_address(page)); + xen_pgd_unpin((pgd_t *)page_address(page)); + ClearPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) { spin_lock(&next->page_table_lock); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 74ab8968c525..d2e3c20127d7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -35,7 +35,7 @@ #include "xen-ops.h" #include "mmu.h" -static cpumask_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq) = -1; static DEFINE_PER_CPU(int, callfunc_irq) = -1; static DEFINE_PER_CPU(int, debug_irq) = -1; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 000000000000..7620a16fe535 --- /dev/null +++ b/arch/x86/xen/suspend.c @@ -0,0 +1,42 @@ +#include + +#include +#include +#include + +#include +#include + +#include "xen-ops.h" +#include "mmu.h" + +void xen_pre_suspend(void) +{ + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + mfn_to_pfn(xen_start_info->console.domU.mfn); + + BUG_ON(!irqs_disabled()); + + HYPERVISOR_shared_info = &xen_dummy_shared_info; + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), + __pte_ma(0), 0)) + BUG(); +} + +void xen_post_suspend(int suspend_cancelled) +{ + if (suspend_cancelled) { + xen_start_info->store_mfn = + pfn_to_mfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + pfn_to_mfn(xen_start_info->console.domU.mfn); + } else { +#ifdef CONFIG_SMP + xen_cpu_initialized_map = cpu_online_map; +#endif + } + + xen_setup_shared_info(); +} + diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa241..0bef256e5f2d 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,6 +572,14 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } +void xen_time_suspend(void) +{ +} + +void xen_time_resume(void) +{ +} + __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a1bc89a8f169..a0503acad664 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -9,6 +9,7 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +struct trap_info; void xen_copy_trap_info(struct trap_info *traps); DECLARE_PER_CPU(unsigned long, xen_cr3); @@ -19,6 +20,7 @@ extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); +void xen_setup_shared_info(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); @@ -59,6 +61,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, int wait); +extern cpumask_t xen_cpu_initialized_map; + /* Declare an asm function, along with symbols needed to make it inlineable */ diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 70375a690761..73d78dc9b875 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -674,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq) return ret; } +static void restore_cpu_virqs(unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int virq, irq, evtchn; + + for (virq = 0; virq < NR_VIRQS; virq++) { + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_VIRQ); + BUG_ON(irq_info[irq].index != virq); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } +} + +static void restore_cpu_ipis(unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int ipi, irq, evtchn; + + for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_IPI); + BUG_ON(irq_info[irq].index != ipi); + + /* Get a new binding from Xen. */ + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + + } +} + +void xen_irq_resume(void) +{ + unsigned int cpu, irq, evtchn; + + init_evtchn_cpu_bindings(); + + /* New event-channel space is not 'live' yet. */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + mask_evtchn(evtchn); + + /* No IRQ <-> event-channel mappings. */ + for (irq = 0; irq < NR_IRQS; irq++) + irq_info[irq].evtchn = 0; /* zap event-channel binding */ + + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + + for_each_possible_cpu(cpu) { + restore_cpu_virqs(cpu); + restore_cpu_ipis(cpu); + } +} + static struct irq_chip xen_dynamic_chip __read_mostly = { .name = "xen-dyn", .mask = disable_dynirq, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 52b6b41b909d..e9e11168616a 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return 0; } -static int gnttab_resume(void) +int gnttab_resume(void) { if (max_nr_grant_frames() < nr_grant_frames) return -ENOSYS; return gnttab_map(0, nr_grant_frames - 1); } -static int gnttab_suspend(void) +int gnttab_suspend(void) { arch_gnttab_unmap_shared(shared, nr_grant_frames); return 0; diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index aa7af9e6abc0..ba85fa2cff33 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -5,21 +5,113 @@ #include #include #include +#include +#include #include - -#define SHUTDOWN_INVALID -1 -#define SHUTDOWN_POWEROFF 0 -#define SHUTDOWN_SUSPEND 2 -/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only - * report a crash, not be instructed to crash! - * HALT is the same as POWEROFF, as far as we're concerned. The tools use - * the distinction when we return the reason code to them. - */ -#define SHUTDOWN_HALT 4 +#include +#include +#include +#include + +#include +#include + +enum shutdown_state { + SHUTDOWN_INVALID = -1, + SHUTDOWN_POWEROFF = 0, + SHUTDOWN_SUSPEND = 2, + /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + report a crash, not be instructed to crash! + HALT is the same as POWEROFF, as far as we're concerned. The tools use + the distinction when we return the reason code to them. */ + SHUTDOWN_HALT = 4, +}; /* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; +static enum shutdown_state shutting_down = SHUTDOWN_INVALID; + +static int xen_suspend(void *data) +{ + int *cancelled = data; + + BUG_ON(!irqs_disabled()); + + load_cr3(swapper_pg_dir); + + xen_mm_pin_all(); + gnttab_suspend(); + xen_time_suspend(); + xen_pre_suspend(); + + /* + * This hypercall returns 1 if suspend was cancelled + * or the domain was merely checkpointed, and 0 if it + * is resuming in a new domain. + */ + *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + + xen_post_suspend(*cancelled); + xen_time_resume(); + gnttab_resume(); + xen_mm_unpin_all(); + + if (!*cancelled) { + xen_irq_resume(); + xen_console_resume(); + } + + return 0; +} + +static void do_suspend(void) +{ + int err; + int cancelled = 1; + + shutting_down = SHUTDOWN_SUSPEND; + +#ifdef CONFIG_PREEMPT + /* If the kernel is preemptible, we need to freeze all the processes + to prevent them from being in the middle of a pagetable update + during suspend. */ + err = freeze_processes(); + if (err) { + printk(KERN_ERR "xen suspend: freeze failed %d\n", err); + return; + } +#endif + + err = device_suspend(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen suspend: device_suspend %d\n", err); + goto out; + } + + printk("suspending xenbus...\n"); + /* XXX use normal device tree? */ + xenbus_suspend(); + + err = stop_machine_run(xen_suspend, &cancelled, 0); + if (err) { + printk(KERN_ERR "failed to start xen_suspend: %d\n", err); + goto out; + } + + if (!cancelled) + xenbus_resume(); + else + xenbus_suspend_cancel(); + + device_resume(); + + +out: +#ifdef CONFIG_PREEMPT + thaw_processes(); +#endif + shutting_down = SHUTDOWN_INVALID; +} static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) @@ -52,11 +144,17 @@ static void shutdown_handler(struct xenbus_watch *watch, } if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) + strcmp(str, "halt") == 0) { + shutting_down = SHUTDOWN_POWEROFF; orderly_poweroff(false); - else if (strcmp(str, "reboot") == 0) + } else if (strcmp(str, "reboot") == 0) { + shutting_down = SHUTDOWN_POWEROFF; /* ? */ ctrl_alt_del(); - else { +#ifdef CONFIG_PM_SLEEP + } else if (strcmp(str, "suspend") == 0) { + do_suspend(); +#endif + } else { printk(KERN_INFO "Ignoring shutdown request: %s\n", str); shutting_down = SHUTDOWN_INVALID; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 590cff32415d..02955a1c3d76 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(SavePinned, dirty); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) diff --git a/include/xen/events.h b/include/xen/events.h index a82ec0c45c38..67c4436554a9 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -41,4 +41,7 @@ static inline void notify_remote_via_evtchn(int port) } extern void notify_remote_via_irq(int irq); + +extern void xen_irq_resume(void); + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 466204846121..a40f1cd91be1 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -51,6 +51,9 @@ struct gnttab_free_callback { u16 count; }; +int gnttab_suspend(void); +int gnttab_resume(void); + int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 10ddfe0142d0..5d7a6db54a8c 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -5,4 +5,13 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +void xen_pre_suspend(void); +void xen_post_suspend(int suspend_cancelled); + +void xen_mm_pin_all(void); +void xen_mm_unpin_all(void); + +void xen_time_suspend(void); +void xen_time_resume(void); + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From 359cdd3f866b6219a6729e313faf2221397f3278 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:28 +0100 Subject: xen: maintain clock offset over save/restore Hook into the device model to make sure that timekeeping's resume handler is called. This deals with our clocksource's non-monotonicity over the save/restore. Explicitly call clock_has_changed() to make sure that all the timers get retriggered properly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/time.c | 8 -------- drivers/xen/manage.c | 15 ++++++++++++--- include/xen/xen-ops.h | 3 --- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0bef256e5f2d..c39e1a5aa241 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,14 +572,6 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } -void xen_time_suspend(void) -{ -} - -void xen_time_resume(void) -{ -} - __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index ba85fa2cff33..675c3dd23962 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -34,14 +34,21 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; static int xen_suspend(void *data) { int *cancelled = data; + int err; BUG_ON(!irqs_disabled()); load_cr3(swapper_pg_dir); + err = device_power_down(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n", + err); + return err; + } + xen_mm_pin_all(); gnttab_suspend(); - xen_time_suspend(); xen_pre_suspend(); /* @@ -52,10 +59,11 @@ static int xen_suspend(void *data) *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); xen_post_suspend(*cancelled); - xen_time_resume(); gnttab_resume(); xen_mm_unpin_all(); + device_power_up(); + if (!*cancelled) { xen_irq_resume(); xen_console_resume(); @@ -105,7 +113,8 @@ static void do_suspend(void) device_resume(); - + /* Make sure timer events get retriggered on all CPUs */ + clock_was_set(); out: #ifdef CONFIG_PREEMPT thaw_processes(); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 5d7a6db54a8c..a706d6a78960 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -11,7 +11,4 @@ void xen_post_suspend(int suspend_cancelled); void xen_mm_pin_all(void); void xen_mm_unpin_all(void); -void xen_time_suspend(void); -void xen_time_resume(void); - #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From a72e72469a166c825196c3f20dabd352877fec2b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 20 May 2008 01:06:55 +0300 Subject: [ALSA] remove CVS keywords This patch removes CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Signed-off-by: Takashi Iwai --- include/sound/uda1341.h | 2 -- sound/arm/sa11xx-uda1341.c | 2 -- sound/i2c/l3/uda1341.c | 2 -- sound/pci/au88x0/au88x0_game.c | 2 -- 4 files changed, 8 deletions(-) (limited to 'include') diff --git a/include/sound/uda1341.h b/include/sound/uda1341.h index 2e564bfb37fe..110d5dc3a2be 100644 --- a/include/sound/uda1341.h +++ b/include/sound/uda1341.h @@ -15,8 +15,6 @@ * features support */ -/* $Id: uda1341.h,v 1.8 2005/11/17 14:17:21 tiwai Exp $ */ - #define UDA1341_ALSA_NAME "snd-uda1341" /* diff --git a/sound/arm/sa11xx-uda1341.c b/sound/arm/sa11xx-uda1341.c index 0eff33ca0f79..faeddf3ecedb 100644 --- a/sound/arm/sa11xx-uda1341.c +++ b/sound/arm/sa11xx-uda1341.c @@ -21,8 +21,6 @@ * merged HAL layer (patches from Brian) */ -/* $Id: sa11xx-uda1341.c,v 1.27 2005/12/07 09:13:42 cladisch Exp $ */ - /*************************************************************************************************** * * To understand what Alsa Drivers should be doing look at "Writing an Alsa Driver" by Takashi Iwai diff --git a/sound/i2c/l3/uda1341.c b/sound/i2c/l3/uda1341.c index bfa5d2c3608b..1f4942ea1414 100644 --- a/sound/i2c/l3/uda1341.c +++ b/sound/i2c/l3/uda1341.c @@ -17,8 +17,6 @@ * 2002-05-12 Tomas Kasparek another code cleanup */ -/* $Id: uda1341.c,v 1.18 2005/11/17 14:17:21 tiwai Exp $ */ - #include #include #include diff --git a/sound/pci/au88x0/au88x0_game.c b/sound/pci/au88x0/au88x0_game.c index bc212f41a38a..e291aa59742e 100644 --- a/sound/pci/au88x0/au88x0_game.c +++ b/sound/pci/au88x0/au88x0_game.c @@ -1,6 +1,4 @@ /* - * $Id: au88x0_game.c,v 1.9 2003/09/22 03:51:28 mjander Exp $ - * * Manuel Jander. * * Based on the work of: -- cgit v1.2.3 From 62cf872a8eec1f11aacbec0ac3fe3698bfa9b403 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 May 2008 12:15:15 +0200 Subject: [ALSA] Replace CONFIG_SND_DEBUG_DETECT with CONFIG_SND_DEBUG_VERBOSE Replace CONFIG_SND_DEBUG_DETECT with CONFIG_SND_DEBUG_VERBOSE to represent its meaning more better. This config isn't provided only for the detection but for more verbose debug prints in general. Signed-off-by: Takashi Iwai --- Documentation/sound/alsa/ALSA-Configuration.txt | 2 +- .../sound/alsa/DocBook/writing-an-alsa-driver.tmpl | 4 ++-- include/sound/core.h | 8 ++++---- sound/core/Kconfig | 10 ++++++---- sound/pci/hda/hda_codec.c | 2 +- sound/pci/hda/hda_hwdep.c | 2 +- sound/pci/pcxhr/pcxhr.c | 4 ++-- sound/pci/pcxhr/pcxhr_core.c | 18 +++++++++--------- 8 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 0bbee38acd26..e59569462cb9 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -1091,7 +1091,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. This occurs when the access to non-existing or non-working codec slot (likely a modem one) causes a stall of the communication via HD-audio bus. You can see which codec slots are probed by enabling - CONFIG_SND_DEBUG_DETECT, or simply from the file name of the codec + CONFIG_SND_DEBUG_VERBOSE, or simply from the file name of the codec proc files. Then limit the slots to probe by probe_mask option. For example, probe_mask=1 means to probe only the first slot, and probe_mask=4 means only the third slot. diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl index b03df4d4795c..e13c4e67029f 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl @@ -6127,8 +6127,8 @@ struct _snd_pcm_runtime { snd_printdd() is compiled in only when - CONFIG_SND_DEBUG_DETECT is set. Please note - that DEBUG_DETECT is not set as default + CONFIG_SND_DEBUG_VERBOSE is set. Please note + that CONFIG_SND_DEBUG_VERBOSE is not set as default even if you configure the alsa-driver with option. You need to give explicitly option instead. diff --git a/include/sound/core.h b/include/sound/core.h index 695ee53488a3..558b96284bd2 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -412,13 +412,13 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...) #endif /* CONFIG_SND_DEBUG */ -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE /** * snd_printdd - debug printk * @format: format string * * Works like snd_printk() for debugging purposes. - * Ignored when CONFIG_SND_DEBUG_DETECT is not set. + * Ignored when CONFIG_SND_DEBUG_VERBOSE is not set. */ #define snd_printdd(format, args...) snd_printk(format, ##args) #else @@ -442,7 +442,7 @@ struct snd_pci_quirk { unsigned short subvendor; /* PCI subvendor ID */ unsigned short subdevice; /* PCI subdevice ID */ int value; /* value */ -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE const char *name; /* name of the device (optional) */ #endif }; @@ -450,7 +450,7 @@ struct snd_pci_quirk { #define _SND_PCI_QUIRK_ID(vend,dev) \ .subvendor = (vend), .subdevice = (dev) #define SND_PCI_QUIRK_ID(vend,dev) {_SND_PCI_QUIRK_ID(vend, dev)} -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE #define SND_PCI_QUIRK(vend,dev,xname,val) \ {_SND_PCI_QUIRK_ID(vend, dev), .value = (val), .name = (xname)} #else diff --git a/sound/core/Kconfig b/sound/core/Kconfig index db2111375784..335d45ecde6a 100644 --- a/sound/core/Kconfig +++ b/sound/core/Kconfig @@ -153,12 +153,14 @@ config SND_DEBUG help Say Y here to enable ALSA debug code. -config SND_DEBUG_DETECT - bool "Debug detection" +config SND_DEBUG_VERBOSE + bool "More verbose debug" depends on SND_DEBUG help - Say Y here to enable extra-verbose log messages printed when - detecting devices. + Say Y here to enable extra-verbose debugging messages. + + Let me repeat: it enables EXTRA-VERBOSE DEBUGGING messages. + So, say Y only if you are ready to be annoyed. config SND_PCM_XRUN_DEBUG bool "Enable PCM ring buffer overrun/underrun debugging" diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index a6be6e3e8716..d2e1093f8e97 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2335,7 +2335,7 @@ int snd_hda_check_board_config(struct hda_codec *codec, if (!tbl) return -1; if (tbl->value >= 0 && tbl->value < num_configs) { -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE char tmp[10]; const char *model = NULL; if (models) diff --git a/sound/pci/hda/hda_hwdep.c b/sound/pci/hda/hda_hwdep.c index 2177d9af5334..6e18a422d993 100644 --- a/sound/pci/hda/hda_hwdep.c +++ b/sound/pci/hda/hda_hwdep.c @@ -88,7 +88,7 @@ static int hda_hwdep_ioctl_compat(struct snd_hwdep *hw, struct file *file, static int hda_hwdep_open(struct snd_hwdep *hw, struct file *file) { -#ifndef CONFIG_SND_DEBUG_DETECT +#ifndef CONFIG_SND_DEBUG_VERBOSE if (!capable(CAP_SYS_RAWIO)) return -EACCES; #endif diff --git a/sound/pci/pcxhr/pcxhr.c b/sound/pci/pcxhr/pcxhr.c index 7fdcdc8c6b64..2c7e25336795 100644 --- a/sound/pci/pcxhr/pcxhr.c +++ b/sound/pci/pcxhr/pcxhr.c @@ -516,7 +516,7 @@ static void pcxhr_trigger_tasklet(unsigned long arg) int capture_mask = 0; int playback_mask = 0; -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE struct timeval my_tv1, my_tv2; do_gettimeofday(&my_tv1); #endif @@ -623,7 +623,7 @@ static void pcxhr_trigger_tasklet(unsigned long arg) mutex_unlock(&mgr->setup_mutex); -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE do_gettimeofday(&my_tv2); snd_printdd("***TRIGGER TASKLET*** TIME = %ld (err = %x)\n", (long)(my_tv2.tv_usec - my_tv1.tv_usec), err); diff --git a/sound/pci/pcxhr/pcxhr_core.c b/sound/pci/pcxhr/pcxhr_core.c index 78aa81feaa4a..abe5c59b72df 100644 --- a/sound/pci/pcxhr/pcxhr_core.c +++ b/sound/pci/pcxhr/pcxhr_core.c @@ -473,7 +473,7 @@ static struct pcxhr_cmd_info pcxhr_dsp_cmds[] = { [CMD_AUDIO_LEVEL_ADJUST] = { 0xc22000, 0, RMH_SSIZE_FIXED }, }; -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE static char* cmd_names[] = { [CMD_VERSION] = "CMD_VERSION", [CMD_SUPPORTED] = "CMD_SUPPORTED", @@ -549,7 +549,7 @@ static int pcxhr_read_rmh_status(struct pcxhr_mgr *mgr, struct pcxhr_rmh *rmh) } } } -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (rmh->cmd_idx < CMD_LAST_INDEX) snd_printdd(" stat[%d]=%x\n", i, data); #endif @@ -597,7 +597,7 @@ static int pcxhr_send_msg_nolock(struct pcxhr_mgr *mgr, struct pcxhr_rmh *rmh) data |= 0x008000; /* MASK_MORE_THAN_1_WORD_COMMAND */ else data &= 0xff7fff; /* MASK_1_WORD_COMMAND */ -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (rmh->cmd_idx < CMD_LAST_INDEX) snd_printdd("MSG cmd[0]=%x (%s)\n", data, cmd_names[rmh->cmd_idx]); #endif @@ -624,7 +624,7 @@ static int pcxhr_send_msg_nolock(struct pcxhr_mgr *mgr, struct pcxhr_rmh *rmh) for (i=1; i < rmh->cmd_len; i++) { /* send other words */ data = rmh->cmd[i]; -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (rmh->cmd_idx < CMD_LAST_INDEX) snd_printdd(" cmd[%d]=%x\n", i, data); #endif @@ -847,7 +847,7 @@ int pcxhr_set_pipe_state(struct pcxhr_mgr *mgr, int playback_mask, int capture_m int state, i, err; int audio_mask; -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE struct timeval my_tv1, my_tv2; do_gettimeofday(&my_tv1); #endif @@ -894,7 +894,7 @@ int pcxhr_set_pipe_state(struct pcxhr_mgr *mgr, int playback_mask, int capture_m if (err) return err; } -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE do_gettimeofday(&my_tv2); snd_printdd("***SET PIPE STATE*** TIME = %ld (err = %x)\n", (long)(my_tv2.tv_usec - my_tv1.tv_usec), err); @@ -951,7 +951,7 @@ static int pcxhr_handle_async_err(struct pcxhr_mgr *mgr, u32 err, enum pcxhr_async_err_src err_src, int pipe, int is_capture) { -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE static char* err_src_name[] = { [PCXHR_ERR_PIPE] = "Pipe", [PCXHR_ERR_STREAM] = "Stream", @@ -1169,7 +1169,7 @@ irqreturn_t pcxhr_interrupt(int irq, void *dev_id) mgr->dsp_time_last, dsp_time_new); mgr->dsp_time_err++; } -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (dsp_time_diff == 0) snd_printdd("ERROR DSP TIME NO DIFF time(%d)\n", dsp_time_new); else if (dsp_time_diff >= (2*PCXHR_GRANULARITY)) @@ -1208,7 +1208,7 @@ irqreturn_t pcxhr_interrupt(int irq, void *dev_id) mgr->src_it_dsp = reg; tasklet_hi_schedule(&mgr->msg_taskq); } -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (reg & PCXHR_FATAL_DSP_ERR) snd_printdd("FATAL DSP ERROR : %x\n", reg); #endif -- cgit v1.2.3 From b1829d2705daa7cb72eb1e08bdc8b7e9fad34266 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2008 01:22:08 +0200 Subject: ftrace: fix merge Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ffbbd54a720e..b482fe88bc04 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -122,7 +122,7 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER +#ifdef CONFIG_TRACING extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else -- cgit v1.2.3 From 19ec673ced067316b9732bc6d1c4ff4052e5f795 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 28 May 2008 23:00:47 +0400 Subject: x86: nmi - fix incorrect NMI watchdog used by default The commit commit 4b82b277707a39b97271439c475f186f63ec4692 Author: Cyrill Gorcunov Date: Sat May 24 19:36:35 2008 +0400 set nmi_watchdog to NMI_IO_APIC as by default. This causes hangs on some machines with buggy watchdogs. Fix it - i.e. restore old behaviour. Thanks to Sitsofe Wheeler and Adrian Bunk for catching the problem and Maciej W. Rozycki for explanation what is going on there. Signed-off-by: Cyrill Gorcunov CC: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi.c | 16 +++++++++------- include/asm-x86/nmi.h | 4 +++- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 69a839fc1eb0..3671a9f3564b 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -84,20 +84,15 @@ static inline unsigned int get_timer_irqs(int cpu) #endif } +#ifdef CONFIG_X86_64 /* Run after command line and cpu_init init, but before all other checks */ void nmi_watchdog_default(void) { if (nmi_watchdog != NMI_DEFAULT) return; -#ifdef CONFIG_X86_64 nmi_watchdog = NMI_NONE; -#else - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; -#endif } +#endif #ifdef CONFIG_SMP /* @@ -488,8 +483,15 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, return -EIO; } +#ifdef CONFIG_X86_64 /* if nmi_watchdog is not set yet, then set it */ nmi_watchdog_default(); +#else + if (lapic_watchdog_ok()) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; +#endif if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog_enabled) diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 1e8f34d7ab68..6f4d44fc051f 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -38,9 +38,11 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev) #ifdef CONFIG_X86_64 extern void default_do_nmi(struct pt_regs *); +extern void nmi_watchdog_default(void); +#else +#define nmi_watchdog_default(void) do {} while (0) #endif -extern void nmi_watchdog_default(void); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -- cgit v1.2.3 From e13ac2e9b18bde51cf32c69c2209df25791ab3e5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 28 May 2008 17:58:05 +0100 Subject: [ALSA] ASoC: Add SOC_DOUBLE_S8_TLV control type The SOC_DOUBLE_S8_TLV control type was originally implemented in the UDA1380 driver by Philipp Zabel and was moved into the core by me. Signed-off-by: Philipp Zabel Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai --- include/sound/soc.h | 15 +++++++++++ sound/soc/soc-core.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index bca9538d9e50..9fa2093e74eb 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -73,6 +73,15 @@ .get = snd_soc_get_volsw_2r, .put = snd_soc_put_volsw_2r, \ .private_value = (reg_left) | ((shift) << 8) | \ ((max) << 12) | ((invert) << 20) | ((reg_right) << 24) } +#define SOC_DOUBLE_S8_TLV(xname, reg, min, max, tlv_array) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \ + .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \ + SNDRV_CTL_ELEM_ACCESS_READWRITE, \ + .tlv.p = (tlv_array), \ + .info = snd_soc_info_volsw_s8, .get = snd_soc_get_volsw_s8, \ + .put = snd_soc_put_volsw_s8, \ + .private_value = (reg) | (((signed char)max) << 16) | \ + (((signed char)min) << 24) } #define SOC_ENUM_DOUBLE(xreg, xshift_l, xshift_r, xmask, xtexts) \ { .reg = xreg, .shift_l = xshift_l, .shift_r = xshift_r, \ .mask = xmask, .texts = xtexts } @@ -267,6 +276,12 @@ int snd_soc_get_volsw_2r(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_put_volsw_2r(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); +int snd_soc_info_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo); +int snd_soc_get_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); +int snd_soc_put_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); /* SoC PCM stream information */ struct snd_soc_pcm_stream { diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index a3f091e0843a..f594ab888e17 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1589,6 +1589,78 @@ int snd_soc_put_volsw_2r(struct snd_kcontrol *kcontrol, } EXPORT_SYMBOL_GPL(snd_soc_put_volsw_2r); +/** + * snd_soc_info_volsw_s8 - signed mixer info callback + * @kcontrol: mixer control + * @uinfo: control element information + * + * Callback to provide information about a signed mixer control. + * + * Returns 0 for success. + */ +int snd_soc_info_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + int max = (signed char)((kcontrol->private_value >> 16) & 0xff); + int min = (signed char)((kcontrol->private_value >> 24) & 0xff); + + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 2; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = max-min; + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_info_volsw_s8); + +/** + * snd_soc_get_volsw_s8 - signed mixer get callback + * @kcontrol: mixer control + * @uinfo: control element information + * + * Callback to get the value of a signed mixer control. + * + * Returns 0 for success. + */ +int snd_soc_get_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + int reg = kcontrol->private_value & 0xff; + int min = (signed char)((kcontrol->private_value >> 24) & 0xff); + int val = snd_soc_read(codec, reg); + + ucontrol->value.integer.value[0] = + ((signed char)(val & 0xff))-min; + ucontrol->value.integer.value[1] = + ((signed char)((val >> 8) & 0xff))-min; + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_get_volsw_s8); + +/** + * snd_soc_put_volsw_sgn - signed mixer put callback + * @kcontrol: mixer control + * @uinfo: control element information + * + * Callback to set the value of a signed mixer control. + * + * Returns 0 for success. + */ +int snd_soc_put_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + int reg = kcontrol->private_value & 0xff; + int min = (signed char)((kcontrol->private_value >> 24) & 0xff); + unsigned short val; + + val = (ucontrol->value.integer.value[0]+min) & 0xff; + val |= ((ucontrol->value.integer.value[1]+min) & 0xff) << 8; + + return snd_soc_update_bits(codec, reg, 0xffff, val); +} +EXPORT_SYMBOL_GPL(snd_soc_put_volsw_s8); + static int __devinit snd_soc_init(void) { printk(KERN_INFO "ASoC version %s\n", SND_SOC_VERSION); -- cgit v1.2.3 From 0261ac5f2f43a1906cfacfb19d62ed643d162cbe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 May 2008 09:31:50 +0200 Subject: xen: fix "xen: implement save/restore" -tip testing found the following build breakage: drivers/built-in.o: In function `xen_suspend': manage.c:(.text+0x4390f): undefined reference to `xen_console_resume' with this config: http://redhat.com/~mingo/misc/config-Thu_May_29_09_23_16_CEST_2008.bad i have bisected it down to: | commit 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 | Author: Jeremy Fitzhardinge | Date: Mon May 26 23:31:27 2008 +0100 | | xen: implement save/restore the problem is that drivers/xen/manage.c is built unconditionally if CONFIG_XEN is enabled and makes use of xen_suspend(), but drivers/char/hvc_xen.c, where the xen_suspend() method is implemented, is only build if CONFIG_HVC_XEN=y as well. i have solved this by providing a NOP implementation for xen_suspend() in the !CONFIG_HVC_XEN case. Signed-off-by: Ingo Molnar --- include/xen/hvc-console.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index fd5483a059bb..98b79bc404dd 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,7 +3,11 @@ extern struct console xenboot_console; +#ifdef CONFIG_HVC_XEN void xen_console_resume(void); +#else +static inline void xen_console_resume(void) { } +#endif void xen_raw_console_write(const char *str); void xen_raw_printk(const char *fmt, ...); -- cgit v1.2.3 From b786af117b360843349cf66165c4efa0217ca2a7 Mon Sep 17 00:00:00 2001 From: Stephen Neuendorffer Date: Wed, 7 May 2008 04:29:17 +1000 Subject: [POWERPC] Refactor DCR code Previously, DCR support was configured at compile time to either use MMIO or native dcr instructions. Although this works for most platforms, it fails on FPGA platforms: 1) Systems may include more than one DCR bus. 2) Systems may be native DCR capable and still use memory mapped DCR interface. This patch provides runtime support based on the device trees for the case where CONFIG_PPC_DCR_MMIO and CONFIG_PPC_DCR_NATIVE are both selected. Previously, this was a poorly defined configuration, which happened to provide NATIVE support. The runtime selection is made based on the dcr-controller having a 'dcr-access-method' attribute in the device tree. If only one of the above options is selected, then the code uses #defines to select only the used code in order to avoid introducing overhead in existing usage. Signed-off-by: Stephen Neuendorffer Signed-off-by: Josh Boyer --- arch/powerpc/sysdev/dcr.c | 154 ++++++++++++++++++++++++++++++-------- include/asm-powerpc/dcr-generic.h | 49 ++++++++++++ include/asm-powerpc/dcr-mmio.h | 20 +++-- include/asm-powerpc/dcr-native.h | 16 ++-- include/asm-powerpc/dcr.h | 39 +++++++++- 5 files changed, 231 insertions(+), 47 deletions(-) create mode 100644 include/asm-powerpc/dcr-generic.h (limited to 'include') diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c index 437e48d3ae33..5f39a79b0660 100644 --- a/arch/powerpc/sysdev/dcr.c +++ b/arch/powerpc/sysdev/dcr.c @@ -23,6 +23,105 @@ #include #include +static struct device_node *find_dcr_parent(struct device_node *node) +{ + struct device_node *par, *tmp; + const u32 *p; + + for (par = of_node_get(node); par;) { + if (of_get_property(par, "dcr-controller", NULL)) + break; + p = of_get_property(par, "dcr-parent", NULL); + tmp = par; + if (p == NULL) + par = of_get_parent(par); + else + par = of_find_node_by_phandle(*p); + of_node_put(tmp); + } + return par; +} + +#if defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO) + +bool dcr_map_ok_generic(dcr_host_t host) +{ + if (host.type == DCR_HOST_NATIVE) + return dcr_map_ok_native(host.host.native); + else if (host.type == DCR_HOST_MMIO) + return dcr_map_ok_mmio(host.host.mmio); + else + return 0; +} +EXPORT_SYMBOL_GPL(dcr_map_ok_generic); + +dcr_host_t dcr_map_generic(struct device_node *dev, + unsigned int dcr_n, + unsigned int dcr_c) +{ + dcr_host_t host; + struct device_node *dp; + const char *prop; + + host.type = DCR_HOST_INVALID; + + dp = find_dcr_parent(dev); + if (dp == NULL) + return host; + + prop = of_get_property(dp, "dcr-access-method", NULL); + + pr_debug("dcr_map_generic(dcr-access-method = %s)\n", prop); + + if (!strcmp(prop, "native")) { + host.type = DCR_HOST_NATIVE; + host.host.native = dcr_map_native(dev, dcr_n, dcr_c); + } else if (!strcmp(prop, "mmio")) { + host.type = DCR_HOST_MMIO; + host.host.mmio = dcr_map_mmio(dev, dcr_n, dcr_c); + } + + of_node_put(dp); + return host; +} +EXPORT_SYMBOL_GPL(dcr_map_generic); + +void dcr_unmap_generic(dcr_host_t host, unsigned int dcr_c) +{ + if (host.type == DCR_HOST_NATIVE) + dcr_unmap_native(host.host.native, dcr_c); + else if (host.type == DCR_HOST_MMIO) + dcr_unmap_mmio(host.host.mmio, dcr_c); + else /* host.type == DCR_HOST_INVALID */ + WARN_ON(true); +} +EXPORT_SYMBOL_GPL(dcr_unmap_generic); + +u32 dcr_read_generic(dcr_host_t host, unsigned int dcr_n) +{ + if (host.type == DCR_HOST_NATIVE) + return dcr_read_native(host.host.native, dcr_n); + else if (host.type == DCR_HOST_MMIO) + return dcr_read_mmio(host.host.mmio, dcr_n); + else /* host.type == DCR_HOST_INVALID */ + WARN_ON(true); + return 0; +} +EXPORT_SYMBOL_GPL(dcr_read_generic); + +void dcr_write_generic(dcr_host_t host, unsigned int dcr_n, u32 value) +{ + if (host.type == DCR_HOST_NATIVE) + dcr_write_native(host.host.native, dcr_n, value); + else if (host.type == DCR_HOST_MMIO) + dcr_write_mmio(host.host.mmio, dcr_n, value); + else /* host.type == DCR_HOST_INVALID */ + WARN_ON(true); +} +EXPORT_SYMBOL_GPL(dcr_write_generic); + +#endif /* defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO) */ + unsigned int dcr_resource_start(struct device_node *np, unsigned int index) { unsigned int ds; @@ -47,26 +146,7 @@ unsigned int dcr_resource_len(struct device_node *np, unsigned int index) } EXPORT_SYMBOL_GPL(dcr_resource_len); -#ifndef CONFIG_PPC_DCR_NATIVE - -static struct device_node * find_dcr_parent(struct device_node * node) -{ - struct device_node *par, *tmp; - const u32 *p; - - for (par = of_node_get(node); par;) { - if (of_get_property(par, "dcr-controller", NULL)) - break; - p = of_get_property(par, "dcr-parent", NULL); - tmp = par; - if (p == NULL) - par = of_get_parent(par); - else - par = of_find_node_by_phandle(*p); - of_node_put(tmp); - } - return par; -} +#ifdef CONFIG_PPC_DCR_MMIO u64 of_translate_dcr_address(struct device_node *dev, unsigned int dcr_n, @@ -75,7 +155,7 @@ u64 of_translate_dcr_address(struct device_node *dev, struct device_node *dp; const u32 *p; unsigned int stride; - u64 ret; + u64 ret = OF_BAD_ADDR; dp = find_dcr_parent(dev); if (dp == NULL) @@ -90,7 +170,7 @@ u64 of_translate_dcr_address(struct device_node *dev, if (p == NULL) p = of_get_property(dp, "dcr-mmio-space", NULL); if (p == NULL) - return OF_BAD_ADDR; + goto done; /* Maybe could do some better range checking here */ ret = of_translate_address(dp, p); @@ -98,21 +178,25 @@ u64 of_translate_dcr_address(struct device_node *dev, ret += (u64)(stride) * (u64)dcr_n; if (out_stride) *out_stride = stride; + + done: + of_node_put(dp); return ret; } -dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n, - unsigned int dcr_c) +dcr_host_mmio_t dcr_map_mmio(struct device_node *dev, + unsigned int dcr_n, + unsigned int dcr_c) { - dcr_host_t ret = { .token = NULL, .stride = 0, .base = dcr_n }; + dcr_host_mmio_t ret = { .token = NULL, .stride = 0, .base = dcr_n }; u64 addr; pr_debug("dcr_map(%s, 0x%x, 0x%x)\n", dev->full_name, dcr_n, dcr_c); addr = of_translate_dcr_address(dev, dcr_n, &ret.stride); - pr_debug("translates to addr: 0x%lx, stride: 0x%x\n", - addr, ret.stride); + pr_debug("translates to addr: 0x%llx, stride: 0x%x\n", + (unsigned long long) addr, ret.stride); if (addr == OF_BAD_ADDR) return ret; pr_debug("mapping 0x%x bytes\n", dcr_c * ret.stride); @@ -124,11 +208,11 @@ dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n, ret.token -= dcr_n * ret.stride; return ret; } -EXPORT_SYMBOL_GPL(dcr_map); +EXPORT_SYMBOL_GPL(dcr_map_mmio); -void dcr_unmap(dcr_host_t host, unsigned int dcr_c) +void dcr_unmap_mmio(dcr_host_mmio_t host, unsigned int dcr_c) { - dcr_host_t h = host; + dcr_host_mmio_t h = host; if (h.token == NULL) return; @@ -136,7 +220,11 @@ void dcr_unmap(dcr_host_t host, unsigned int dcr_c) iounmap(h.token); h.token = NULL; } -EXPORT_SYMBOL_GPL(dcr_unmap); -#else /* defined(CONFIG_PPC_DCR_NATIVE) */ +EXPORT_SYMBOL_GPL(dcr_unmap_mmio); + +#endif /* defined(CONFIG_PPC_DCR_MMIO) */ + +#ifdef CONFIG_PPC_DCR_NATIVE DEFINE_SPINLOCK(dcr_ind_lock); -#endif /* !defined(CONFIG_PPC_DCR_NATIVE) */ +#endif /* defined(CONFIG_PPC_DCR_NATIVE) */ + diff --git a/include/asm-powerpc/dcr-generic.h b/include/asm-powerpc/dcr-generic.h new file mode 100644 index 000000000000..35b71599ec46 --- /dev/null +++ b/include/asm-powerpc/dcr-generic.h @@ -0,0 +1,49 @@ +/* + * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_POWERPC_DCR_GENERIC_H +#define _ASM_POWERPC_DCR_GENERIC_H +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +enum host_type_t {DCR_HOST_MMIO, DCR_HOST_NATIVE, DCR_HOST_INVALID}; + +typedef struct { + enum host_type_t type; + union { + dcr_host_mmio_t mmio; + dcr_host_native_t native; + } host; +} dcr_host_t; + +extern bool dcr_map_ok_generic(dcr_host_t host); + +extern dcr_host_t dcr_map_generic(struct device_node *dev, unsigned int dcr_n, + unsigned int dcr_c); +extern void dcr_unmap_generic(dcr_host_t host, unsigned int dcr_c); + +extern u32 dcr_read_generic(dcr_host_t host, unsigned int dcr_n); + +extern void dcr_write_generic(dcr_host_t host, unsigned int dcr_n, u32 value); + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_DCR_GENERIC_H */ + + diff --git a/include/asm-powerpc/dcr-mmio.h b/include/asm-powerpc/dcr-mmio.h index 08532ff1899c..acd491dbd45a 100644 --- a/include/asm-powerpc/dcr-mmio.h +++ b/include/asm-powerpc/dcr-mmio.h @@ -27,20 +27,26 @@ typedef struct { void __iomem *token; unsigned int stride; unsigned int base; -} dcr_host_t; +} dcr_host_mmio_t; -#define DCR_MAP_OK(host) ((host).token != NULL) +static inline bool dcr_map_ok_mmio(dcr_host_mmio_t host) +{ + return host.token != NULL; +} -extern dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n, - unsigned int dcr_c); -extern void dcr_unmap(dcr_host_t host, unsigned int dcr_c); +extern dcr_host_mmio_t dcr_map_mmio(struct device_node *dev, + unsigned int dcr_n, + unsigned int dcr_c); +extern void dcr_unmap_mmio(dcr_host_mmio_t host, unsigned int dcr_c); -static inline u32 dcr_read(dcr_host_t host, unsigned int dcr_n) +static inline u32 dcr_read_mmio(dcr_host_mmio_t host, unsigned int dcr_n) { return in_be32(host.token + ((host.base + dcr_n) * host.stride)); } -static inline void dcr_write(dcr_host_t host, unsigned int dcr_n, u32 value) +static inline void dcr_write_mmio(dcr_host_mmio_t host, + unsigned int dcr_n, + u32 value) { out_be32(host.token + ((host.base + dcr_n) * host.stride), value); } diff --git a/include/asm-powerpc/dcr-native.h b/include/asm-powerpc/dcr-native.h index f8398ce80372..72d2b72c7390 100644 --- a/include/asm-powerpc/dcr-native.h +++ b/include/asm-powerpc/dcr-native.h @@ -26,14 +26,18 @@ typedef struct { unsigned int base; -} dcr_host_t; +} dcr_host_native_t; -#define DCR_MAP_OK(host) (1) +static inline bool dcr_map_ok_native(dcr_host_native_t host) +{ + return 1; +} -#define dcr_map(dev, dcr_n, dcr_c) ((dcr_host_t){ .base = (dcr_n) }) -#define dcr_unmap(host, dcr_c) do {} while (0) -#define dcr_read(host, dcr_n) mfdcr(dcr_n + host.base) -#define dcr_write(host, dcr_n, value) mtdcr(dcr_n + host.base, value) +#define dcr_map_native(dev, dcr_n, dcr_c) \ + ((dcr_host_native_t){ .base = (dcr_n) }) +#define dcr_unmap_native(host, dcr_c) do {} while (0) +#define dcr_read_native(host, dcr_n) mfdcr(dcr_n + host.base) +#define dcr_write_native(host, dcr_n, value) mtdcr(dcr_n + host.base, value) /* Device Control Registers */ void __mtdcr(int reg, unsigned int val); diff --git a/include/asm-powerpc/dcr.h b/include/asm-powerpc/dcr.h index 9338d50538f1..53b283050ab3 100644 --- a/include/asm-powerpc/dcr.h +++ b/include/asm-powerpc/dcr.h @@ -20,14 +20,50 @@ #ifndef _ASM_POWERPC_DCR_H #define _ASM_POWERPC_DCR_H #ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_DCR #ifdef CONFIG_PPC_DCR_NATIVE #include -#else +#endif + +#ifdef CONFIG_PPC_DCR_MMIO #include #endif + +/* Indirection layer for providing both NATIVE and MMIO support. */ + +#if defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO) + +#include + +#define DCR_MAP_OK(host) dcr_map_ok_generic(host) +#define dcr_map(dev, dcr_n, dcr_c) dcr_map_generic(dev, dcr_n, dcr_c) +#define dcr_unmap(host, dcr_c) dcr_unmap_generic(host, dcr_c) +#define dcr_read(host, dcr_n) dcr_read_generic(host, dcr_n) +#define dcr_write(host, dcr_n, value) dcr_write_generic(host, dcr_n, value) + +#else + +#ifdef CONFIG_PPC_DCR_NATIVE +typedef dcr_host_native_t dcr_host_t; +#define DCR_MAP_OK(host) dcr_map_ok_native(host) +#define dcr_map(dev, dcr_n, dcr_c) dcr_map_native(dev, dcr_n, dcr_c) +#define dcr_unmap(host, dcr_c) dcr_unmap_native(host, dcr_c) +#define dcr_read(host, dcr_n) dcr_read_native(host, dcr_n) +#define dcr_write(host, dcr_n, value) dcr_write_native(host, dcr_n, value) +#else +typedef dcr_host_mmio_t dcr_host_t; +#define DCR_MAP_OK(host) dcr_map_ok_mmio(host) +#define dcr_map(dev, dcr_n, dcr_c) dcr_map_mmio(dev, dcr_n, dcr_c) +#define dcr_unmap(host, dcr_c) dcr_unmap_mmio(host, dcr_c) +#define dcr_read(host, dcr_n) dcr_read_mmio(host, dcr_n) +#define dcr_write(host, dcr_n, value) dcr_write_mmio(host, dcr_n, value) +#endif + +#endif /* defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO) */ + /* * On CONFIG_PPC_MERGE, we have additional helpers to read the DCR * base from the device-tree @@ -41,5 +77,6 @@ extern unsigned int dcr_resource_len(struct device_node *np, #endif /* CONFIG_PPC_MERGE */ #endif /* CONFIG_PPC_DCR */ +#endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_DCR_H */ -- cgit v1.2.3 From 8ff7f2a46bd8831e1f1f2a694ec13921720180b7 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 21 Feb 2008 16:30:10 +0100 Subject: There is no need to have BOOT_PARAMS_SIZE known outside of atags.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit atags.c was the only user of KEXEC_BOOT_PARAMS_SIZE and kexec.h was only included to get that definition. Signed-off-by: Uwe Kleine-König Acked-by: Uli Luckas --- arch/arm/kernel/atags.c | 7 +++---- include/asm-arm/kexec.h | 2 -- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags.c index 64c420805e6f..56fef9a05504 100644 --- a/arch/arm/kernel/atags.c +++ b/arch/arm/kernel/atags.c @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -42,14 +41,14 @@ create_proc_entries(void) return 0; } - -static char __initdata atags_copy_buf[KEXEC_BOOT_PARAMS_SIZE]; +#define BOOT_PARAMS_SIZE 1536 +static char __initdata atags_copy_buf[BOOT_PARAMS_SIZE]; static char __initdata *atags_copy; void __init save_atags(const struct tag *tags) { atags_copy = atags_copy_buf; - memcpy(atags_copy, tags, KEXEC_BOOT_PARAMS_SIZE); + memcpy(atags_copy, tags, sizeof(atags_copy_buf)); } diff --git a/include/asm-arm/kexec.h b/include/asm-arm/kexec.h index 47fe34d692da..c8986bb99ed5 100644 --- a/include/asm-arm/kexec.h +++ b/include/asm-arm/kexec.h @@ -14,8 +14,6 @@ #define KEXEC_ARCH KEXEC_ARCH_ARM -#define KEXEC_BOOT_PARAMS_SIZE 1536 - #define KEXEC_ARM_ATAGS_OFFSET 0x1000 #define KEXEC_ARM_ZIMAGE_OFFSET 0x8000 -- cgit v1.2.3 From a4831fbe096a6f4f691fd71b041ce27add54088e Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 21 May 2008 10:17:07 +0200 Subject: ns9xxx: fix assembler version of __REG2 to be consistent with the C version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not very critical because __REG2 isn't used in assembler code currently. Additionally some white space noise is fixed. Signed-off-by: Uwe Kleine-König --- include/asm-arm/arch-ns9xxx/hardware.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-arm/arch-ns9xxx/hardware.h b/include/asm-arm/arch-ns9xxx/hardware.h index 0b7b34603f1c..0dca11ce21fc 100644 --- a/include/asm-arm/arch-ns9xxx/hardware.h +++ b/include/asm-arm/arch-ns9xxx/hardware.h @@ -66,13 +66,13 @@ __REGGET(var, reg ## _ ## field) / __REGSHIFT(reg ## _ ## field) # define REGGETIM_IDX(var, reg, field, idx) \ - __REGGET(var, reg ## _ ## field((idx))) / \ + __REGGET(var, reg ## _ ## field((idx))) / \ __REGSHIFT(reg ## _ ## field((idx))) #else # define __REG(x) io_p2v(x) -# define __REG2(x, y) io_p2v((x) + (y)) +# define __REG2(x, y) io_p2v((x) + 4 * (y)) #endif -- cgit v1.2.3 From ba3a5974239293d921235e6fa82b09b670e674ef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 30 May 2008 14:55:47 +0200 Subject: - fix typo in include/asm-x86/nmi.h --- include/asm-x86/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 6f4d44fc051f..972a4f6f799a 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -40,7 +40,7 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev) extern void default_do_nmi(struct pt_regs *); extern void nmi_watchdog_default(void); #else -#define nmi_watchdog_default(void) do {} while (0) +#define nmi_watchdog_default() do { } while (0) #endif extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); -- cgit v1.2.3 From d364319b989967b74e57fef5c8017fd56a16c392 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 23:42:01 +0200 Subject: x86: move mmconfig declarations to header arch/x86/kernel/mmconf-fam10h_64.c is missing the prototypes, which are decalred in arch/x86/kernel/setup_64.c. Move the prototypes and the inline stubs to the appropriate header file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/mmconf-fam10h_64.c | 1 + arch/x86/kernel/setup_64.c | 13 +------------ include/asm-x86/mmconfig.h | 12 ++++++++++++ 3 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 include/asm-x86/mmconfig.h (limited to 'include') diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index edc5fbfe85c0..fdfdc550b366 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "../pci/pci.h" diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff1286ad8a..341230db74e1 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #ifdef CONFIG_PARAVIRT @@ -293,18 +294,6 @@ static void __init parse_setup_data(void) } } -#ifdef CONFIG_PCI_MMCONFIG -extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); -#else -void __cpuinit fam10h_check_enable_mmcfg(void) -{ -} -void __init check_enable_amd_mmconf_dmi(void) -{ -} -#endif - /* * setup_arch - architecture-specific boot-time initializations * diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h new file mode 100644 index 000000000000..95beda07c6fa --- /dev/null +++ b/include/asm-x86/mmconfig.h @@ -0,0 +1,12 @@ +#ifndef _ASM_MMCONFIG_H +#define _ASM_MMCONFIG_H + +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __init check_enable_amd_mmconf_dmi(void); +#else +static inline void fam10h_check_enable_mmcfg(void) { } +static inline void check_enable_amd_mmconf_dmi(void) { } +#endif + +#endif -- cgit v1.2.3 From 3b6b9293d0f8e1b11630102013ca2a1dcef17d44 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Thu, 29 May 2008 18:31:15 -0400 Subject: x86: Honor 'quiet' command line option in real mode boot decompressor. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch lets the early real mode code look for the 'quiet' option on the kernel command line and pass a loadflag to the decompressor. When this flag is set, we suppress the "Decompressing Linux... Parsing ELF... done." messages. Signed-off-by: Kristian Høgsberg Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/misc.c | 13 ++++++++++--- arch/x86/boot/main.c | 4 ++++ include/asm-x86/bootparam.h | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 74ed3c075ee6..d10e7274e1fc 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -189,6 +189,7 @@ static void gzip_release(void **); * This is set up by the setup-routine at boot-time */ static struct boot_params *real_mode; /* Pointer to real-mode data */ +static int quiet; extern unsigned char input_data[]; extern int input_len; @@ -391,7 +392,8 @@ static void parse_elf(void *output) return; } - putstr("Parsing ELF... "); + if (!quiet) + putstr("Parsing ELF... "); phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); if (!phdrs) @@ -426,6 +428,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, { real_mode = rmode; + if (real_mode->hdr.loadflags & QUIET_FLAG) + quiet = 1; + if (real_mode->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; @@ -461,9 +466,11 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, #endif makecrc(); - putstr("\nDecompressing Linux... "); + if (!quiet) + putstr("\nDecompressing Linux... "); gunzip(); parse_elf(output); - putstr("done.\nBooting the kernel.\n"); + if (!quiet) + putstr("done.\nBooting the kernel.\n"); return; } diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 77569a4a3be1..2296164b54d2 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -165,6 +165,10 @@ void main(void) /* Set the video mode */ set_video(); + /* Parse command line for 'quiet' and pass it to decompressor. */ + if (cmdline_find_option_bool("quiet")) + boot_params.hdr.loadflags |= QUIET_FLAG; + /* Do the last things and invoke protected mode */ go_to_protected_mode(); } diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index f62f4733606b..3e36ba8f288b 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h @@ -40,6 +40,7 @@ struct setup_header { __u8 type_of_loader; __u8 loadflags; #define LOADED_HIGH (1<<0) +#define QUIET_FLAG (1<<5) #define KEEP_SEGMENTS (1<<6) #define CAN_USE_HEAP (1<<7) __u16 setup_move_size; -- cgit v1.2.3 From f0d43100f13be0fa5bf52741d7084bb27f00e621 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 May 2008 12:56:36 -0700 Subject: x86: extend e820 early_res support 32bit -fix #3 introduce init_pg_table_start, so xen PV could specify the value. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/head32.c | 3 ++- arch/x86/kernel/head_32.S | 2 ++ arch/x86/kernel/setup_32.c | 7 +++++++ arch/x86/lguest/boot.c | 5 +++-- arch/x86/xen/enlighten.c | 3 ++- include/asm-x86/setup.h | 4 +++- 6 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index c216d3c2a991..0b6cb9fba71e 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -76,7 +76,8 @@ void __init i386_start_kernel(void) reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif - reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE"); + reserve_early(init_pg_tables_start, init_pg_tables_end, + "INIT_PG_TABLE"); reserve_ebda_region(); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b2cc73768a9d..bef4618feadb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -194,6 +194,7 @@ default_entry: xorl %ebx,%ebx /* %ebx is kept at zero */ movl $pa(pg0), %edi + movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_pmd), %edx movl $PTE_ATTR, %eax 10: @@ -228,6 +229,7 @@ default_entry: page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(pg0), %edi + movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_dir), %edx movl $PTE_ATTR, %eax 10: diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 08b47a2f7af0..de9c5ee77d07 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -71,6 +71,7 @@ /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* address, and must not be in the .bss segment! */ +unsigned long init_pg_tables_start __initdata = ~0UL; unsigned long init_pg_tables_end __initdata = ~0UL; /* @@ -485,6 +486,10 @@ static void __init reserve_initrd(void) return; } + printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image, + ramdisk_end); + + if (ramdisk_end <= end_of_lowmem) { /* All in lowmem, easy case */ /* @@ -511,6 +516,8 @@ static void __init reserve_initrd(void) "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; + printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", + ramdisk_here, ramdisk_here + ramdisk_size); do_relocate_initrd = true; } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index af65b2da3ba0..bf7c34a3aaeb 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1011,6 +1011,7 @@ __init void lguest_init(void) * clobbered. The Launcher places our initial pagetables somewhere at * the top of our physical memory, so we don't need extra space: set * init_pg_tables_end to the end of the kernel. */ + init_pg_tables_start = __pa(pg0); init_pg_tables_end = __pa(pg0); /* Load the %fs segment register (the per-cpu segment register) with @@ -1064,9 +1065,9 @@ __init void lguest_init(void) pm_power_off = lguest_power_off; machine_ops.restart = lguest_restart; - /* Now we're set up, call start_kernel() in init/main.c and we proceed + /* Now we're set up, call i386_start_kernel() in head32.c and we proceed * to boot as normal. It never returns. */ - start_kernel(); + i386_start_kernel(); } /* * This marks the end of stage II of our journey, The Guest. diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e457d61..ccc7b84ddabf 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1211,6 +1211,7 @@ asmlinkage void __init xen_start_kernel(void) pgd = (pgd_t *)xen_start_info->pt_base; + init_pg_tables_start = __pa(pgd); init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; init_mm.pgd = pgd; /* use the Xen pagetables to start */ @@ -1246,5 +1247,5 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("hvc", 0, NULL); /* Start the world */ - start_kernel(); + i386_start_kernel(); } diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index ffa0f540fc7f..64f5f0c11d66 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -59,9 +59,11 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map); void __init add_memory_region(unsigned long long start, unsigned long long size, int type); -extern unsigned long init_pg_tables_end; +void __init i386_start_kernel(void); +extern unsigned long init_pg_tables_start; +extern unsigned long init_pg_tables_end; #endif /* __i386__ */ #endif /* _SETUP */ -- cgit v1.2.3 From a5481280b29b6a3db912ec100498bd31eaa6d2db Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 May 2008 12:58:37 -0700 Subject: x86: extend e820 early_res support 32bit -fix #5 reserve early numa kva, so it will not clash with new RAMDISK Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_32.c | 1 - arch/x86/mm/discontig_32.c | 12 +++++------- include/asm-x86/mmzone_32.h | 4 ---- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index de9c5ee77d07..6f40cb560ea9 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -614,7 +614,6 @@ void __init setup_bootmem_allocator(void) */ find_smp_config(); #endif - numa_kva_reserve(); reserve_crashkernel(); reserve_ibft_region(); diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 47749727907e..55fdbab6b014 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -357,6 +357,11 @@ unsigned long __init setup_memory(void) printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", kva_start_pfn, max_low_pfn); printk("max_pfn = %ld\n", max_pfn); + + /* avoid clash with initrd */ + reserve_early(kva_start_pfn< system_max_low_pfn) @@ -392,13 +397,6 @@ unsigned long __init setup_memory(void) return max_low_pfn; } -void __init numa_kva_reserve(void) -{ - if (kva_pages) - reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages), - BOOTMEM_DEFAULT); -} - void __init zone_sizes_init(void) { int nid; diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index cb2cad0b65a7..faef751181b7 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -38,16 +38,12 @@ static inline void get_memcfg_numa(void) } extern int early_pfn_to_nid(unsigned long pfn); -extern void numa_kva_reserve(void); #else /* !CONFIG_NUMA */ #define get_memcfg_numa get_memcfg_numa_flat #define get_zholes_size(n) (0) -static inline void numa_kva_reserve(void) -{ -} #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM -- cgit v1.2.3 From d2cd74b158d7214a556226e3312f9fb1de64d7ae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Jun 2008 11:45:53 +0200 Subject: [ALSA] emu10k1 - Fix inverted Analog/Digital mixer switch on Audigy2 On Audigy2 Platinum, the Analog/Digital mixer switch is inverted. https://bugzilla.novell.com/show_bug.cgi?id=396204 The patch adds a simple workaround. There might be another device requiring a similar fix, too (or fix for audigy2 generically), but right now I fix only the known broken one. Signed-off-by: Takashi Iwai --- include/sound/emu10k1.h | 1 + sound/pci/emu10k1/emu10k1_main.c | 1 + sound/pci/emu10k1/emumixer.c | 13 ++++++++++--- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 7b7b9b13b4dd..10ee28eac018 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1670,6 +1670,7 @@ struct snd_emu_chip_details { unsigned char spi_dac; /* SPI interface for DAC */ unsigned char i2c_adc; /* I2C interface for ADC */ unsigned char adc_1361t; /* Use Philips 1361T ADC */ + unsigned char invert_shared_spdif; /* analog/digital switch inverted */ const char *driver; const char *name; const char *id; /* for backward compatibility - can be NULL if not needed */ diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 548c9cc81af5..2f283ea6ad9a 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -1528,6 +1528,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .ca0151_chip = 1, .spk71 = 1, .spdif_bug = 1, + .invert_shared_spdif = 1, /* digital/analog switch swapped */ .adc_1361t = 1, /* 24 bit capture instead of 16bit. Fixes ALSA bug#324 */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .revision = 0x04, diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c index fd221209abcb..f34bbfb705f5 100644 --- a/sound/pci/emu10k1/emumixer.c +++ b/sound/pci/emu10k1/emumixer.c @@ -1578,6 +1578,10 @@ static int snd_emu10k1_shared_spdif_get(struct snd_kcontrol *kcontrol, ucontrol->value.integer.value[0] = inl(emu->port + A_IOCFG) & A_IOCFG_GPOUT0 ? 1 : 0; else ucontrol->value.integer.value[0] = inl(emu->port + HCFG) & HCFG_GPOUT0 ? 1 : 0; + if (emu->card_capabilities->invert_shared_spdif) + ucontrol->value.integer.value[0] = + !ucontrol->value.integer.value[0]; + return 0; } @@ -1586,15 +1590,18 @@ static int snd_emu10k1_shared_spdif_put(struct snd_kcontrol *kcontrol, { unsigned long flags; struct snd_emu10k1 *emu = snd_kcontrol_chip(kcontrol); - unsigned int reg, val; + unsigned int reg, val, sw; int change = 0; + sw = ucontrol->value.integer.value[0]; + if (emu->card_capabilities->invert_shared_spdif) + sw = !sw; spin_lock_irqsave(&emu->reg_lock, flags); if ( emu->card_capabilities->i2c_adc) { /* Do nothing for Audigy 2 ZS Notebook */ } else if (emu->audigy) { reg = inl(emu->port + A_IOCFG); - val = ucontrol->value.integer.value[0] ? A_IOCFG_GPOUT0 : 0; + val = sw ? A_IOCFG_GPOUT0 : 0; change = (reg & A_IOCFG_GPOUT0) != val; if (change) { reg &= ~A_IOCFG_GPOUT0; @@ -1603,7 +1610,7 @@ static int snd_emu10k1_shared_spdif_put(struct snd_kcontrol *kcontrol, } } reg = inl(emu->port + HCFG); - val = ucontrol->value.integer.value[0] ? HCFG_GPOUT0 : 0; + val = sw ? HCFG_GPOUT0 : 0; change |= (reg & HCFG_GPOUT0) != val; if (change) { reg &= ~HCFG_GPOUT0; -- cgit v1.2.3 From 831d991821daedd4839073dbca55514432ef1768 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 3 Sep 2007 10:17:39 +0200 Subject: x86: add PCI extended config space access for AMD Barcelona This patch implements PCI extended configuration space access for AMD's Barcelona CPUs. It extends the method using CF8/CFC IO addresses. An x86 capability bit has been introduced that is set for CPUs supporting PCI extended config space accesses. Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ++++ arch/x86/kernel/cpu/amd_64.c | 3 +++ arch/x86/kernel/setup.c | 13 +++++++++++++ arch/x86/kernel/setup.h | 26 ++++++++++++++++++++++++++ arch/x86/kernel/setup_64.c | 2 ++ arch/x86/pci/direct.c | 21 +++++++++++++++------ include/asm-x86/cpufeature.h | 2 ++ 7 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 arch/x86/kernel/setup.h (limited to 'include') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 245866828294..99221f9834e4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -6,6 +6,7 @@ #include #include +#include "../setup.h" #include "cpu.h" /* @@ -308,6 +309,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + + if (c->x86 == 0x10) + amd_enable_pci_ext_cfg(c); } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index c815c2c0484b..180097e99219 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -217,6 +217,9 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 == 0x10) fam10h_check_enable_mmcfg(); + if (c->x86 == 0x10) + amd_enable_pci_ext_cfg(c); + if (amd_apic_timer_broken()) disable_apic_timer = 1; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6f80b852a196..d8f171234013 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -136,4 +136,17 @@ void __init setup_per_cpu_areas(void) setup_cpumask_of_cpu(); } +#define ENABLE_CF8_EXT_CFG (1ULL << 46) + +void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c) +{ + u64 reg; + rdmsrl(MSR_AMD64_NB_CFG, reg); + if (!(reg & ENABLE_CF8_EXT_CFG)) { + reg |= ENABLE_CF8_EXT_CFG; + wrmsrl(MSR_AMD64_NB_CFG, reg); + } + set_cpu_cap(c, X86_FEATURE_PCI_EXT_CFG); +} + #endif diff --git a/arch/x86/kernel/setup.h b/arch/x86/kernel/setup.h new file mode 100644 index 000000000000..66cc2c7f961d --- /dev/null +++ b/arch/x86/kernel/setup.h @@ -0,0 +1,26 @@ +/* + * Internal declarations for shared x86 setup code. + * + * Copyright (c) 2008 Advanced Micro Devices, Inc. + * Contributed by Robert Richter + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ + +#ifndef _ARCH_X86_KERNEL_SETUP_H + +extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c); + +#endif /* _ARCH_X86_KERNEL_SETUP_H */ diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 25afdd80a3cf..215bd67e5b6c 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -73,6 +73,8 @@ #include #include +#include "setup.h" + #include #ifdef CONFIG_PARAVIRT #include diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 21d1e0e0d535..27d61b63def6 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -8,18 +8,21 @@ #include "pci.h" /* - * Functions for accessing PCI configuration space with type 1 accesses + * Functions for accessing PCI base (first 256 bytes) and extended + * (4096 bytes per PCI function) configuration space with type 1 + * accesses. */ #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) + (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \ + | (devfn << 8) | (reg & 0xFC)) static int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { unsigned long flags; - if ((bus > 255) || (devfn > 255) || (reg > 255)) { + if ((bus > 255) || (devfn > 255) || (reg > 4095)) { *value = -1; return -EINVAL; } @@ -50,7 +53,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, { unsigned long flags; - if ((bus > 255) || (devfn > 255) || (reg > 255)) + if ((bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; spin_lock_irqsave(&pci_config_lock, flags); @@ -260,10 +263,16 @@ void __init pci_direct_init(int type) return; printk(KERN_INFO "PCI: Using configuration type %d for base access\n", type); - if (type == 1) + if (type == 1) { raw_pci_ops = &pci_direct_conf1; - else + if (!raw_pci_ext_ops && cpu_has_pci_ext_cfg) { + printk(KERN_INFO "PCI: Using configuration type 1 " + "for extended access\n"); + raw_pci_ext_ops = &pci_direct_conf1; + } + } else { raw_pci_ops = &pci_direct_conf2; + } } int __init pci_direct_probe(void) diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 0d609c837a41..40fcbba00f15 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -79,6 +79,7 @@ #define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ +#define X86_FEATURE_PCI_EXT_CFG (3*32+19) /* PCI extended cfg access */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -187,6 +188,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_pci_ext_cfg boot_cpu_has(X86_FEATURE_PCI_EXT_CFG) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 -- cgit v1.2.3 From 6fc92866a4a6778a9732a14b61f70cb9014b2a1a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Jun 2008 10:54:16 +0200 Subject: fix build bug in "x86: add PCI extended config space access for AMD Barcelona" Signed-off-by: Ingo Molnar --- include/asm-x86/mmconfig.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index 95beda07c6fa..46d6bb135df4 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -9,4 +9,6 @@ static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } #endif +extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c); + #endif -- cgit v1.2.3 From c46e62f73569d7ef42255bd6f31e35925b7f1492 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 28 May 2008 12:42:57 +0200 Subject: i8259: fix final ugliness Introduce IRQx_VECTOR on 32-bit, so that #ifdef noise is kept down. There should be no object code change. [ mingo@elte.hu: merged to x86/irq not x86/i8259 due to x86/irq having restructured the vector code into asm-x86/irq_vectors.h, which this patch touches. ] Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8259.c | 22 ++++++++-------------- include/asm-x86/irq_vectors.h | 9 ++++++--- 2 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 7a0fda8f01b5..dc92b49d9204 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -297,34 +297,28 @@ void init_8259A(int auto_eoi) * outb_pic - this has to work on a wide range of PC hardware. */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ -#ifndef CONFIG_X86_64 - outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ -#else /* CONFIG_X86_64 */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, + to 0x20-0x27 on i386 */ outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* 8259A-1 (the master) has a slave on IR2 */ - outb_pic(0x04, PIC_MASTER_IMR); -#endif /* CONFIG_X86_64 */ + outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); + if (auto_eoi) /* master does Auto EOI */ outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); else /* master expects normal EOI */ outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ -#ifndef CONFIG_X86_64 - outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ -#else /* CONFIG_X86_64 */ - /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + + /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); -#endif /* CONFIG_X86_64 */ if (auto_eoi) /* * In AEOI mode we just have to mask the interrupt diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index 3cb6d8c77b39..b58581e2e24e 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -18,17 +18,20 @@ #endif /* - * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit. - * * Reserve the lowest usable priority level 0x20 - 0x2f for triggering * cleanup after irq migration on 64 bit. */ #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR /* - * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit + * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit. + * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit. */ +#ifdef CONFIG_X86_32 +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR) +#else #define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#endif #define IRQ1_VECTOR (IRQ0_VECTOR + 1) #define IRQ2_VECTOR (IRQ0_VECTOR + 2) #define IRQ3_VECTOR (IRQ0_VECTOR + 3) -- cgit v1.2.3 From 1a5726528a70bb239bdd149aef7f2155cd2b1699 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Jun 2008 12:21:36 +0200 Subject: fix build bug in "x86: add PCI extended config space access for AMD Barcelona" --- arch/x86/kernel/cpu/amd.c | 1 + include/asm-x86/mmconfig.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 99221f9834e4..656b40aed647 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "../setup.h" diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index 46d6bb135df4..691798fbee10 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -9,6 +9,10 @@ static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } #endif +#if defined(CONFIG_SMP) && defined(CONFIG_X86_64) extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c); +#else +static inline void amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c) { } +#endif #endif -- cgit v1.2.3 From c1f64a58003fd2efaa725a857e269a15f765791a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 27 May 2008 09:47:13 -0700 Subject: x86: MMIO and gcc re-ordering issue On Tue, 27 May 2008, Linus Torvalds wrote: > > Expecting people to fix up all drivers is simply not going to happen. And > serializing things shouldn't be *that* expensive. People who cannot take > the expense can continue to use the magic __raw_writel() etc stuff. Of course, for non-x86, you kind of have to expect drivers to be well-behaved, so non-x86 can probably avoid this simply because there are less relevant drivers involved. Here's a UNTESTED patch for x86 that may or may not compile and work, and which serializes (on a compiler level) the IO accesses against regular memory accesses. __read[bwlq]()/__write[bwlq]() are not serialized with a :"memory" barrier, although since they still use "asm volatile" I suspect that i practice they are probably serial too. Did not look very closely at any generated code (only did a trivial test to see that the code looks *roughly* correct). Signed-off-by: Ingo Molnar --- include/asm-x86/io.h | 56 ++++++++++++++++++++++++++++++++++++++ include/asm-x86/io_32.h | 49 ---------------------------------- include/asm-x86/io_64.h | 71 ------------------------------------------------- 3 files changed, 56 insertions(+), 120 deletions(-) (limited to 'include') diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index d5b11f60dbd0..8e9eca93f9b9 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -3,6 +3,62 @@ #define ARCH_HAS_IOREMAP_WC +#include + +#define build_mmio_read(name, size, type, reg, barrier) \ +static inline type name(const volatile void __iomem *addr) \ +{ type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \ +:"m" (*(volatile type __force *)addr) barrier); return ret; } + +#define build_mmio_write(name, size, type, reg, barrier) \ +static inline void name(type val, volatile void __iomem *addr) \ +{ asm volatile("mov" size " %0,%1": :reg (val), \ +"m" (*(volatile type __force *)addr) barrier); } + +build_mmio_read(readb, "b", unsigned char, "q", :"memory") +build_mmio_read(readw, "w", unsigned short, "r", :"memory") +build_mmio_read(readl, "l", unsigned int, "r", :"memory") + +build_mmio_read(__readb, "b", unsigned char, "q", ) +build_mmio_read(__readw, "w", unsigned short, "r", ) +build_mmio_read(__readl, "l", unsigned int, "r", ) + +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") +build_mmio_write(writew, "w", unsigned short, "r", :"memory") +build_mmio_write(writel, "l", unsigned int, "r", :"memory") + +build_mmio_write(__writeb, "b", unsigned char, "q", ) +build_mmio_write(__writew, "w", unsigned short, "r", ) +build_mmio_write(__writel, "l", unsigned int, "r", ) + +#define readb_relaxed(a) __readb(a) +#define readw_relaxed(a) __readw(a) +#define readl_relaxed(a) __readl(a) +#define __raw_readb __readb +#define __raw_readw __readw +#define __raw_readl __readl + +#define __raw_writeb __writeb +#define __raw_writew __writew +#define __raw_writel __writel + +#define mmiowb() barrier() + +#ifdef CONFIG_X86_64 +build_mmio_read(readq, "q", unsigned long, "r", :"memory") +build_mmio_read(__readq, "q", unsigned long, "r", ) +build_mmio_write(writeq, "q", unsigned long, "r", :"memory") +build_mmio_write(__writeq, "q", unsigned long, "r", ) + +#define readq_relaxed(a) __readq(a) +#define __raw_readq __readq +#define __raw_writeq writeq + +/* Let people know we have them */ +#define readq readq +#define writeq writeq +#endif + #ifdef CONFIG_X86_32 # include "io_32.h" #else diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 049e81e797a0..d71be8df9797 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -149,55 +149,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -static inline unsigned char readb(const volatile void __iomem *addr) -{ - return *(volatile unsigned char __force *)addr; -} - -static inline unsigned short readw(const volatile void __iomem *addr) -{ - return *(volatile unsigned short __force *)addr; -} - -static inline unsigned int readl(const volatile void __iomem *addr) -{ - return *(volatile unsigned int __force *) addr; -} - -#define readb_relaxed(addr) readb(addr) -#define readw_relaxed(addr) readw(addr) -#define readl_relaxed(addr) readl(addr) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl - -static inline void writeb(unsigned char b, volatile void __iomem *addr) -{ - *(volatile unsigned char __force *)addr = b; -} - -static inline void writew(unsigned short b, volatile void __iomem *addr) -{ - *(volatile unsigned short __force *)addr = b; -} - -static inline void writel(unsigned int b, volatile void __iomem *addr) -{ - *(volatile unsigned int __force *)addr = b; -} -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel - -#define mmiowb() - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index 0930bedf9e4d..ddd8058a5026 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -204,77 +204,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -static inline __u8 __readb(const volatile void __iomem *addr) -{ - return *(__force volatile __u8 *)addr; -} - -static inline __u16 __readw(const volatile void __iomem *addr) -{ - return *(__force volatile __u16 *)addr; -} - -static __always_inline __u32 __readl(const volatile void __iomem *addr) -{ - return *(__force volatile __u32 *)addr; -} - -static inline __u64 __readq(const volatile void __iomem *addr) -{ - return *(__force volatile __u64 *)addr; -} - -#define readb(x) __readb(x) -#define readw(x) __readw(x) -#define readl(x) __readl(x) -#define readq(x) __readq(x) -#define readb_relaxed(a) readb(a) -#define readw_relaxed(a) readw(a) -#define readl_relaxed(a) readl(a) -#define readq_relaxed(a) readq(a) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_readq readq - -#define mmiowb() - -static inline void __writel(__u32 b, volatile void __iomem *addr) -{ - *(__force volatile __u32 *)addr = b; -} - -static inline void __writeq(__u64 b, volatile void __iomem *addr) -{ - *(__force volatile __u64 *)addr = b; -} - -static inline void __writeb(__u8 b, volatile void __iomem *addr) -{ - *(__force volatile __u8 *)addr = b; -} - -static inline void __writew(__u16 b, volatile void __iomem *addr) -{ - *(__force volatile __u16 *)addr = b; -} - -#define writeq(val, addr) __writeq((val), (addr)) -#define writel(val, addr) __writel((val), (addr)) -#define writew(val, addr) __writew((val), (addr)) -#define writeb(val, addr) __writeb((val), (addr)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel -#define __raw_writeq writeq - void __memcpy_fromio(void *, unsigned long, unsigned); void __memcpy_toio(unsigned long, const void *, unsigned); -- cgit v1.2.3 From 83bea8e1fa0c47b30664b6f92397c016c22f77fb Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 27 May 2008 21:03:46 +0200 Subject: x86: fix incomplete include guard in include/asm-x86/seccomp_32.h Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar --- include/asm-x86/seccomp_32.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h index 18da19e89bff..36e71c5f306f 100644 --- a/include/asm-x86/seccomp_32.h +++ b/include/asm-x86/seccomp_32.h @@ -1,4 +1,5 @@ #ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H #include -- cgit v1.2.3 From 6330a30a76c1e62d4b4ec238368957f8febf9113 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 28 May 2008 09:46:19 +0200 Subject: x86: break mutual header inclusion This breaks up the mutual inclusion between headers ptrace.h and vm86.h by moving some small part of vm86.h which is needed by ptrace.h into processor-flags.h. We also try to move #include lines to the top. This has been compile tested on x86_32 and x86_64 defconfig, and run through 'make headers_check'. Cc: Adrian Bunk Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar --- include/asm-x86/processor-flags.h | 6 ++++++ include/asm-x86/ptrace.h | 8 +++++--- include/asm-x86/vm86.h | 8 +------- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 199cab107d85..092b39b3a7e6 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -88,4 +88,10 @@ #define CX86_ARR_BASE 0xc4 #define CX86_RCR_BASE 0xdc +#ifdef CONFIG_VM86 +#define X86_VM_MASK X86_EFLAGS_VM +#else +#define X86_VM_MASK 0 /* No VM86 support */ +#endif + #endif /* __ASM_I386_PROCESSOR_FLAGS_H */ diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 9f922b0b95d6..8a71db803da6 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -3,7 +3,12 @@ #include /* For __user */ #include +#include +#ifdef __KERNEL__ +#include /* the DS BTS struct is used for ptrace too */ +#include +#endif #ifndef __ASSEMBLY__ @@ -55,9 +60,6 @@ struct pt_regs { unsigned long ss; }; -#include -#include - #endif /* __KERNEL__ */ #else /* __i386__ */ diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h index cbf4a0effa75..5ce351325e01 100644 --- a/include/asm-x86/vm86.h +++ b/include/asm-x86/vm86.h @@ -115,7 +115,6 @@ struct vm86plus_info_struct { unsigned long is_vm86pus:1; /* for vm86 internal use */ unsigned char vm86dbg_intxxtab[32]; /* for debugger */ }; - struct vm86plus_struct { struct vm86_regs regs; unsigned long flags; @@ -128,11 +127,7 @@ struct vm86plus_struct { #ifdef __KERNEL__ -#ifdef CONFIG_VM86 -#define X86_VM_MASK X86_EFLAGS_VM -#else -#define X86_VM_MASK 0 /* No VM86 support */ -#endif +#include /* * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 @@ -142,7 +137,6 @@ struct vm86plus_struct { * at the end of the structure. Look at ptrace.h to see the "normal" * setup. For user space layout see 'struct vm86_regs' above. */ -#include struct kernel_vm86_regs { /* -- cgit v1.2.3 From ad90c0e3ce8d20d6873b57e36181ef6d7a0097fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 27 May 2008 20:48:37 -0400 Subject: ftrace: user update and disable dynamic ftrace daemon In dynamic ftrace, the mcount function starts off pointing to a stub function that just returns. On start up, the call to the stub is modified to point to a "record_ip" function. The job of the record_ip function is to add the function to a pre-allocated hash list. If the function is already there, it simply is ignored, otherwise it is added to the list. Later, a ftraced daemon wakes up and calls kstop_machine if any functions have been recorded, and changes the calls to the recorded functions to a simple nop. If no functions were recorded, the daemon goes back to sleep. The daemon wakes up once a second to see if it needs to update any newly recorded functions into nops. Usually it does not, but if a lot of code has been executed for the first time in the kernel, the ftraced daemon will call kstop_machine to update those into nops. The problem currently is that there's no way to stop the daemon from doing this, and it can cause unneeded latencies (800us which for some is bothersome). This patch adds a new file /debugfs/tracing/ftraced_enabled. If the daemon is active, reading this will return "enabled\n" and "disabled\n" when the daemon is not running. To disable the daemon, the user can echo "0" or "disable" into this file, and "1" or "enable" to re-enable the daemon. Since the daemon is used to convert the functions into nops to increase the performance of the system, I also added that anytime something is written into the ftraced_enabled file, kstop_machine will run if there are new functions that have been detected that need to be converted. This way the user can disable the daemon but still be able to control the conversion of the mcount calls to nops by simply, "echo 0 > /debugfs/tracing/ftraced_enabled" when they need to do more conversions. To see the number of converted functions: "cat /debugfs/tracing/dyn_ftrace_total_info" Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 ++ kernel/trace/ftrace.c | 157 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 116 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b482fe88bc04..623819433ed5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,9 +72,15 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); + +void ftrace_disable_daemon(void); +void ftrace_enable_daemon(void); + #else # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_disable_daemon() do { } while (0) +# define ftrace_enable_daemon() do { } while (0) #endif /* totally disable ftrace - can not re-enable after this */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1843edc098a6..f762f5a2d331 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -151,8 +151,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE static struct task_struct *ftraced_task; -static DECLARE_WAIT_QUEUE_HEAD(ftraced_waiters); -static unsigned long ftraced_iteration_counter; enum { FTRACE_ENABLE_CALLS = (1 << 0), @@ -189,6 +187,7 @@ static struct ftrace_page *ftrace_pages; static int ftraced_trigger; static int ftraced_suspend; +static int ftraced_stop; static int ftrace_record_suspend; @@ -474,14 +473,21 @@ ftrace_code_disable(struct dyn_ftrace *rec) return 1; } +static int __ftrace_update_code(void *ignore); + static int __ftrace_modify_code(void *data) { unsigned long addr; int *command = data; - if (*command & FTRACE_ENABLE_CALLS) + if (*command & FTRACE_ENABLE_CALLS) { + /* + * Update any recorded ips now that we have the + * machine stopped + */ + __ftrace_update_code(NULL); ftrace_replace_code(1); - else if (*command & FTRACE_DISABLE_CALLS) + } else if (*command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); if (*command & FTRACE_UPDATE_TRACE_FUNC) @@ -503,6 +509,25 @@ static void ftrace_run_update_code(int command) stop_machine_run(__ftrace_modify_code, &command, NR_CPUS); } +void ftrace_disable_daemon(void) +{ + /* Stop the daemon from calling kstop_machine */ + mutex_lock(&ftraced_lock); + ftraced_stop = 1; + mutex_unlock(&ftraced_lock); + + ftrace_force_update(); +} + +void ftrace_enable_daemon(void) +{ + mutex_lock(&ftraced_lock); + ftraced_stop = 0; + mutex_unlock(&ftraced_lock); + + ftrace_force_update(); +} + static ftrace_func_t saved_ftrace_func; static void ftrace_startup(void) @@ -603,6 +628,7 @@ static int __ftrace_update_code(void *ignore) int i; /* Don't be recording funcs now */ + ftrace_record_suspend++; save_ftrace_enabled = ftrace_enabled; ftrace_enabled = 0; @@ -628,18 +654,23 @@ static int __ftrace_update_code(void *ignore) stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; + ftraced_trigger = 0; ftrace_enabled = save_ftrace_enabled; + ftrace_record_suspend--; return 0; } -static void ftrace_update_code(void) +static int ftrace_update_code(void) { - if (unlikely(ftrace_disabled)) - return; + if (unlikely(ftrace_disabled) || + !ftrace_enabled || !ftraced_trigger) + return 0; stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); + + return 1; } static int ftraced(void *ignore) @@ -658,14 +689,13 @@ static int ftraced(void *ignore) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); - if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) { - ftrace_record_suspend++; - ftrace_update_code(); + if (!ftraced_suspend && !ftraced_stop && + ftrace_update_code()) { usecs = nsecs_to_usecs(ftrace_update_time); if (ftrace_update_tot_cnt > 100000) { ftrace_update_tot_cnt = 0; pr_info("hm, dftrace overflow: %lu change%s" - " (%lu total) in %lu usec%s\n", + " (%lu total) in %lu usec%s\n", ftrace_update_cnt, ftrace_update_cnt != 1 ? "s" : "", ftrace_update_tot_cnt, @@ -673,15 +703,10 @@ static int ftraced(void *ignore) ftrace_disabled = 1; WARN_ON_ONCE(1); } - ftraced_trigger = 0; - ftrace_record_suspend--; } - ftraced_iteration_counter++; mutex_unlock(&ftraced_lock); mutex_unlock(&ftrace_sysctl_lock); - wake_up_interruptible(&ftraced_waiters); - ftrace_shutdown_replenish(); } __set_current_state(TASK_RUNNING); @@ -1219,6 +1244,55 @@ ftrace_notrace_release(struct inode *inode, struct file *file) return ftrace_regex_release(inode, file, 0); } +static ssize_t +ftraced_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + /* don't worry about races */ + char *buf = ftraced_stop ? "disabled\n" : "enabled\n"; + int r = strlen(buf); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +ftraced_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + if (strncmp(buf, "enable", 6) == 0) + val = 1; + else if (strncmp(buf, "disable", 7) == 0) + val = 0; + else { + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + val = !!val; + } + + if (val) + ftrace_enable_daemon(); + else + ftrace_disable_daemon(); + + filp->f_pos += cnt; + + return cnt; +} + static struct file_operations ftrace_avail_fops = { .open = ftrace_avail_open, .read = seq_read, @@ -1242,51 +1316,34 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; +static struct file_operations ftraced_fops = { + .open = tracing_open_generic, + .read = ftraced_read, + .write = ftraced_write, +}; + /** * ftrace_force_update - force an update to all recording ftrace functions - * - * The ftrace dynamic update daemon only wakes up once a second. - * There may be cases where an update needs to be done immediately - * for tests or internal kernel tracing to begin. This function - * wakes the daemon to do an update and will not return until the - * update is complete. */ int ftrace_force_update(void) { - unsigned long last_counter; - DECLARE_WAITQUEUE(wait, current); int ret = 0; if (unlikely(ftrace_disabled)) return -ENODEV; + mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); - last_counter = ftraced_iteration_counter; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ftraced_waiters, &wait); - if (unlikely(!ftraced_task)) { - ret = -ENODEV; - goto out; - } - - do { - mutex_unlock(&ftraced_lock); - wake_up_process(ftraced_task); - schedule(); - mutex_lock(&ftraced_lock); - if (signal_pending(current)) { - ret = -EINTR; - break; - } - set_current_state(TASK_INTERRUPTIBLE); - } while (last_counter == ftraced_iteration_counter); + /* + * If ftraced_trigger is not set, then there is nothing + * to update. + */ + if (ftraced_trigger && !ftrace_update_code()) + ret = -EBUSY; - out: mutex_unlock(&ftraced_lock); - remove_wait_queue(&ftraced_waiters, &wait); - set_current_state(TASK_RUNNING); + mutex_unlock(&ftrace_sysctl_lock); return ret; } @@ -1331,6 +1388,12 @@ static __init int ftrace_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'set_ftrace_notrace' entry\n"); + + entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer, + NULL, &ftraced_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'ftraced_enabled' entry\n"); return 0; } -- cgit v1.2.3 From fb093eab6d8963a085f112773284f2bcb3d7907b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 27 May 2008 16:29:20 -0700 Subject: x86: remove duplicated e820 func in setup.h we already have them in e820.h Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/asm-x86/setup.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index 64f5f0c11d66..9e163fc3e984 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -50,15 +50,9 @@ extern struct boot_params boot_params; */ #define LOWMEMSIZE() (0x9f000) -struct e820entry; - char * __init machine_specific_memory_setup(void); char *memory_setup(void); -int __init copy_e820_map(struct e820entry *biosmap, int nr_map); -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type); - void __init i386_start_kernel(void); -- cgit v1.2.3 From 9f5314fb4d556d3132c784d0df47352b2830ca53 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 28 May 2008 09:51:18 -0500 Subject: x86, uv: update macros used by UV platform Update the UV address macros to better describe the fields of UV physical addresses. Improve comments in the header files. Add additional MMR definitions. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 141 +++++++---- include/asm-x86/uv/uv_hub.h | 188 ++++++++++----- include/asm-x86/uv/uv_mmrs.h | 509 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 731 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index ebf13908a743..45e84acca8a9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -5,7 +5,7 @@ * * SGI UV APIC functions (note: not an Intel compatible APIC) * - * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ #include @@ -55,37 +55,37 @@ static cpumask_t uv_vector_allocation_domain(int cpu) int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) { unsigned long val; - int nasid; + int pnode; - nasid = uv_apicid_to_nasid(phys_apicid); + pnode = uv_apicid_to_pnode(phys_apicid); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_INIT; - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); mdelay(10); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_STARTUP; - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); return 0; } static void uv_send_IPI_one(int cpu, int vector) { unsigned long val, apicid, lapicid; - int nasid; + int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ lapicid = apicid & 0x3f; /* ZZZ macro needed */ - nasid = uv_apicid_to_nasid(apicid); + pnode = uv_apicid_to_pnode(apicid); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << UVH_IPI_INT_APIC_ID_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } static void uv_send_IPI_mask(cpumask_t mask, int vector) @@ -159,39 +159,81 @@ struct genapic apic_x2apic_uv_x = { .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ }; -static __cpuinit void set_x2apic_extra_bits(int nasid) +static __cpuinit void set_x2apic_extra_bits(int pnode) { - __get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6); + __get_cpu_var(x2apic_extra_bits) = (pnode << 6); } /* * Called on boot cpu. */ +static __init int boot_pnode_to_blade(int pnode) +{ + int blade; + + for (blade = 0; blade < uv_num_possible_blades(); blade++) + if (pnode == uv_blade_info[blade].pnode) + return blade; + BUG(); +} + +struct redir_addr { + unsigned long redirect; + unsigned long alias; +}; + +#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT + +static __initdata struct redir_addr redir_addrs[] = { + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, +}; + +static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) +{ + union uvh_si_alias0_overlay_config_u alias; + union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; + int i; + + for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { + alias.v = uv_read_local_mmr(redir_addrs[i].alias); + if (alias.s.base == 0) { + *size = (1UL << alias.s.m_alias); + redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); + *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; + return; + } + } + BUG(); +} + static __init void uv_system_init(void) { union uvh_si_addr_map_config_u m_n_config; - int bytes, nid, cpu, lcpu, nasid, last_nasid, blade; - unsigned long mmr_base; + union uvh_node_id_u node_id; + unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; + int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; + unsigned long mmr_base, present; m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); + m_val = m_n_config.s.m_skt; + n_val = m_n_config.s.n_skt; mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); - last_nasid = -1; - for_each_possible_cpu(cpu) { - nid = cpu_to_node(cpu); - nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); - if (nasid != last_nasid) - uv_possible_blades++; - last_nasid = nasid; - } + for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) + uv_possible_blades += + hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = alloc_bootmem_pages(bytes); + get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); + bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); uv_node_to_blade = alloc_bootmem_pages(bytes); memset(uv_node_to_blade, 255, bytes); @@ -200,43 +242,56 @@ static __init void uv_system_init(void) uv_cpu_to_blade = alloc_bootmem_pages(bytes); memset(uv_cpu_to_blade, 255, bytes); - last_nasid = -1; - blade = -1; - lcpu = -1; - for_each_possible_cpu(cpu) { - nid = cpu_to_node(cpu); - nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); - if (nasid != last_nasid) { - blade++; - lcpu = -1; - uv_blade_info[blade].nr_posible_cpus = 0; + blade = 0; + for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { + present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); + for (j = 0; j < 64; j++) { + if (!test_bit(j, &present)) + continue; + uv_blade_info[blade].pnode = (i * 64 + j); + uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + blade++; } - last_nasid = nasid; - lcpu++; + } - uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt; - uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt; + node_id.v = uv_read_local_mmr(UVH_NODE_ID); + gnode_upper = (((unsigned long)node_id.s.node_id) & + ~((1 << n_val) - 1)) << m_val; + + for_each_present_cpu(cpu) { + nid = cpu_to_node(cpu); + pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + blade = boot_pnode_to_blade(pnode); + lcpu = uv_blade_info[blade].nr_possible_cpus; + uv_blade_info[blade].nr_possible_cpus++; + + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; + uv_cpu_hub_info(cpu)->lowmem_remap_top = + lowmem_redir_base + lowmem_redir_size; + uv_cpu_hub_info(cpu)->m_val = m_val; + uv_cpu_hub_info(cpu)->n_val = m_val; uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; - uv_cpu_hub_info(cpu)->local_nasid = nasid; - uv_cpu_hub_info(cpu)->gnode_upper = - nasid & ~((1 << uv_hub_info->n_val) - 1); + uv_cpu_hub_info(cpu)->pnode = pnode; + uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1; + uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ - uv_blade_info[blade].nasid = nasid; - uv_blade_info[blade].nr_posible_cpus++; uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; - printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid); - printk(KERN_DEBUG "UV lcpu %d, blade %d\n", lcpu, blade); + printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, pnode %d, nid %d, " + "lcpu %d, blade %d\n", + cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, + lcpu, blade); } } /* * Called on each cpu to initialize the per_cpu UV data area. + * ZZZ hotplug not supported yet */ void __cpuinit uv_cpu_init(void) { @@ -246,5 +301,5 @@ void __cpuinit uv_cpu_init(void) uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; if (get_uv_system_type() == UV_NON_UNIQUE_APIC) - set_x2apic_extra_bits(uv_hub_info->local_nasid); + set_x2apic_extra_bits(uv_hub_info->pnode); } diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h index 26b9240d1e23..65004881de5f 100644 --- a/include/asm-x86/uv/uv_hub.h +++ b/include/asm-x86/uv/uv_hub.h @@ -5,7 +5,7 @@ * * SGI UV architectural definitions * - * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ #ifndef __ASM_X86_UV_HUB_H__ @@ -20,26 +20,49 @@ /* * Addressing Terminology * - * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of - * routers always have low bit of 1, C/MBricks have low bit - * equal to 0. Most addressing macros that target UV hub chips - * right shift the NASID by 1 to exclude the always-zero bit. + * M - The low M bits of a physical address represent the offset + * into the blade local memory. RAM memory on a blade is physically + * contiguous (although various IO spaces may punch holes in + * it).. * - * SNASID - NASID right shifted by 1 bit. + * N - Number of bits in the node portion of a socket physical + * address. + * + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. + * + * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead + * of nasids. + * + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. + * + * + * NumaLink Global Physical Address Format: + * +--------------------------------+---------------------+ + * |00..000| GNODE | NodeOffset | + * +--------------------------------+---------------------+ + * |<-------53 - M bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) * * * Memory/UV-HUB Processor Socket Address Format: - * +--------+---------------+---------------------+ - * |00..0000| SNASID | NodeOffset | - * +--------+---------------+---------------------+ - * <--- N bits --->|<--------M bits -----> + * +----------------+---------------+---------------------+ + * |00..000000000000| PNODE | NodeOffset | + * +----------------+---------------+---------------------+ + * <--- N bits --->|<--------M bits -----> * - * M number of node offset bits (35 .. 40) - * N number of SNASID bits (0 .. 10) + * M - number of node offset bits (35 .. 40) + * N - number of PNODE bits (0 .. 10) * * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). * The actual values are configuration dependent and are set at - * boot time + * boot time. M & N values are set by the hardware/BIOS at boot. + * * * APICID format * NOTE!!!!!! This is the current format of the APICID. However, code @@ -48,14 +71,14 @@ * * 1111110000000000 * 5432109876543210 - * nnnnnnnnnnlc0cch + * pppppppppplc0cch * sssssssssss * - * n = snasid bits + * p = pnode bits * l = socket number on board * c = core * h = hyperthread - * s = bits that are in the socket CSR + * s = bits that are in the SOCKET_ID CSR * * Note: Processor only supports 12 bits in the APICID register. The ACPI * tables hold all 16 bits. Software needs to be aware of this. @@ -74,7 +97,7 @@ * This value is also the value of the maximum number of non-router NASIDs * in the numalink fabric. * - * NOTE: a brick may be 1 or 2 OS nodes. Don't get these confused. + * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused. */ #define UV_MAX_NUMALINK_BLADES 16384 @@ -96,8 +119,12 @@ */ struct uv_hub_info_s { unsigned long global_mmr_base; - unsigned short local_nasid; - unsigned short gnode_upper; + unsigned long gpa_mask; + unsigned long gnode_upper; + unsigned long lowmem_remap_top; + unsigned long lowmem_remap_base; + unsigned short pnode; + unsigned short pnode_mask; unsigned short coherency_domain_number; unsigned short numa_blade_id; unsigned char blade_processor_id; @@ -112,83 +139,124 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); * Local & Global MMR space macros. * Note: macros are intended to be used ONLY by inline functions * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) */ -#define UV_SNASID(n) ((n) >> 1) -#define UV_NASID(n) ((n) << 1) +#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) +#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) #define UV_LOCAL_MMR_BASE 0xf4000000UL #define UV_GLOBAL_MMR32_BASE 0xf8000000UL #define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) -#define UV_GLOBAL_MMR32_SNASID_MASK 0x3ff -#define UV_GLOBAL_MMR32_SNASID_SHIFT 15 -#define UV_GLOBAL_MMR64_SNASID_SHIFT 26 +#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 +#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 -#define UV_GLOBAL_MMR32_NASID_BITS(n) \ - (((UV_SNASID(n) & UV_GLOBAL_MMR32_SNASID_MASK)) << \ - (UV_GLOBAL_MMR32_SNASID_SHIFT)) +#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) -#define UV_GLOBAL_MMR64_NASID_BITS(n) \ - ((unsigned long)UV_SNASID(n) << UV_GLOBAL_MMR64_SNASID_SHIFT) +#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ + ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) + +#define UV_APIC_PNODE_SHIFT 6 + +/* + * Macros for converting between kernel virtual addresses, socket local physical + * addresses, and UV global physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. + */ + +/* socket phys RAM --> UV global physical address */ +static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) +{ + if (paddr < uv_hub_info->lowmem_remap_top) + paddr += uv_hub_info->lowmem_remap_base; + return paddr | uv_hub_info->gnode_upper; +} + + +/* socket virtual --> UV global physical address */ +static inline unsigned long uv_gpa(void *v) +{ + return __pa(v) | uv_hub_info->gnode_upper; +} + +/* socket virtual --> UV global physical address */ +static inline void *uv_vgpa(void *v) +{ + return (void *)uv_gpa(v); +} + +/* UV global physical address --> socket virtual */ +static inline void *uv_va(unsigned long gpa) +{ + return __va(gpa & uv_hub_info->gpa_mask); +} + +/* pnode, offset --> socket virtual */ +static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +{ + return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); +} -#define UV_APIC_NASID_SHIFT 6 /* - * Extract a NASID from an APICID (full apicid, not processor subset) + * Extract a PNODE from an APICID (full apicid, not processor subset) */ -static inline int uv_apicid_to_nasid(int apicid) +static inline int uv_apicid_to_pnode(int apicid) { - return (UV_NASID(apicid >> UV_APIC_NASID_SHIFT)); + return (apicid >> UV_APIC_PNODE_SHIFT); } /* * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. */ -static inline unsigned long *uv_global_mmr32_address(int nasid, +static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR32_BASE | - UV_GLOBAL_MMR32_NASID_BITS(nasid) | offset); + UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr32(int nasid, unsigned long offset, +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) { - *uv_global_mmr32_address(nasid, offset) = val; + *uv_global_mmr32_address(pnode, offset) = val; } -static inline unsigned long uv_read_global_mmr32(int nasid, +static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) { - return *uv_global_mmr32_address(nasid, offset); + return *uv_global_mmr32_address(pnode, offset); } /* * Access Global MMR space using the MMR space located at the top of physical * memory. */ -static inline unsigned long *uv_global_mmr64_address(int nasid, +static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR64_BASE | - UV_GLOBAL_MMR64_NASID_BITS(nasid) | offset); + UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr64(int nasid, unsigned long offset, +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) { - *uv_global_mmr64_address(nasid, offset) = val; + *uv_global_mmr64_address(pnode, offset) = val; } -static inline unsigned long uv_read_global_mmr64(int nasid, +static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) { - return *uv_global_mmr64_address(nasid, offset); + return *uv_global_mmr64_address(pnode, offset); } /* - * Access node local MMRs. Faster than using global space but only local MMRs + * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. */ static inline unsigned long *uv_local_mmr_address(unsigned long offset) @@ -207,15 +275,15 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) } /* - * Structures and definitions for converting between cpu, node, and blade + * Structures and definitions for converting between cpu, node, pnode, and blade * numbers. */ struct uv_blade_info { - unsigned short nr_posible_cpus; + unsigned short nr_possible_cpus; unsigned short nr_online_cpus; - unsigned short nasid; + unsigned short pnode; }; -struct uv_blade_info *uv_blade_info; +extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; extern short *uv_cpu_to_blade; extern short uv_possible_blades; @@ -244,16 +312,16 @@ static inline int uv_node_to_blade_id(int nid) return uv_node_to_blade[nid]; } -/* Convert a blade id to the NASID of the blade */ -static inline int uv_blade_to_nasid(int bid) +/* Convert a blade id to the PNODE of the blade */ +static inline int uv_blade_to_pnode(int bid) { - return uv_blade_info[bid].nasid; + return uv_blade_info[bid].pnode; } /* Determine the number of possible cpus on a blade */ static inline int uv_blade_nr_possible_cpus(int bid) { - return uv_blade_info[bid].nr_posible_cpus; + return uv_blade_info[bid].nr_possible_cpus; } /* Determine the number of online cpus on a blade */ @@ -262,16 +330,16 @@ static inline int uv_blade_nr_online_cpus(int bid) return uv_blade_info[bid].nr_online_cpus; } -/* Convert a cpu id to the NASID of the blade containing the cpu */ -static inline int uv_cpu_to_nasid(int cpu) +/* Convert a cpu id to the PNODE of the blade containing the cpu */ +static inline int uv_cpu_to_pnode(int cpu) { - return uv_blade_info[uv_cpu_to_blade_id(cpu)].nasid; + return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode; } -/* Convert a node number to the NASID of the blade */ -static inline int uv_node_to_nasid(int nid) +/* Convert a linux node number to the PNODE of the blade */ +static inline int uv_node_to_pnode(int nid) { - return uv_blade_info[uv_node_to_blade_id(nid)].nasid; + return uv_blade_info[uv_node_to_blade_id(nid)].pnode; } /* Maximum possible number of blades */ diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h index 3b69fe6b6376..ac9846076521 100644 --- a/include/asm-x86/uv/uv_mmrs.h +++ b/include/asm-x86/uv/uv_mmrs.h @@ -11,11 +11,46 @@ #ifndef __ASM_X86_UV_MMRS__ #define __ASM_X86_UV_MMRS__ -/* - * AUTO GENERATED - Do not edit - */ +#define UV_MMR_ENABLE (1UL << 63) - #define UV_MMR_ENABLE (1UL << 63) +/* ========================================================================= */ +/* UVH_BAU_DATA_CONFIG */ +/* ========================================================================= */ +#define UVH_BAU_DATA_CONFIG 0x61680UL +#define UVH_BAU_DATA_CONFIG_32 0x0450 + +#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 +#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_BAU_DATA_CONFIG_P_SHFT 13 +#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_BAU_DATA_CONFIG_T_SHFT 15 +#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_BAU_DATA_CONFIG_M_SHFT 16 +#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_bau_data_config_u { + unsigned long v; + struct uvh_bau_data_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; /* ========================================================================= */ /* UVH_IPI_INT */ @@ -109,6 +144,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0aa0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL @@ -169,6 +205,7 @@ union uvh_lb_bau_intd_software_acknowledge_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0aa8 /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ @@ -247,6 +284,331 @@ union uvh_lb_bau_sb_descriptor_base_u { } s; }; +/* ========================================================================= */ +/* UVH_LB_MCAST_AOERR0_RPT_ENABLE */ +/* ========================================================================= */ +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL + +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_TIMEOUT_SHFT 22 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_TIMEOUT_MASK 0x0000000000400000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 23 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000000800000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 24 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000001000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 25 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000002000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 26 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000004000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 27 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000008000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 28 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000010000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 29 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000020000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 30 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000040000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 31 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000080000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 32 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000100000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 33 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000200000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 34 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000400000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 35 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000800000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 36 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000001000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 37 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000002000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 38 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000004000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 39 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000008000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 40 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000010000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 41 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000020000000000UL + +union uvh_lb_mcast_aoerr0_rpt_enable_u { + unsigned long v; + struct uvh_lb_mcast_aoerr0_rpt_enable_s { + unsigned long mcast_obese_msg : 1; /* RW */ + unsigned long mcast_data_sb_err : 1; /* RW */ + unsigned long mcast_nack_buff_parity : 1; /* RW */ + unsigned long mcast_timeout : 1; /* RW */ + unsigned long mcast_inactive_reply : 1; /* RW */ + unsigned long mcast_upgrade_error : 1; /* RW */ + unsigned long mcast_reg_count_underflow : 1; /* RW */ + unsigned long mcast_rep_obese_msg : 1; /* RW */ + unsigned long ucache_req_runt_msg : 1; /* RW */ + unsigned long ucache_req_obese_msg : 1; /* RW */ + unsigned long ucache_req_data_sb_err : 1; /* RW */ + unsigned long ucache_rep_runt_msg : 1; /* RW */ + unsigned long ucache_rep_obese_msg : 1; /* RW */ + unsigned long ucache_rep_data_sb_err : 1; /* RW */ + unsigned long ucache_rep_command_err : 1; /* RW */ + unsigned long ucache_pend_timeout : 1; /* RW */ + unsigned long macc_req_runt_msg : 1; /* RW */ + unsigned long macc_req_obese_msg : 1; /* RW */ + unsigned long macc_req_data_sb_err : 1; /* RW */ + unsigned long macc_rep_runt_msg : 1; /* RW */ + unsigned long macc_rep_obese_msg : 1; /* RW */ + unsigned long macc_rep_data_sb_err : 1; /* RW */ + unsigned long macc_timeout : 1; /* RW */ + unsigned long macc_spurious_event : 1; /* RW */ + unsigned long ioh_destination_table_parity : 1; /* RW */ + unsigned long get_had_error_reply : 1; /* RW */ + unsigned long get_timeout : 1; /* RW */ + unsigned long lock_manager_had_error_reply : 1; /* RW */ + unsigned long put_had_error_reply : 1; /* RW */ + unsigned long put_timeout : 1; /* RW */ + unsigned long sb_activation_overrun : 1; /* RW */ + unsigned long completed_gb_activation_had_error_reply : 1; /* RW */ + unsigned long completed_gb_activation_timeout : 1; /* RW */ + unsigned long descriptor_buffer_0_parity : 1; /* RW */ + unsigned long descriptor_buffer_1_parity : 1; /* RW */ + unsigned long socket_destination_table_parity : 1; /* RW */ + unsigned long bau_reply_payload_corruption : 1; /* RW */ + unsigned long io_port_destination_table_parity : 1; /* RW */ + unsigned long intd_soft_ack_timeout : 1; /* RW */ + unsigned long int_rep_obese_msg : 1; /* RW */ + unsigned long int_rep_command_err : 1; /* RW */ + unsigned long int_timeout : 1; /* RW */ + unsigned long rsvd_42_63 : 22; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LOCAL_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_LOCAL_INT0_CONFIG 0x61000UL + +#define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0 +#define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8 +#define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12 +#define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_LOCAL_INT0_CONFIG_P_SHFT 13 +#define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_LOCAL_INT0_CONFIG_T_SHFT 15 +#define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_LOCAL_INT0_CONFIG_M_SHFT 16 +#define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_local_int0_config_u { + unsigned long v; + struct uvh_local_int0_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LOCAL_INT0_ENABLE */ +/* ========================================================================= */ +#define UVH_LOCAL_INT0_ENABLE 0x65000UL + +#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0 +#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1 +#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL +#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2 +#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL +#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3 +#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL +#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4 +#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL +#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5 +#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL +#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6 +#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7 +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8 +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9 +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10 +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11 +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12 +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13 +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14 +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15 +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16 +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17 +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18 +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19 +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20 +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL +#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21 +#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL +#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39 +#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL +#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40 +#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL +#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41 +#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42 +#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43 +#define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL +#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44 +#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL + +union uvh_local_int0_enable_u { + unsigned long v; + struct uvh_local_int0_enable_s { + unsigned long lb_hcerr : 1; /* RW */ + unsigned long gr0_hcerr : 1; /* RW */ + unsigned long gr1_hcerr : 1; /* RW */ + unsigned long lh_hcerr : 1; /* RW */ + unsigned long rh_hcerr : 1; /* RW */ + unsigned long xn_hcerr : 1; /* RW */ + unsigned long si_hcerr : 1; /* RW */ + unsigned long lb_aoerr0 : 1; /* RW */ + unsigned long gr0_aoerr0 : 1; /* RW */ + unsigned long gr1_aoerr0 : 1; /* RW */ + unsigned long lh_aoerr0 : 1; /* RW */ + unsigned long rh_aoerr0 : 1; /* RW */ + unsigned long xn_aoerr0 : 1; /* RW */ + unsigned long si_aoerr0 : 1; /* RW */ + unsigned long lb_aoerr1 : 1; /* RW */ + unsigned long gr0_aoerr1 : 1; /* RW */ + unsigned long gr1_aoerr1 : 1; /* RW */ + unsigned long lh_aoerr1 : 1; /* RW */ + unsigned long rh_aoerr1 : 1; /* RW */ + unsigned long xn_aoerr1 : 1; /* RW */ + unsigned long si_aoerr1 : 1; /* RW */ + unsigned long rh_vpi_int : 1; /* RW */ + unsigned long system_shutdown_int : 1; /* RW */ + unsigned long lb_irq_int_0 : 1; /* RW */ + unsigned long lb_irq_int_1 : 1; /* RW */ + unsigned long lb_irq_int_2 : 1; /* RW */ + unsigned long lb_irq_int_3 : 1; /* RW */ + unsigned long lb_irq_int_4 : 1; /* RW */ + unsigned long lb_irq_int_5 : 1; /* RW */ + unsigned long lb_irq_int_6 : 1; /* RW */ + unsigned long lb_irq_int_7 : 1; /* RW */ + unsigned long lb_irq_int_8 : 1; /* RW */ + unsigned long lb_irq_int_9 : 1; /* RW */ + unsigned long lb_irq_int_10 : 1; /* RW */ + unsigned long lb_irq_int_11 : 1; /* RW */ + unsigned long lb_irq_int_12 : 1; /* RW */ + unsigned long lb_irq_int_13 : 1; /* RW */ + unsigned long lb_irq_int_14 : 1; /* RW */ + unsigned long lb_irq_int_15 : 1; /* RW */ + unsigned long l1_nmi_int : 1; /* RW */ + unsigned long stop_clock : 1; /* RW */ + unsigned long asic_to_l1 : 1; /* RW */ + unsigned long l1_to_asic : 1; /* RW */ + unsigned long ltc_int : 1; /* RW */ + unsigned long la_seq_trigger : 1; /* RW */ + unsigned long rsvd_45_63 : 19; /* */ + } s; +}; + /* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ @@ -283,6 +645,73 @@ union uvh_node_id_u { } s; }; +/* ========================================================================= */ +/* UVH_NODE_PRESENT_TABLE */ +/* ========================================================================= */ +#define UVH_NODE_PRESENT_TABLE 0x1400UL +#define UVH_NODE_PRESENT_TABLE_DEPTH 16 + +#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + +union uvh_node_present_table_u { + unsigned long v; + struct uvh_node_present_table_s { + unsigned long nodes : 64; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_0_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_1_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_2_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + /* ========================================================================= */ /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ @@ -369,5 +798,77 @@ union uvh_si_addr_map_config_u { } s; }; +/* ========================================================================= */ +/* UVH_SI_ALIAS0_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL + +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias0_overlay_config_u { + unsigned long v; + struct uvh_si_alias0_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS1_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL + +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias1_overlay_config_u { + unsigned long v; + struct uvh_si_alias1_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS2_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL + +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias2_overlay_config_u { + unsigned long v; + struct uvh_si_alias2_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + #endif /* __ASM_X86_UV_MMRS__ */ -- cgit v1.2.3 From 7e0edc1bc343231029084761ebf59e522902eb49 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 31 May 2008 01:33:04 +0100 Subject: xen: add new Xen elfnote types and use them appropriately Define recently added XEN_ELFNOTEs, and use them appropriately. Most significantly, this enables domain checkpointing (xm save -c). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/xen-head.S | 5 +++++ include/xen/interface/elfnote.h | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 2ab5f42f34d8..ef6c9e005f90 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -7,6 +7,7 @@ #include #include #include +#include __INIT ENTRY(startup_xen) @@ -32,5 +33,9 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, + .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) #endif /*CONFIG_XEN */ diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index a64d3df5bd95..7a8262c375cc 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -120,6 +120,26 @@ */ #define XEN_ELFNOTE_BSD_SYMTAB 11 +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ /* -- cgit v1.2.3 From e0773410247f1e5fc6f7c52a4c5f3c6c9873d527 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 31 May 2008 14:24:02 +0530 Subject: ftrace: export kretprobe_trampoline for function tracer Follow suit from kprobe implementations on other archs and make kretprobe_trampoline non-static. Ftrace implmentation (more specifically, kernel/trace/trace.c) requires access to it (see-> http://kerneltrap.org/mailarchive/linux-kernel/2008/5/27/1955234). Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar --- arch/arm/kernel/kprobes.c | 2 +- include/asm-arm/kprobes.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 5593dd207216..5ee39e10c8d1 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -274,7 +274,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, * for kretprobe handlers which should normally be interested in r0 only * anyway. */ -static void __attribute__((naked)) __kprobes kretprobe_trampoline(void) +void __naked __kprobes kretprobe_trampoline(void) { __asm__ __volatile__ ( "stmdb sp!, {r0 - r11} \n\t" diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h index c042194d3ab5..b1a37876942d 100644 --- a/include/asm-arm/kprobes.h +++ b/include/asm-arm/kprobes.h @@ -59,6 +59,7 @@ struct kprobe_ctlblk { }; void arch_remove_kprobe(struct kprobe *); +void kretprobe_trampoline(void); int kprobe_trap_handler(struct pt_regs *regs, unsigned int instr); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); -- cgit v1.2.3 From 1a7e612fa5ea0311232bd5418a40ec7280557789 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 31 May 2008 16:18:11 +0100 Subject: [ARM] 5074/1: fix warning: missing terminating ' character Signed-off-by: Dmitry Baryshkov Acked-by: Richard Purdie Signed-off-by: Russell King --- include/asm-arm/arch-pxa/pxa27x-udc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-arm/arch-pxa/pxa27x-udc.h b/include/asm-arm/arch-pxa/pxa27x-udc.h index 9cf0b1f88112..2d4e9cd3a5e3 100644 --- a/include/asm-arm/arch-pxa/pxa27x-udc.h +++ b/include/asm-arm/arch-pxa/pxa27x-udc.h @@ -2,7 +2,7 @@ #define _ASM_ARCH_PXA27X_UDC_H #ifdef _ASM_ARCH_PXA25X_UDC_H -#error You can't include both PXA25x and PXA27x UDC support +#error You cannot include both PXA25x and PXA27x UDC support #endif #define UDCCR __REG(0x40600000) /* UDC Control Register */ -- cgit v1.2.3 From ca0a789ab9c83d8fdf28f5c2700b316cd5dec2f0 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Sat, 24 May 2008 17:47:04 +0100 Subject: [ARM] 5057/1: [AT91] Calao Systems - board files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for three AT91-based boards available from Calao Systems: USB_A9260, USB_A9263 and QIL_A9260. Signed-off-by: Grégory Hermant Signed-off-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-at91/Kconfig | 21 +++ arch/arm/mach-at91/Makefile | 3 + arch/arm/mach-at91/board-qil-a9260.c | 255 +++++++++++++++++++++++++++++++++++ arch/arm/mach-at91/board-usb-a9260.c | 215 +++++++++++++++++++++++++++++ arch/arm/mach-at91/board-usb-a9263.c | 230 +++++++++++++++++++++++++++++++ include/asm-arm/arch-at91/timex.h | 17 ++- 6 files changed, 740 insertions(+), 1 deletion(-) create mode 100644 arch/arm/mach-at91/board-qil-a9260.c create mode 100644 arch/arm/mach-at91/board-usb-a9260.c create mode 100644 arch/arm/mach-at91/board-usb-a9263.c (limited to 'include') diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index 3d549a9afe1d..cae5c67dc312 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -170,6 +170,20 @@ config MACH_SAM9_L9260 Select this if you are using Olimex's SAM9-L9260 board based on the Atmel AT91SAM9260. +config MACH_USB_A9260 + bool "CALAO USB-A9260" + depends on ARCH_AT91SAM9260 + help + Select this if you are using a Calao Systems USB-A9260. + + +config MACH_QIL_A9260 + bool "CALAO QIL-A9260 board" + depends on ARCH_AT91SAM9260 + help + Select this if you are using a Calao Systems QIL-A9260 Board. + + endif # ---------------------------------------------------------- @@ -200,6 +214,13 @@ config MACH_AT91SAM9263EK Select this if you are using Atmel's AT91SAM9263-EK Evaluation Kit. +config MACH_USB_A9263 + bool "CALAO USB-A9263" + depends on ARCH_AT91SAM9263 + help + Select this if you are using a Calao Systems USB-A9263. + + endif # ---------------------------------------------------------- diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 995be14b2c42..932d17109e8b 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -36,12 +36,15 @@ obj-$(CONFIG_MACH_YL9200) += board-yl-9200.o obj-$(CONFIG_MACH_AT91SAM9260EK) += board-sam9260ek.o obj-$(CONFIG_MACH_CAM60) += board-cam60.o obj-$(CONFIG_MACH_SAM9_L9260) += board-sam9-l9260.o +obj-$(CONFIG_MACH_USB_A9260) += board-usb-a9260.o +obj-$(CONFIG_MACH_QIL_A9260) += board-qil-a9260.o # AT91SAM9261 board-specific support obj-$(CONFIG_MACH_AT91SAM9261EK) += board-sam9261ek.o # AT91SAM9263 board-specific support obj-$(CONFIG_MACH_AT91SAM9263EK) += board-sam9263ek.o +obj-$(CONFIG_MACH_USB_A9263) += board-usb-a9263.o # AT91SAM9RL board-specific support obj-$(CONFIG_MACH_AT91SAM9RLEK) += board-sam9rlek.o diff --git a/arch/arm/mach-at91/board-qil-a9260.c b/arch/arm/mach-at91/board-qil-a9260.c new file mode 100644 index 000000000000..99b4ec3818d6 --- /dev/null +++ b/arch/arm/mach-at91/board-qil-a9260.c @@ -0,0 +1,255 @@ +/* + * linux/arch/arm/mach-at91/board-qil-a9260.c + * + * Copyright (C) 2005 SAN People + * Copyright (C) 2006 Atmel + * Copyright (C) 2007 Calao-systems + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "generic.h" + + +static void __init ek_map_io(void) +{ + /* Initialize processor: 12.000 MHz crystal */ + at91sam9260_initialize(12000000); + + /* DGBU on ttyS0. (Rx & Tx only) */ + at91_register_uart(0, 0, 0); + + /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ + at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS + | ATMEL_UART_DTR | ATMEL_UART_DSR | ATMEL_UART_DCD + | ATMEL_UART_RI); + + /* USART1 on ttyS2. (Rx, Tx, CTS, RTS) */ + at91_register_uart(AT91SAM9260_ID_US1, 2, ATMEL_UART_CTS | ATMEL_UART_RTS); + + /* USART2 on ttyS3. (Rx, Tx, CTS, RTS) */ + at91_register_uart(AT91SAM9260_ID_US2, 3, ATMEL_UART_CTS | ATMEL_UART_RTS); + + /* set serial console to ttyS1 (ie, USART0) */ + at91_set_serial_console(1); + +} + +static void __init ek_init_irq(void) +{ + at91sam9260_init_interrupts(NULL); +} + + +/* + * USB Host port + */ +static struct at91_usbh_data __initdata ek_usbh_data = { + .ports = 2, +}; + +/* + * USB Device port + */ +static struct at91_udc_data __initdata ek_udc_data = { + .vbus_pin = AT91_PIN_PC5, + .pullup_pin = 0, /* pull-up driven by UDC */ +}; + +/* + * SPI devices. + */ +static struct spi_board_info ek_spi_devices[] = { +#if defined(CONFIG_RTC_DRV_M41T94) + { /* M41T94 RTC */ + .modalias = "m41t94", + .chip_select = 0, + .max_speed_hz = 1 * 1000 * 1000, + .bus_num = 0, + } +#endif +}; + +/* + * MACB Ethernet device + */ +static struct at91_eth_data __initdata ek_macb_data = { + .phy_irq_pin = AT91_PIN_PA31, + .is_rmii = 1, +}; + +/* + * NAND flash + */ +static struct mtd_partition __initdata ek_nand_partition[] = { + { + .name = "Uboot & Kernel", + .offset = 0x00000000, + .size = 16 * 1024 * 1024, + }, + { + .name = "Root FS", + .offset = 0x01000000, + .size = 120 * 1024 * 1024, + }, + { + .name = "FS", + .offset = 0x08800000, + .size = 120 * 1024 * 1024, + }, +}; + +static struct mtd_partition * __init nand_partitions(int size, int *num_partitions) +{ + *num_partitions = ARRAY_SIZE(ek_nand_partition); + return ek_nand_partition; +} + +static struct at91_nand_data __initdata ek_nand_data = { + .ale = 21, + .cle = 22, +// .det_pin = ... not connected + .rdy_pin = AT91_PIN_PC13, + .enable_pin = AT91_PIN_PC14, + .partition_info = nand_partitions, +#if defined(CONFIG_MTD_NAND_AT91_BUSWIDTH_16) + .bus_width_16 = 1, +#else + .bus_width_16 = 0, +#endif +}; + +/* + * MCI (SD/MMC) + */ +static struct at91_mmc_data __initdata ek_mmc_data = { + .slot_b = 0, + .wire4 = 1, +// .det_pin = ... not connected +// .wp_pin = ... not connected +// .vcc_pin = ... not connected +}; + +/* + * GPIO Buttons + */ +#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) +static struct gpio_keys_button ek_buttons[] = { + { /* USER PUSH BUTTON */ + .code = KEY_ENTER, + .gpio = AT91_PIN_PB10, + .active_low = 1, + .desc = "user_pb", + .wakeup = 1, + } +}; + +static struct gpio_keys_platform_data ek_button_data = { + .buttons = ek_buttons, + .nbuttons = ARRAY_SIZE(ek_buttons), +}; + +static struct platform_device ek_button_device = { + .name = "gpio-keys", + .id = -1, + .num_resources = 0, + .dev = { + .platform_data = &ek_button_data, + } +}; + +static void __init ek_add_device_buttons(void) +{ + at91_set_GPIO_periph(AT91_PIN_PB10, 1); /* user push button, pull up enabled */ + at91_set_deglitch(AT91_PIN_PB10, 1); + + platform_device_register(&ek_button_device); +} +#else +static void __init ek_add_device_buttons(void) {} +#endif + +/* + * LEDs + */ +static struct gpio_led ek_leds[] = { + { /* user_led (green) */ + .name = "user_led", + .gpio = AT91_PIN_PB21, + .active_low = 0, + .default_trigger = "heartbeat", + } +}; + +static void __init ek_board_init(void) +{ + /* Serial */ + at91_add_device_serial(); + /* USB Host */ + at91_add_device_usbh(&ek_usbh_data); + /* USB Device */ + at91_add_device_udc(&ek_udc_data); + /* SPI */ + at91_add_device_spi(ek_spi_devices, ARRAY_SIZE(ek_spi_devices)); + /* NAND */ + at91_add_device_nand(&ek_nand_data); + /* I2C */ + at91_add_device_i2c(NULL, 0); + /* Ethernet */ + at91_add_device_eth(&ek_macb_data); + /* MMC */ + at91_add_device_mmc(0, &ek_mmc_data); + /* Push Buttons */ + ek_add_device_buttons(); + /* LEDs */ + at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds)); + /* shutdown controller, wakeup button (5 msec low) */ + at91_sys_write(AT91_SHDW_MR, AT91_SHDW_CPTWK0_(10) | AT91_SHDW_WKMODE0_LOW + | AT91_SHDW_RTTWKEN); +} + +MACHINE_START(QIL_A9260, "CALAO QIL_A9260") + /* Maintainer: calao-systems */ + .phys_io = AT91_BASE_SYS, + .io_pg_offst = (AT91_VA_BASE_SYS >> 18) & 0xfffc, + .boot_params = AT91_SDRAM_BASE + 0x100, + .timer = &at91sam926x_timer, + .map_io = ek_map_io, + .init_irq = ek_init_irq, + .init_machine = ek_board_init, +MACHINE_END diff --git a/arch/arm/mach-at91/board-usb-a9260.c b/arch/arm/mach-at91/board-usb-a9260.c new file mode 100644 index 000000000000..837aedf8ffeb --- /dev/null +++ b/arch/arm/mach-at91/board-usb-a9260.c @@ -0,0 +1,215 @@ +/* + * linux/arch/arm/mach-at91/board-usb-a9260.c + * + * Copyright (C) 2005 SAN People + * Copyright (C) 2006 Atmel + * Copyright (C) 2007 Calao-systems + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "generic.h" + + +static void __init ek_map_io(void) +{ + /* Initialize processor: 12.000 MHz crystal */ + at91sam9260_initialize(12000000); + + /* DGBU on ttyS0. (Rx & Tx only) */ + at91_register_uart(0, 0, 0); + + /* set serial console to ttyS0 (ie, DBGU) */ + at91_set_serial_console(0); +} + +static void __init ek_init_irq(void) +{ + at91sam9260_init_interrupts(NULL); +} + + +/* + * USB Host port + */ +static struct at91_usbh_data __initdata ek_usbh_data = { + .ports = 2, +}; + +/* + * USB Device port + */ +static struct at91_udc_data __initdata ek_udc_data = { + .vbus_pin = AT91_PIN_PC5, + .pullup_pin = 0, /* pull-up driven by UDC */ +}; + +/* + * MACB Ethernet device + */ +static struct at91_eth_data __initdata ek_macb_data = { + .phy_irq_pin = AT91_PIN_PA31, + .is_rmii = 1, +}; + +/* + * NAND flash + */ +static struct mtd_partition __initdata ek_nand_partition[] = { + { + .name = "Uboot & Kernel", + .offset = 0x00000000, + .size = 16 * 1024 * 1024, + }, + { + .name = "Root FS", + .offset = 0x01000000, + .size = 120 * 1024 * 1024, + }, + { + .name = "FS", + .offset = 0x08800000, + .size = 120 * 1024 * 1024, + } +}; + +static struct mtd_partition * __init nand_partitions(int size, int *num_partitions) +{ + *num_partitions = ARRAY_SIZE(ek_nand_partition); + return ek_nand_partition; +} + +static struct at91_nand_data __initdata ek_nand_data = { + .ale = 21, + .cle = 22, +// .det_pin = ... not connected + .rdy_pin = AT91_PIN_PC13, + .enable_pin = AT91_PIN_PC14, + .partition_info = nand_partitions, +#if defined(CONFIG_MTD_NAND_AT91_BUSWIDTH_16) + .bus_width_16 = 1, +#else + .bus_width_16 = 0, +#endif +}; + +/* + * GPIO Buttons + */ + +#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) +static struct gpio_keys_button ek_buttons[] = { + { /* USER PUSH BUTTON */ + .code = KEY_ENTER, + .gpio = AT91_PIN_PB10, + .active_low = 1, + .desc = "user_pb", + .wakeup = 1, + } +}; + +static struct gpio_keys_platform_data ek_button_data = { + .buttons = ek_buttons, + .nbuttons = ARRAY_SIZE(ek_buttons), +}; + +static struct platform_device ek_button_device = { + .name = "gpio-keys", + .id = -1, + .num_resources = 0, + .dev = { + .platform_data = &ek_button_data, + } +}; + +static void __init ek_add_device_buttons(void) +{ + at91_set_GPIO_periph(AT91_PIN_PB10, 1); /* user push button, pull up enabled */ + at91_set_deglitch(AT91_PIN_PB10, 1); + + platform_device_register(&ek_button_device); +} +#else +static void __init ek_add_device_buttons(void) {} +#endif + +/* + * LEDs + */ +static struct gpio_led ek_leds[] = { + { /* user_led (green) */ + .name = "user_led", + .gpio = AT91_PIN_PB21, + .active_low = 0, + .default_trigger = "heartbeat", + } +}; + +static void __init ek_board_init(void) +{ + /* Serial */ + at91_add_device_serial(); + /* USB Host */ + at91_add_device_usbh(&ek_usbh_data); + /* USB Device */ + at91_add_device_udc(&ek_udc_data); + /* NAND */ + at91_add_device_nand(&ek_nand_data); + /* I2C */ + at91_add_device_i2c(NULL, 0); + /* Ethernet */ + at91_add_device_eth(&ek_macb_data); + /* Push Buttons */ + ek_add_device_buttons(); + /* LEDs */ + at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds)); + /* shutdown controller, wakeup button (5 msec low) */ + at91_sys_write(AT91_SHDW_MR, AT91_SHDW_CPTWK0_(10) | AT91_SHDW_WKMODE0_LOW + | AT91_SHDW_RTTWKEN); +} + +MACHINE_START(USB_A9260, "CALAO USB_A9260") + /* Maintainer: calao-systems */ + .phys_io = AT91_BASE_SYS, + .io_pg_offst = (AT91_VA_BASE_SYS >> 18) & 0xfffc, + .boot_params = AT91_SDRAM_BASE + 0x100, + .timer = &at91sam926x_timer, + .map_io = ek_map_io, + .init_irq = ek_init_irq, + .init_machine = ek_board_init, +MACHINE_END diff --git a/arch/arm/mach-at91/board-usb-a9263.c b/arch/arm/mach-at91/board-usb-a9263.c new file mode 100644 index 000000000000..95800d32bd49 --- /dev/null +++ b/arch/arm/mach-at91/board-usb-a9263.c @@ -0,0 +1,230 @@ +/* + * linux/arch/arm/mach-at91/board-usb-a9263.c + * + * Copyright (C) 2005 SAN People + * Copyright (C) 2007 Atmel Corporation. + * Copyright (C) 2007 Calao-systems + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "generic.h" + + +static void __init ek_map_io(void) +{ + /* Initialize processor: 12.00 MHz crystal */ + at91sam9263_initialize(12000000); + + /* DGBU on ttyS0. (Rx & Tx only) */ + at91_register_uart(0, 0, 0); + + /* set serial console to ttyS0 (ie, DBGU) */ + at91_set_serial_console(0); +} + +static void __init ek_init_irq(void) +{ + at91sam9263_init_interrupts(NULL); +} + + +/* + * USB Host port + */ +static struct at91_usbh_data __initdata ek_usbh_data = { + .ports = 2, +}; + +/* + * USB Device port + */ +static struct at91_udc_data __initdata ek_udc_data = { + .vbus_pin = AT91_PIN_PB11, + .pullup_pin = 0, /* pull-up driven by UDC */ +}; + +/* + * SPI devices. + */ +static struct spi_board_info ek_spi_devices[] = { +#if !defined(CONFIG_MMC_AT91) + { /* DataFlash chip */ + .modalias = "mtd_dataflash", + .chip_select = 0, + .max_speed_hz = 15 * 1000 * 1000, + .bus_num = 0, + } +#endif +}; + +/* + * MACB Ethernet device + */ +static struct at91_eth_data __initdata ek_macb_data = { + .phy_irq_pin = AT91_PIN_PE31, + .is_rmii = 1, +}; + +/* + * NAND flash + */ +static struct mtd_partition __initdata ek_nand_partition[] = { + { + .name = "Linux Kernel", + .offset = 0x00000000, + .size = 16 * 1024 * 1024, + }, + { + .name = "Root FS", + .offset = 0x01000000, + .size = 120 * 1024 * 1024, + }, + { + .name = "FS", + .offset = 0x08800000, + .size = 120 * 1024 * 1024, + } +}; + +static struct mtd_partition * __init nand_partitions(int size, int *num_partitions) +{ + *num_partitions = ARRAY_SIZE(ek_nand_partition); + return ek_nand_partition; +} + +static struct at91_nand_data __initdata ek_nand_data = { + .ale = 21, + .cle = 22, +// .det_pin = ... not connected + .rdy_pin = AT91_PIN_PA22, + .enable_pin = AT91_PIN_PD15, + .partition_info = nand_partitions, +#if defined(CONFIG_MTD_NAND_AT91_BUSWIDTH_16) + .bus_width_16 = 1, +#else + .bus_width_16 = 0, +#endif +}; + +/* + * GPIO Buttons + */ +#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) +static struct gpio_keys_button ek_buttons[] = { + { /* USER PUSH BUTTON */ + .code = KEY_ENTER, + .gpio = AT91_PIN_PB10, + .active_low = 1, + .desc = "user_pb", + .wakeup = 1, + } +}; + +static struct gpio_keys_platform_data ek_button_data = { + .buttons = ek_buttons, + .nbuttons = ARRAY_SIZE(ek_buttons), +}; + +static struct platform_device ek_button_device = { + .name = "gpio-keys", + .id = -1, + .num_resources = 0, + .dev = { + .platform_data = &ek_button_data, + } +}; + +static void __init ek_add_device_buttons(void) +{ + at91_set_GPIO_periph(AT91_PIN_PB10, 1); /* user push button, pull up enabled */ + at91_set_deglitch(AT91_PIN_PB10, 1); + + platform_device_register(&ek_button_device); +} +#else +static void __init ek_add_device_buttons(void) {} +#endif + +/* + * LEDs + */ +static struct gpio_led ek_leds[] = { + { /* user_led (green) */ + .name = "user_led", + .gpio = AT91_PIN_PB21, + .active_low = 1, + .default_trigger = "heartbeat", + } +}; + + +static void __init ek_board_init(void) +{ + /* Serial */ + at91_add_device_serial(); + /* USB Host */ + at91_add_device_usbh(&ek_usbh_data); + /* USB Device */ + at91_add_device_udc(&ek_udc_data); + /* SPI */ + at91_add_device_spi(ek_spi_devices, ARRAY_SIZE(ek_spi_devices)); + /* Ethernet */ + at91_add_device_eth(&ek_macb_data); + /* NAND */ + at91_add_device_nand(&ek_nand_data); + /* I2C */ + at91_add_device_i2c(NULL, 0); + /* Push Buttons */ + ek_add_device_buttons(); + /* LEDs */ + at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds)); + /* shutdown controller, wakeup button (5 msec low) */ + at91_sys_write(AT91_SHDW_MR, AT91_SHDW_CPTWK0_(10) | AT91_SHDW_WKMODE0_LOW + | AT91_SHDW_RTTWKEN); +} + +MACHINE_START(USB_A9263, "CALAO USB_A9263") + /* Maintainer: calao-systems */ + .phys_io = AT91_BASE_SYS, + .io_pg_offst = (AT91_VA_BASE_SYS >> 18) & 0xfffc, + .boot_params = AT91_SDRAM_BASE + 0x100, + .timer = &at91sam926x_timer, + .map_io = ek_map_io, + .init_irq = ek_init_irq, + .init_machine = ek_board_init, +MACHINE_END diff --git a/include/asm-arm/arch-at91/timex.h b/include/asm-arm/arch-at91/timex.h index f1933b0fa43f..6e084eb839d9 100644 --- a/include/asm-arm/arch-at91/timex.h +++ b/include/asm-arm/arch-at91/timex.h @@ -27,14 +27,29 @@ #define CLOCK_TICK_RATE (AT91_SLOW_CLOCK) -#elif defined(CONFIG_ARCH_AT91SAM9260) || defined(CONFIG_ARCH_AT91SAM9261) +#elif defined(CONFIG_ARCH_AT91SAM9260) + +#if defined(CONFIG_MACH_USB_A9260) || defined(CONFIG_MACH_QIL_A9260) +#define AT91SAM9_MASTER_CLOCK 90000000 +#else +#define AT91SAM9_MASTER_CLOCK 99300000 +#endif + +#define CLOCK_TICK_RATE (AT91SAM9_MASTER_CLOCK/16) + +#elif defined(CONFIG_ARCH_AT91SAM9261) #define AT91SAM9_MASTER_CLOCK 99300000 #define CLOCK_TICK_RATE (AT91SAM9_MASTER_CLOCK/16) #elif defined(CONFIG_ARCH_AT91SAM9263) +#if defined(CONFIG_MACH_USB_A9263) +#define AT91SAM9_MASTER_CLOCK 90000000 +#else #define AT91SAM9_MASTER_CLOCK 99959500 +#endif + #define CLOCK_TICK_RATE (AT91SAM9_MASTER_CLOCK/16) #elif defined(CONFIG_ARCH_AT91SAM9RL) -- cgit v1.2.3 From 53d7168026a440c4cba25468a3d926ddd7ab030a Mon Sep 17 00:00:00 2001 From: Stelian Pop Date: Sat, 5 Apr 2008 21:14:03 +0100 Subject: [ARM] 4933/1: AT91CAP9 UDPHS driver: generic AT91 parts. This is patch 1 of 2 adding support for the USB High Speed Device Port on the AT91CAP9 system on chip. The AT91CAP9 uses the same UDPHS IP as the AVR32 and the AT91SAM9RL. This patch makes the generic AT91 adaptations, mainly dealing with the addition of the UDPHS UTMI clock. Signed-off-by: Stelian Pop Acked-by: Haavard Skinnemoen Acked-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-at91/clock.c | 42 +++++++++++++++++++++++++++++++++++- include/asm-arm/arch-at91/at91_pmc.h | 7 +++++- include/asm-arm/arch-at91/board.h | 4 ++++ 3 files changed, 51 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index a33dfe450726..b87772cd3d32 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -112,12 +112,34 @@ static void pmc_sys_mode(struct clk *clk, int is_on) at91_sys_write(AT91_PMC_SCDR, clk->pmc_mask); } +static void pmc_uckr_mode(struct clk *clk, int is_on) +{ + unsigned int uckr = at91_sys_read(AT91_CKGR_UCKR); + + if (is_on) { + is_on = AT91_PMC_LOCKU; + at91_sys_write(AT91_CKGR_UCKR, uckr | clk->pmc_mask); + } else + at91_sys_write(AT91_CKGR_UCKR, uckr & ~(clk->pmc_mask)); + + do { + cpu_relax(); + } while ((at91_sys_read(AT91_PMC_SR) & AT91_PMC_LOCKU) != is_on); +} + /* USB function clocks (PLLB must be 48 MHz) */ static struct clk udpck = { .name = "udpck", .parent = &pllb, .mode = pmc_sys_mode, }; +static struct clk utmi_clk = { + .name = "utmi_clk", + .parent = &main_clk, + .pmc_mask = AT91_PMC_UPLLEN, /* in CKGR_UCKR */ + .mode = pmc_uckr_mode, + .type = CLK_TYPE_PLL, +}; static struct clk uhpck = { .name = "uhpck", .parent = &pllb, @@ -361,7 +383,7 @@ static void __init init_programmable_clock(struct clk *clk) static int at91_clk_show(struct seq_file *s, void *unused) { - u32 scsr, pcsr, sr; + u32 scsr, pcsr, uckr = 0, sr; struct clk *clk; seq_printf(s, "SCSR = %8x\n", scsr = at91_sys_read(AT91_PMC_SCSR)); @@ -370,6 +392,8 @@ static int at91_clk_show(struct seq_file *s, void *unused) seq_printf(s, "MCFR = %8x\n", at91_sys_read(AT91_CKGR_MCFR)); seq_printf(s, "PLLA = %8x\n", at91_sys_read(AT91_CKGR_PLLAR)); seq_printf(s, "PLLB = %8x\n", at91_sys_read(AT91_CKGR_PLLBR)); + if (cpu_is_at91cap9()) + seq_printf(s, "UCKR = %8x\n", uckr = at91_sys_read(AT91_CKGR_UCKR)); seq_printf(s, "MCKR = %8x\n", at91_sys_read(AT91_PMC_MCKR)); seq_printf(s, "SR = %8x\n", sr = at91_sys_read(AT91_PMC_SR)); @@ -382,6 +406,8 @@ static int at91_clk_show(struct seq_file *s, void *unused) state = (scsr & clk->pmc_mask) ? "on" : "off"; else if (clk->mode == pmc_periph_mode) state = (pcsr & clk->pmc_mask) ? "on" : "off"; + else if (clk->mode == pmc_uckr_mode) + state = (uckr & clk->pmc_mask) ? "on" : "off"; else if (clk->pmc_mask) state = (sr & clk->pmc_mask) ? "on" : "off"; else if (clk == &clk32k || clk == &main_clk) @@ -581,6 +607,17 @@ int __init at91_clock_init(unsigned long main_clock) udpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init); uhpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init); + /* + * USB HS clock init + */ + if (cpu_is_at91cap9()) { + /* + * multiplier is hard-wired to 40 + * (obtain the USB High Speed 480 MHz when input is 12 MHz) + */ + utmi_clk.rate_hz = 40 * utmi_clk.parent->rate_hz; + } + /* * MCK and CPU derive from one of those primary clocks. * For now, assume this parentage won't change. @@ -598,6 +635,9 @@ int __init at91_clock_init(unsigned long main_clock) for (i = 0; i < ARRAY_SIZE(standard_pmc_clocks); i++) list_add_tail(&standard_pmc_clocks[i]->node, &clocks); + if (cpu_is_at91cap9()) + list_add_tail(&utmi_clk.node, &clocks); + /* MCK and CPU clock are "always on" */ clk_enable(&mck); diff --git a/include/asm-arm/arch-at91/at91_pmc.h b/include/asm-arm/arch-at91/at91_pmc.h index c2b13c280155..2001e81f2267 100644 --- a/include/asm-arm/arch-at91/at91_pmc.h +++ b/include/asm-arm/arch-at91/at91_pmc.h @@ -39,10 +39,14 @@ #define AT91_PMC_PCSR (AT91_PMC + 0x18) /* Peripheral Clock Status Register */ #define AT91_CKGR_UCKR (AT91_PMC + 0x1C) /* UTMI Clock Register [SAM9RL, CAP9] */ +#define AT91_PMC_UPLLEN (1 << 16) /* UTMI PLL Enable */ +#define AT91_PMC_UPLLCOUNT (0xf << 20) /* UTMI PLL Start-up Time */ +#define AT91_PMC_BIASEN (1 << 24) /* UTMI BIAS Enable */ +#define AT91_PMC_BIASCOUNT (0xf << 28) /* UTMI PLL Start-up Time */ #define AT91_CKGR_MOR (AT91_PMC + 0x20) /* Main Oscillator Register [not on SAM9RL] */ #define AT91_PMC_MOSCEN (1 << 0) /* Main Oscillator Enable */ -#define AT91_PMC_OSCBYPASS (1 << 1) /* Oscillator Bypass [AT91SAM926x only] */ +#define AT91_PMC_OSCBYPASS (1 << 1) /* Oscillator Bypass [SAM9x, CAP9] */ #define AT91_PMC_OSCOUNT (0xff << 8) /* Main Oscillator Start-up Time */ #define AT91_CKGR_MCFR (AT91_PMC + 0x24) /* Main Clock Frequency Register */ @@ -97,6 +101,7 @@ #define AT91_PMC_LOCKA (1 << 1) /* PLLA Lock */ #define AT91_PMC_LOCKB (1 << 2) /* PLLB Lock */ #define AT91_PMC_MCKRDY (1 << 3) /* Master Clock */ +#define AT91_PMC_LOCKU (1 << 6) /* UPLL Lock [AT91CAP9 only] */ #define AT91_PMC_PCK0RDY (1 << 8) /* Programmable Clock 0 */ #define AT91_PMC_PCK1RDY (1 << 9) /* Programmable Clock 1 */ #define AT91_PMC_PCK2RDY (1 << 10) /* Programmable Clock 2 */ diff --git a/include/asm-arm/arch-at91/board.h b/include/asm-arm/arch-at91/board.h index dc189f01c5b3..1f247028686d 100644 --- a/include/asm-arm/arch-at91/board.h +++ b/include/asm-arm/arch-at91/board.h @@ -36,6 +36,7 @@ #include #include #include +#include /* USB Device */ struct at91_udc_data { @@ -45,6 +46,9 @@ struct at91_udc_data { }; extern void __init at91_add_device_udc(struct at91_udc_data *data); + /* USB High Speed Device */ +extern void __init at91_add_device_usba(struct usba_platform_data *data); + /* Compact Flash */ struct at91_cf_data { u8 irq_pin; /* I/O IRQ */ -- cgit v1.2.3 From 7c8cf66529ebf95f1a5f34d1b69504d442b42630 Mon Sep 17 00:00:00 2001 From: Stelian Pop Date: Sat, 5 Apr 2008 21:15:25 +0100 Subject: [ARM] 4934/1: AT91CAP9 UDPHS driver: board and cpu integration. This is patch 2 of 2 adding support for the USB High Speed Device Port on the AT91CAP9 system on chip. The AT91CAP9 uses the same UDPHS IP as the AVR32 and the AT91SAM9RL. This patch declares the UDPHS ressources in the at91cap9 (cpu and adk board) files, wires up the atmel_usba_udc driver to them, and activates the driver in the defconfig. Signed-off-by: Stelian Pop Acked-by: Haavard Skinnemoen Acked-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/configs/at91cap9adk_defconfig | 27 +++++++- arch/arm/mach-at91/at91cap9_devices.c | 99 +++++++++++++++++++++++++++++ arch/arm/mach-at91/board-cap9adk.c | 9 +++ include/asm-arm/arch-at91/at91cap9.h | 2 +- include/asm-arm/arch-at91/at91cap9_matrix.h | 5 ++ 5 files changed, 140 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/arm/configs/at91cap9adk_defconfig b/arch/arm/configs/at91cap9adk_defconfig index e32e73648129..39ca662c0793 100644 --- a/arch/arm/configs/at91cap9adk_defconfig +++ b/arch/arm/configs/at91cap9adk_defconfig @@ -907,7 +907,32 @@ CONFIG_USB_MON=y # # USB Gadget Support # -# CONFIG_USB_GADGET is not set +CONFIG_USB_GADGET=y +# CONFIG_USB_GADGET_DEBUG is not set +# CONFIG_USB_GADGET_DEBUG_FILES is not set +CONFIG_USB_GADGET_SELECTED=y +# CONFIG_USB_GADGET_AMD5536UDC is not set +CONFIG_USB_GADGET_ATMEL_USBA=y +CONFIG_USB_ATMEL_USBA=y +# CONFIG_USB_GADGET_FSL_USB2 is not set +# CONFIG_USB_GADGET_NET2280 is not set +# CONFIG_USB_GADGET_PXA2XX is not set +# CONFIG_USB_GADGET_M66592 is not set +# CONFIG_USB_GADGET_GOKU is not set +# CONFIG_USB_GADGET_LH7A40X is not set +# CONFIG_USB_GADGET_OMAP is not set +# CONFIG_USB_GADGET_S3C2410 is not set +# CONFIG_USB_GADGET_AT91 is not set +# CONFIG_USB_GADGET_DUMMY_HCD is not set +CONFIG_USB_GADGET_DUALSPEED=y +# CONFIG_USB_ZERO is not set +CONFIG_USB_ETH=m +CONFIG_USB_ETH_RNDIS=y +# CONFIG_USB_GADGETFS is not set +CONFIG_USB_FILE_STORAGE=m +# CONFIG_USB_FILE_STORAGE_TEST is not set +# CONFIG_USB_G_SERIAL is not set +# CONFIG_USB_MIDI_GADGET is not set CONFIG_MMC=y # CONFIG_MMC_DEBUG is not set # CONFIG_MMC_UNSAFE_RESUME is not set diff --git a/arch/arm/mach-at91/at91cap9_devices.c b/arch/arm/mach-at91/at91cap9_devices.c index be526746e01e..747b9dedab88 100644 --- a/arch/arm/mach-at91/at91cap9_devices.c +++ b/arch/arm/mach-at91/at91cap9_devices.c @@ -83,6 +83,105 @@ void __init at91_add_device_usbh(struct at91_usbh_data *data) {} #endif +/* -------------------------------------------------------------------- + * USB HS Device (Gadget) + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_USB_GADGET_ATMEL_USBA) || defined(CONFIG_USB_GADGET_ATMEL_USBA_MODULE) + +static struct resource usba_udc_resources[] = { + [0] = { + .start = AT91CAP9_UDPHS_FIFO, + .end = AT91CAP9_UDPHS_FIFO + SZ_512K - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = AT91CAP9_BASE_UDPHS, + .end = AT91CAP9_BASE_UDPHS + SZ_1K - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { + .start = AT91CAP9_ID_UDPHS, + .end = AT91CAP9_ID_UDPHS, + .flags = IORESOURCE_IRQ, + }, +}; + +#define EP(nam, idx, maxpkt, maxbk, dma, isoc) \ + [idx] = { \ + .name = nam, \ + .index = idx, \ + .fifo_size = maxpkt, \ + .nr_banks = maxbk, \ + .can_dma = dma, \ + .can_isoc = isoc, \ + } + +static struct usba_ep_data usba_udc_ep[] = { + EP("ep0", 0, 64, 1, 0, 0), + EP("ep1", 1, 1024, 3, 1, 1), + EP("ep2", 2, 1024, 3, 1, 1), + EP("ep3", 3, 1024, 2, 1, 1), + EP("ep4", 4, 1024, 2, 1, 1), + EP("ep5", 5, 1024, 2, 1, 0), + EP("ep6", 6, 1024, 2, 1, 0), + EP("ep7", 7, 1024, 2, 0, 0), +}; + +#undef EP + +/* + * pdata doesn't have room for any endpoints, so we need to + * append room for the ones we need right after it. + */ +static struct { + struct usba_platform_data pdata; + struct usba_ep_data ep[8]; +} usba_udc_data; + +static struct platform_device at91_usba_udc_device = { + .name = "atmel_usba_udc", + .id = -1, + .dev = { + .platform_data = &usba_udc_data.pdata, + }, + .resource = usba_udc_resources, + .num_resources = ARRAY_SIZE(usba_udc_resources), +}; + +void __init at91_add_device_usba(struct usba_platform_data *data) +{ + at91_sys_write(AT91_MATRIX_UDPHS, AT91_MATRIX_SELECT_UDPHS | + AT91_MATRIX_UDPHS_BYPASS_LOCK); + + /* + * Invalid pins are 0 on AT91, but the usba driver is shared + * with AVR32, which use negative values instead. Once/if + * gpio_is_valid() is ported to AT91, revisit this code. + */ + usba_udc_data.pdata.vbus_pin = -EINVAL; + usba_udc_data.pdata.num_ep = ARRAY_SIZE(usba_udc_ep); + memcpy(usba_udc_data.ep, usba_udc_ep, sizeof(usba_udc_ep));; + + if (data && data->vbus_pin > 0) { + at91_set_gpio_input(data->vbus_pin, 0); + at91_set_deglitch(data->vbus_pin, 1); + usba_udc_data.pdata.vbus_pin = data->vbus_pin; + } + + /* Pullup pin is handled internally by USB device peripheral */ + + /* Clocks */ + at91_clock_associate("utmi_clk", &at91_usba_udc_device.dev, "hclk"); + at91_clock_associate("udphs_clk", &at91_usba_udc_device.dev, "pclk"); + + platform_device_register(&at91_usba_udc_device); +} +#else +void __init at91_add_device_usba(struct usba_platform_data *data) {} +#endif + + /* -------------------------------------------------------------------- * Ethernet * -------------------------------------------------------------------- */ diff --git a/arch/arm/mach-at91/board-cap9adk.c b/arch/arm/mach-at91/board-cap9adk.c index e5512d1ff217..8a2a958639db 100644 --- a/arch/arm/mach-at91/board-cap9adk.c +++ b/arch/arm/mach-at91/board-cap9adk.c @@ -78,6 +78,12 @@ static struct at91_usbh_data __initdata cap9adk_usbh_data = { .ports = 2, }; +/* + * USB HS Device port + */ +static struct usba_platform_data __initdata cap9adk_usba_udc_data = { + .vbus_pin = AT91_PIN_PB31, +}; /* * ADS7846 Touchscreen @@ -326,6 +332,9 @@ static void __init cap9adk_board_init(void) /* USB Host */ set_irq_type(AT91CAP9_ID_UHP, IRQT_HIGH); at91_add_device_usbh(&cap9adk_usbh_data); + /* USB HS */ + set_irq_type(AT91CAP9_ID_UDPHS, IRQT_HIGH); + at91_add_device_usba(&cap9adk_usba_udc_data); /* SPI */ at91_add_device_spi(cap9adk_spi_devices, ARRAY_SIZE(cap9adk_spi_devices)); /* Touchscreen */ diff --git a/include/asm-arm/arch-at91/at91cap9.h b/include/asm-arm/arch-at91/at91cap9.h index bac83adb5050..6f14d9053ac7 100644 --- a/include/asm-arm/arch-at91/at91cap9.h +++ b/include/asm-arm/arch-at91/at91cap9.h @@ -118,7 +118,7 @@ #define AT91CAP9_ROM_SIZE (32 * SZ_1K) /* Internal ROM size (32Kb) */ #define AT91CAP9_LCDC_BASE 0x00500000 /* LCD Controller */ -#define AT91CAP9_UDPHS_BASE 0x00600000 /* USB High Speed Device Port */ +#define AT91CAP9_UDPHS_FIFO 0x00600000 /* USB High Speed Device Port */ #define AT91CAP9_UHP_BASE 0x00700000 /* USB Host controller */ #define CONFIG_DRAM_BASE AT91_CHIPSELECT_6 diff --git a/include/asm-arm/arch-at91/at91cap9_matrix.h b/include/asm-arm/arch-at91/at91cap9_matrix.h index a641686b6c3d..ddbd4873c842 100644 --- a/include/asm-arm/arch-at91/at91cap9_matrix.h +++ b/include/asm-arm/arch-at91/at91cap9_matrix.h @@ -106,6 +106,11 @@ #define AT91_MPBS0_SFR (AT91_MATRIX + 0x114) /* MPBlock Slave 0 Special Function Register */ #define AT91_MPBS1_SFR (AT91_MATRIX + 0x11C) /* MPBlock Slave 1 Special Function Register */ +#define AT91_MATRIX_UDPHS (AT91_MATRIX + 0x118) /* USBHS Special Function Register [AT91CAP9 only] */ +#define AT91_MATRIX_SELECT_UDPHS (0 << 31) /* select High Speed UDP */ +#define AT91_MATRIX_SELECT_UDP (1 << 31) /* select standard UDP */ +#define AT91_MATRIX_UDPHS_BYPASS_LOCK (1 << 30) /* bypass lock bit */ + #define AT91_MATRIX_EBICSA (AT91_MATRIX + 0x120) /* EBI Chip Select Assignment Register */ #define AT91_MATRIX_EBI_CS1A (1 << 1) /* Chip Select 1 Assignment */ #define AT91_MATRIX_EBI_CS1A_SMC (0 << 1) -- cgit v1.2.3 From ba45ca435060614e595a107ac323a36b52619d7d Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Tue, 8 Apr 2008 13:59:18 +0100 Subject: [ARM] 4940/1: AT91: UDPHS driver: SAM9RL board and cpu integration. Adds support for the USB High Speed Device Port on the AT91SAM9RL system on chip. The AT91SAM9RL uses the same UDPHS IP as the AVR32 and the AT91CAP9 (atmel_usba_udc driver). Signed-off-by: Nicolas Ferre Acked-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-at91/at91sam9rl_devices.c | 95 +++++++++++++++++++++++++++++++++ arch/arm/mach-at91/board-sam9rlek.c | 10 ++++ arch/arm/mach-at91/clock.c | 9 ++-- drivers/usb/gadget/Kconfig | 4 +- include/asm-arm/arch-at91/at91sam9rl.h | 2 +- 5 files changed, 113 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c index 450db304936f..8507dd02fe90 100644 --- a/arch/arm/mach-at91/at91sam9rl_devices.c +++ b/arch/arm/mach-at91/at91sam9rl_devices.c @@ -25,6 +25,101 @@ #include "generic.h" +/* -------------------------------------------------------------------- + * USB HS Device (Gadget) + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_USB_GADGET_ATMEL_USBA) || defined(CONFIG_USB_GADGET_ATMEL_USBA_MODULE) + +static struct resource usba_udc_resources[] = { + [0] = { + .start = AT91SAM9RL_UDPHS_FIFO, + .end = AT91SAM9RL_UDPHS_FIFO + SZ_512K - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = AT91SAM9RL_BASE_UDPHS, + .end = AT91SAM9RL_BASE_UDPHS + SZ_1K - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { + .start = AT91SAM9RL_ID_UDPHS, + .end = AT91SAM9RL_ID_UDPHS, + .flags = IORESOURCE_IRQ, + }, +}; + +#define EP(nam, idx, maxpkt, maxbk, dma, isoc) \ + [idx] = { \ + .name = nam, \ + .index = idx, \ + .fifo_size = maxpkt, \ + .nr_banks = maxbk, \ + .can_dma = dma, \ + .can_isoc = isoc, \ + } + +static struct usba_ep_data usba_udc_ep[] __initdata = { + EP("ep0", 0, 64, 1, 0, 0), + EP("ep1", 1, 1024, 2, 1, 1), + EP("ep2", 2, 1024, 2, 1, 1), + EP("ep3", 3, 1024, 3, 1, 0), + EP("ep4", 4, 1024, 3, 1, 0), + EP("ep5", 5, 1024, 3, 1, 1), + EP("ep6", 6, 1024, 3, 1, 1), +}; + +#undef EP + +/* + * pdata doesn't have room for any endpoints, so we need to + * append room for the ones we need right after it. + */ +static struct { + struct usba_platform_data pdata; + struct usba_ep_data ep[7]; +} usba_udc_data; + +static struct platform_device at91_usba_udc_device = { + .name = "atmel_usba_udc", + .id = -1, + .dev = { + .platform_data = &usba_udc_data.pdata, + }, + .resource = usba_udc_resources, + .num_resources = ARRAY_SIZE(usba_udc_resources), +}; + +void __init at91_add_device_usba(struct usba_platform_data *data) +{ + /* + * Invalid pins are 0 on AT91, but the usba driver is shared + * with AVR32, which use negative values instead. Once/if + * gpio_is_valid() is ported to AT91, revisit this code. + */ + usba_udc_data.pdata.vbus_pin = -EINVAL; + usba_udc_data.pdata.num_ep = ARRAY_SIZE(usba_udc_ep); + memcpy(usba_udc_data.ep, usba_udc_ep, sizeof(usba_udc_ep));; + + if (data && data->vbus_pin > 0) { + at91_set_gpio_input(data->vbus_pin, 0); + at91_set_deglitch(data->vbus_pin, 1); + usba_udc_data.pdata.vbus_pin = data->vbus_pin; + } + + /* Pullup pin is handled internally by USB device peripheral */ + + /* Clocks */ + at91_clock_associate("utmi_clk", &at91_usba_udc_device.dev, "hclk"); + at91_clock_associate("udphs_clk", &at91_usba_udc_device.dev, "pclk"); + + platform_device_register(&at91_usba_udc_device); +} +#else +void __init at91_add_device_usba(struct usba_platform_data *data) {} +#endif + + /* -------------------------------------------------------------------- * MMC / SD * -------------------------------------------------------------------- */ diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c index ffc0597aee8d..b6a70fc735c3 100644 --- a/arch/arm/mach-at91/board-sam9rlek.c +++ b/arch/arm/mach-at91/board-sam9rlek.c @@ -55,6 +55,14 @@ static void __init ek_init_irq(void) } +/* + * USB HS Device port + */ +static struct usba_platform_data __initdata ek_usba_udc_data = { + .vbus_pin = AT91_PIN_PA8, +}; + + /* * MCI (SD/MMC) */ @@ -175,6 +183,8 @@ static void __init ek_board_init(void) { /* Serial */ at91_add_device_serial(); + /* USB HS */ + at91_add_device_usba(&ek_usba_udc_data); /* I2C */ at91_add_device_i2c(NULL, 0); /* NAND */ diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index b87772cd3d32..e8ce8f0f3eda 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -391,8 +391,9 @@ static int at91_clk_show(struct seq_file *s, void *unused) seq_printf(s, "MOR = %8x\n", at91_sys_read(AT91_CKGR_MOR)); seq_printf(s, "MCFR = %8x\n", at91_sys_read(AT91_CKGR_MCFR)); seq_printf(s, "PLLA = %8x\n", at91_sys_read(AT91_CKGR_PLLAR)); - seq_printf(s, "PLLB = %8x\n", at91_sys_read(AT91_CKGR_PLLBR)); - if (cpu_is_at91cap9()) + if (!cpu_is_at91sam9rl()) + seq_printf(s, "PLLB = %8x\n", at91_sys_read(AT91_CKGR_PLLBR)); + if (cpu_is_at91cap9() || cpu_is_at91sam9rl()) seq_printf(s, "UCKR = %8x\n", uckr = at91_sys_read(AT91_CKGR_UCKR)); seq_printf(s, "MCKR = %8x\n", at91_sys_read(AT91_PMC_MCKR)); seq_printf(s, "SR = %8x\n", sr = at91_sys_read(AT91_PMC_SR)); @@ -610,7 +611,7 @@ int __init at91_clock_init(unsigned long main_clock) /* * USB HS clock init */ - if (cpu_is_at91cap9()) { + if (cpu_is_at91cap9() || cpu_is_at91sam9rl()) { /* * multiplier is hard-wired to 40 * (obtain the USB High Speed 480 MHz when input is 12 MHz) @@ -635,7 +636,7 @@ int __init at91_clock_init(unsigned long main_clock) for (i = 0; i < ARRAY_SIZE(standard_pmc_clocks); i++) list_add_tail(&standard_pmc_clocks[i]->node, &clocks); - if (cpu_is_at91cap9()) + if (cpu_is_at91cap9() || cpu_is_at91sam9rl()) list_add_tail(&utmi_clk.node, &clocks); /* MCK and CPU clock are "always on" */ diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 6e784d2db423..3565d4352826 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -118,10 +118,10 @@ config USB_AMD5536UDC config USB_GADGET_ATMEL_USBA boolean "Atmel USBA" select USB_GADGET_DUALSPEED - depends on AVR32 || ARCH_AT91CAP9 + depends on AVR32 || ARCH_AT91CAP9 || ARCH_AT91SAM9RL help USBA is the integrated high-speed USB Device controller on - the AT32AP700x and AT91CAP9 processors from Atmel. + the AT32AP700x, some AT91SAM9 and AT91CAP9 processors from Atmel. config USB_ATMEL_USBA tristate diff --git a/include/asm-arm/arch-at91/at91sam9rl.h b/include/asm-arm/arch-at91/at91sam9rl.h index 16d2832f6c0a..622e56f81d42 100644 --- a/include/asm-arm/arch-at91/at91sam9rl.h +++ b/include/asm-arm/arch-at91/at91sam9rl.h @@ -110,6 +110,6 @@ #define AT91SAM9RL_ROM_SIZE (2 * SZ_16K) /* Internal ROM size (32Kb) */ #define AT91SAM9RL_LCDC_BASE 0x00500000 /* LCD Controller */ -#define AT91SAM9RL_UDPHS_BASE 0x00600000 /* USB Device HS controller */ +#define AT91SAM9RL_UDPHS_FIFO 0x00600000 /* USB Device HS controller */ #endif -- cgit v1.2.3 From 0b0a9df6038752674e54e333cd247c877d29aab8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 18 May 2008 14:59:36 +0100 Subject: [ARM] pxa: separate out power manager and clock registers The power manager and core clock registers aren't present in PXA3 CPUs. Move them out of pxa-regs.h into pxa2xx-regs.h, and include pxa2xx-regs.h where necessary. Signed-off-by: Russell King --- arch/arm/common/sharpsl_pm.c | 1 + arch/arm/mach-pxa/clock.c | 2 +- arch/arm/mach-pxa/corgi.c | 1 + arch/arm/mach-pxa/corgi_pm.c | 1 + arch/arm/mach-pxa/generic.c | 1 + arch/arm/mach-pxa/magician.c | 1 + arch/arm/mach-pxa/mfp-pxa2xx.c | 1 + arch/arm/mach-pxa/poodle.c | 1 + arch/arm/mach-pxa/pxa25x.c | 1 + arch/arm/mach-pxa/standby.S | 1 + arch/arm/mach-pxa/trizeps4.c | 2 + include/asm-arm/arch-pxa/pxa-regs.h | 158 +------------------------------- include/asm-arm/arch-pxa/pxa2xx-regs.h | 162 +++++++++++++++++++++++++++++++++ include/asm-arm/arch-pxa/system.h | 1 + 14 files changed, 177 insertions(+), 157 deletions(-) (limited to 'include') diff --git a/arch/arm/common/sharpsl_pm.c b/arch/arm/common/sharpsl_pm.c index 5736c987c80d..8822b684d474 100644 --- a/arch/arm/common/sharpsl_pm.c +++ b/arch/arm/common/sharpsl_pm.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c index e97dc59813c8..180c8bb53041 100644 --- a/arch/arm/mach-pxa/clock.c +++ b/arch/arm/mach-pxa/clock.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index b757dd756655..b37671b71886 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-pxa/corgi_pm.c b/arch/arm/mach-pxa/corgi_pm.c index ae91c4b1135c..53c4499ebdbc 100644 --- a/arch/arm/mach-pxa/corgi_pm.c +++ b/arch/arm/mach-pxa/corgi_pm.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "sharpsl.h" diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index 44617938f3f1..c2f102339f57 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -26,6 +26,7 @@ #include #include +#include /* for __pxa_set_cken */ #include "generic.h" diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index badba064dc04..b94cae483149 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c index 22097a1707cc..d1cdb4ecb0b8 100644 --- a/arch/arm/mach-pxa/mfp-pxa2xx.c +++ b/arch/arm/mach-pxa/mfp-pxa2xx.c @@ -20,6 +20,7 @@ #include #include +#include #include #include "generic.h" diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 0b30f25cff3c..f81c10cafd48 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index e5b417d14bb0..3b848dc3ca11 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S index a37ef1c4b9e9..40bb70eff3fe 100644 --- a/arch/arm/mach-pxa/standby.S +++ b/arch/arm/mach-pxa/standby.S @@ -14,6 +14,7 @@ #include #include +#include .text diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index 931885d86b91..bc7c465ef32b 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -487,6 +488,7 @@ static void __init trizeps4_map_io(void) ConXS_BCR = trizeps_conxs_bcr; #endif +#warning FIXME - accessing PM registers directly is deprecated PWER = 0x00000002; PFER = 0x00000000; PRER = 0x00000002; diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index 68d742877308..dce9308626b7 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -825,120 +825,9 @@ #endif /* - * Power Manager + * Power Manager - see pxa2xx-regs.h */ -#define PMCR __REG(0x40F00000) /* Power Manager Control Register */ -#define PSSR __REG(0x40F00004) /* Power Manager Sleep Status Register */ -#define PSPR __REG(0x40F00008) /* Power Manager Scratch Pad Register */ -#define PWER __REG(0x40F0000C) /* Power Manager Wake-up Enable Register */ -#define PRER __REG(0x40F00010) /* Power Manager GPIO Rising-Edge Detect Enable Register */ -#define PFER __REG(0x40F00014) /* Power Manager GPIO Falling-Edge Detect Enable Register */ -#define PEDR __REG(0x40F00018) /* Power Manager GPIO Edge Detect Status Register */ -#define PCFR __REG(0x40F0001C) /* Power Manager General Configuration Register */ -#define PGSR0 __REG(0x40F00020) /* Power Manager GPIO Sleep State Register for GP[31-0] */ -#define PGSR1 __REG(0x40F00024) /* Power Manager GPIO Sleep State Register for GP[63-32] */ -#define PGSR2 __REG(0x40F00028) /* Power Manager GPIO Sleep State Register for GP[84-64] */ -#define PGSR3 __REG(0x40F0002C) /* Power Manager GPIO Sleep State Register for GP[118-96] */ -#define RCSR __REG(0x40F00030) /* Reset Controller Status Register */ - -#define PSLR __REG(0x40F00034) /* Power Manager Sleep Config Register */ -#define PSTR __REG(0x40F00038) /*Power Manager Standby Config Register */ -#define PSNR __REG(0x40F0003C) /*Power Manager Sense Config Register */ -#define PVCR __REG(0x40F00040) /*Power Manager VoltageControl Register */ -#define PKWR __REG(0x40F00050) /* Power Manager KB Wake-up Enable Reg */ -#define PKSR __REG(0x40F00054) /* Power Manager KB Level-Detect Register */ -#define PCMD(x) __REG2(0x40F00080, (x)<<2) -#define PCMD0 __REG(0x40F00080 + 0 * 4) -#define PCMD1 __REG(0x40F00080 + 1 * 4) -#define PCMD2 __REG(0x40F00080 + 2 * 4) -#define PCMD3 __REG(0x40F00080 + 3 * 4) -#define PCMD4 __REG(0x40F00080 + 4 * 4) -#define PCMD5 __REG(0x40F00080 + 5 * 4) -#define PCMD6 __REG(0x40F00080 + 6 * 4) -#define PCMD7 __REG(0x40F00080 + 7 * 4) -#define PCMD8 __REG(0x40F00080 + 8 * 4) -#define PCMD9 __REG(0x40F00080 + 9 * 4) -#define PCMD10 __REG(0x40F00080 + 10 * 4) -#define PCMD11 __REG(0x40F00080 + 11 * 4) -#define PCMD12 __REG(0x40F00080 + 12 * 4) -#define PCMD13 __REG(0x40F00080 + 13 * 4) -#define PCMD14 __REG(0x40F00080 + 14 * 4) -#define PCMD15 __REG(0x40F00080 + 15 * 4) -#define PCMD16 __REG(0x40F00080 + 16 * 4) -#define PCMD17 __REG(0x40F00080 + 17 * 4) -#define PCMD18 __REG(0x40F00080 + 18 * 4) -#define PCMD19 __REG(0x40F00080 + 19 * 4) -#define PCMD20 __REG(0x40F00080 + 20 * 4) -#define PCMD21 __REG(0x40F00080 + 21 * 4) -#define PCMD22 __REG(0x40F00080 + 22 * 4) -#define PCMD23 __REG(0x40F00080 + 23 * 4) -#define PCMD24 __REG(0x40F00080 + 24 * 4) -#define PCMD25 __REG(0x40F00080 + 25 * 4) -#define PCMD26 __REG(0x40F00080 + 26 * 4) -#define PCMD27 __REG(0x40F00080 + 27 * 4) -#define PCMD28 __REG(0x40F00080 + 28 * 4) -#define PCMD29 __REG(0x40F00080 + 29 * 4) -#define PCMD30 __REG(0x40F00080 + 30 * 4) -#define PCMD31 __REG(0x40F00080 + 31 * 4) - -#define PCMD_MBC (1<<12) -#define PCMD_DCE (1<<11) -#define PCMD_LC (1<<10) -/* FIXME: PCMD_SQC need be checked. */ -#define PCMD_SQC (3<<8) /* currently only bit 8 is changeable, - bit 9 should be 0 all day. */ -#define PVCR_VCSA (0x1<<14) -#define PVCR_CommandDelay (0xf80) -#define PCFR_PI2C_EN (0x1 << 6) - -#define PSSR_OTGPH (1 << 6) /* OTG Peripheral control Hold */ -#define PSSR_RDH (1 << 5) /* Read Disable Hold */ -#define PSSR_PH (1 << 4) /* Peripheral Control Hold */ -#define PSSR_STS (1 << 3) /* Standby Mode Status */ -#define PSSR_VFS (1 << 2) /* VDD Fault Status */ -#define PSSR_BFS (1 << 1) /* Battery Fault Status */ -#define PSSR_SSS (1 << 0) /* Software Sleep Status */ - -#define PSLR_SL_ROD (1 << 20) /* Sleep-Mode/Depp-Sleep Mode nRESET_OUT Disable */ - -#define PCFR_RO (1 << 15) /* RDH Override */ -#define PCFR_PO (1 << 14) /* PH Override */ -#define PCFR_GPROD (1 << 12) /* GPIO nRESET_OUT Disable */ -#define PCFR_L1_EN (1 << 11) /* Sleep Mode L1 converter Enable */ -#define PCFR_FVC (1 << 10) /* Frequency/Voltage Change */ -#define PCFR_DC_EN (1 << 7) /* Sleep/deep-sleep DC-DC Converter Enable */ -#define PCFR_PI2CEN (1 << 6) /* Enable PI2C controller */ -#define PCFR_GPR_EN (1 << 4) /* nRESET_GPIO Pin Enable */ -#define PCFR_DS (1 << 3) /* Deep Sleep Mode */ -#define PCFR_FS (1 << 2) /* Float Static Chip Selects */ -#define PCFR_FP (1 << 1) /* Float PCMCIA controls */ -#define PCFR_OPDE (1 << 0) /* 3.6864 MHz oscillator power-down enable */ - -#define RCSR_GPR (1 << 3) /* GPIO Reset */ -#define RCSR_SMR (1 << 2) /* Sleep Mode */ -#define RCSR_WDR (1 << 1) /* Watchdog Reset */ -#define RCSR_HWR (1 << 0) /* Hardware Reset */ - -#define PWER_GPIO(Nb) (1 << Nb) /* GPIO [0..15] wake-up enable */ -#define PWER_GPIO0 PWER_GPIO (0) /* GPIO [0] wake-up enable */ -#define PWER_GPIO1 PWER_GPIO (1) /* GPIO [1] wake-up enable */ -#define PWER_GPIO2 PWER_GPIO (2) /* GPIO [2] wake-up enable */ -#define PWER_GPIO3 PWER_GPIO (3) /* GPIO [3] wake-up enable */ -#define PWER_GPIO4 PWER_GPIO (4) /* GPIO [4] wake-up enable */ -#define PWER_GPIO5 PWER_GPIO (5) /* GPIO [5] wake-up enable */ -#define PWER_GPIO6 PWER_GPIO (6) /* GPIO [6] wake-up enable */ -#define PWER_GPIO7 PWER_GPIO (7) /* GPIO [7] wake-up enable */ -#define PWER_GPIO8 PWER_GPIO (8) /* GPIO [8] wake-up enable */ -#define PWER_GPIO9 PWER_GPIO (9) /* GPIO [9] wake-up enable */ -#define PWER_GPIO10 PWER_GPIO (10) /* GPIO [10] wake-up enable */ -#define PWER_GPIO11 PWER_GPIO (11) /* GPIO [11] wake-up enable */ -#define PWER_GPIO12 PWER_GPIO (12) /* GPIO [12] wake-up enable */ -#define PWER_GPIO13 PWER_GPIO (13) /* GPIO [13] wake-up enable */ -#define PWER_GPIO14 PWER_GPIO (14) /* GPIO [14] wake-up enable */ -#define PWER_GPIO15 PWER_GPIO (15) /* GPIO [15] wake-up enable */ -#define PWER_RTC 0x80000000 /* RTC alarm wake-up enable */ - /* * SSP Serial Port Registers - see include/asm-arm/arch-pxa/regs-ssp.h */ @@ -948,52 +837,9 @@ */ /* - * Core Clock + * Core Clock - see include/asm-arm/arch-pxa/pxa2xx-regs.h */ -#define CCCR __REG(0x41300000) /* Core Clock Configuration Register */ -#define CKEN __REG(0x41300004) /* Clock Enable Register */ -#define OSCC __REG(0x41300008) /* Oscillator Configuration Register */ -#define CCSR __REG(0x4130000C) /* Core Clock Status Register */ - -#define CCCR_N_MASK 0x0380 /* Run Mode Frequency to Turbo Mode Frequency Multiplier */ -#define CCCR_M_MASK 0x0060 /* Memory Frequency to Run Mode Frequency Multiplier */ -#define CCCR_L_MASK 0x001f /* Crystal Frequency to Memory Frequency Multiplier */ - -#define CKEN_AC97CONF (31) /* AC97 Controller Configuration */ -#define CKEN_CAMERA (24) /* Camera Interface Clock Enable */ -#define CKEN_SSP1 (23) /* SSP1 Unit Clock Enable */ -#define CKEN_MEMC (22) /* Memory Controller Clock Enable */ -#define CKEN_MEMSTK (21) /* Memory Stick Host Controller */ -#define CKEN_IM (20) /* Internal Memory Clock Enable */ -#define CKEN_KEYPAD (19) /* Keypad Interface Clock Enable */ -#define CKEN_USIM (18) /* USIM Unit Clock Enable */ -#define CKEN_MSL (17) /* MSL Unit Clock Enable */ -#define CKEN_LCD (16) /* LCD Unit Clock Enable */ -#define CKEN_PWRI2C (15) /* PWR I2C Unit Clock Enable */ -#define CKEN_I2C (14) /* I2C Unit Clock Enable */ -#define CKEN_FICP (13) /* FICP Unit Clock Enable */ -#define CKEN_MMC (12) /* MMC Unit Clock Enable */ -#define CKEN_USB (11) /* USB Unit Clock Enable */ -#define CKEN_ASSP (10) /* ASSP (SSP3) Clock Enable */ -#define CKEN_USBHOST (10) /* USB Host Unit Clock Enable */ -#define CKEN_OSTIMER (9) /* OS Timer Unit Clock Enable */ -#define CKEN_NSSP (9) /* NSSP (SSP2) Clock Enable */ -#define CKEN_I2S (8) /* I2S Unit Clock Enable */ -#define CKEN_BTUART (7) /* BTUART Unit Clock Enable */ -#define CKEN_FFUART (6) /* FFUART Unit Clock Enable */ -#define CKEN_STUART (5) /* STUART Unit Clock Enable */ -#define CKEN_HWUART (4) /* HWUART Unit Clock Enable */ -#define CKEN_SSP3 (4) /* SSP3 Unit Clock Enable */ -#define CKEN_SSP (3) /* SSP Unit Clock Enable */ -#define CKEN_SSP2 (3) /* SSP2 Unit Clock Enable */ -#define CKEN_AC97 (2) /* AC97 Unit Clock Enable */ -#define CKEN_PWM1 (1) /* PWM1 Clock Enable */ -#define CKEN_PWM0 (0) /* PWM0 Clock Enable */ - -#define OSCC_OON (1 << 1) /* 32.768kHz OON (write-once only bit) */ -#define OSCC_OOK (1 << 0) /* 32.768kHz OOK (read-only bit) */ - #ifdef CONFIG_PXA27x /* Camera Interface */ diff --git a/include/asm-arm/arch-pxa/pxa2xx-regs.h b/include/asm-arm/arch-pxa/pxa2xx-regs.h index 9553b54fa5bc..73e0a329cf7f 100644 --- a/include/asm-arm/arch-pxa/pxa2xx-regs.h +++ b/include/asm-arm/arch-pxa/pxa2xx-regs.h @@ -81,4 +81,166 @@ #endif + +/* + * Power Manager + */ + +#define PMCR __REG(0x40F00000) /* Power Manager Control Register */ +#define PSSR __REG(0x40F00004) /* Power Manager Sleep Status Register */ +#define PSPR __REG(0x40F00008) /* Power Manager Scratch Pad Register */ +#define PWER __REG(0x40F0000C) /* Power Manager Wake-up Enable Register */ +#define PRER __REG(0x40F00010) /* Power Manager GPIO Rising-Edge Detect Enable Register */ +#define PFER __REG(0x40F00014) /* Power Manager GPIO Falling-Edge Detect Enable Register */ +#define PEDR __REG(0x40F00018) /* Power Manager GPIO Edge Detect Status Register */ +#define PCFR __REG(0x40F0001C) /* Power Manager General Configuration Register */ +#define PGSR0 __REG(0x40F00020) /* Power Manager GPIO Sleep State Register for GP[31-0] */ +#define PGSR1 __REG(0x40F00024) /* Power Manager GPIO Sleep State Register for GP[63-32] */ +#define PGSR2 __REG(0x40F00028) /* Power Manager GPIO Sleep State Register for GP[84-64] */ +#define PGSR3 __REG(0x40F0002C) /* Power Manager GPIO Sleep State Register for GP[118-96] */ +#define RCSR __REG(0x40F00030) /* Reset Controller Status Register */ + +#define PSLR __REG(0x40F00034) /* Power Manager Sleep Config Register */ +#define PSTR __REG(0x40F00038) /* Power Manager Standby Config Register */ +#define PSNR __REG(0x40F0003C) /* Power Manager Sense Config Register */ +#define PVCR __REG(0x40F00040) /* Power Manager VoltageControl Register */ +#define PKWR __REG(0x40F00050) /* Power Manager KB Wake-up Enable Reg */ +#define PKSR __REG(0x40F00054) /* Power Manager KB Level-Detect Register */ +#define PCMD(x) __REG2(0x40F00080, (x)<<2) +#define PCMD0 __REG(0x40F00080 + 0 * 4) +#define PCMD1 __REG(0x40F00080 + 1 * 4) +#define PCMD2 __REG(0x40F00080 + 2 * 4) +#define PCMD3 __REG(0x40F00080 + 3 * 4) +#define PCMD4 __REG(0x40F00080 + 4 * 4) +#define PCMD5 __REG(0x40F00080 + 5 * 4) +#define PCMD6 __REG(0x40F00080 + 6 * 4) +#define PCMD7 __REG(0x40F00080 + 7 * 4) +#define PCMD8 __REG(0x40F00080 + 8 * 4) +#define PCMD9 __REG(0x40F00080 + 9 * 4) +#define PCMD10 __REG(0x40F00080 + 10 * 4) +#define PCMD11 __REG(0x40F00080 + 11 * 4) +#define PCMD12 __REG(0x40F00080 + 12 * 4) +#define PCMD13 __REG(0x40F00080 + 13 * 4) +#define PCMD14 __REG(0x40F00080 + 14 * 4) +#define PCMD15 __REG(0x40F00080 + 15 * 4) +#define PCMD16 __REG(0x40F00080 + 16 * 4) +#define PCMD17 __REG(0x40F00080 + 17 * 4) +#define PCMD18 __REG(0x40F00080 + 18 * 4) +#define PCMD19 __REG(0x40F00080 + 19 * 4) +#define PCMD20 __REG(0x40F00080 + 20 * 4) +#define PCMD21 __REG(0x40F00080 + 21 * 4) +#define PCMD22 __REG(0x40F00080 + 22 * 4) +#define PCMD23 __REG(0x40F00080 + 23 * 4) +#define PCMD24 __REG(0x40F00080 + 24 * 4) +#define PCMD25 __REG(0x40F00080 + 25 * 4) +#define PCMD26 __REG(0x40F00080 + 26 * 4) +#define PCMD27 __REG(0x40F00080 + 27 * 4) +#define PCMD28 __REG(0x40F00080 + 28 * 4) +#define PCMD29 __REG(0x40F00080 + 29 * 4) +#define PCMD30 __REG(0x40F00080 + 30 * 4) +#define PCMD31 __REG(0x40F00080 + 31 * 4) + +#define PCMD_MBC (1<<12) +#define PCMD_DCE (1<<11) +#define PCMD_LC (1<<10) +/* FIXME: PCMD_SQC need be checked. */ +#define PCMD_SQC (3<<8) /* currently only bit 8 is changeable, + bit 9 should be 0 all day. */ +#define PVCR_VCSA (0x1<<14) +#define PVCR_CommandDelay (0xf80) +#define PCFR_PI2C_EN (0x1 << 6) + +#define PSSR_OTGPH (1 << 6) /* OTG Peripheral control Hold */ +#define PSSR_RDH (1 << 5) /* Read Disable Hold */ +#define PSSR_PH (1 << 4) /* Peripheral Control Hold */ +#define PSSR_STS (1 << 3) /* Standby Mode Status */ +#define PSSR_VFS (1 << 2) /* VDD Fault Status */ +#define PSSR_BFS (1 << 1) /* Battery Fault Status */ +#define PSSR_SSS (1 << 0) /* Software Sleep Status */ + +#define PSLR_SL_ROD (1 << 20) /* Sleep-Mode/Depp-Sleep Mode nRESET_OUT Disable */ + +#define PCFR_RO (1 << 15) /* RDH Override */ +#define PCFR_PO (1 << 14) /* PH Override */ +#define PCFR_GPROD (1 << 12) /* GPIO nRESET_OUT Disable */ +#define PCFR_L1_EN (1 << 11) /* Sleep Mode L1 converter Enable */ +#define PCFR_FVC (1 << 10) /* Frequency/Voltage Change */ +#define PCFR_DC_EN (1 << 7) /* Sleep/deep-sleep DC-DC Converter Enable */ +#define PCFR_PI2CEN (1 << 6) /* Enable PI2C controller */ +#define PCFR_GPR_EN (1 << 4) /* nRESET_GPIO Pin Enable */ +#define PCFR_DS (1 << 3) /* Deep Sleep Mode */ +#define PCFR_FS (1 << 2) /* Float Static Chip Selects */ +#define PCFR_FP (1 << 1) /* Float PCMCIA controls */ +#define PCFR_OPDE (1 << 0) /* 3.6864 MHz oscillator power-down enable */ + +#define RCSR_GPR (1 << 3) /* GPIO Reset */ +#define RCSR_SMR (1 << 2) /* Sleep Mode */ +#define RCSR_WDR (1 << 1) /* Watchdog Reset */ +#define RCSR_HWR (1 << 0) /* Hardware Reset */ + +#define PWER_GPIO(Nb) (1 << Nb) /* GPIO [0..15] wake-up enable */ +#define PWER_GPIO0 PWER_GPIO (0) /* GPIO [0] wake-up enable */ +#define PWER_GPIO1 PWER_GPIO (1) /* GPIO [1] wake-up enable */ +#define PWER_GPIO2 PWER_GPIO (2) /* GPIO [2] wake-up enable */ +#define PWER_GPIO3 PWER_GPIO (3) /* GPIO [3] wake-up enable */ +#define PWER_GPIO4 PWER_GPIO (4) /* GPIO [4] wake-up enable */ +#define PWER_GPIO5 PWER_GPIO (5) /* GPIO [5] wake-up enable */ +#define PWER_GPIO6 PWER_GPIO (6) /* GPIO [6] wake-up enable */ +#define PWER_GPIO7 PWER_GPIO (7) /* GPIO [7] wake-up enable */ +#define PWER_GPIO8 PWER_GPIO (8) /* GPIO [8] wake-up enable */ +#define PWER_GPIO9 PWER_GPIO (9) /* GPIO [9] wake-up enable */ +#define PWER_GPIO10 PWER_GPIO (10) /* GPIO [10] wake-up enable */ +#define PWER_GPIO11 PWER_GPIO (11) /* GPIO [11] wake-up enable */ +#define PWER_GPIO12 PWER_GPIO (12) /* GPIO [12] wake-up enable */ +#define PWER_GPIO13 PWER_GPIO (13) /* GPIO [13] wake-up enable */ +#define PWER_GPIO14 PWER_GPIO (14) /* GPIO [14] wake-up enable */ +#define PWER_GPIO15 PWER_GPIO (15) /* GPIO [15] wake-up enable */ +#define PWER_RTC 0x80000000 /* RTC alarm wake-up enable */ + +/* + * PXA2xx specific Core clock definitions + */ +#define CCCR __REG(0x41300000) /* Core Clock Configuration Register */ +#define CCSR __REG(0x4130000C) /* Core Clock Status Register */ +#define CKEN __REG(0x41300004) /* Clock Enable Register */ +#define OSCC __REG(0x41300008) /* Oscillator Configuration Register */ + +#define CCCR_N_MASK 0x0380 /* Run Mode Frequency to Turbo Mode Frequency Multiplier */ +#define CCCR_M_MASK 0x0060 /* Memory Frequency to Run Mode Frequency Multiplier */ +#define CCCR_L_MASK 0x001f /* Crystal Frequency to Memory Frequency Multiplier */ + +#define CKEN_AC97CONF (31) /* AC97 Controller Configuration */ +#define CKEN_CAMERA (24) /* Camera Interface Clock Enable */ +#define CKEN_SSP1 (23) /* SSP1 Unit Clock Enable */ +#define CKEN_MEMC (22) /* Memory Controller Clock Enable */ +#define CKEN_MEMSTK (21) /* Memory Stick Host Controller */ +#define CKEN_IM (20) /* Internal Memory Clock Enable */ +#define CKEN_KEYPAD (19) /* Keypad Interface Clock Enable */ +#define CKEN_USIM (18) /* USIM Unit Clock Enable */ +#define CKEN_MSL (17) /* MSL Unit Clock Enable */ +#define CKEN_LCD (16) /* LCD Unit Clock Enable */ +#define CKEN_PWRI2C (15) /* PWR I2C Unit Clock Enable */ +#define CKEN_I2C (14) /* I2C Unit Clock Enable */ +#define CKEN_FICP (13) /* FICP Unit Clock Enable */ +#define CKEN_MMC (12) /* MMC Unit Clock Enable */ +#define CKEN_USB (11) /* USB Unit Clock Enable */ +#define CKEN_ASSP (10) /* ASSP (SSP3) Clock Enable */ +#define CKEN_USBHOST (10) /* USB Host Unit Clock Enable */ +#define CKEN_OSTIMER (9) /* OS Timer Unit Clock Enable */ +#define CKEN_NSSP (9) /* NSSP (SSP2) Clock Enable */ +#define CKEN_I2S (8) /* I2S Unit Clock Enable */ +#define CKEN_BTUART (7) /* BTUART Unit Clock Enable */ +#define CKEN_FFUART (6) /* FFUART Unit Clock Enable */ +#define CKEN_STUART (5) /* STUART Unit Clock Enable */ +#define CKEN_HWUART (4) /* HWUART Unit Clock Enable */ +#define CKEN_SSP3 (4) /* SSP3 Unit Clock Enable */ +#define CKEN_SSP (3) /* SSP Unit Clock Enable */ +#define CKEN_SSP2 (3) /* SSP2 Unit Clock Enable */ +#define CKEN_AC97 (2) /* AC97 Unit Clock Enable */ +#define CKEN_PWM1 (1) /* PWM1 Clock Enable */ +#define CKEN_PWM0 (0) /* PWM0 Clock Enable */ + +#define OSCC_OON (1 << 1) /* 32.768kHz OON (write-once only bit) */ +#define OSCC_OOK (1 << 0) /* 32.768kHz OOK (read-only bit) */ + #endif diff --git a/include/asm-arm/arch-pxa/system.h b/include/asm-arm/arch-pxa/system.h index 9aa6c2e939e8..ba7e132de1b3 100644 --- a/include/asm-arm/arch-pxa/system.h +++ b/include/asm-arm/arch-pxa/system.h @@ -12,6 +12,7 @@ #include #include "hardware.h" +#include "pxa2xx-regs.h" #include "pxa-regs.h" static inline void arch_idle(void) -- cgit v1.2.3 From bcf0b0880710409420a4e3b15dbf4b9a63542c0b Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 30 Apr 2008 03:49:55 -0500 Subject: [POWERPC] Move to runtime allocated exception stacks For the additonal exception levels (critical, debug, machine check) on 40x/book-e we were using "static" allocations of the stack in the associated head.S. Move to a runtime allocation to make the code a bit easier to read as we mimic how we handle IRQ stacks. Its also a bit easier to setup the stack with a "dummy" thread_info in C code. Signed-off-by: Kumar Gala Acked-by: Paul Mackerras --- arch/powerpc/kernel/head_40x.S | 18 ++++-------------- arch/powerpc/kernel/head_44x.S | 9 --------- arch/powerpc/kernel/head_booke.h | 29 +++++++++++------------------ arch/powerpc/kernel/head_fsl_booke.S | 9 --------- arch/powerpc/kernel/irq.c | 33 +++++++++++++++++++++++++++++++++ arch/powerpc/kernel/setup_32.c | 24 ++++++++++++++++++++++++ include/asm-powerpc/irq.h | 13 +++++++++++++ 7 files changed, 85 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 8552e67e3a8b..f2cf60d38f78 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -148,14 +148,14 @@ _ENTRY(crit_r11) mfcr r10; /* save CR in r10 for now */\ mfspr r11,SPRN_SRR3; /* check whether user or kernel */\ andi. r11,r11,MSR_PR; \ - lis r11,critical_stack_top@h; \ - ori r11,r11,critical_stack_top@l; \ + lis r11,critirq_ctx@ha; \ + tophys(r11,r11); \ + lwz r11,critirq_ctx@l(r11); \ beq 1f; \ /* COMING FROM USER MODE */ \ mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ - addi r11,r11,THREAD_SIZE; \ -1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\ +1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\ tophys(r11,r11); \ stw r10,_CCR(r11); /* save various registers */\ stw r12,GPR12(r11); \ @@ -996,16 +996,6 @@ empty_zero_page: swapper_pg_dir: .space PGD_TABLE_SIZE - -/* Stack for handling critical exceptions from kernel mode */ - .section .bss - .align 12 -exception_stack_bottom: - .space 4096 -critical_stack_top: - .globl exception_stack_top -exception_stack_top: - /* Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index c2b9dc4fce5d..47ea8affad23 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -737,15 +737,6 @@ empty_zero_page: swapper_pg_dir: .space PGD_TABLE_SIZE -/* Reserved 4k for the critical exception stack & 4k for the machine - * check stack per CPU for kernel mode exceptions */ - .section .bss - .align 12 -exception_stack_bottom: - .space BOOKE_EXCEPTION_STACK_SIZE - .globl exception_stack_top -exception_stack_top: - /* * Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 721faef87095..9eacf4ca442a 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -43,9 +43,7 @@ SAVE_2GPRS(7, r11) /* To handle the additional exception priority levels on 40x and Book-E - * processors we allocate a 4k stack per additional priority level. The various - * head_xxx.S files allocate space (exception_stack_top) for each priority's - * stack times the number of CPUs + * processors we allocate a stack per additional priority level. * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check @@ -61,36 +59,31 @@ * going to critical or their own debug level we aren't currently * providing configurations that micro-optimize space usage. */ -#ifdef CONFIG_44x -#define NUM_EXCEPTION_LVLS 2 -#else -#define NUM_EXCEPTION_LVLS 3 -#endif -#define BOOKE_EXCEPTION_STACK_SIZE (4096 * NUM_EXCEPTION_LVLS) /* CRIT_SPRG only used in critical exception handling */ #define CRIT_SPRG SPRN_SPRG2 /* MCHECK_SPRG only used in machine check exception handling */ #define MCHECK_SPRG SPRN_SPRG6W -#define MCHECK_STACK_TOP (exception_stack_top - 4096) -#define CRIT_STACK_TOP (exception_stack_top) +#define MCHECK_STACK_BASE mcheckirq_ctx +#define CRIT_STACK_BASE critirq_ctx /* only on e200 for now */ -#define DEBUG_STACK_TOP (exception_stack_top - 8192) +#define DEBUG_STACK_BASE dbgirq_ctx #define DEBUG_SPRG SPRN_SPRG6W #ifdef CONFIG_SMP #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ mfspr r8,SPRN_PIR; \ - mulli r8,r8,BOOKE_EXCEPTION_STACK_SIZE; \ - neg r8,r8; \ - addis r8,r8,level##_STACK_TOP@ha; \ - addi r8,r8,level##_STACK_TOP@l + slwi r8,r8,2; \ + addis r8,r8,level##_STACK_BASE@ha; \ + lwz r8,level##_STACK_BASE@l(r8); \ + addi r8,r8,THREAD_SIZE; #else #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ - lis r8,level##_STACK_TOP@h; \ - ori r8,r8,level##_STACK_TOP@l + lis r8,level##_STACK_BASE@ha; \ + lwz r8,level##_STACK_BASE@l(r8); \ + addi r8,r8,THREAD_SIZE; #endif /* diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index e581524d85bc..503f86030b6e 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1080,15 +1080,6 @@ empty_zero_page: swapper_pg_dir: .space PGD_TABLE_SIZE -/* Reserved 4k for the critical exception stack & 4k for the machine - * check stack per CPU for kernel mode exceptions */ - .section .bss - .align 12 -exception_stack_bottom: - .space BOOKE_EXCEPTION_STACK_SIZE * NR_CPUS - .globl exception_stack_top -exception_stack_top: - /* * Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2f73f705d564..b5199752ac60 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -356,9 +356,42 @@ void __init init_IRQ(void) { if (ppc_md.init_IRQ) ppc_md.init_IRQ(); + + exc_lvl_ctx_init(); + irq_ctx_init(); } +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +struct thread_info *critirq_ctx[NR_CPUS] __read_mostly; +struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly; +struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; + +void exc_lvl_ctx_init(void) +{ + struct thread_info *tp; + int i; + + for_each_possible_cpu(i) { + memset((void *)critirq_ctx[i], 0, THREAD_SIZE); + tp = critirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = 0; + +#ifdef CONFIG_BOOKE + memset((void *)dbgirq_ctx[i], 0, THREAD_SIZE); + tp = dbgirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = 0; + + memset((void *)mcheckirq_ctx[i], 0, THREAD_SIZE); + tp = mcheckirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = HARDIRQ_OFFSET; +#endif + } +} +#endif #ifdef CONFIG_IRQSTACKS struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5112a4aa801d..bef0be3fd98b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -248,6 +248,28 @@ static void __init irqstack_early_init(void) #define irqstack_early_init() #endif +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +static void __init exc_lvl_early_init(void) +{ + unsigned int i; + + /* interrupt stacks must be in lowmem, we get that for free on ppc32 + * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ + for_each_possible_cpu(i) { + critirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +#ifdef CONFIG_BOOKE + dbgirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + mcheckirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +#endif + } +} +#else +#define exc_lvl_early_init() +#endif + /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { @@ -305,6 +327,8 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; + exc_lvl_early_init(); + irqstack_early_init(); /* set up the bootmem stuff with available memory */ diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h index 5089deb8fec3..1ef8e304e0ea 100644 --- a/include/asm-powerpc/irq.h +++ b/include/asm-powerpc/irq.h @@ -619,6 +619,19 @@ struct pt_regs; #define __ARCH_HAS_DO_SOFTIRQ +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +/* + * Per-cpu stacks for handling critical, debug and machine check + * level interrupts. + */ +extern struct thread_info *critirq_ctx[NR_CPUS]; +extern struct thread_info *dbgirq_ctx[NR_CPUS]; +extern struct thread_info *mcheckirq_ctx[NR_CPUS]; +extern void exc_lvl_ctx_init(void); +#else +#define exc_lvl_ctx_init() +#endif + #ifdef CONFIG_IRQSTACKS /* * Per-cpu stacks for handling hard and soft interrupts. -- cgit v1.2.3 From 5a85bb02ba473aacd29f4039a74744f729443b31 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 3 Jun 2008 09:06:20 +0200 Subject: ALSA: Release v1.0.17rc1 Signed-off-by: Jaroslav Kysela --- include/sound/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/version.h b/include/sound/version.h index ed6fb2eb1eac..2474b8ee99f9 100644 --- a/include/sound/version.h +++ b/include/sound/version.h @@ -1,3 +1,3 @@ -/* include/version.h. Generated by alsa/ksync script. */ -#define CONFIG_SND_VERSION "1.0.16" +/* include/version.h */ +#define CONFIG_SND_VERSION "1.0.17rc1" #define CONFIG_SND_DATE "" -- cgit v1.2.3 From 4c1cbafb88490757a38119c41229251369bcecbc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 3 Jun 2008 09:28:52 +0200 Subject: x86 mpparse: build fix fix this build bug: drivers/acpi/pci_irq.c: In function 'acpi_pci_irq_enable': drivers/acpi/pci_irq.c:574: error: implicit declaration of function 'mp_config_acpi_gsi' Signed-off-by: Ingo Molnar --- include/asm-x86/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h index 14411c9de46f..73ce5b32443f 100644 --- a/include/asm-x86/acpi.h +++ b/include/asm-x86/acpi.h @@ -28,6 +28,7 @@ #include #include #include +#include #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long -- cgit v1.2.3 From ba924c81dd5a7a7fb5ded025af7fdd3b61f8ca67 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 31 May 2008 22:51:51 -0700 Subject: x86, numa, 32-bit: increase max_elements to 1024 so every element will represent 64M instead of 256M. AMD opteron could have HW memory hole remapping, so could have [0, 8g + 64M) on node0. Reduce element size to 64M to keep that on node 0 Later we need to use find_e820_area() to allocate memory_node_map like on 64-bit. But need to move memory_present out of populate_mem_map... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/discontig_32.c | 14 +++++++------- include/asm-x86/mmzone_32.h | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 55fdbab6b014..922af20d1d29 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -59,14 +59,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; /* * 4) physnode_map - the mapping between a pfn and owning node * physnode_map keeps track of the physical memory layout of a generic - * numa node on a 256Mb break (each element of the array will - * represent 256Mb of memory and will be marked by the node id. so, + * numa node on a 64Mb break (each element of the array will + * represent 64Mb of memory and will be marked by the node id. so, * if the first gig is on node 0, and the second gig is on node 1 * physnode_map will contain: * - * physnode_map[0-3] = 0; - * physnode_map[4-7] = 1; - * physnode_map[8- ] = -1; + * physnode_map[0-15] = 0; + * physnode_map[16-31] = 1; + * physnode_map[32- ] = -1; */ s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; EXPORT_SYMBOL(physnode_map); @@ -81,9 +81,9 @@ void memory_present(int nid, unsigned long start, unsigned long end) printk(KERN_DEBUG " "); for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { physnode_map[pfn / PAGES_PER_ELEMENT] = nid; - printk("%ld ", pfn); + printk(KERN_CONT "%ld ", pfn); } - printk("\n"); + printk(KERN_CONT "\n"); } unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index faef751181b7..ab0012888c44 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -51,14 +51,14 @@ extern int early_pfn_to_nid(unsigned long pfn); /* * generic node memory support, the following assumptions apply: * - * 1) memory comes in 256Mb contigious chunks which are either present or not + * 1) memory comes in 64Mb contigious chunks which are either present or not * 2) we will not have more than 64Gb in total * * for now assume that 64Gb is max amount of RAM for whole system * 64Gb / 4096bytes/page = 16777216 pages */ #define MAX_NR_PAGES 16777216 -#define MAX_ELEMENTS 256 +#define MAX_ELEMENTS 1024 #define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS) extern s8 physnode_map[]; -- cgit v1.2.3 From 0596152388e234efebce464355186ad9e16c8cb6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 31 May 2008 22:52:47 -0700 Subject: x86, 32-bit: change propagate_e820_map() back to find_max_pfn() we don't need to call memory_present that early. numa and sparse will call memory_present later and might even fail, it will call memory_present for the full range. also for sparse it will call alloc_bootmem ... before we set up bootmem. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_32.c | 5 ++--- arch/x86/kernel/setup_32.c | 4 ++-- arch/x86/mm/discontig_32.c | 2 +- include/asm-x86/e820_32.h | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index db760d4706af..0c025d04fc2e 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -210,7 +210,7 @@ void __init init_iomem_resources(struct resource *code_resource, /* * Find the highest page frame number we have available */ -void __init propagate_e820_map(void) +void __init find_max_pfn(void) { int i; @@ -227,7 +227,6 @@ void __init propagate_e820_map(void) continue; if (end > max_pfn) max_pfn = end; - memory_present(0, start, end); } } @@ -361,7 +360,7 @@ static int __init parse_memmap(char *arg) * size before original memory map is * reset. */ - propagate_e820_map(); + find_max_pfn(); saved_max_pfn = max_pfn; #endif e820.nr_map = 0; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 6f40cb560ea9..29010420458d 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -730,10 +730,10 @@ void __init setup_arch(char **cmdline_p) efi_init(); /* update e820 for memory not covered by WB MTRRs */ - propagate_e820_map(); + find_max_pfn(); mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) - propagate_e820_map(); + find_max_pfn(); max_low_pfn = setup_memory(); diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 98b099eeab3a..ebbbba338150 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void) printk("NUMA - single node, flat memory mode\n"); /* Run the memory configuration and find the top of memory. */ - propagate_e820_map(); + find_max_pfn(); node_start_pfn[0] = 0; node_end_pfn[0] = max_pfn; memory_present(0, 0, max_pfn); diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index 7ace82570a36..00fbc60b9d30 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -21,7 +21,7 @@ extern void setup_memory_map(void); extern void finish_e820_parsing(void); -extern void propagate_e820_map(void); +extern void find_max_pfn(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void limit_regions(unsigned long long size); extern void init_iomem_resources(struct resource *code_resource, -- cgit v1.2.3 From 2944e16b25e7fb8b5ee0dd9dc7197a0f9e523cfd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 1 Jun 2008 13:17:38 -0700 Subject: x86: update mptable make mptable to be consistent with acpi routing, so we could: 1. kexec kernel with acpi=off 2. work around BIOSes where acpi routing is working, but mptable is not right, so can use kernel/kexec to start other OSes that don't have good acpi support. command line: update_mptable Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 22 +++ arch/x86/kernel/e820.c | 25 +++ arch/x86/kernel/mpparse.c | 401 ++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/setup_64.c | 4 + drivers/acpi/pci_irq.c | 5 + include/asm-x86/e820.h | 1 + include/asm-x86/mpspec.h | 4 + 7 files changed, 445 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2ddfabae382b..f226bdc19f69 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1154,6 +1154,28 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) return gsi; } +int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, + u32 gsi, int triggering, int polarity) +{ + struct mpc_config_intsrc intsrc; + int ioapic; + + /* print the entry should happen on mptable identically */ + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqtype = mp_INT; + intsrc.mpc_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); + intsrc.mpc_srcbus = number; + intsrc.mpc_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + ioapic = mp_find_ioapic(gsi); + intsrc.mpc_dstapic = mp_ioapic_routing[ioapic].apic_id; + intsrc.mpc_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + + MP_intsrc_info(&intsrc); + + return 0; +} + /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0cd9132c9450..cd2b99e27d43 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -739,3 +739,28 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) return -1UL; } + +/* + * pre allocated 4k and reserved it in e820 + */ +u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) +{ + u64 size = 0; + u64 addr; + u64 start; + + start = startt; + while (size < sizet) + start = find_e820_area_size(start, &size, align); + + if (size < sizet) + return 0; + + addr = round_down(start + size - sizet, align); + update_memory_range(addr, sizet, E820_RAM, E820_RESERVED); + printk(KERN_INFO "update e820 for early_reserve_e820\n"); + update_e820(); + + return addr; +} + diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9f3792d55044..8898aa49079d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #ifdef CONFIG_X86_32 @@ -161,20 +163,81 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) nr_ioapics++; } -static void __init MP_intsrc_info(struct mpc_config_intsrc *m) +static void print_MP_intsrc_info(struct mpc_config_intsrc *m) { - printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x," + printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); - mp_irqs[mp_irq_entries].mp_dstapic = m->mpc_dstapic; - mp_irqs[mp_irq_entries].mp_type = m->mpc_type; - mp_irqs[mp_irq_entries].mp_irqtype = m->mpc_irqtype; - mp_irqs[mp_irq_entries].mp_irqflag = m->mpc_irqflag; - mp_irqs[mp_irq_entries].mp_srcbus = m->mpc_srcbus; - mp_irqs[mp_irq_entries].mp_srcbusirq = m->mpc_srcbusirq; - mp_irqs[mp_irq_entries].mp_dstirq = m->mpc_dstirq; +} + +static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +{ + printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, + (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, + mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); +} + +static void assign_to_mp_irq(struct mpc_config_intsrc *m, + struct mp_config_intsrc *mp_irq) +{ + mp_irq->mp_dstapic = m->mpc_dstapic; + mp_irq->mp_type = m->mpc_type; + mp_irq->mp_irqtype = m->mpc_irqtype; + mp_irq->mp_irqflag = m->mpc_irqflag; + mp_irq->mp_srcbus = m->mpc_srcbus; + mp_irq->mp_srcbusirq = m->mpc_srcbusirq; + mp_irq->mp_dstirq = m->mpc_dstirq; +} + +static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, + struct mpc_config_intsrc *m) +{ + m->mpc_dstapic = mp_irq->mp_dstapic; + m->mpc_type = mp_irq->mp_type; + m->mpc_irqtype = mp_irq->mp_irqtype; + m->mpc_irqflag = mp_irq->mp_irqflag; + m->mpc_srcbus = mp_irq->mp_srcbus; + m->mpc_srcbusirq = mp_irq->mp_srcbusirq; + m->mpc_dstirq = mp_irq->mp_dstirq; +} + +static int mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, + struct mpc_config_intsrc *m) +{ + if (mp_irq->mp_dstapic != m->mpc_dstapic) + return 1; + if (mp_irq->mp_type != m->mpc_type) + return 2; + if (mp_irq->mp_irqtype != m->mpc_irqtype) + return 3; + if (mp_irq->mp_irqflag != m->mpc_irqflag) + return 4; + if (mp_irq->mp_srcbus != m->mpc_srcbus) + return 5; + if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) + return 6; + if (mp_irq->mp_dstirq != m->mpc_dstirq) + return 7; + + return 0; +} + +void MP_intsrc_info(struct mpc_config_intsrc *m) +{ + int i; + + print_MP_intsrc_info(m); + + for (i = 0; i < mp_irq_entries; i++) { + if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m)) + return; + } + + assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } @@ -268,12 +331,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, * Read/parse the MPC */ -static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) +static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, + char *str) { - char str[16]; - char oem[10]; - int count = sizeof(*mpc); - unsigned char *mpt = ((unsigned char *)mpc) + count; if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", @@ -301,13 +361,28 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) memcpy(str, mpc->mpc_productid, 12); str[12] = 0; -#ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); -#endif printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); + return 1; +} + +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) +{ + char str[16]; + char oem[10]; + + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; + + if (!smp_check_mpc(mpc, oem, str)) + return 0; + +#ifdef CONFIG_X86_32 + mps_oem_check(mpc, oem, str); +#endif + /* save the local APIC address, it might be non-default */ if (!acpi_lapic) mp_lapic_addr = mpc->mpc_lapic; @@ -785,3 +860,295 @@ void __init find_smp_config(void) { __find_smp_config(1); } + +#ifdef CONFIG_X86_IO_APIC +static u8 __initdata irq_used[MAX_IRQ_SOURCES]; + +static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) +{ + int i; + + if (m->mpc_irqtype != mp_INT) + return 0; + + if (m->mpc_irqflag != 0x0f) + return 0; + + /* not legacy */ + + for (i = 0; i < mp_irq_entries; i++) { + if (mp_irqs[i].mp_irqtype != mp_INT) + continue; + + if (mp_irqs[i].mp_irqflag != 0x0f) + continue; + + if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) + continue; + if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) + continue; + if (irq_used[i]) { + /* already claimed */ + return -2; + } + irq_used[i] = 1; + return i; + } + + /* not found */ + return -1; +} + +#define SPARE_SLOT_NUM 20 + +static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; +#endif + +static int __init replace_intsrc_all(struct mp_config_table *mpc, + unsigned long mpc_new_phys, + unsigned long mpc_new_length) +{ +#ifdef CONFIG_X86_IO_APIC + int i; + int nr_m_spare = 0; +#endif + + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; + + printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); + while (count < mpc->mpc_length) { + switch (*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m = + (struct mpc_config_processor *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m = + (struct mpc_config_bus *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + mpt += sizeof(struct mpc_config_ioapic); + count += sizeof(struct mpc_config_ioapic); + break; + } + case MP_INTSRC: + { +#ifdef CONFIG_X86_IO_APIC + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; + + printk(KERN_INFO "OLD "); + print_MP_intsrc_info(m); + i = get_MP_intsrc_index(m); + if (i > 0) { + assign_to_mpc_intsrc(&mp_irqs[i], m); + printk(KERN_INFO "NEW "); + print_mp_irq_info(&mp_irqs[i]); + } else if (!i) { + /* legacy, do nothing */ + } else if (nr_m_spare < SPARE_SLOT_NUM) { + /* + * not found (-1), or duplicated (-2) + * are invalid entries, + * we need to use the slot later + */ + m_spare[nr_m_spare] = m; + nr_m_spare++; + } +#endif + mpt += sizeof(struct mpc_config_intsrc); + count += sizeof(struct mpc_config_intsrc); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m = + (struct mpc_config_lintsrc *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + default: + /* wrong mptable */ + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); + printk(KERN_ERR "type %x\n", *mpt); + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, + 1, mpc, mpc->mpc_length, 1); + goto out; + } + } + +#ifdef CONFIG_X86_IO_APIC + for (i = 0; i < mp_irq_entries; i++) { + if (irq_used[i]) + continue; + + if (mp_irqs[i].mp_irqtype != mp_INT) + continue; + + if (mp_irqs[i].mp_irqflag != 0x0f) + continue; + + if (nr_m_spare > 0) { + printk(KERN_INFO "*NEW* found "); + nr_m_spare--; + assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); + m_spare[nr_m_spare] = NULL; + } else { + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; + count += sizeof(struct mpc_config_intsrc); + if (!mpc_new_phys) { + printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); + } else { + if (count <= mpc_new_length) + printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count); + else { + printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length); + goto out; + } + } + assign_to_mpc_intsrc(&mp_irqs[i], m); + mpc->mpc_length = count; + mpt += sizeof(struct mpc_config_intsrc); + } + print_mp_irq_info(&mp_irqs[i]); + } +#endif +out: + /* update checksum */ + mpc->mpc_checksum = 0; + mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, + mpc->mpc_length); + + return 0; +} + +int __initdata enable_update_mptable; + +static int __init update_mptable_setup(char *str) +{ + enable_update_mptable = 1; + return 0; +} +early_param("update_mptable", update_mptable_setup); + +static unsigned long __initdata mpc_new_phys; +static unsigned long mpc_new_length __initdata = 4096; + +/* alloc_mptable or alloc_mptable=4k */ +static int __initdata alloc_mptable; +static int __init parse_alloc_mptable_opt(char *p) +{ + enable_update_mptable = 1; + alloc_mptable = 1; + if (!p) + return 0; + mpc_new_length = memparse(p, &p); + return 0; +} +early_param("alloc_mptable", parse_alloc_mptable_opt); + +void __init early_reserve_e820_mpc_new(void) +{ + if (enable_update_mptable && alloc_mptable) { + u64 startt = 0; +#ifdef CONFIG_X86_TRAMPOLINE + startt = TRAMPOLINE_BASE; +#endif + mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); + } +} + +static int __init update_mp_table(void) +{ + char str[16]; + char oem[10]; + struct intel_mp_floating *mpf; + struct mp_config_table *mpc; + struct mp_config_table *mpc_new; + + if (!enable_update_mptable) + return 0; + + mpf = mpf_found; + if (!mpf) + return 0; + + /* + * Now see if we need to go further. + */ + if (mpf->mpf_feature1 != 0) + return 0; + + if (!mpf->mpf_physptr) + return 0; + + mpc = phys_to_virt(mpf->mpf_physptr); + + if (!smp_check_mpc(mpc, oem, str)) + return 0; + + printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); + printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); + + if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { + mpc_new_phys = 0; + printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", + mpc_new_length); + } + + if (!mpc_new_phys) { + unsigned char old, new; + /* check if we can change the postion */ + mpc->mpc_checksum = 0; + old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); + mpc->mpc_checksum = 0xff; + new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); + if (old == new) { + printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); + return 0; + } + printk(KERN_INFO "use in-positon replacing\n"); + } else { + mpf->mpf_physptr = mpc_new_phys; + mpc_new = phys_to_virt(mpc_new_phys); + memcpy(mpc_new, mpc, mpc->mpc_length); + mpc = mpc_new; + /* check if we can modify that */ + if (mpc_new_phys - mpf->mpf_physptr) { + struct intel_mp_floating *mpf_new; + /* steal 16 bytes from [0, 1k) */ + printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); + mpf_new = phys_to_virt(0x400 - 16); + memcpy(mpf_new, mpf, 16); + mpf = mpf_new; + mpf->mpf_physptr = mpc_new_phys; + } + mpf->mpf_checksum = 0; + mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); + printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); + } + + /* + * only replace the one with mp_INT and + * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW, + * already in mp_irqs , stored by ... and mp_config_acpi_gsi, + * may need pci=routeirq for all coverage + */ + replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); + + return 0; +} + +late_initcall(update_mp_table); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 89e6cca5d693..978a0d637f3f 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -56,6 +56,7 @@ #include #include