summaryrefslogtreecommitdiff
path: root/arch/x86_64
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-06 01:55:20 +0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-06 01:55:20 +0400
commitea62ccd00fd0b6720b033adfc9984f31130ce195 (patch)
tree9837b797b2466fffcb0af96c388b06eae9c3df18 /arch/x86_64
parent886a0768affe9a32f18c45f8e1393bca9ece5392 (diff)
parent35060b6a9a4e1c89bc6fbea61090e302dbc61847 (diff)
downloadlinux-ea62ccd00fd0b6720b033adfc9984f31130ce195.tar.xz
Merge branch 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6
* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (231 commits) [PATCH] i386: Don't delete cpu_devs data to identify different x86 types in late_initcall [PATCH] i386: type may be unused [PATCH] i386: Some additional chipset register values validation. [PATCH] i386: Add missing !X86_PAE dependincy to the 2G/2G split. [PATCH] x86-64: Don't exclude asm-offsets.c in Documentation/dontdiff [PATCH] i386: avoid redundant preempt_disable in __unlazy_fpu [PATCH] i386: white space fixes in i387.h [PATCH] i386: Drop noisy e820 debugging printks [PATCH] x86-64: Fix allnoconfig error in genapic_flat.c [PATCH] x86-64: Shut up warnings for vfat compat ioctls on other file systems [PATCH] x86-64: Share identical video.S between i386 and x86-64 [PATCH] x86-64: Remove CONFIG_REORDER [PATCH] x86-64: Print type and size correctly for unknown compat ioctls [PATCH] i386: Remove copy_*_user BUG_ONs for (size < 0) [PATCH] i386: Little cleanups in smpboot.c [PATCH] x86-64: Don't enable NUMA for a single node in K8 NUMA scanning [PATCH] x86: Use RDTSCP for synchronous get_cycles if possible [PATCH] i386: Add X86_FEATURE_RDTSCP [PATCH] i386: Implement X86_FEATURE_SYNC_RDTSC on i386 [PATCH] i386: Implement alternative_io for i386 ... Fix up trivial conflict in include/linux/highmem.h manually. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig61
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/boot/Makefile2
-rw-r--r--arch/x86_64/boot/compressed/Makefile12
-rw-r--r--arch/x86_64/boot/compressed/head.S339
-rw-r--r--arch/x86_64/boot/compressed/misc.c247
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.lds44
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.scr9
-rw-r--r--arch/x86_64/boot/setup.S85
-rw-r--r--arch/x86_64/boot/video.S2043
-rw-r--r--arch/x86_64/defconfig183
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c10
-rw-r--r--arch/x86_64/ia32/ia32entry.S4
-rw-r--r--arch/x86_64/ia32/syscall32.c1
-rw-r--r--arch/x86_64/kernel/Makefile7
-rw-r--r--arch/x86_64/kernel/acpi/sleep.c24
-rw-r--r--arch/x86_64/kernel/acpi/wakeup.S286
-rw-r--r--arch/x86_64/kernel/aperture.c5
-rw-r--r--arch/x86_64/kernel/apic.c35
-rw-r--r--arch/x86_64/kernel/asm-offsets.c10
-rw-r--r--arch/x86_64/kernel/bugs.c21
-rw-r--r--arch/x86_64/kernel/e820.c5
-rw-r--r--arch/x86_64/kernel/early-quirks.c13
-rw-r--r--arch/x86_64/kernel/early_printk.c5
-rw-r--r--arch/x86_64/kernel/entry.S5
-rw-r--r--arch/x86_64/kernel/functionlist1284
-rw-r--r--arch/x86_64/kernel/genapic.c104
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c137
-rw-r--r--arch/x86_64/kernel/genapic_flat.c25
-rw-r--r--arch/x86_64/kernel/head.S340
-rw-r--r--arch/x86_64/kernel/head64.c41
-rw-r--r--arch/x86_64/kernel/io_apic.c31
-rw-r--r--arch/x86_64/kernel/ioport.c1
-rw-r--r--arch/x86_64/kernel/machine_kexec.c14
-rw-r--r--arch/x86_64/kernel/mce.c32
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c678
-rw-r--r--arch/x86_64/kernel/pci-calgary.c2
-rw-r--r--arch/x86_64/kernel/pci-gart.c2
-rw-r--r--arch/x86_64/kernel/pci-nommu.c2
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c12
-rw-r--r--arch/x86_64/kernel/setup.c35
-rw-r--r--arch/x86_64/kernel/setup64.c5
-rw-r--r--arch/x86_64/kernel/signal.c6
-rw-r--r--arch/x86_64/kernel/smp.c30
-rw-r--r--arch/x86_64/kernel/smpboot.c47
-rw-r--r--arch/x86_64/kernel/suspend.c19
-rw-r--r--arch/x86_64/kernel/suspend_asm.S7
-rw-r--r--arch/x86_64/kernel/syscall.c1
-rw-r--r--arch/x86_64/kernel/time.c71
-rw-r--r--arch/x86_64/kernel/trampoline.S123
-rw-r--r--arch/x86_64/kernel/traps.c34
-rw-r--r--arch/x86_64/kernel/tsc.c17
-rw-r--r--arch/x86_64/kernel/tsc_sync.c4
-rw-r--r--arch/x86_64/kernel/verify_cpu.S119
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S20
-rw-r--r--arch/x86_64/kernel/vsyscall.c68
-rw-r--r--arch/x86_64/mm/fault.c5
-rw-r--r--arch/x86_64/mm/init.c172
-rw-r--r--arch/x86_64/mm/k8topology.c9
-rw-r--r--arch/x86_64/mm/numa.c306
-rw-r--r--arch/x86_64/mm/pageattr.c32
-rw-r--r--arch/x86_64/mm/srat.c8
64 files changed, 1798 insertions, 5509 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e9b4f058a49a..145bb824b2a8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -415,13 +415,13 @@ config OUT_OF_LINE_PFN_TO_PAGE
depends on DISCONTIGMEM
config NR_CPUS
- int "Maximum number of CPUs (2-256)"
+ int "Maximum number of CPUs (2-255)"
range 2 255
depends on SMP
default "8"
help
This allows you to specify the maximum number of CPUs which this
- kernel will support. Current maximum is 256 CPUs due to
+ kernel will support. Current maximum is 255 CPUs due to
APIC addressing limits. Less depending on the hardware.
This is purely to save memory - each supported CPU requires
@@ -565,23 +565,56 @@ config CRASH_DUMP
PHYSICAL_START.
For more details see Documentation/kdump/kdump.txt
+config RELOCATABLE
+ bool "Build a relocatable kernel(EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ Builds a relocatable kernel. This enables loading and running
+ a kernel binary from a different physical address than it has
+ been compiled for.
+
+ One use is for the kexec on panic case where the recovery kernel
+ must live at a different physical address than the primary
+ kernel.
+
+ Note: If CONFIG_RELOCATABLE=y, then kernel run from the address
+ it has been loaded at and compile time physical address
+ (CONFIG_PHYSICAL_START) is ignored.
+
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
- default "0x1000000" if CRASH_DUMP
default "0x200000"
help
- This gives the physical address where the kernel is loaded. Normally
- for regular kernels this value is 0x200000 (2MB). But in the case
- of kexec on panic the fail safe kernel needs to run at a different
- address than the panic-ed kernel. This option is used to set the load
- address for kernels used to capture crash dump on being kexec'ed
- after panic. The default value for crash dump kernels is
- 0x1000000 (16MB). This can also be set based on the "X" value as
+ This gives the physical address where the kernel is loaded. It
+ should be aligned to 2MB boundary.
+
+ If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
+ bzImage will decompress itself to above physical address and
+ run from there. Otherwise, bzImage will run from the address where
+ it has been loaded by the boot loader and will ignore above physical
+ address.
+
+ In normal kdump cases one does not have to set/change this option
+ as now bzImage can be compiled as a completely relocatable image
+ (CONFIG_RELOCATABLE=y) and be used to load and run from a different
+ address. This option is mainly useful for the folks who don't want
+ to use a bzImage for capturing the crash dump and want to use a
+ vmlinux instead.
+
+ So if you are using bzImage for capturing the crash dump, leave
+ the value here unchanged to 0x200000 and set CONFIG_RELOCATABLE=y.
+ Otherwise if you plan to use vmlinux for capturing the crash dump
+ change this value to start of the reserved region (Typically 16MB
+ 0x1000000). In other words, it can be set based on the "X" value as
specified in the "crashkernel=YM@XM" command line boot parameter
passed to the panic-ed kernel. Typically this parameter is set as
crashkernel=64M@16M. Please take a look at
Documentation/kdump/kdump.txt for more details about crash dumps.
+ Usage of bzImage for capturing the crash dump is advantageous as
+ one does not have to build two kernels. Same kernel can be used
+ as production kernel and capture kernel.
+
Don't change this unless you know what you are doing.
config SECCOMP
@@ -627,14 +660,6 @@ config CC_STACKPROTECTOR_ALL
source kernel/Kconfig.hz
-config REORDER
- bool "Function reordering"
- default n
- help
- This option enables the toolchain to reorder functions for a more
- optimal TLB usage. If you have pretty much any version of binutils,
- this can increase your kernel build time by roughly one minute.
-
config K8_NB
def_bool y
depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 2941a915d4ef..29617ae3926d 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -40,10 +40,6 @@ cflags-y += -m64
cflags-y += -mno-red-zone
cflags-y += -mcmodel=kernel
cflags-y += -pipe
-cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
-# this makes reading assembly source easier, but produces worse code
-# actually it makes the kernel smaller too.
-cflags-y += -fno-reorder-blocks
cflags-y += -Wno-sign-compare
cflags-y += -fno-asynchronous-unwind-tables
ifneq ($(CONFIG_DEBUG_INFO),y)
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index deb063e7762d..ee6f6505f95f 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -36,7 +36,7 @@ subdir- := compressed/ #Let make clean descend in compressed/
# ---------------------------------------------------------------------------
$(obj)/bzImage: IMAGE_OFFSET := 0x100000
-$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
+$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
$(obj)/bzImage: BUILDFLAGS := -b
quiet_cmd_image = BUILD $@
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
index e70fa6e1da08..705a3e33d7e1 100644
--- a/arch/x86_64/boot/compressed/Makefile
+++ b/arch/x86_64/boot/compressed/Makefile
@@ -8,16 +8,14 @@
targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
EXTRA_AFLAGS := -traditional
-AFLAGS := $(subst -m64,-m32,$(AFLAGS))
# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with
# -m32
-CFLAGS := -m32 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing
-LDFLAGS := -m elf_i386
+CFLAGS := -m64 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing -fPIC -mcmodel=small -fno-builtin
+LDFLAGS := -m elf_x86_64
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 -m elf_i386
-
-$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+LDFLAGS_vmlinux := -T
+$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
$(call if_changed,ld)
@:
@@ -27,7 +25,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
$(call if_changed,gzip)
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
+LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
$(call if_changed,ld)
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index 6f55565e4d42..f9d5692a0106 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -26,116 +26,279 @@
#include <linux/linkage.h>
#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/msr.h>
+.section ".text.head"
.code32
.globl startup_32
-
+
startup_32:
cld
cli
- movl $(__KERNEL_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- movl %eax,%fs
- movl %eax,%gs
-
- lss stack_start,%esp
- xorl %eax,%eax
-1: incl %eax # check that A20 really IS enabled
- movl %eax,0x000000 # loop forever if it isn't
- cmpl %eax,0x100000
- je 1b
+ movl $(__KERNEL_DS), %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+/* Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x34-0x3f are used as the stack for this calculation.
+ * Only 4 bytes are needed.
+ */
+ leal 0x40(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $1b, %ebp
+
+/* setup a stack and make sure cpu supports long mode. */
+ movl $user_stack_end, %eax
+ addl %ebp, %eax
+ movl %eax, %esp
+
+ call verify_cpu
+ testl %eax, %eax
+ jnz no_longmode
+
+/* Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ */
+/* %ebp contains the address we are loaded at by the boot loader and %ebx
+ * contains the address where we should move the kernel image temporarily
+ * for safe in-place decompression.
+ */
+
+#ifdef CONFIG_RELOCATABLE
+ movl %ebp, %ebx
+ addl $(LARGE_PAGE_SIZE -1), %ebx
+ andl $LARGE_PAGE_MASK, %ebx
+#else
+ movl $CONFIG_PHYSICAL_START, %ebx
+#endif
+
+ /* Replace the compressed data size with the uncompressed size */
+ subl input_len(%ebp), %ebx
+ movl output_len(%ebp), %eax
+ addl %eax, %ebx
+ /* Add 8 bytes for every 32K input block */
+ shrl $12, %eax
+ addl %eax, %ebx
+ /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
+ addl $(32768 + 18 + 4095), %ebx
+ andl $~4095, %ebx
/*
- * Initialize eflags. Some BIOS's leave bits like NT set. This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
+ * Prepare for entering 64 bit mode
*/
- pushl $0
- popfl
+
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ leal gdt(%ebp), %eax
+ movl %eax, gdt+2(%ebp)
+ lgdt gdt(%ebp)
+
+ /* Enable PAE mode */
+ xorl %eax, %eax
+ orl $(1 << 5), %eax
+ movl %eax, %cr4
+
+ /*
+ * Build early 4G boot pagetable
+ */
+ /* Initialize Page tables to 0*/
+ leal pgtable(%ebx), %edi
+ xorl %eax, %eax
+ movl $((4096*6)/4), %ecx
+ rep stosl
+
+ /* Build Level 4 */
+ leal pgtable + 0(%ebx), %edi
+ leal 0x1007 (%edi), %eax
+ movl %eax, 0(%edi)
+
+ /* Build Level 3 */
+ leal pgtable + 0x1000(%ebx), %edi
+ leal 0x1007(%edi), %eax
+ movl $4, %ecx
+1: movl %eax, 0x00(%edi)
+ addl $0x00001000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Build Level 2 */
+ leal pgtable + 0x2000(%ebx), %edi
+ movl $0x00000183, %eax
+ movl $2048, %ecx
+1: movl %eax, 0(%edi)
+ addl $0x00200000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Enable the boot page tables */
+ leal pgtable(%ebx), %eax
+ movl %eax, %cr3
+
+ /* Enable Long mode in EFER (Extended Feature Enable Register) */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /* Setup for the jump to 64bit mode
+ *
+ * When the jump is performend we will be in long mode but
+ * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
+ * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
+ * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ * We place all of the values on our mini stack so lret can
+ * used to perform that far jump.
+ */
+ pushl $__KERNEL_CS
+ leal startup_64(%ebp), %eax
+ pushl %eax
+
+ /* Enter paged protected Mode, activating Long Mode */
+ movl $0x80000001, %eax /* Enable Paging and Protected mode */
+ movl %eax, %cr0
+
+ /* Jump from 32bit compatibility mode into 64bit mode. */
+ lret
+
+no_longmode:
+ /* This isn't an x86-64 CPU so hang */
+1:
+ hlt
+ jmp 1b
+
+#include "../../kernel/verify_cpu.S"
+
+ /* Be careful here startup_64 needs to be at a predictable
+ * address so I can export it in an ELF header. Bootloaders
+ * should look at the ELF header to find this address, as
+ * it may change in the future.
+ */
+ .code64
+ .org 0x200
+ENTRY(startup_64)
+ /* We come here either from startup_32 or directly from a
+ * 64bit bootloader. If we come here from a bootloader we depend on
+ * an identity mapped page table being provied that maps our
+ * entire text+data+bss and hopefully all of memory.
+ */
+
+ /* Setup data segments. */
+ xorl %eax, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+ /* Compute the decompressed kernel start address. It is where
+ * we were loaded at aligned to a 2M boundary. %rbp contains the
+ * decompressed kernel start address.
+ *
+ * If it is a relocatable kernel then decompress and run the kernel
+ * from load address aligned to 2MB addr, otherwise decompress and
+ * run the kernel from CONFIG_PHYSICAL_START
+ */
+
+ /* Start with the delta to where the kernel will run at. */
+#ifdef CONFIG_RELOCATABLE
+ leaq startup_32(%rip) /* - $startup_32 */, %rbp
+ addq $(LARGE_PAGE_SIZE - 1), %rbp
+ andq $LARGE_PAGE_MASK, %rbp
+ movq %rbp, %rbx
+#else
+ movq $CONFIG_PHYSICAL_START, %rbp
+ movq %rbp, %rbx
+#endif
+
+ /* Replace the compressed data size with the uncompressed size */
+ movl input_len(%rip), %eax
+ subq %rax, %rbx
+ movl output_len(%rip), %eax
+ addq %rax, %rbx
+ /* Add 8 bytes for every 32K input block */
+ shrq $12, %rax
+ addq %rax, %rbx
+ /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
+ addq $(32768 + 18 + 4095), %rbx
+ andq $~4095, %rbx
+
+/* Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ leaq _end(%rip), %r8
+ leaq _end(%rbx), %r9
+ movq $_end /* - $startup_32 */, %rcx
+1: subq $8, %r8
+ subq $8, %r9
+ movq 0(%r8), %rax
+ movq %rax, 0(%r9)
+ subq $8, %rcx
+ jnz 1b
+
+/*
+ * Jump to the relocated address.
+ */
+ leaq relocated(%rbx), %rax
+ jmp *%rax
+
+.section ".text"
+relocated:
+
/*
* Clear BSS
*/
- xorl %eax,%eax
- movl $_edata,%edi
- movl $_end,%ecx
- subl %edi,%ecx
+ xorq %rax, %rax
+ leaq _edata(%rbx), %rdi
+ leaq _end(%rbx), %rcx
+ subq %rdi, %rcx
cld
rep
stosb
+
+ /* Setup the stack */
+ leaq user_stack_end(%rip), %rsp
+
+ /* zero EFLAGS after setting rsp */
+ pushq $0
+ popfq
+
/*
* Do the decompression, and jump to the new kernel..
*/
- subl $16,%esp # place for structure on the stack
- movl %esp,%eax
- pushl %esi # real mode pointer as second arg
- pushl %eax # address of structure as first arg
- call decompress_kernel
- orl %eax,%eax
- jnz 3f
- addl $8,%esp
- xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $__PHYSICAL_START
+ pushq %rsi # Save the real mode argument
+ movq %rsi, %rdi # real mode address
+ leaq _heap(%rip), %rsi # _heap
+ leaq input_data(%rip), %rdx # input_data
+ movl input_len(%rip), %eax
+ movq %rax, %rcx # input_len
+ movq %rbp, %r8 # output
+ call decompress_kernel
+ popq %rsi
-/*
- * We come here, if we were loaded high.
- * We need to move the move-in-place routine down to 0x1000
- * and then start it with the buffer addresses in registers,
- * which we got from the stack.
- */
-3:
- movl %esi,%ebx
- movl $move_routine_start,%esi
- movl $0x1000,%edi
- movl $move_routine_end,%ecx
- subl %esi,%ecx
- addl $3,%ecx
- shrl $2,%ecx
- cld
- rep
- movsl
-
- popl %esi # discard the address
- addl $4,%esp # real mode pointer
- popl %esi # low_buffer_start
- popl %ecx # lcount
- popl %edx # high_buffer_start
- popl %eax # hcount
- movl $__PHYSICAL_START,%edi
- cli # make sure we don't get interrupted
- ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
/*
- * Routine (template) for moving the decompressed kernel in place,
- * if we were high loaded. This _must_ PIC-code !
+ * Jump to the decompressed kernel.
*/
-move_routine_start:
- movl %ecx,%ebp
- shrl $2,%ecx
- rep
- movsl
- movl %ebp,%ecx
- andl $3,%ecx
- rep
- movsb
- movl %edx,%esi
- movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0
- addl $3,%ecx
- shrl $2,%ecx
- rep
- movsl
- movl %ebx,%esi # Restore setup pointer
- xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $__PHYSICAL_START
-move_routine_end:
+ jmp *%rbp
-
-/* Stack for uncompression */
- .align 32
-user_stack:
+ .data
+gdt:
+ .word gdt_end - gdt
+ .long gdt
+ .word 0
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+gdt_end:
+ .bss
+/* Stack for uncompression */
+ .balign 4
+user_stack:
.fill 4096,4,0
-stack_start:
- .long user_stack+4096
- .word __KERNEL_DS
-
+user_stack_end:
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 3755b2e394d0..f932b0e89096 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -9,10 +9,95 @@
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
+#define _LINUX_STRING_H_ 1
+#define __LINUX_BITMAP_H 1
+
+#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <asm/io.h>
#include <asm/page.h>
+/* WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically
+ * at run time, but no relocation processing is performed.
+ * This means that it is not safe to place pointers in static structures.
+ */
+
+/*
+ * Getting to provable safe in place decompression is hard.
+ * Worst case behaviours need to be analized.
+ * Background information:
+ *
+ * The file layout is:
+ * magic[2]
+ * method[1]
+ * flags[1]
+ * timestamp[4]
+ * extraflags[1]
+ * os[1]
+ * compressed data blocks[N]
+ * crc[4] orig_len[4]
+ *
+ * resulting in 18 bytes of non compressed data overhead.
+ *
+ * Files divided into blocks
+ * 1 bit (last block flag)
+ * 2 bits (block type)
+ *
+ * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
+ * The smallest block type encoding is always used.
+ *
+ * stored:
+ * 32 bits length in bytes.
+ *
+ * fixed:
+ * magic fixed tree.
+ * symbols.
+ *
+ * dynamic:
+ * dynamic tree encoding.
+ * symbols.
+ *
+ *
+ * The buffer for decompression in place is the length of the
+ * uncompressed data, plus a small amount extra to keep the algorithm safe.
+ * The compressed data is placed at the end of the buffer. The output
+ * pointer is placed at the start of the buffer and the input pointer
+ * is placed where the compressed data starts. Problems will occur
+ * when the output pointer overruns the input pointer.
+ *
+ * The output pointer can only overrun the input pointer if the input
+ * pointer is moving faster than the output pointer. A condition only
+ * triggered by data whose compressed form is larger than the uncompressed
+ * form.
+ *
+ * The worst case at the block level is a growth of the compressed data
+ * of 5 bytes per 32767 bytes.
+ *
+ * The worst case internal to a compressed block is very hard to figure.
+ * The worst case can at least be boundined by having one bit that represents
+ * 32764 bytes and then all of the rest of the bytes representing the very
+ * very last byte.
+ *
+ * All of which is enough to compute an amount of extra data that is required
+ * to be safe. To avoid problems at the block level allocating 5 extra bytes
+ * per 32767 bytes of data is sufficient. To avoind problems internal to a block
+ * adding an extra 32767 bytes (the worst case uncompressed block size) is
+ * sufficient, to ensure that in the worst case the decompressed data for
+ * block will stop the byte before the compressed data for a block begins.
+ * To avoid problems with the compressed data's meta information an extra 18
+ * bytes are needed. Leading to the formula:
+ *
+ * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
+ *
+ * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+ * Adding 32768 instead of 32767 just makes for round numbers.
+ * Adding the decompressor_size is necessary as it musht live after all
+ * of the data as well. Last I measured the decompressor is about 14K.
+ * 10K of actuall data and 4K of bss.
+ *
+ */
+
/*
* gzip declarations
*/
@@ -28,15 +113,20 @@ typedef unsigned char uch;
typedef unsigned short ush;
typedef unsigned long ulg;
-#define WSIZE 0x8000 /* Window size must be at least 32k, */
- /* and a power of two */
+#define WSIZE 0x80000000 /* Window size must be at least 32k,
+ * and a power of two
+ * We don't actually have a window just
+ * a huge output buffer so I report
+ * a 2G windows size, as that should
+ * always be larger than our output buffer.
+ */
-static uch *inbuf; /* input buffer */
-static uch window[WSIZE]; /* Sliding window buffer */
+static uch *inbuf; /* input buffer */
+static uch *window; /* Sliding window buffer, (and final output buffer) */
-static unsigned insize = 0; /* valid bytes in inbuf */
-static unsigned inptr = 0; /* index of next byte to be processed in inbuf */
-static unsigned outcnt = 0; /* bytes in output buffer */
+static unsigned insize; /* valid bytes in inbuf */
+static unsigned inptr; /* index of next byte to be processed in inbuf */
+static unsigned outcnt; /* bytes in output buffer */
/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
@@ -87,8 +177,6 @@ extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
-static uch *output_data;
-static unsigned long output_ptr = 0;
static void *malloc(int size);
static void free(void *where);
@@ -98,17 +186,10 @@ static void *memcpy(void *dest, const void *src, unsigned n);
static void putstr(const char *);
-extern int end;
-static long free_mem_ptr = (long)&end;
+static long free_mem_ptr;
static long free_mem_end_ptr;
-#define INPLACE_MOVE_ROUTINE 0x1000
-#define LOW_BUFFER_START 0x2000
-#define LOW_BUFFER_MAX 0x90000
-#define HEAP_SIZE 0x3000
-static unsigned int low_buffer_end, low_buffer_size;
-static int high_loaded =0;
-static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
+#define HEAP_SIZE 0x7000
static char *vidmem = (char *)0xb8000;
static int vidport;
@@ -218,58 +299,31 @@ static void* memcpy(void* dest, const void* src, unsigned n)
*/
static int fill_inbuf(void)
{
- if (insize != 0) {
- error("ran out of input data");
- }
-
- inbuf = input_data;
- insize = input_len;
- inptr = 1;
- return inbuf[0];
+ error("ran out of input data");
+ return 0;
}
/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
*/
-static void flush_window_low(void)
-{
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, *out, ch;
-
- in = window;
- out = &output_data[output_ptr];
- for (n = 0; n < outcnt; n++) {
- ch = *out++ = *in++;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- output_ptr += (ulg)outcnt;
- outcnt = 0;
-}
-
-static void flush_window_high(void)
-{
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, ch;
- in = window;
- for (n = 0; n < outcnt; n++) {
- ch = *output_data++ = *in++;
- if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- outcnt = 0;
-}
-
static void flush_window(void)
{
- if (high_loaded) flush_window_high();
- else flush_window_low();
+ /* With my window equal to my output buffer
+ * I only need to compute the crc here.
+ */
+ ulg c = crc; /* temporary variable */
+ unsigned n;
+ uch *in, ch;
+
+ in = window;
+ for (n = 0; n < outcnt; n++) {
+ ch = *in++;
+ c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+ }
+ crc = c;
+ bytes_out += (ulg)outcnt;
+ outcnt = 0;
}
static void error(char *x)
@@ -281,57 +335,8 @@ static void error(char *x)
while(1); /* Halt */
}
-static void setup_normal_output_buffer(void)
-{
-#ifdef STANDARD_MEMORY_BIOS_CALL
- if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
-#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
-#endif
- output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
- free_mem_end_ptr = (long)real_mode;
-}
-
-struct moveparams {
- uch *low_buffer_start; int lcount;
- uch *high_buffer_start; int hcount;
-};
-
-static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
-{
- high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
-#ifdef STANDARD_MEMORY_BIOS_CALL
- if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
-#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
-#endif
- mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
- low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
- ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
- low_buffer_size = low_buffer_end - LOW_BUFFER_START;
- high_loaded = 1;
- free_mem_end_ptr = (long)high_buffer_start;
- if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
- high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
- mv->hcount = 0; /* say: we need not to move high_buffer */
- }
- else mv->hcount = -1;
- mv->high_buffer_start = high_buffer_start;
-}
-
-static void close_output_buffer_if_we_run_high(struct moveparams *mv)
-{
- if (bytes_out > low_buffer_size) {
- mv->lcount = low_buffer_size;
- if (mv->hcount)
- mv->hcount = bytes_out - low_buffer_size;
- } else {
- mv->lcount = bytes_out;
- mv->hcount = 0;
- }
-}
-
-int decompress_kernel(struct moveparams *mv, void *rmode)
+asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
+ uch *input_data, unsigned long input_len, uch *output)
{
real_mode = rmode;
@@ -346,13 +351,21 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
lines = RM_SCREEN_INFO.orig_video_lines;
cols = RM_SCREEN_INFO.orig_video_cols;
- if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
- else setup_output_buffer_if_we_run_high(mv);
+ window = output; /* Output buffer (Normally at 1M) */
+ free_mem_ptr = heap; /* Heap */
+ free_mem_end_ptr = heap + HEAP_SIZE;
+ inbuf = input_data; /* Input buffer */
+ insize = input_len;
+ inptr = 0;
+
+ if ((ulg)output & (__KERNEL_ALIGN - 1))
+ error("Destination address not 2M aligned");
+ if ((ulg)output >= 0xffffffffffUL)
+ error("Destination address too large");
makecrc();
putstr(".\nDecompressing Linux...");
gunzip();
putstr("done.\nBooting the kernel.\n");
- if (high_loaded) close_output_buffer_if_we_run_high(mv);
- return high_loaded;
+ return;
}
diff --git a/arch/x86_64/boot/compressed/vmlinux.lds b/arch/x86_64/boot/compressed/vmlinux.lds
new file mode 100644
index 000000000000..94c13e557fb4
--- /dev/null
+++ b/arch/x86_64/boot/compressed/vmlinux.lds
@@ -0,0 +1,44 @@
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(startup_64)
+SECTIONS
+{
+ /* Be careful parts of head.S assume startup_32 is at
+ * address 0.
+ */
+ . = 0;
+ .text : {
+ _head = . ;
+ *(.text.head)
+ _ehead = . ;
+ *(.text.compressed)
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(8);
+ _end = . ;
+ . = ALIGN(4096);
+ pgtable = . ;
+ . = . + 4096 * 6;
+ _heap = .;
+ }
+}
diff --git a/arch/x86_64/boot/compressed/vmlinux.scr b/arch/x86_64/boot/compressed/vmlinux.scr
index 1ed9d791f863..bd1429ce193e 100644
--- a/arch/x86_64/boot/compressed/vmlinux.scr
+++ b/arch/x86_64/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
SECTIONS
{
- .data : {
+ .text.compressed : {
input_len = .;
- LONG(input_data_end - input_data) input_data = .;
- *(.data)
- input_data_end = .;
+ LONG(input_data_end - input_data) input_data = .;
+ *(.data)
+ output_len = . - 4;
+ input_data_end = .;
}
}
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index 770940cc0108..e9e33f949697 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -51,6 +51,7 @@
#include <asm/boot.h>
#include <asm/e820.h>
#include <asm/page.h>
+#include <asm/setup.h>
/* Signature words to ensure LILO loaded us right */
#define SIG1 0xAA55
@@ -80,7 +81,7 @@ start:
# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
.ascii "HdrS" # header signature
- .word 0x0204 # header version number (>= 0x0105)
+ .word 0x0206 # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG
@@ -155,7 +156,20 @@ cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
# low memory 0x10000 or higher.
ramdisk_max: .long 0xffffffff
-
+kernel_alignment: .long 0x200000 # physical addr alignment required for
+ # protected mode relocatable kernel
+#ifdef CONFIG_RELOCATABLE
+relocatable_kernel: .byte 1
+#else
+relocatable_kernel: .byte 0
+#endif
+pad2: .byte 0
+pad3: .word 0
+
+cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
+ #added with boot protocol
+ #version 2.06
+
trampoline: call start_of_setup
.align 16
# The offset at this point is 0x240
@@ -290,64 +304,10 @@ loader_ok:
movw %cs,%ax
movw %ax,%ds
- /* minimum CPUID flags for x86-64 */
- /* see http://www.x86-64.org/lists/discuss/msg02971.html */
-#define SSE_MASK ((1<<25)|(1<<26))
-#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|\
- (1<<13)|(1<<15)|(1<<24))
-#define REQUIRED_MASK2 (1<<29)
-
- pushfl /* standard way to check for cpuid */
- popl %eax
- movl %eax,%ebx
- xorl $0x200000,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- cmpl %eax,%ebx
- jz no_longmode /* cpu has no cpuid */
- movl $0x0,%eax
- cpuid
- cmpl $0x1,%eax
- jb no_longmode /* no cpuid 1 */
- xor %di,%di
- cmpl $0x68747541,%ebx /* AuthenticAMD */
- jnz noamd
- cmpl $0x69746e65,%edx
- jnz noamd
- cmpl $0x444d4163,%ecx
- jnz noamd
- mov $1,%di /* cpu is from AMD */
-noamd:
- movl $0x1,%eax
- cpuid
- andl $REQUIRED_MASK1,%edx
- xorl $REQUIRED_MASK1,%edx
- jnz no_longmode
- movl $0x80000000,%eax
- cpuid
- cmpl $0x80000001,%eax
- jb no_longmode /* no extended cpuid */
- movl $0x80000001,%eax
- cpuid
- andl $REQUIRED_MASK2,%edx
- xorl $REQUIRED_MASK2,%edx
- jnz no_longmode
-sse_test:
- movl $1,%eax
- cpuid
- andl $SSE_MASK,%edx
- cmpl $SSE_MASK,%edx
- je sse_ok
- test %di,%di
- jz no_longmode /* only try to force SSE on AMD */
- movl $0xc0010015,%ecx /* HWCR */
- rdmsr
- btr $15,%eax /* enable SSE */
- wrmsr
- xor %di,%di /* don't loop */
- jmp sse_test /* try again */
+ call verify_cpu
+ testl %eax,%eax
+ jz sse_ok
+
no_longmode:
call beep
lea long_mode_panic,%si
@@ -357,7 +317,8 @@ no_longmode_loop:
long_mode_panic:
.string "Your CPU does not support long mode. Use a 32bit distribution."
.byte 0
-
+
+#include "../kernel/verify_cpu.S"
sse_ok:
popw %ds
@@ -846,7 +807,7 @@ gdt_48:
# Include video setup & detection code
-#include "video.S"
+#include "../../i386/boot/video.S"
# Setup signature -- must be last
setup_sig1: .word SIG1
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
deleted file mode 100644
index 6090516c9c7f..000000000000
--- a/arch/x86_64/boot/video.S
+++ /dev/null
@@ -1,2043 +0,0 @@
-/* video.S
- *
- * Display adapter & video mode setup, version 2.13 (14-May-99)
- *
- * Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
- * Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
- *
- * Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
- *
- * For further information, look at Documentation/svga.txt.
- *
- */
-
-/* Enable autodetection of SVGA adapters and modes. */
-#undef CONFIG_VIDEO_SVGA
-
-/* Enable autodetection of VESA modes */
-#define CONFIG_VIDEO_VESA
-
-/* Enable compacting of mode table */
-#define CONFIG_VIDEO_COMPACT
-
-/* Retain screen contents when switching modes */
-#define CONFIG_VIDEO_RETAIN
-
-/* Enable local mode list */
-#undef CONFIG_VIDEO_LOCAL
-
-/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
-#undef CONFIG_VIDEO_400_HACK
-
-/* Hack that lets you force specific BIOS mode ID and specific dimensions */
-#undef CONFIG_VIDEO_GFX_HACK
-#define VIDEO_GFX_BIOS_AX 0x4f02 /* 800x600 on ThinkPad */
-#define VIDEO_GFX_BIOS_BX 0x0102
-#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425 /* 100x37 */
-
-/* This code uses an extended set of video mode numbers. These include:
- * Aliases for standard modes
- * NORMAL_VGA (-1)
- * EXTENDED_VGA (-2)
- * ASK_VGA (-3)
- * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
- * of compatibility when extending the table. These are between 0x00 and 0xff.
- */
-#define VIDEO_FIRST_MENU 0x0000
-
-/* Standard BIOS video modes (BIOS number + 0x0100) */
-#define VIDEO_FIRST_BIOS 0x0100
-
-/* VESA BIOS video modes (VESA number + 0x0200) */
-#define VIDEO_FIRST_VESA 0x0200
-
-/* Video7 special modes (BIOS number + 0x0900) */
-#define VIDEO_FIRST_V7 0x0900
-
-/* Special video modes */
-#define VIDEO_FIRST_SPECIAL 0x0f00
-#define VIDEO_80x25 0x0f00
-#define VIDEO_8POINT 0x0f01
-#define VIDEO_80x43 0x0f02
-#define VIDEO_80x28 0x0f03
-#define VIDEO_CURRENT_MODE 0x0f04
-#define VIDEO_80x30 0x0f05
-#define VIDEO_80x34 0x0f06
-#define VIDEO_80x60 0x0f07
-#define VIDEO_GFX_HACK 0x0f08
-#define VIDEO_LAST_SPECIAL 0x0f09
-
-/* Video modes given by resolution */
-#define VIDEO_FIRST_RESOLUTION 0x1000
-
-/* The "recalculate timings" flag */
-#define VIDEO_RECALC 0x8000
-
-/* Positions of various video parameters passed to the kernel */
-/* (see also include/linux/tty.h) */
-#define PARAM_CURSOR_POS 0x00
-#define PARAM_VIDEO_PAGE 0x04
-#define PARAM_VIDEO_MODE 0x06
-#define PARAM_VIDEO_COLS 0x07
-#define PARAM_VIDEO_EGA_BX 0x0a
-#define PARAM_VIDEO_LINES 0x0e
-#define PARAM_HAVE_VGA 0x0f
-#define PARAM_FONT_POINTS 0x10
-
-#define PARAM_LFB_WIDTH 0x12
-#define PARAM_LFB_HEIGHT 0x14
-#define PARAM_LFB_DEPTH 0x16
-#define PARAM_LFB_BASE 0x18
-#define PARAM_LFB_SIZE 0x1c
-#define PARAM_LFB_LINELENGTH 0x24
-#define PARAM_LFB_COLORS 0x26
-#define PARAM_VESAPM_SEG 0x2e
-#define PARAM_VESAPM_OFF 0x30
-#define PARAM_LFB_PAGES 0x32
-#define PARAM_VESA_ATTRIB 0x34
-#define PARAM_CAPABILITIES 0x36
-
-/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
-#ifdef CONFIG_VIDEO_RETAIN
-#define DO_STORE call store_screen
-#else
-#define DO_STORE
-#endif /* CONFIG_VIDEO_RETAIN */
-
-# This is the main entry point called by setup.S
-# %ds *must* be pointing to the bootsector
-video: pushw %ds # We use different segments
- pushw %ds # FS contains original DS
- popw %fs
- pushw %cs # DS is equal to CS
- popw %ds
- pushw %cs # ES is equal to CS
- popw %es
- xorw %ax, %ax
- movw %ax, %gs # GS is zero
- cld
- call basic_detect # Basic adapter type testing (EGA/VGA/MDA/CGA)
-#ifdef CONFIG_VIDEO_SELECT
- movw %fs:(0x01fa), %ax # User selected video mode
- cmpw $ASK_VGA, %ax # Bring up the menu
- jz vid2
-
- call mode_set # Set the mode
- jc vid1
-
- leaw badmdt, %si # Invalid mode ID
- call prtstr
-vid2: call mode_menu
-vid1:
-#ifdef CONFIG_VIDEO_RETAIN
- call restore_screen # Restore screen contents
-#endif /* CONFIG_VIDEO_RETAIN */
- call store_edid
-#endif /* CONFIG_VIDEO_SELECT */
- call mode_params # Store mode parameters
- popw %ds # Restore original DS
- ret
-
-# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
-basic_detect:
- movb $0, %fs:(PARAM_HAVE_VGA)
- movb $0x12, %ah # Check EGA/VGA
- movb $0x10, %bl
- int $0x10
- movw %bx, %fs:(PARAM_VIDEO_EGA_BX) # Identifies EGA to the kernel
- cmpb $0x10, %bl # No, it's a CGA/MDA/HGA card.
- je basret
-
- incb adapter
- movw $0x1a00, %ax # Check EGA or VGA?
- int $0x10
- cmpb $0x1a, %al # 1a means VGA...
- jne basret # anything else is EGA.
-
- incb %fs:(PARAM_HAVE_VGA) # We've detected a VGA
- incb adapter
-basret: ret
-
-# Store the video mode parameters for later usage by the kernel.
-# This is done by asking the BIOS except for the rows/columns
-# parameters in the default 80x25 mode -- these are set directly,
-# because some very obscure BIOSes supply insane values.
-mode_params:
-#ifdef CONFIG_VIDEO_SELECT
- cmpb $0, graphic_mode
- jnz mopar_gr
-#endif
- movb $0x03, %ah # Read cursor position
- xorb %bh, %bh
- int $0x10
- movw %dx, %fs:(PARAM_CURSOR_POS)
- movb $0x0f, %ah # Read page/mode/width
- int $0x10
- movw %bx, %fs:(PARAM_VIDEO_PAGE)
- movw %ax, %fs:(PARAM_VIDEO_MODE) # Video mode and screen width
- cmpb $0x7, %al # MDA/HGA => segment differs
- jnz mopar0
-
- movw $0xb000, video_segment
-mopar0: movw %gs:(0x485), %ax # Font size
- movw %ax, %fs:(PARAM_FONT_POINTS) # (valid only on EGA/VGA)
- movw force_size, %ax # Forced size?
- orw %ax, %ax
- jz mopar1
-
- movb %ah, %fs:(PARAM_VIDEO_COLS)
- movb %al, %fs:(PARAM_VIDEO_LINES)
- ret
-
-mopar1: movb $25, %al
- cmpb $0, adapter # If we are on CGA/MDA/HGA, the
- jz mopar2 # screen must have 25 lines.
-
- movb %gs:(0x484), %al # On EGA/VGA, use the EGA+ BIOS
- incb %al # location of max lines.
-mopar2: movb %al, %fs:(PARAM_VIDEO_LINES)
- ret
-
-#ifdef CONFIG_VIDEO_SELECT
-# Fetching of VESA frame buffer parameters
-mopar_gr:
- leaw modelist+1024, %di
- movb $0x23, %fs:(PARAM_HAVE_VGA)
- movw 16(%di), %ax
- movw %ax, %fs:(PARAM_LFB_LINELENGTH)
- movw 18(%di), %ax
- movw %ax, %fs:(PARAM_LFB_WIDTH)
- movw 20(%di), %ax
- movw %ax, %fs:(PARAM_LFB_HEIGHT)
- movb 25(%di), %al
- movb $0, %ah
- movw %ax, %fs:(PARAM_LFB_DEPTH)
- movb 29(%di), %al
- movb $0, %ah
- movw %ax, %fs:(PARAM_LFB_PAGES)
- movl 40(%di), %eax
- movl %eax, %fs:(PARAM_LFB_BASE)
- movl 31(%di), %eax
- movl %eax, %fs:(PARAM_LFB_COLORS)
- movl 35(%di), %eax
- movl %eax, %fs:(PARAM_LFB_COLORS+4)
- movw 0(%di), %ax
- movw %ax, %fs:(PARAM_VESA_ATTRIB)
-
-# get video mem size
- leaw modelist+1024, %di
- movw $0x4f00, %ax
- int $0x10
- xorl %eax, %eax
- movw 18(%di), %ax
- movl %eax, %fs:(PARAM_LFB_SIZE)
-
-# store mode capabilities
- movl 10(%di), %eax
- movl %eax, %fs:(PARAM_CAPABILITIES)
-
-# switching the DAC to 8-bit is for <= 8 bpp only
- movw %fs:(PARAM_LFB_DEPTH), %ax
- cmpw $8, %ax
- jg dac_done
-
-# get DAC switching capability
- xorl %eax, %eax
- movb 10(%di), %al
- testb $1, %al
- jz dac_set
-
-# attempt to switch DAC to 8-bit
- movw $0x4f08, %ax
- movw $0x0800, %bx
- int $0x10
- cmpw $0x004f, %ax
- jne dac_set
- movb %bh, dac_size # store actual DAC size
-
-dac_set:
-# set color size to DAC size
- movb dac_size, %al
- movb %al, %fs:(PARAM_LFB_COLORS+0)
- movb %al, %fs:(PARAM_LFB_COLORS+2)
- movb %al, %fs:(PARAM_LFB_COLORS+4)
- movb %al, %fs:(PARAM_LFB_COLORS+6)
-
-# set color offsets to 0
- movb $0, %fs:(PARAM_LFB_COLORS+1)
- movb $0, %fs:(PARAM_LFB_COLORS+3)
- movb $0, %fs:(PARAM_LFB_COLORS+5)
- movb $0, %fs:(PARAM_LFB_COLORS+7)
-
-dac_done:
-# get protected mode interface informations
- movw $0x4f0a, %ax
- xorw %bx, %bx
- xorw %di, %di
- int $0x10
- cmp $0x004f, %ax
- jnz no_pm
-
- movw %es, %fs:(PARAM_VESAPM_SEG)
- movw %di, %fs:(PARAM_VESAPM_OFF)
-no_pm: ret
-
-# The video mode menu
-mode_menu:
- leaw keymsg, %si # "Return/Space/Timeout" message
- call prtstr
- call flush
-nokey: call getkt
-
- cmpb $0x0d, %al # ENTER ?
- je listm # yes - manual mode selection
-
- cmpb $0x20, %al # SPACE ?
- je defmd1 # no - repeat
-
- call beep
- jmp nokey
-
-defmd1: ret # No mode chosen? Default 80x25
-
-listm: call mode_table # List mode table
-listm0: leaw name_bann, %si # Print adapter name
- call prtstr
- movw card_name, %si
- orw %si, %si
- jnz an2
-
- movb adapter, %al
- leaw old_name, %si
- orb %al, %al
- jz an1
-
- leaw ega_name, %si
- decb %al
- jz an1
-
- leaw vga_name, %si
- jmp an1
-
-an2: call prtstr
- leaw svga_name, %si
-an1: call prtstr
- leaw listhdr, %si # Table header
- call prtstr
- movb $0x30, %dl # DL holds mode number
- leaw modelist, %si
-lm1: cmpw $ASK_VGA, (%si) # End?
- jz lm2
-
- movb %dl, %al # Menu selection number
- call prtchr
- call prtsp2
- lodsw
- call prthw # Mode ID
- call prtsp2
- movb 0x1(%si), %al
- call prtdec # Rows
- movb $0x78, %al # the letter 'x'
- call prtchr
- lodsw
- call prtdec # Columns
- movb $0x0d, %al # New line
- call prtchr
- movb $0x0a, %al
- call prtchr
- incb %dl # Next character
- cmpb $0x3a, %dl
- jnz lm1
-
- movb $0x61, %dl
- jmp lm1
-
-lm2: leaw prompt, %si # Mode prompt
- call prtstr
- leaw edit_buf, %di # Editor buffer
-lm3: call getkey
- cmpb $0x0d, %al # Enter?
- jz lment
-
- cmpb $0x08, %al # Backspace?
- jz lmbs
-
- cmpb $0x20, %al # Printable?
- jc lm3
-
- cmpw $edit_buf+4, %di # Enough space?
- jz lm3
-
- stosb
- call prtchr
- jmp lm3
-
-lmbs: cmpw $edit_buf, %di # Backspace
- jz lm3
-
- decw %di
- movb $0x08, %al
- call prtchr
- call prtspc
- movb $0x08, %al
- call prtchr
- jmp lm3
-
-lment: movb $0, (%di)
- leaw crlft, %si
- call prtstr
- leaw edit_buf, %si
- cmpb $0, (%si) # Empty string = default mode
- jz lmdef
-
- cmpb $0, 1(%si) # One character = menu selection
- jz mnusel
-
- cmpw $0x6373, (%si) # "scan" => mode scanning
- jnz lmhx
-
- cmpw $0x6e61, 2(%si)
- jz lmscan
-
-lmhx: xorw %bx, %bx # Else => mode ID in hex
-lmhex: lodsb
- orb %al, %al
- jz lmuse1
-
- subb $0x30, %al
- jc lmbad
-
- cmpb $10, %al
- jc lmhx1
-
- subb $7, %al
- andb $0xdf, %al
- cmpb $10, %al
- jc lmbad
-
- cmpb $16, %al
- jnc lmbad
-
-lmhx1: shlw $4, %bx
- orb %al, %bl
- jmp lmhex
-
-lmuse1: movw %bx, %ax
- jmp lmuse
-
-mnusel: lodsb # Menu selection
- xorb %ah, %ah
- subb $0x30, %al
- jc lmbad
-
- cmpb $10, %al
- jc lmuse
-
- cmpb $0x61-0x30, %al
- jc lmbad
-
- subb $0x61-0x30-10, %al
- cmpb $36, %al
- jnc lmbad
-
-lmuse: call mode_set
- jc lmdef
-
-lmbad: leaw unknt, %si
- call prtstr
- jmp lm2
-lmscan: cmpb $0, adapter # Scanning only on EGA/VGA
- jz lmbad
-
- movw $0, mt_end # Scanning of modes is
- movb $1, scanning # done as new autodetection.
- call mode_table
- jmp listm0
-lmdef: ret
-
-# Additional parts of mode_set... (relative jumps, you know)
-setv7: # Video7 extended modes
- DO_STORE
- subb $VIDEO_FIRST_V7>>8, %bh
- movw $0x6f05, %ax
- int $0x10
- stc
- ret
-
-_setrec: jmp setrec # Ugly...
-_set_80x25: jmp set_80x25
-
-# Aliases for backward compatibility.
-setalias:
- movw $VIDEO_80x25, %ax
- incw %bx
- jz mode_set
-
- movb $VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
- incw %bx
- jnz setbad # Fall-through!
-
-# Setting of user mode (AX=mode ID) => CF=success
-mode_set:
- movw %ax, %fs:(0x01fa) # Store mode for use in acpi_wakeup.S
- movw %ax, %bx
- cmpb $0xff, %ah
- jz setalias
-
- testb $VIDEO_RECALC>>8, %ah
- jnz _setrec
-
- cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah
- jnc setres
-
- cmpb $VIDEO_FIRST_SPECIAL>>8, %ah
- jz setspc
-
- cmpb $VIDEO_FIRST_V7>>8, %ah
- jz setv7
-
- cmpb $VIDEO_FIRST_VESA>>8, %ah
- jnc check_vesa
-
- orb %ah, %ah
- jz setmenu
-
- decb %ah
- jz setbios
-
-setbad: clc
- movb $0, do_restore # The screen needn't be restored
- ret
-
-setvesa:
- DO_STORE
- subb $VIDEO_FIRST_VESA>>8, %bh
- movw $0x4f02, %ax # VESA BIOS mode set call
- int $0x10
- cmpw $0x004f, %ax # AL=4f if implemented
- jnz setbad # AH=0 if OK
-
- stc
- ret
-
-setbios:
- DO_STORE
- int $0x10 # Standard BIOS mode set call
- pushw %bx
- movb $0x0f, %ah # Check if really set
- int $0x10
- popw %bx
- cmpb %bl, %al
- jnz setbad
-
- stc
- ret
-
-setspc: xorb %bh, %bh # Set special mode
- cmpb $VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
- jnc setbad
-
- addw %bx, %bx
- jmp *spec_inits(%bx)
-
-setmenu:
- orb %al, %al # 80x25 is an exception
- jz _set_80x25
-
- pushw %bx # Set mode chosen from menu
- call mode_table # Build the mode table
- popw %ax
- shlw $2, %ax
- addw %ax, %si
- cmpw %di, %si
- jnc setbad
-
- movw (%si), %ax # Fetch mode ID
-_m_s: jmp mode_set
-
-setres: pushw %bx # Set mode chosen by resolution
- call mode_table
- popw %bx
- xchgb %bl, %bh
-setr1: lodsw
- cmpw $ASK_VGA, %ax # End of the list?
- jz setbad
-
- lodsw
- cmpw %bx, %ax
- jnz setr1
-
- movw -4(%si), %ax # Fetch mode ID
- jmp _m_s
-
-check_vesa:
-#ifdef CONFIG_FIRMWARE_EDID
- leaw modelist+1024, %di
- movw $0x4f00, %ax
- int $0x10
- cmpw $0x004f, %ax
- jnz setbad
-
- movw 4(%di), %ax
- movw %ax, vbe_version
-#endif
- leaw modelist+1024, %di
- subb $VIDEO_FIRST_VESA>>8, %bh
- movw %bx, %cx # Get mode information structure
- movw $0x4f01, %ax
- int $0x10
- addb $VIDEO_FIRST_VESA>>8, %bh
- cmpw $0x004f, %ax
- jnz setbad
-
- movb (%di), %al # Check capabilities.
- andb $0x19, %al
- cmpb $0x09, %al
- jz setvesa # This is a text mode
-
- movb (%di), %al # Check capabilities.
- andb $0x99, %al
- cmpb $0x99, %al
- jnz _setbad # Doh! No linear frame buffer.
-
- subb $VIDEO_FIRST_VESA>>8, %bh
- orw $0x4000, %bx # Use linear frame buffer
- movw $0x4f02, %ax # VESA BIOS mode set call
- int $0x10
- cmpw $0x004f, %ax # AL=4f if implemented
- jnz _setbad # AH=0 if OK
-
- movb $1, graphic_mode # flag graphic mode
- movb $0, do_restore # no screen restore
- stc
- ret
-
-_setbad: jmp setbad # Ugly...
-
-# Recalculate vertical display end registers -- this fixes various
-# inconsistencies of extended modes on many adapters. Called when
-# the VIDEO_RECALC flag is set in the mode ID.
-
-setrec: subb $VIDEO_RECALC>>8, %ah # Set the base mode
- call mode_set
- jnc rct3
-
- movw %gs:(0x485), %ax # Font size in pixels
- movb %gs:(0x484), %bl # Number of rows
- incb %bl
- mulb %bl # Number of visible
- decw %ax # scan lines - 1
- movw $0x3d4, %dx
- movw %ax, %bx
- movb $0x12, %al # Lower 8 bits
- movb %bl, %ah
- outw %ax, %dx
- movb $0x07, %al # Bits 8 and 9 in the overflow register
- call inidx
- xchgb %al, %ah
- andb $0xbd, %ah
- shrb %bh
- jnc rct1
- orb $0x02, %ah
-rct1: shrb %bh
- jnc rct2
- orb $0x40, %ah
-rct2: movb $0x07, %al
- outw %ax, %dx
- stc
-rct3: ret
-
-# Table of routines for setting of the special modes.
-spec_inits:
- .word set_80x25
- .word set_8pixel
- .word set_80x43
- .word set_80x28
- .word set_current
- .word set_80x30
- .word set_80x34
- .word set_80x60
- .word set_gfx
-
-# Set the 80x25 mode. If already set, do nothing.
-set_80x25:
- movw $0x5019, force_size # Override possibly broken BIOS
-use_80x25:
-#ifdef CONFIG_VIDEO_400_HACK
- movw $0x1202, %ax # Force 400 scan lines
- movb $0x30, %bl
- int $0x10
-#else
- movb $0x0f, %ah # Get current mode ID
- int $0x10
- cmpw $0x5007, %ax # Mode 7 (80x25 mono) is the only one available
- jz st80 # on CGA/MDA/HGA and is also available on EGAM
-
- cmpw $0x5003, %ax # Unknown mode, force 80x25 color
- jnz force3
-
-st80: cmpb $0, adapter # CGA/MDA/HGA => mode 3/7 is always 80x25
- jz set80
-
- movb %gs:(0x0484), %al # This is EGA+ -- beware of 80x50 etc.
- orb %al, %al # Some buggy BIOS'es set 0 rows
- jz set80
-
- cmpb $24, %al # It's hopefully correct
- jz set80
-#endif /* CONFIG_VIDEO_400_HACK */
-force3: DO_STORE
- movw $0x0003, %ax # Forced set
- int $0x10
-set80: stc
- ret
-
-# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
-set_8pixel:
- DO_STORE
- call use_80x25 # The base is 80x25
-set_8pt:
- movw $0x1112, %ax # Use 8x8 font
- xorb %bl, %bl
- int $0x10
- movw $0x1200, %ax # Use alternate print screen
- movb $0x20, %bl
- int $0x10
- movw $0x1201, %ax # Turn off cursor emulation
- movb $0x34, %bl
- int $0x10
- movb $0x01, %ah # Define cursor scan lines 6-7
- movw $0x0607, %cx
- int $0x10
-set_current:
- stc
- ret
-
-# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
-# 80x25 mode with 14-point fonts instead of 16-point.
-set_80x28:
- DO_STORE
- call use_80x25 # The base is 80x25
-set14: movw $0x1111, %ax # Use 9x14 font
- xorb %bl, %bl
- int $0x10
- movb $0x01, %ah # Define cursor scan lines 11-12
- movw $0x0b0c, %cx
- int $0x10
- stc
- ret
-
-# Set the 80x43 mode. This mode is works on all VGA's.
-# It's a 350-scanline mode with 8-pixel font.
-set_80x43:
- DO_STORE
- movw $0x1201, %ax # Set 350 scans
- movb $0x30, %bl
- int $0x10
- movw $0x0003, %ax # Reset video mode
- int $0x10
- jmp set_8pt # Use 8-pixel font
-
-# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
-set_80x30:
- call use_80x25 # Start with real 80x25
- DO_STORE
- movw $0x3cc, %dx # Get CRTC port
- inb %dx, %al
- movb $0xd4, %dl
- rorb %al # Mono or color?
- jc set48a
-
- movb $0xb4, %dl
-set48a: movw $0x0c11, %ax # Vertical sync end (also unlocks CR0-7)
- call outidx
- movw $0x0b06, %ax # Vertical total
- call outidx
- movw $0x3e07, %ax # (Vertical) overflow
- call outidx
- movw $0xea10, %ax # Vertical sync start
- call outidx
- movw $0xdf12, %ax # Vertical display end
- call outidx
- movw $0xe715, %ax # Vertical blank start
- call outidx
- movw $0x0416, %ax # Vertical blank end
- call outidx
- pushw %dx
- movb $0xcc, %dl # Misc output register (read)
- inb %dx, %al
- movb $0xc2, %dl # (write)
- andb $0x0d, %al # Preserve clock select bits and color bit
- orb $0xe2, %al # Set correct sync polarity
- outb %al, %dx
- popw %dx
- movw $0x501e, force_size
- stc # That's all.
- ret
-
-# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
-set_80x34:
- call set_80x30 # Set 480 scans
- call set14 # And 14-pt font
- movw $0xdb12, %ax # VGA vertical display end
- movw $0x5022, force_size
-setvde: call outidx
- stc
- ret
-
-# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
-set_80x60:
- call set_80x30 # Set 480 scans
- call set_8pt # And 8-pt font
- movw $0xdf12, %ax # VGA vertical display end
- movw $0x503c, force_size
- jmp setvde
-
-# Special hack for ThinkPad graphics
-set_gfx:
-#ifdef CONFIG_VIDEO_GFX_HACK
- movw $VIDEO_GFX_BIOS_AX, %ax
- movw $VIDEO_GFX_BIOS_BX, %bx
- int $0x10
- movw $VIDEO_GFX_DUMMY_RESOLUTION, force_size
- stc
-#endif
- ret
-
-#ifdef CONFIG_VIDEO_RETAIN
-
-# Store screen contents to temporary buffer.
-store_screen:
- cmpb $0, do_restore # Already stored?
- jnz stsr
-
- testb $CAN_USE_HEAP, loadflags # Have we space for storing?
- jz stsr
-
- pushw %ax
- pushw %bx
- pushw force_size # Don't force specific size
- movw $0, force_size
- call mode_params # Obtain params of current mode
- popw force_size
- movb %fs:(PARAM_VIDEO_LINES), %ah
- movb %fs:(PARAM_VIDEO_COLS), %al
- movw %ax, %bx # BX=dimensions
- mulb %ah
- movw %ax, %cx # CX=number of characters
- addw %ax, %ax # Calculate image size
- addw $modelist+1024+4, %ax
- cmpw heap_end_ptr, %ax
- jnc sts1 # Unfortunately, out of memory
-
- movw %fs:(PARAM_CURSOR_POS), %ax # Store mode params
- leaw modelist+1024, %di
- stosw
- movw %bx, %ax
- stosw
- pushw %ds # Store the screen
- movw video_segment, %ds
- xorw %si, %si
- rep
- movsw
- popw %ds
- incb do_restore # Screen will be restored later
-sts1: popw %bx
- popw %ax
-stsr: ret
-
-# Restore screen contents from temporary buffer.
-restore_screen:
- cmpb $0, do_restore # Has the screen been stored?
- jz res1
-
- call mode_params # Get parameters of current mode
- movb %fs:(PARAM_VIDEO_LINES), %cl
- movb %fs:(PARAM_VIDEO_COLS), %ch
- leaw modelist+1024, %si # Screen buffer
- lodsw # Set cursor position
- movw %ax, %dx
- cmpb %cl, %dh
- jc res2
-
- movb %cl, %dh
- decb %dh
-res2: cmpb %ch, %dl
- jc res3
-
- movb %ch, %dl
- decb %dl
-res3: movb $0x02, %ah
- movb $0x00, %bh
- int $0x10
- lodsw # Display size
- movb %ah, %dl # DL=number of lines
- movb $0, %ah # BX=phys. length of orig. line
- movw %ax, %bx
- cmpb %cl, %dl # Too many?
- jc res4
-
- pushw %ax
- movb %dl, %al
- subb %cl, %al
- mulb %bl
- addw %ax, %si
- addw %ax, %si
- popw %ax
- movb %cl, %dl
-res4: cmpb %ch, %al # Too wide?
- jc res5
-
- movb %ch, %al # AX=width of src. line
-res5: movb $0, %cl
- xchgb %ch, %cl
- movw %cx, %bp # BP=width of dest. line
- pushw %es
- movw video_segment, %es
- xorw %di, %di # Move the data
- addw %bx, %bx # Convert BX and BP to _bytes_
- addw %bp, %bp
-res6: pushw %si
- pushw %di
- movw %ax, %cx
- rep
- movsw
- popw %di
- popw %si
- addw %bp, %di
- addw %bx, %si
- decb %dl
- jnz res6
-
- popw %es # Done
-res1: ret
-#endif /* CONFIG_VIDEO_RETAIN */
-
-# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
-outidx: outb %al, %dx
- pushw %ax
- movb %ah, %al
- incw %dx
- outb %al, %dx
- decw %dx
- popw %ax
- ret
-
-# Build the table of video modes (stored after the setup.S code at the
-# `modelist' label. Each video mode record looks like:
-# .word MODE-ID (our special mode ID (see above))
-# .byte rows (number of rows)
-# .byte columns (number of columns)
-# Returns address of the end of the table in DI, the end is marked
-# with a ASK_VGA ID.
-mode_table:
- movw mt_end, %di # Already filled?
- orw %di, %di
- jnz mtab1x
-
- leaw modelist, %di # Store standard modes:
- movl $VIDEO_80x25 + 0x50190000, %eax # The 80x25 mode (ALL)
- stosl
- movb adapter, %al # CGA/MDA/HGA -- no more modes
- orb %al, %al
- jz mtabe
-
- decb %al
- jnz mtabv
-
- movl $VIDEO_8POINT + 0x502b0000, %eax # The 80x43 EGA mode
- stosl
- jmp mtabe
-
-mtab1x: jmp mtab1
-
-mtabv: leaw vga_modes, %si # All modes for std VGA
- movw $vga_modes_end-vga_modes, %cx
- rep # I'm unable to use movsw as I don't know how to store a half
- movsb # of the expression above to cx without using explicit shr.
-
- cmpb $0, scanning # Mode scan requested?
- jz mscan1
-
- call mode_scan
-mscan1:
-
-#ifdef CONFIG_VIDEO_LOCAL
- call local_modes
-#endif /* CONFIG_VIDEO_LOCAL */
-
-#ifdef CONFIG_VIDEO_VESA
- call vesa_modes # Detect VESA VGA modes
-#endif /* CONFIG_VIDEO_VESA */
-
-#ifdef CONFIG_VIDEO_SVGA
- cmpb $0, scanning # Bypass when scanning
- jnz mscan2
-
- call svga_modes # Detect SVGA cards & modes
-mscan2:
-#endif /* CONFIG_VIDEO_SVGA */
-
-mtabe:
-
-#ifdef CONFIG_VIDEO_COMPACT
- leaw modelist, %si
- movw %di, %dx
- movw %si, %di
-cmt1: cmpw %dx, %si # Scan all modes
- jz cmt2
-
- leaw modelist, %bx # Find in previous entries
- movw 2(%si), %cx
-cmt3: cmpw %bx, %si
- jz cmt4
-
- cmpw 2(%bx), %cx # Found => don't copy this entry
- jz cmt5
-
- addw $4, %bx
- jmp cmt3
-
-cmt4: movsl # Copy entry
- jmp cmt1
-
-cmt5: addw $4, %si # Skip entry
- jmp cmt1
-
-cmt2:
-#endif /* CONFIG_VIDEO_COMPACT */
-
- movw $ASK_VGA, (%di) # End marker
- movw %di, mt_end
-mtab1: leaw modelist, %si # SI=mode list, DI=list end
-ret0: ret
-
-# Modes usable on all standard VGAs
-vga_modes:
- .word VIDEO_8POINT
- .word 0x5032 # 80x50
- .word VIDEO_80x43
- .word 0x502b # 80x43
- .word VIDEO_80x28
- .word 0x501c # 80x28
- .word VIDEO_80x30
- .word 0x501e # 80x30
- .word VIDEO_80x34
- .word 0x5022 # 80x34
- .word VIDEO_80x60
- .word 0x503c # 80x60
-#ifdef CONFIG_VIDEO_GFX_HACK
- .word VIDEO_GFX_HACK
- .word VIDEO_GFX_DUMMY_RESOLUTION
-#endif
-
-vga_modes_end:
-# Detect VESA modes.
-
-#ifdef CONFIG_VIDEO_VESA
-vesa_modes:
- cmpb $2, adapter # VGA only
- jnz ret0
-
- movw %di, %bp # BP=original mode table end
- addw $0x200, %di # Buffer space
- movw $0x4f00, %ax # VESA Get card info call
- int $0x10
- movw %bp, %di
- cmpw $0x004f, %ax # Successful?
- jnz ret0
-
- cmpw $0x4556, 0x200(%di)
- jnz ret0
-
- cmpw $0x4153, 0x202(%di)
- jnz ret0
-
- movw $vesa_name, card_name # Set name to "VESA VGA"
- pushw %gs
- lgsw 0x20e(%di), %si # GS:SI=mode list
- movw $128, %cx # Iteration limit
-vesa1:
-# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
-# XXX: lodsw %gs:(%si), %ax # Get next mode in the list
- gs; lodsw
- cmpw $0xffff, %ax # End of the table?
- jz vesar
-
- cmpw $0x0080, %ax # Check validity of mode ID
- jc vesa2
-
- orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
- jz vesan # Certain BIOSes report 0x80-0xff!
-
- cmpw $0x0800, %ax
- jnc vesae
-
-vesa2: pushw %cx
- movw %ax, %cx # Get mode information structure
- movw $0x4f01, %ax
- int $0x10
- movw %cx, %bx # BX=mode number
- addb $VIDEO_FIRST_VESA>>8, %bh
- popw %cx
- cmpw $0x004f, %ax
- jnz vesan # Don't report errors (buggy BIOSES)
-
- movb (%di), %al # Check capabilities. We require
- andb $0x19, %al # a color text mode.
- cmpb $0x09, %al
- jnz vesan
-
- cmpw $0xb800, 8(%di) # Standard video memory address required
- jnz vesan
-
- testb $2, (%di) # Mode characteristics supplied?
- movw %bx, (%di) # Store mode number
- jz vesa3
-
- xorw %dx, %dx
- movw 0x12(%di), %bx # Width
- orb %bh, %bh
- jnz vesan
-
- movb %bl, 0x3(%di)
- movw 0x14(%di), %ax # Height
- orb %ah, %ah
- jnz vesan
-
- movb %al, 2(%di)
- mulb %bl
- cmpw $8193, %ax # Small enough for Linux console driver?
- jnc vesan
-
- jmp vesaok
-
-vesa3: subw $0x8108, %bx # This mode has no detailed info specified,
- jc vesan # so it must be a standard VESA mode.
-
- cmpw $5, %bx
- jnc vesan
-
- movw vesa_text_mode_table(%bx), %ax
- movw %ax, 2(%di)
-vesaok: addw $4, %di # The mode is valid. Store it.
-vesan: loop vesa1 # Next mode. Limit exceeded => error
-vesae: leaw vesaer, %si
- call prtstr
- movw %bp, %di # Discard already found modes.
-vesar: popw %gs
- ret
-
-# Dimensions of standard VESA text modes
-vesa_text_mode_table:
- .byte 60, 80 # 0108
- .byte 25, 132 # 0109
- .byte 43, 132 # 010A
- .byte 50, 132 # 010B
- .byte 60, 132 # 010C
-#endif /* CONFIG_VIDEO_VESA */
-
-# Scan for video modes. A bit dirty, but should work.
-mode_scan:
- movw $0x0100, %cx # Start with mode 0
-scm1: movb $0, %ah # Test the mode
- movb %cl, %al
- int $0x10
- movb $0x0f, %ah
- int $0x10
- cmpb %cl, %al
- jnz scm2 # Mode not set
-
- movw $0x3c0, %dx # Test if it's a text mode
- movb $0x10, %al # Mode bits
- call inidx
- andb $0x03, %al
- jnz scm2
-
- movb $0xce, %dl # Another set of mode bits
- movb $0x06, %al
- call inidx
- shrb %al
- jc scm2
-
- movb $0xd4, %dl # Cursor location
- movb $0x0f, %al
- call inidx
- orb %al, %al
- jnz scm2
-
- movw %cx, %ax # Ok, store the mode
- stosw
- movb %gs:(0x484), %al # Number of rows
- incb %al
- stosb
- movw %gs:(0x44a), %ax # Number of columns
- stosb
-scm2: incb %cl
- jns scm1
-
- movw $0x0003, %ax # Return back to mode 3
- int $0x10
- ret
-
-tstidx: outw %ax, %dx # OUT DX,AX and inidx
-inidx: outb %al, %dx # Read from indexed VGA register
- incw %dx # AL=index, DX=index reg port -> AL=data
- inb %dx, %al
- decw %dx
- ret
-
-# Try to detect type of SVGA card and supply (usually approximate) video
-# mode table for it.
-
-#ifdef CONFIG_VIDEO_SVGA
-svga_modes:
- leaw svga_table, %si # Test all known SVGA adapters
-dosvga: lodsw
- movw %ax, %bp # Default mode table
- orw %ax, %ax
- jz didsv1
-
- lodsw # Pointer to test routine
- pushw %si
- pushw %di
- pushw %es
- movw $0xc000, %bx
- movw %bx, %es
- call *%ax # Call test routine
- popw %es
- popw %di
- popw %si
- orw %bp, %bp
- jz dosvga
-
- movw %bp, %si # Found, copy the modes
- movb svga_prefix, %ah
-cpsvga: lodsb
- orb %al, %al
- jz didsv
-
- stosw
- movsw
- jmp cpsvga
-
-didsv: movw %si, card_name # Store pointer to card name
-didsv1: ret
-
-# Table of all known SVGA cards. For each card, we store a pointer to
-# a table of video modes supported by the card and a pointer to a routine
-# used for testing of presence of the card. The video mode table is always
-# followed by the name of the card or the chipset.
-svga_table:
- .word ati_md, ati_test
- .word oak_md, oak_test
- .word paradise_md, paradise_test
- .word realtek_md, realtek_test
- .word s3_md, s3_test
- .word chips_md, chips_test
- .word video7_md, video7_test
- .word cirrus5_md, cirrus5_test
- .word cirrus6_md, cirrus6_test
- .word cirrus1_md, cirrus1_test
- .word ahead_md, ahead_test
- .word everex_md, everex_test
- .word genoa_md, genoa_test
- .word trident_md, trident_test
- .word tseng_md, tseng_test
- .word 0
-
-# Test routines and mode tables:
-
-# S3 - The test algorithm was taken from the SuperProbe package
-# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
-s3_test:
- movw $0x0f35, %cx # we store some constants in cl/ch
- movw $0x03d4, %dx
- movb $0x38, %al
- call inidx
- movb %al, %bh # store current CRT-register 0x38
- movw $0x0038, %ax
- call outidx # disable writing to special regs
- movb %cl, %al # check whether we can write special reg 0x35
- call inidx
- movb %al, %bl # save the current value of CRT reg 0x35
- andb $0xf0, %al # clear bits 0-3
- movb %al, %ah
- movb %cl, %al # and write it to CRT reg 0x35
- call outidx
- call inidx # now read it back
- andb %ch, %al # clear the upper 4 bits
- jz s3_2 # the first test failed. But we have a
-
- movb %bl, %ah # second chance
- movb %cl, %al
- call outidx
- jmp s3_1 # do the other tests
-
-s3_2: movw %cx, %ax # load ah with 0xf and al with 0x35
- orb %bl, %ah # set the upper 4 bits of ah with the orig value
- call outidx # write ...
- call inidx # ... and reread
- andb %cl, %al # turn off the upper 4 bits
- pushw %ax
- movb %bl, %ah # restore old value in register 0x35
- movb %cl, %al
- call outidx
- popw %ax
- cmpb %ch, %al # setting lower 4 bits was successful => bad
- je no_s3 # writing is allowed => this is not an S3
-
-s3_1: movw $0x4838, %ax # allow writing to special regs by putting
- call outidx # magic number into CRT-register 0x38
- movb %cl, %al # check whether we can write special reg 0x35
- call inidx
- movb %al, %bl
- andb $0xf0, %al
- movb %al, %ah
- movb %cl, %al
- call outidx
- call inidx
- andb %ch, %al
- jnz no_s3 # no, we can't write => no S3
-
- movw %cx, %ax
- orb %bl, %ah
- call outidx
- call inidx
- andb %ch, %al
- pushw %ax
- movb %bl, %ah # restore old value in register 0x35
- movb %cl, %al
- call outidx
- popw %ax
- cmpb %ch, %al
- jne no_s31 # writing not possible => no S3
- movb $0x30, %al
- call inidx # now get the S3 id ...
- leaw idS3, %di
- movw $0x10, %cx
- repne
- scasb
- je no_s31
-
- movb %bh, %ah
- movb $0x38, %al
- jmp s3rest
-
-no_s3: movb $0x35, %al # restore CRT register 0x35
- movb %bl, %ah
- call outidx
-no_s31: xorw %bp, %bp # Detection failed
-s3rest: movb %bh, %ah
- movb $0x38, %al # restore old value of CRT register 0x38
- jmp outidx
-
-idS3: .byte 0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
- .byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
-
-s3_md: .byte 0x54, 0x2b, 0x84
- .byte 0x55, 0x19, 0x84
- .byte 0
- .ascii "S3"
- .byte 0
-
-# ATI cards.
-ati_test:
- leaw idati, %si
- movw $0x31, %di
- movw $0x09, %cx
- repe
- cmpsb
- je atiok
-
- xorw %bp, %bp
-atiok: ret
-
-idati: .ascii "761295520"
-
-ati_md: .byte 0x23, 0x19, 0x84
- .byte 0x33, 0x2c, 0x84
- .byte 0x22, 0x1e, 0x64
- .byte 0x21, 0x19, 0x64
- .byte 0x58, 0x21, 0x50
- .byte 0x5b, 0x1e, 0x50
- .byte 0
- .ascii "ATI"
- .byte 0
-
-# AHEAD
-ahead_test:
- movw $0x200f, %ax
- movw $0x3ce, %dx
- outw %ax, %dx
- incw %dx
- inb %dx, %al
- cmpb $0x20, %al
- je isahed
-
- cmpb $0x21, %al
- je isahed
-
- xorw %bp, %bp
-isahed: ret
-
-ahead_md:
- .byte 0x22, 0x2c, 0x84
- .byte 0x23, 0x19, 0x84
- .byte 0x24, 0x1c, 0x84
- .byte 0x2f, 0x32, 0xa0
- .byte 0x32, 0x22, 0x50
- .byte 0x34, 0x42, 0x50
- .byte 0
- .ascii "Ahead"
- .byte 0
-
-# Chips & Tech.
-chips_test:
- movw $0x3c3, %dx
- inb %dx, %al
- orb $0x10, %al
- outb %al, %dx
- movw $0x104, %dx
- inb %dx, %al
- movb %al, %bl
- movw $0x3c3, %dx
- inb %dx, %al
- andb $0xef, %al
- outb %al, %dx
- cmpb $0xa5, %bl
- je cantok
-
- xorw %bp, %bp
-cantok: ret
-
-chips_md:
- .byte 0x60, 0x19, 0x84
- .byte 0x61, 0x32, 0x84
- .byte 0
- .ascii "Chips & Technologies"
- .byte 0
-
-# Cirrus Logic 5X0
-cirrus1_test:
- movw $0x3d4, %dx
- movb $0x0c, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- movb %al, %bl
- xorb %al, %al
- outb %al, %dx
- decw %dx
- movb $0x1f, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- movb %al, %bh
- xorb %ah, %ah
- shlb $4, %al
- movw %ax, %cx
- movb %bh, %al
- shrb $4, %al
- addw %ax, %cx
- shlw $8, %cx
- addw $6, %cx
- movw %cx, %ax
- movw $0x3c4, %dx
- outw %ax, %dx
- incw %dx
- inb %dx, %al
- andb %al, %al
- jnz nocirr
-
- movb %bh, %al
- outb %al, %dx
- inb %dx, %al
- cmpb $0x01, %al
- je iscirr
-
-nocirr: xorw %bp, %bp
-iscirr: movw $0x3d4, %dx
- movb %bl, %al
- xorb %ah, %ah
- shlw $8, %ax
- addw $0x0c, %ax
- outw %ax, %dx
- ret
-
-cirrus1_md:
- .byte 0x1f, 0x19, 0x84
- .byte 0x20, 0x2c, 0x84
- .byte 0x22, 0x1e, 0x84
- .byte 0x31, 0x25, 0x64
- .byte 0
- .ascii "Cirrus Logic 5X0"
- .byte 0
-
-# Cirrus Logic 54XX
-cirrus5_test:
- movw $0x3c4, %dx
- movb $6, %al
- call inidx
- movb %al, %bl # BL=backup
- movw $6, %ax
- call tstidx
- cmpb $0x0f, %al
- jne c5fail
-
- movw $0x1206, %ax
- call tstidx
- cmpb $0x12, %al
- jne c5fail
-
- movb $0x1e, %al
- call inidx
- movb %al, %bh
- movb %bh, %ah
- andb $0xc0, %ah
- movb $0x1e, %al
- call tstidx
- andb $0x3f, %al
- jne c5xx
-
- movb $0x1e, %al
- movb %bh, %ah
- orb $0x3f, %ah
- call tstidx
- xorb $0x3f, %al
- andb $0x3f, %al
-c5xx: pushf
- movb $0x1e, %al
- movb %bh, %ah
- outw %ax, %dx
- popf
- je c5done
-
-c5fail: xorw %bp, %bp
-c5done: movb $6, %al
- movb %bl, %ah
- outw %ax, %dx
- ret
-
-cirrus5_md:
- .byte 0x14, 0x19, 0x84
- .byte 0x54, 0x2b, 0x84
- .byte 0
- .ascii "Cirrus Logic 54XX"
- .byte 0
-
-# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
-# it's misidentified by the Ahead test.
-cirrus6_test:
- movw $0x3ce, %dx
- movb $0x0a, %al
- call inidx
- movb %al, %bl # BL=backup
- movw $0xce0a, %ax
- call tstidx
- orb %al, %al
- jne c2fail
-
- movw $0xec0a, %ax
- call tstidx
- cmpb $0x01, %al
- jne c2fail
-
- movb $0xaa, %al
- call inidx # 4X, 5X, 7X and 8X are valid 64XX chip ID's.
- shrb $4, %al
- subb $4, %al
- jz c6done
-
- decb %al
- jz c6done
-
- subb $2, %al
- jz c6done
-
- decb %al
- jz c6done
-
-c2fail: xorw %bp, %bp
-c6done: movb $0x0a, %al
- movb %bl, %ah
- outw %ax, %dx
- ret
-
-cirrus6_md:
- .byte 0
- .ascii "Cirrus Logic 64XX"
- .byte 0
-
-# Everex / Trident
-everex_test:
- movw $0x7000, %ax
- xorw %bx, %bx
- int $0x10
- cmpb $0x70, %al
- jne noevrx
-
- shrw $4, %dx
- cmpw $0x678, %dx
- je evtrid
-
- cmpw $0x236, %dx
- jne evrxok
-
-evtrid: leaw trident_md, %bp
-evrxok: ret
-
-noevrx: xorw %bp, %bp
- ret
-
-everex_md:
- .byte 0x03, 0x22, 0x50
- .byte 0x04, 0x3c, 0x50
- .byte 0x07, 0x2b, 0x64
- .byte 0x08, 0x4b, 0x64
- .byte 0x0a, 0x19, 0x84
- .byte 0x0b, 0x2c, 0x84
- .byte 0x16, 0x1e, 0x50
- .byte 0x18, 0x1b, 0x64
- .byte 0x21, 0x40, 0xa0
- .byte 0x40, 0x1e, 0x84
- .byte 0
- .ascii "Everex/Trident"
- .byte 0
-
-# Genoa.
-genoa_test:
- leaw idgenoa, %si # Check Genoa 'clues'
- xorw %ax, %ax
- movb %es:(0x37), %al
- movw %ax, %di
- movw $0x04, %cx
- decw %si
- decw %di
-l1: incw %si
- incw %di
- movb (%si), %al
- testb %al, %al
- jz l2
-
- cmpb %es:(%di), %al
-l2: loope l1
- orw %cx, %cx
- je isgen
-
- xorw %bp, %bp
-isgen: ret
-
-idgenoa: .byte 0x77, 0x00, 0x99, 0x66
-
-genoa_md:
- .byte 0x58, 0x20, 0x50
- .byte 0x5a, 0x2a, 0x64
- .byte 0x60, 0x19, 0x84
- .byte 0x61, 0x1d, 0x84
- .byte 0x62, 0x20, 0x84
- .byte 0x63, 0x2c, 0x84
- .byte 0x64, 0x3c, 0x84
- .byte 0x6b, 0x4f, 0x64
- .byte 0x72, 0x3c, 0x50
- .byte 0x74, 0x42, 0x50
- .byte 0x78, 0x4b, 0x64
- .byte 0
- .ascii "Genoa"
- .byte 0
-
-# OAK
-oak_test:
- leaw idoakvga, %si
- movw $0x08, %di
- movw $0x08, %cx
- repe
- cmpsb
- je isoak
-
- xorw %bp, %bp
-isoak: ret
-
-idoakvga: .ascii "OAK VGA "
-
-oak_md: .byte 0x4e, 0x3c, 0x50
- .byte 0x4f, 0x3c, 0x84
- .byte 0x50, 0x19, 0x84
- .byte 0x51, 0x2b, 0x84
- .byte 0
- .ascii "OAK"
- .byte 0
-
-# WD Paradise.
-paradise_test:
- leaw idparadise, %si
- movw $0x7d, %di
- movw $0x04, %cx
- repe
- cmpsb
- je ispara
-
- xorw %bp, %bp
-ispara: ret
-
-idparadise: .ascii "VGA="
-
-paradise_md:
- .byte 0x41, 0x22, 0x50
- .byte 0x47, 0x1c, 0x84
- .byte 0x55, 0x19, 0x84
- .byte 0x54, 0x2c, 0x84
- .byte 0
- .ascii "Paradise"
- .byte 0
-
-# Trident.
-trident_test:
- movw $0x3c4, %dx
- movb $0x0e, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- xchgb %al, %ah
- xorb %al, %al
- outb %al, %dx
- inb %dx, %al
- xchgb %ah, %al
- movb %al, %bl # Strange thing ... in the book this wasn't
- andb $0x02, %bl # necessary but it worked on my card which
- jz setb2 # is a trident. Without it the screen goes
- # blurred ...
- andb $0xfd, %al
- jmp clrb2
-
-setb2: orb $0x02, %al
-clrb2: outb %al, %dx
- andb $0x0f, %ah
- cmpb $0x02, %ah
- je istrid
-
- xorw %bp, %bp
-istrid: ret
-
-trident_md:
- .byte 0x50, 0x1e, 0x50
- .byte 0x51, 0x2b, 0x50
- .byte 0x52, 0x3c, 0x50
- .byte 0x57, 0x19, 0x84
- .byte 0x58, 0x1e, 0x84
- .byte 0x59, 0x2b, 0x84
- .byte 0x5a, 0x3c, 0x84
- .byte 0
- .ascii "Trident"
- .byte 0
-
-# Tseng.
-tseng_test:
- movw $0x3cd, %dx
- inb %dx, %al # Could things be this simple ! :-)
- movb %al, %bl
- movb $0x55, %al
- outb %al, %dx
- inb %dx, %al
- movb %al, %ah
- movb %bl, %al
- outb %al, %dx
- cmpb $0x55, %ah
- je istsen
-
-isnot: xorw %bp, %bp
-istsen: ret
-
-tseng_md:
- .byte 0x26, 0x3c, 0x50
- .byte 0x2a, 0x28, 0x64
- .byte 0x23, 0x19, 0x84
- .byte 0x24, 0x1c, 0x84
- .byte 0x22, 0x2c, 0x84
- .byte 0x21, 0x3c, 0x84
- .byte 0
- .ascii "Tseng"
- .byte 0
-
-# Video7.
-video7_test:
- movw $0x3cc, %dx
- inb %dx, %al
- movw $0x3b4, %dx
- andb $0x01, %al
- jz even7
-
- movw $0x3d4, %dx
-even7: movb $0x0c, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- movb %al, %bl
- movb $0x55, %al
- outb %al, %dx
- inb %dx, %al
- decw %dx
- movb $0x1f, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- movb %al, %bh
- decw %dx
- movb $0x0c, %al
- outb %al, %dx
- incw %dx
- movb %bl, %al
- outb %al, %dx
- movb $0x55, %al
- xorb $0xea, %al
- cmpb %bh, %al
- jne isnot
-
- movb $VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
- ret
-
-video7_md:
- .byte 0x40, 0x2b, 0x50
- .byte 0x43, 0x3c, 0x50
- .byte 0x44, 0x3c, 0x64
- .byte 0x41, 0x19, 0x84
- .byte 0x42, 0x2c, 0x84
- .byte 0x45, 0x1c, 0x84
- .byte 0
- .ascii "Video 7"
- .byte 0
-
-# Realtek VGA
-realtek_test:
- leaw idrtvga, %si
- movw $0x45, %di
- movw $0x0b, %cx
- repe
- cmpsb
- je isrt
-
- xorw %bp, %bp
-isrt: ret
-
-idrtvga: .ascii "REALTEK VGA"
-
-realtek_md:
- .byte 0x1a, 0x3c, 0x50
- .byte 0x1b, 0x19, 0x84
- .byte 0x1c, 0x1e, 0x84
- .byte 0x1d, 0x2b, 0x84
- .byte 0x1e, 0x3c, 0x84
- .byte 0
- .ascii "REALTEK"
- .byte 0
-
-#endif /* CONFIG_VIDEO_SVGA */
-
-# User-defined local mode table (VGA only)
-#ifdef CONFIG_VIDEO_LOCAL
-local_modes:
- leaw local_mode_table, %si
-locm1: lodsw
- orw %ax, %ax
- jz locm2
-
- stosw
- movsw
- jmp locm1
-
-locm2: ret
-
-# This is the table of local video modes which can be supplied manually
-# by the user. Each entry consists of mode ID (word) and dimensions
-# (byte for column count and another byte for row count). These modes
-# are placed before all SVGA and VESA modes and override them if table
-# compacting is enabled. The table must end with a zero word followed
-# by NUL-terminated video adapter name.
-local_mode_table:
- .word 0x0100 # Example: 40x25
- .byte 25,40
- .word 0
- .ascii "Local"
- .byte 0
-#endif /* CONFIG_VIDEO_LOCAL */
-
-# Read a key and return the ASCII code in al, scan code in ah
-getkey: xorb %ah, %ah
- int $0x16
- ret
-
-# Read a key with a timeout of 30 seconds.
-# The hardware clock is used to get the time.
-getkt: call gettime
- addb $30, %al # Wait 30 seconds
- cmpb $60, %al
- jl lminute
-
- subb $60, %al
-lminute:
- movb %al, %cl
-again: movb $0x01, %ah
- int $0x16
- jnz getkey # key pressed, so get it
-
- call gettime
- cmpb %cl, %al
- jne again
-
- movb $0x20, %al # timeout, return `space'
- ret
-
-# Flush the keyboard buffer
-flush: movb $0x01, %ah
- int $0x16
- jz empty
-
- xorb %ah, %ah
- int $0x16
- jmp flush
-
-empty: ret
-
-# Print hexadecimal number.
-prthw: pushw %ax
- movb %ah, %al
- call prthb
- popw %ax
-prthb: pushw %ax
- shrb $4, %al
- call prthn
- popw %ax
- andb $0x0f, %al
-prthn: cmpb $0x0a, %al
- jc prth1
-
- addb $0x07, %al
-prth1: addb $0x30, %al
- jmp prtchr
-
-# Print decimal number in al
-prtdec: pushw %ax
- pushw %cx
- xorb %ah, %ah
- movb $0x0a, %cl
- idivb %cl
- cmpb $0x09, %al
- jbe lt100
-
- call prtdec
- jmp skip10
-
-lt100: addb $0x30, %al
- call prtchr
-skip10: movb %ah, %al
- addb $0x30, %al
- call prtchr
- popw %cx
- popw %ax
- ret
-
-store_edid:
-#ifdef CONFIG_FIRMWARE_EDID
- pushw %es # just save all registers
- pushw %ax
- pushw %bx
- pushw %cx
- pushw %dx
- pushw %di
-
- pushw %fs
- popw %es
-
- movl $0x13131313, %eax # memset block with 0x13
- movw $32, %cx
- movw $0x140, %di
- cld
- rep
- stosl
-
- cmpw $0x0200, vbe_version # only do EDID on >= VBE2.0
- jl no_edid
-
- pushw %es # save ES
- xorw %di, %di # Report Capability
- pushw %di
- popw %es # ES:DI must be 0:0
- movw $0x4f15, %ax
- xorw %bx, %bx
- xorw %cx, %cx
- int $0x10
- popw %es # restore ES
-
- cmpb $0x00, %ah # call successful
- jne no_edid
-
- cmpb $0x4f, %al # function supported
- jne no_edid
-
- movw $0x4f15, %ax # do VBE/DDC
- movw $0x01, %bx
- movw $0x00, %cx
- movw $0x01, %dx
- movw $0x140, %di
- int $0x10
-
-no_edid:
- popw %di # restore all registers
- popw %dx
- popw %cx
- popw %bx
- popw %ax
- popw %es
-#endif
- ret
-
-# VIDEO_SELECT-only variables
-mt_end: .word 0 # End of video mode table if built
-edit_buf: .space 6 # Line editor buffer
-card_name: .word 0 # Pointer to adapter name
-scanning: .byte 0 # Performing mode scan
-do_restore: .byte 0 # Screen contents altered during mode change
-svga_prefix: .byte VIDEO_FIRST_BIOS>>8 # Default prefix for BIOS modes
-graphic_mode: .byte 0 # Graphic mode with a linear frame buffer
-dac_size: .byte 6 # DAC bit depth
-vbe_version: .word 0 # VBE bios version
-
-# Status messages
-keymsg: .ascii "Press <RETURN> to see video modes available, "
- .ascii "<SPACE> to continue or wait 30 secs"
- .byte 0x0d, 0x0a, 0
-
-listhdr: .byte 0x0d, 0x0a
- .ascii "Mode: COLSxROWS:"
-
-crlft: .byte 0x0d, 0x0a, 0
-
-prompt: .byte 0x0d, 0x0a
- .asciz "Enter mode number or `scan': "
-
-unknt: .asciz "Unknown mode ID. Try again."
-
-badmdt: .ascii "You passed an undefined mode number."
- .byte 0x0d, 0x0a, 0
-
-vesaer: .ascii "Error: Scanning of VESA modes failed. Please "
- .ascii "report to <mj@ucw.cz>."
- .byte 0x0d, 0x0a, 0
-
-old_name: .asciz "CGA/MDA/HGA"
-
-ega_name: .asciz "EGA"
-
-svga_name: .ascii " "
-
-vga_name: .asciz "VGA"
-
-vesa_name: .asciz "VESA"
-
-name_bann: .asciz "Video adapter: "
-#endif /* CONFIG_VIDEO_SELECT */
-
-# Other variables:
-adapter: .byte 0 # Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
-video_segment: .word 0xb800 # Video memory segment
-force_size: .word 0 # Use this size instead of the one in BIOS vars
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index b26378815b91..941a7e3aa5fb 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.21-rc3
-# Wed Mar 7 15:29:47 2007
+# Linux kernel version: 2.6.21-git3
+# Tue May 1 07:30:48 2007
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -118,11 +118,11 @@ CONFIG_X86_PC=y
# CONFIG_X86_VSMP is not set
# CONFIG_MK8 is not set
# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_L1_CACHE_SHIFT=7
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_MCORE2=y
+# CONFIG_GENERIC_CPU is not set
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y
# CONFIG_MICROCODE is not set
@@ -174,6 +174,7 @@ CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
# CONFIG_KEXEC is not set
# CONFIG_CRASH_DUMP is not set
+# CONFIG_RELOCATABLE is not set
CONFIG_PHYSICAL_START=0x200000
CONFIG_SECCOMP=y
# CONFIG_CC_STACKPROTECTOR is not set
@@ -182,7 +183,6 @@ CONFIG_HZ_250=y
# CONFIG_HZ_300 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
-# CONFIG_REORDER is not set
CONFIG_K8_NB=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
@@ -218,7 +218,6 @@ CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_THERMAL=y
CONFIG_ACPI_NUMA=y
# CONFIG_ACPI_ASUS is not set
-# CONFIG_ACPI_IBM is not set
# CONFIG_ACPI_TOSHIBA is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
@@ -243,7 +242,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
-# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
#
# CPUFreq processor drivers
@@ -299,7 +298,6 @@ CONFIG_NET=y
#
# Networking options
#
-# CONFIG_NETDEBUG is not set
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
CONFIG_UNIX=y
@@ -334,6 +332,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_IPV6=y
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
# CONFIG_INET6_AH is not set
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
@@ -389,6 +388,13 @@ CONFIG_IPV6_SIT=y
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
# CONFIG_IEEE80211 is not set
#
@@ -409,10 +415,6 @@ CONFIG_FW_LOADER=y
# Connector - unified userspace <-> kernelspace linker
#
# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
# CONFIG_MTD is not set
#
@@ -459,6 +461,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_SONY_LAPTOP is not set
+# CONFIG_THINKPAD_ACPI is not set
#
# ATA/ATAPI/MFM/RLL support
@@ -494,7 +497,6 @@ CONFIG_BLK_DEV_IDEPCI=y
# CONFIG_BLK_DEV_RZ1000 is not set
CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
# CONFIG_BLK_DEV_AEC62XX is not set
# CONFIG_BLK_DEV_ALI15X3 is not set
@@ -525,7 +527,6 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y
# CONFIG_IDE_ARM is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
# CONFIG_BLK_DEV_HD is not set
#
@@ -584,11 +585,9 @@ CONFIG_AIC79XX_DEBUG_MASK=0
# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_ARCMSR is not set
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=y
-CONFIG_MEGARAID_MAILBOX=y
+# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
-CONFIG_MEGARAID_SAS=y
+# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DMX3191D is not set
@@ -608,6 +607,7 @@ CONFIG_MEGARAID_SAS=y
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_ESP_CORE is not set
# CONFIG_SCSI_SRP is not set
#
@@ -636,6 +636,7 @@ CONFIG_SATA_ACPI=y
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CS5520 is not set
# CONFIG_PATA_CS5530 is not set
@@ -687,7 +688,7 @@ CONFIG_BLK_DEV_DM=y
CONFIG_FUSION=y
CONFIG_FUSION_SPI=y
# CONFIG_FUSION_FC is not set
-CONFIG_FUSION_SAS=y
+# CONFIG_FUSION_SAS is not set
CONFIG_FUSION_MAX_SGE=128
# CONFIG_FUSION_CTL is not set
@@ -700,19 +701,22 @@ CONFIG_IEEE1394=y
# Subsystem Options
#
# CONFIG_IEEE1394_VERBOSEDEBUG is not set
-# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
#
-# Device Drivers
+# Controllers
+#
+
+#
+# Texas Instruments PCILynx requires I2C
#
-# CONFIG_IEEE1394_PCILYNX is not set
CONFIG_IEEE1394_OHCI1394=y
#
-# Protocol Drivers
+# Protocols
#
# CONFIG_IEEE1394_VIDEO1394 is not set
# CONFIG_IEEE1394_SBP2 is not set
+# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
# CONFIG_IEEE1394_ETH1394 is not set
# CONFIG_IEEE1394_DV1394 is not set
CONFIG_IEEE1394_RAWIO=y
@@ -775,7 +779,8 @@ CONFIG_TULIP=y
# CONFIG_HP100 is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
+CONFIG_AMD8111_ETH=y
+# CONFIG_AMD8111E_NAPI is not set
# CONFIG_ADAPTEC_STARFIRE is not set
CONFIG_B44=y
CONFIG_FORCEDETH=y
@@ -837,9 +842,10 @@ CONFIG_S2IO=m
# CONFIG_TR is not set
#
-# Wireless LAN (non-hamradio)
+# Wireless LAN
#
-# CONFIG_NET_RADIO is not set
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
#
# Wan interfaces
@@ -853,7 +859,6 @@ CONFIG_S2IO=m
# CONFIG_SHAPER is not set
CONFIG_NETCONSOLE=y
CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
@@ -987,57 +992,7 @@ CONFIG_HPET_MMAP=y
#
# I2C support
#
-CONFIG_I2C=m
-CONFIG_I2C_CHARDEV=m
-
-#
-# I2C Algorithms
-#
-# CONFIG_I2C_ALGOBIT is not set
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
-
-#
-# I2C Hardware Bus support
-#
-# CONFIG_I2C_ALI1535 is not set
-# CONFIG_I2C_ALI1563 is not set
-# CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_I801 is not set
-# CONFIG_I2C_I810 is not set
-# CONFIG_I2C_PIIX4 is not set
-CONFIG_I2C_ISA=m
-# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_OCORES is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PASEMI is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_I2C_SIS5595 is not set
-# CONFIG_I2C_SIS630 is not set
-# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_VIA is not set
-# CONFIG_I2C_VIAPRO is not set
-# CONFIG_I2C_VOODOO3 is not set
-# CONFIG_I2C_PCA_ISA is not set
-
-#
-# Miscellaneous I2C Chip support
-#
-# CONFIG_SENSORS_DS1337 is not set
-# CONFIG_SENSORS_DS1374 is not set
-# CONFIG_SENSORS_EEPROM is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_I2C is not set
#
# SPI support
@@ -1053,54 +1008,8 @@ CONFIG_I2C_ISA=m
#
# Hardware Monitoring support
#
-CONFIG_HWMON=y
+# CONFIG_HWMON is not set
# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_ABITUGURU is not set
-# CONFIG_SENSORS_ADM1021 is not set
-# CONFIG_SENSORS_ADM1025 is not set
-# CONFIG_SENSORS_ADM1026 is not set
-# CONFIG_SENSORS_ADM1029 is not set
-# CONFIG_SENSORS_ADM1031 is not set
-# CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_K8TEMP is not set
-# CONFIG_SENSORS_ASB100 is not set
-# CONFIG_SENSORS_ATXP1 is not set
-# CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
-# CONFIG_SENSORS_GL518SM is not set
-# CONFIG_SENSORS_GL520SM is not set
-# CONFIG_SENSORS_IT87 is not set
-# CONFIG_SENSORS_LM63 is not set
-# CONFIG_SENSORS_LM75 is not set
-# CONFIG_SENSORS_LM77 is not set
-# CONFIG_SENSORS_LM78 is not set
-# CONFIG_SENSORS_LM80 is not set
-# CONFIG_SENSORS_LM83 is not set
-# CONFIG_SENSORS_LM85 is not set
-# CONFIG_SENSORS_LM87 is not set
-# CONFIG_SENSORS_LM90 is not set
-# CONFIG_SENSORS_LM92 is not set
-# CONFIG_SENSORS_MAX1619 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_PC87427 is not set
-# CONFIG_SENSORS_SIS5595 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_SMSC47M192 is not set
-CONFIG_SENSORS_SMSC47B397=m
-# CONFIG_SENSORS_VIA686A is not set
-# CONFIG_SENSORS_VT1211 is not set
-# CONFIG_SENSORS_VT8231 is not set
-# CONFIG_SENSORS_W83781D is not set
-# CONFIG_SENSORS_W83791D is not set
-# CONFIG_SENSORS_W83792D is not set
-# CONFIG_SENSORS_W83793 is not set
-# CONFIG_SENSORS_W83L785TS is not set
-# CONFIG_SENSORS_W83627HF is not set
-# CONFIG_SENSORS_W83627EHF is not set
-# CONFIG_SENSORS_HDAPS is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
#
# Multifunction device drivers
@@ -1147,8 +1056,9 @@ CONFIG_SOUND=y
# Open Sound System
#
CONFIG_SOUND_PRIME=y
-# CONFIG_OBSOLETE_OSS is not set
+CONFIG_OBSOLETE_OSS=y
# CONFIG_SOUND_BT878 is not set
+# CONFIG_SOUND_ES1371 is not set
CONFIG_SOUND_ICH=y
# CONFIG_SOUND_TRIDENT is not set
# CONFIG_SOUND_MSNDCLAS is not set
@@ -1163,6 +1073,14 @@ CONFIG_HID=y
# CONFIG_HID_DEBUG is not set
#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
# USB support
#
CONFIG_USB_ARCH_HAS_HCD=y
@@ -1175,6 +1093,7 @@ CONFIG_USB=y
# Miscellaneous USB options
#
CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_SUSPEND is not set
# CONFIG_USB_OTG is not set
@@ -1225,10 +1144,6 @@ CONFIG_USB_STORAGE=y
#
# USB Input Devices
#
-CONFIG_USB_HID=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-# CONFIG_USB_HIDDEV is not set
# CONFIG_USB_AIPTEK is not set
# CONFIG_USB_WACOM is not set
# CONFIG_USB_ACECAD is not set
@@ -1556,7 +1471,7 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
+CONFIG_TIMER_STATS=y
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_RT_MUTEX_TESTER is not set
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 071100ea1251..185399baaf6d 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -5,6 +5,11 @@
* This tricks binfmt_elf.c into loading 32bit binaries using lots
* of ugly preprocessor tricks. Talk about very very poor man's inheritance.
*/
+#define __ASM_X86_64_ELF_H 1
+
+#undef ELF_CLASS
+#define ELF_CLASS ELFCLASS32
+
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/rwsem.h>
@@ -50,9 +55,6 @@ struct elf_phdr;
#undef ELF_ARCH
#define ELF_ARCH EM_386
-#undef ELF_CLASS
-#define ELF_CLASS ELFCLASS32
-
#define ELF_DATA ELFDATA2LSB
#define USE_ELF_CORE_DUMP 1
@@ -136,7 +138,7 @@ struct elf_prpsinfo
#define user user32
-#define __ASM_X86_64_ELF_H 1
+#undef elf_read_implies_exec
#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X)
//#include <asm/ia32.h>
#include <linux/elf.h>
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 796df6992f62..c48087db6f75 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -481,11 +481,7 @@ ia32_sys_call_table:
.quad sys_symlink
.quad sys_lstat
.quad sys_readlink /* 85 */
-#ifdef CONFIG_IA32_AOUT
.quad sys_uselib
-#else
- .quad quiet_ni_syscall
-#endif
.quad sys_swapon
.quad sys_reboot
.quad compat_sys_old_readdir
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 568ff0df89e7..fc4419ff0355 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -13,6 +13,7 @@
#include <asm/proto.h>
#include <asm/tlbflush.h>
#include <asm/ia32_unistd.h>
+#include <asm/vsyscall32.h>
extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index bb47e86f3d02..4d94c51803d8 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,8 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o
+ pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
+ perfctr-watchdog.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
@@ -21,8 +22,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
obj-y += apic.o nmi.o
-obj-y += io_apic.o mpparse.o \
- genapic.o genapic_cluster.o genapic_flat.o
+obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_PM) += suspend.o
@@ -58,3 +58,4 @@ i8237-y += ../../i386/kernel/i8237.o
msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
alternative-y += ../../i386/kernel/alternative.o
pcspeaker-y += ../../i386/kernel/pcspeaker.o
+perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c
index e1548fbe95ae..195b7034a148 100644
--- a/arch/x86_64/kernel/acpi/sleep.c
+++ b/arch/x86_64/kernel/acpi/sleep.c
@@ -60,19 +60,6 @@ extern char wakeup_start, wakeup_end;
extern unsigned long acpi_copy_wakeup_routine(unsigned long);
-static pgd_t low_ptr;
-
-static void init_low_mapping(void)
-{
- pgd_t *slot0 = pgd_offset(current->mm, 0UL);
- low_ptr = *slot0;
- /* FIXME: We're playing with the current task's page tables here, which
- * is potentially dangerous on SMP systems.
- */
- set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
- local_flush_tlb();
-}
-
/**
* acpi_save_state_mem - save kernel state
*
@@ -81,8 +68,6 @@ static void init_low_mapping(void)
*/
int acpi_save_state_mem(void)
{
- init_low_mapping();
-
memcpy((void *)acpi_wakeup_address, &wakeup_start,
&wakeup_end - &wakeup_start);
acpi_copy_wakeup_routine(acpi_wakeup_address);
@@ -95,8 +80,6 @@ int acpi_save_state_mem(void)
*/
void acpi_restore_state_mem(void)
{
- set_pgd(pgd_offset(current->mm, 0UL), low_ptr);
- local_flush_tlb();
}
/**
@@ -109,10 +92,11 @@ void acpi_restore_state_mem(void)
*/
void __init acpi_reserve_bootmem(void)
{
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
- if ((&wakeup_end - &wakeup_start) > PAGE_SIZE)
+ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
+ if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
printk(KERN_CRIT
- "ACPI: Wakeup code way too big, will crash on attempt to suspend\n");
+ "ACPI: Wakeup code way too big, will crash on attempt"
+ " to suspend\n");
}
static int __init acpi_sleep_setup(char *str)
diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S
index 185faa911db5..8550a6ffa275 100644
--- a/arch/x86_64/kernel/acpi/wakeup.S
+++ b/arch/x86_64/kernel/acpi/wakeup.S
@@ -1,6 +1,7 @@
.text
#include <linux/linkage.h>
#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/msr.h>
@@ -30,22 +31,28 @@ wakeup_code:
cld
# setup data segment
movw %cs, %ax
- movw %ax, %ds # Make ds:0 point to wakeup_start
+ movw %ax, %ds # Make ds:0 point to wakeup_start
movw %ax, %ss
- mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
+ # Private stack is needed for ASUS board
+ mov $(wakeup_stack - wakeup_code), %sp
- pushl $0 # Kill any dangerous flags
+ pushl $0 # Kill any dangerous flags
popfl
movl real_magic - wakeup_code, %eax
cmpl $0x12345678, %eax
jne bogus_real_magic
+ call verify_cpu # Verify the cpu supports long
+ # mode
+ testl %eax, %eax
+ jnz no_longmode
+
testl $1, video_flags - wakeup_code
jz 1f
lcall $0xc000,$3
movw %cs, %ax
- movw %ax, %ds # Bios might have played with that
+ movw %ax, %ds # Bios might have played with that
movw %ax, %ss
1:
@@ -61,12 +68,15 @@ wakeup_code:
movb $0xa2, %al ; outb %al, $0x80
- lidt %ds:idt_48a - wakeup_code
- xorl %eax, %eax
- movw %ds, %ax # (Convert %ds:gdt to a linear ptr)
- shll $4, %eax
- addl $(gdta - wakeup_code), %eax
- movl %eax, gdt_48a +2 - wakeup_code
+ mov %ds, %ax # Find 32bit wakeup_code addr
+ movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
+ shll $4, %esi
+ # Fix up the vectors
+ addl %esi, wakeup_32_vector - wakeup_code
+ addl %esi, wakeup_long64_vector - wakeup_code
+ addl %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer
+
+ lidtl %ds:idt_48a - wakeup_code
lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is
# appropriate
@@ -75,86 +85,63 @@ wakeup_code:
jmp 1f
1:
- .byte 0x66, 0xea # prefix + jmpi-opcode
- .long wakeup_32 - __START_KERNEL_map
- .word __KERNEL_CS
+ ljmpl *(wakeup_32_vector - wakeup_code)
+
+ .balign 4
+wakeup_32_vector:
+ .long wakeup_32 - wakeup_code
+ .word __KERNEL32_CS, 0
.code32
wakeup_32:
# Running in this code, but at low address; paging is not yet turned on.
movb $0xa5, %al ; outb %al, $0x80
- /* Check if extended functions are implemented */
- movl $0x80000000, %eax
- cpuid
- cmpl $0x80000000, %eax
- jbe bogus_cpu
- wbinvd
- mov $0x80000001, %eax
- cpuid
- btl $29, %edx
- jnc bogus_cpu
- movl %edx,%edi
-
- movw $__KERNEL_DS, %ax
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %fs
- movw %ax, %gs
-
- movw $__KERNEL_DS, %ax
- movw %ax, %ss
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
- mov $(wakeup_stack - __START_KERNEL_map), %esp
- movl saved_magic - __START_KERNEL_map, %eax
- cmpl $0x9abcdef0, %eax
- jne bogus_32_magic
+ movw $0x0e00 + 'i', %ds:(0xb8012)
+ movb $0xa8, %al ; outb %al, $0x80;
/*
* Prepare for entering 64bits mode
*/
- /* Enable PAE mode and PGE */
+ /* Enable PAE */
xorl %eax, %eax
btsl $5, %eax
- btsl $7, %eax
movl %eax, %cr4
/* Setup early boot stage 4 level pagetables */
- movl $(wakeup_level4_pgt - __START_KERNEL_map), %eax
+ leal (wakeup_level4_pgt - wakeup_code)(%esi), %eax
movl %eax, %cr3
- /* Setup EFER (Extended Feature Enable Register) */
- movl $MSR_EFER, %ecx
- rdmsr
- /* Fool rdmsr and reset %eax to avoid dependences */
- xorl %eax, %eax
+ /* Check if nx is implemented */
+ movl $0x80000001, %eax
+ cpuid
+ movl %edx,%edi
+
/* Enable Long Mode */
+ xorl %eax, %eax
btsl $_EFER_LME, %eax
- /* Enable System Call */
- btsl $_EFER_SCE, %eax
- /* No Execute supported? */
+ /* No Execute supported? */
btl $20,%edi
jnc 1f
btsl $_EFER_NX, %eax
-1:
/* Make changes effective */
+1: movl $MSR_EFER, %ecx
+ xorl %edx, %edx
wrmsr
- wbinvd
xorl %eax, %eax
btsl $31, %eax /* Enable paging and in turn activate Long Mode */
btsl $0, %eax /* Enable protected mode */
- btsl $1, %eax /* Enable MP */
- btsl $4, %eax /* Enable ET */
- btsl $5, %eax /* Enable NE */
- btsl $16, %eax /* Enable WP */
- btsl $18, %eax /* Enable AM */
/* Make changes effective */
movl %eax, %cr0
+
/* At this point:
CR4.PAE must be 1
CS.L must be 0
@@ -162,11 +149,6 @@ wakeup_32:
Next instruction must be a branch
This must be on identity-mapped page
*/
- jmp reach_compatibility_mode
-reach_compatibility_mode:
- movw $0x0e00 + 'i', %ds:(0xb8012)
- movb $0xa8, %al ; outb %al, $0x80;
-
/*
* At this point we're in long mode but in 32bit compatibility mode
* with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
@@ -174,24 +156,19 @@ reach_compatibility_mode:
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
*/
- movw $0x0e00 + 'n', %ds:(0xb8014)
- movb $0xa9, %al ; outb %al, $0x80
-
- /* Load new GDT with the 64bit segment using 32bit descriptor */
- movl $(pGDT32 - __START_KERNEL_map), %eax
- lgdt (%eax)
-
- movl $(wakeup_jumpvector - __START_KERNEL_map), %eax
/* Finally jump in 64bit mode */
- ljmp *(%eax)
+ ljmp *(wakeup_long64_vector - wakeup_code)(%esi)
-wakeup_jumpvector:
- .long wakeup_long64 - __START_KERNEL_map
- .word __KERNEL_CS
+ .balign 4
+wakeup_long64_vector:
+ .long wakeup_long64 - wakeup_code
+ .word __KERNEL_CS, 0
.code64
- /* Hooray, we are in Long 64-bit mode (but still running in low memory) */
+ /* Hooray, we are in Long 64-bit mode (but still running in
+ * low memory)
+ */
wakeup_long64:
/*
* We must switch to a new descriptor in kernel space for the GDT
@@ -199,7 +176,15 @@ wakeup_long64:
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
- lgdt cpu_gdt_descr - __START_KERNEL_map
+ lgdt cpu_gdt_descr
+
+ movw $0x0e00 + 'n', %ds:(0xb8014)
+ movb $0xa9, %al ; outb %al, $0x80
+
+ movq saved_magic, %rax
+ movq $0x123456789abcdef0, %rdx
+ cmpq %rdx, %rax
+ jne bogus_64_magic
movw $0x0e00 + 'u', %ds:(0xb8016)
@@ -211,75 +196,58 @@ wakeup_long64:
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
- movq saved_esp, %rsp
+ movq saved_rsp, %rsp
movw $0x0e00 + 'x', %ds:(0xb8018)
- movq saved_ebx, %rbx
- movq saved_edi, %rdi
- movq saved_esi, %rsi
- movq saved_ebp, %rbp
+ movq saved_rbx, %rbx
+ movq saved_rdi, %rdi
+ movq saved_rsi, %rsi
+ movq saved_rbp, %rbp
movw $0x0e00 + '!', %ds:(0xb801a)
- movq saved_eip, %rax
+ movq saved_rip, %rax
jmp *%rax
.code32
.align 64
gdta:
+ /* Its good to keep gdt in sync with one in trampoline.S */
.word 0, 0, 0, 0 # dummy
-
- .word 0, 0, 0, 0 # unused
-
- .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
- .word 0 # base address = 0
- .word 0x9B00 # code read/exec. ??? Why I need 0x9B00 (as opposed to 0x9A00 in order for this to work?)
- .word 0x00CF # granularity = 4096, 386
- # (+5th nibble of limit)
-
- .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
- .word 0 # base address = 0
- .word 0x9200 # data read/write
- .word 0x00CF # granularity = 4096, 386
- # (+5th nibble of limit)
-# this is 64bit descriptor for code
- .word 0xFFFF
- .word 0
- .word 0x9A00 # code read/exec
- .word 0x00AF # as above, but it is long mode and with D=0
+ /* ??? Why I need the accessed bit set in order for this to work? */
+ .quad 0x00cf9b000000ffff # __KERNEL32_CS
+ .quad 0x00af9b000000ffff # __KERNEL_CS
+ .quad 0x00cf93000000ffff # __KERNEL_DS
idt_48a:
.word 0 # idt limit = 0
.word 0, 0 # idt base = 0L
gdt_48a:
- .word 0x8000 # gdt limit=2048,
+ .word 0x800 # gdt limit=2048,
# 256 GDT entries
- .word 0, 0 # gdt base (filled in later)
-
+ .long gdta - wakeup_code # gdt base (relocated in later)
-real_save_gdt: .word 0
- .quad 0
real_magic: .quad 0
video_mode: .quad 0
video_flags: .quad 0
+.code16
bogus_real_magic:
- movb $0xba,%al ; outb %al,$0x80
+ movb $0xba,%al ; outb %al,$0x80
jmp bogus_real_magic
-bogus_32_magic:
+.code64
+bogus_64_magic:
movb $0xb3,%al ; outb %al,$0x80
- jmp bogus_32_magic
+ jmp bogus_64_magic
-bogus_31_magic:
- movb $0xb1,%al ; outb %al,$0x80
- jmp bogus_31_magic
-
-bogus_cpu:
- movb $0xbc,%al ; outb %al,$0x80
- jmp bogus_cpu
+.code16
+no_longmode:
+ movb $0xbc,%al ; outb %al,$0x80
+ jmp no_longmode
+#include "../verify_cpu.S"
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
@@ -301,6 +269,7 @@ bogus_cpu:
#define VIDEO_FIRST_V7 0x0900
# Setting of user mode (AX=mode ID) => CF=success
+.code16
mode_seta:
movw %ax, %bx
#if 0
@@ -346,21 +315,18 @@ check_vesaa:
_setbada: jmp setbada
- .code64
-bogus_magic:
- movw $0x0e00 + 'B', %ds:(0xb8018)
- jmp bogus_magic
-
-bogus_magic2:
- movw $0x0e00 + '2', %ds:(0xb8018)
- jmp bogus_magic2
-
-
wakeup_stack_begin: # Stack grows down
.org 0xff0
wakeup_stack: # Just below end of page
+.org 0x1000
+ENTRY(wakeup_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 510,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
+
ENTRY(wakeup_end)
##
@@ -373,28 +339,11 @@ ENTRY(wakeup_end)
#
# Returned address is location of code in low memory (past data and stack)
#
+ .code64
ENTRY(acpi_copy_wakeup_routine)
pushq %rax
- pushq %rcx
pushq %rdx
- sgdt saved_gdt
- sidt saved_idt
- sldt saved_ldt
- str saved_tss
-
- movq %cr3, %rdx
- movq %rdx, saved_cr3
- movq %cr4, %rdx
- movq %rdx, saved_cr4
- movq %cr0, %rdx
- movq %rdx, saved_cr0
- sgdt real_save_gdt - wakeup_start (,%rdi)
- movl $MSR_EFER, %ecx
- rdmsr
- movl %eax, saved_efer
- movl %edx, saved_efer2
-
movl saved_video_mode, %edx
movl %edx, video_mode - wakeup_start (,%rdi)
movl acpi_video_flags, %edx
@@ -403,21 +352,13 @@ ENTRY(acpi_copy_wakeup_routine)
movq $0x123456789abcdef0, %rdx
movq %rdx, saved_magic
- movl saved_magic - __START_KERNEL_map, %eax
- cmpl $0x9abcdef0, %eax
- jne bogus_32_magic
-
- # make sure %cr4 is set correctly (features, etc)
- movl saved_cr4 - __START_KERNEL_map, %eax
- movq %rax, %cr4
+ movq saved_magic, %rax
+ movq $0x123456789abcdef0, %rdx
+ cmpq %rdx, %rax
+ jne bogus_64_magic
- movl saved_cr0 - __START_KERNEL_map, %eax
- movq %rax, %cr0
- jmp 1f # Flush pipelines
-1:
# restore the regs we used
popq %rdx
- popq %rcx
popq %rax
ENTRY(do_suspend_lowlevel_s4bios)
ret
@@ -450,13 +391,13 @@ do_suspend_lowlevel:
movq %r15, saved_context_r15(%rip)
pushfq ; popq saved_context_eflags(%rip)
- movq $.L97, saved_eip(%rip)
+ movq $.L97, saved_rip(%rip)
- movq %rsp,saved_esp
- movq %rbp,saved_ebp
- movq %rbx,saved_ebx
- movq %rdi,saved_edi
- movq %rsi,saved_esi
+ movq %rsp,saved_rsp
+ movq %rbp,saved_rbp
+ movq %rbx,saved_rbx
+ movq %rdi,saved_rdi
+ movq %rsi,saved_rsi
addq $8, %rsp
movl $3, %edi
@@ -503,25 +444,12 @@ do_suspend_lowlevel:
.data
ALIGN
-ENTRY(saved_ebp) .quad 0
-ENTRY(saved_esi) .quad 0
-ENTRY(saved_edi) .quad 0
-ENTRY(saved_ebx) .quad 0
+ENTRY(saved_rbp) .quad 0
+ENTRY(saved_rsi) .quad 0
+ENTRY(saved_rdi) .quad 0
+ENTRY(saved_rbx) .quad 0
-ENTRY(saved_eip) .quad 0
-ENTRY(saved_esp) .quad 0
+ENTRY(saved_rip) .quad 0
+ENTRY(saved_rsp) .quad 0
ENTRY(saved_magic) .quad 0
-
-ALIGN
-# saved registers
-saved_gdt: .quad 0,0
-saved_idt: .quad 0,0
-saved_ldt: .quad 0
-saved_tss: .quad 0
-
-saved_cr0: .quad 0
-saved_cr3: .quad 0
-saved_cr4: .quad 0
-saved_efer: .quad 0
-saved_efer2: .quad 0
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index b487396c4c5b..a52af5820592 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -51,7 +51,6 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
static u32 __init allocate_aperture(void)
{
- pg_data_t *nd0 = NODE_DATA(0);
u32 aper_size;
void *p;
@@ -65,12 +64,12 @@ static u32 __init allocate_aperture(void)
* Unfortunately we cannot move it up because that would make the
* IOMMU useless.
*/
- p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0);
+ p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
if (!p || __pa(p)+aper_size > 0xffffffff) {
printk("Cannot allocate aperture memory hole (%p,%uK)\n",
p, aper_size>>10);
if (p)
- free_bootmem_node(nd0, __pa(p), aper_size);
+ free_bootmem(__pa(p), aper_size);
return 0;
}
printk("Mapping aperture over %d KB of RAM @ %lx\n",
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index bd3e45d47c37..d198f7d82e5a 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -68,6 +68,28 @@ int using_apic_timer __read_mostly = 0;
static void apic_pm_activate(void);
+void apic_wait_icr_idle(void)
+{
+ while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+ cpu_relax();
+}
+
+unsigned int safe_apic_wait_icr_idle(void)
+{
+ unsigned int send_status;
+ int timeout;
+
+ timeout = 0;
+ do {
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ if (!send_status)
+ break;
+ udelay(100);
+ } while (timeout++ < 1000);
+
+ return send_status;
+}
+
void enable_NMI_through_LVT0 (void * dummy)
{
unsigned int v;
@@ -817,14 +839,15 @@ static void setup_APIC_timer(unsigned int clocks)
static int __init calibrate_APIC_clock(void)
{
- int apic, apic_start, tsc, tsc_start;
+ unsigned apic, apic_start;
+ unsigned long tsc, tsc_start;
int result;
/*
* Put whatever arbitrary (but long enough) timeout
* value into the APIC clock, we just want to get the
* counter running for calibration.
*/
- __setup_APIC_LVTT(1000000000);
+ __setup_APIC_LVTT(4000000000);
apic_start = apic_read(APIC_TMCCT);
#ifdef CONFIG_X86_PM_TIMER
@@ -835,15 +858,15 @@ static int __init calibrate_APIC_clock(void)
} else
#endif
{
- rdtscl(tsc_start);
+ rdtscll(tsc_start);
do {
apic = apic_read(APIC_TMCCT);
- rdtscl(tsc);
+ rdtscll(tsc);
} while ((tsc - tsc_start) < TICK_COUNT &&
- (apic - apic_start) < TICK_COUNT);
+ (apic_start - apic) < TICK_COUNT);
- result = (apic_start - apic) * 1000L * cpu_khz /
+ result = (apic_start - apic) * 1000L * tsc_khz /
(tsc - tsc_start);
}
printk("result %d\n", result);
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 96687e2beb2c..778953bc636c 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -21,6 +21,14 @@
#define BLANK() asm volatile("\n->" : : )
+#define __NO_STUBS 1
+#undef __SYSCALL
+#undef _ASM_X86_64_UNISTD_H_
+#define __SYSCALL(nr, sym) [nr] = 1,
+static char syscalls[] = {
+#include <asm/unistd.h>
+};
+
int main(void)
{
#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
@@ -71,5 +79,7 @@ int main(void)
DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
BLANK();
DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
+ BLANK();
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
return 0;
}
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
new file mode 100644
index 000000000000..12b585b5345d
--- /dev/null
+++ b/arch/x86_64/kernel/bugs.c
@@ -0,0 +1,21 @@
+/*
+ * arch/x86_64/kernel/bugs.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ * Copyright (C) 2000 SuSE
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/alternative.h>
+#include <asm/processor.h>
+
+void __init check_bugs(void)
+{
+ identify_cpu(&boot_cpu_data);
+#if !defined(CONFIG_SMP)
+ printk("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+#endif
+ alternative_instructions();
+}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index a490fabfcf47..be8965427a93 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -25,7 +25,7 @@
#include <asm/bootsetup.h>
#include <asm/sections.h>
-struct e820map e820 __initdata;
+struct e820map e820;
/*
* PFN of last memory page.
@@ -98,7 +98,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
* This function checks if any part of the range <start,end> is mapped
* with type.
*/
-int __meminit
+int
e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
{
int i;
@@ -112,6 +112,7 @@ e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
}
return 0;
}
+EXPORT_SYMBOL_GPL(e820_any_mapped);
/*
* This function checks if the entire range <start,end> is mapped with type.
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index fede55a53995..990d9c218a5d 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -71,18 +71,6 @@ static void __init ati_bugs(void)
}
}
-static void intel_bugs(void)
-{
- u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
-
-#ifdef CONFIG_SMP
- if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
- device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
- device == PCI_DEVICE_ID_INTEL_E7525_MCH)
- quirk_intel_irqbalance();
-#endif
-}
-
struct chipset {
u16 vendor;
void (*f)(void);
@@ -92,7 +80,6 @@ static struct chipset early_qrk[] __initdata = {
{ PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
{ PCI_VENDOR_ID_VIA, via_bugs },
{ PCI_VENDOR_ID_ATI, ati_bugs },
- { PCI_VENDOR_ID_INTEL, intel_bugs},
{}
};
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 47b6d90349da..92213d2b7c11 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -11,11 +11,10 @@
#ifdef __i386__
#include <asm/setup.h>
-#define VGABASE (__ISA_IO_base + 0xb8000)
#else
#include <asm/bootsetup.h>
-#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
#endif
+#define VGABASE (__ISA_IO_base + 0xb8000)
static int max_ypos = 25, max_xpos = 80;
static int current_ypos = 25, current_xpos = 0;
@@ -176,7 +175,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
return ret;
}
-void __init simnow_init(char *str)
+static void __init simnow_init(char *str)
{
char *fn = "klog";
if (*str == '=')
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed4350ced3d0..fa984b53e7e6 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -701,6 +701,7 @@ END(spurious_interrupt)
CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* push real oldrax to the rdi slot */
CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
CFI_ENDPROC
@@ -710,6 +711,7 @@ END(spurious_interrupt)
XCPT_FRAME
pushq %rax
CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
CFI_ENDPROC
@@ -817,6 +819,7 @@ paranoid_schedule\trace:
*/
KPROBE_ENTRY(error_entry)
_frame RDI
+ CFI_REL_OFFSET rax,0
/* rdi slot contains rax, oldrax contains error code */
cld
subq $14*8,%rsp
@@ -824,6 +827,7 @@ KPROBE_ENTRY(error_entry)
movq %rsi,13*8(%rsp)
CFI_REL_OFFSET rsi,RSI
movq 14*8(%rsp),%rsi /* load rax from rdi slot */
+ CFI_REGISTER rax,rsi
movq %rdx,12*8(%rsp)
CFI_REL_OFFSET rdx,RDX
movq %rcx,11*8(%rsp)
@@ -857,6 +861,7 @@ error_swapgs:
swapgs
error_sti:
movq %rdi,RDI(%rsp)
+ CFI_REL_OFFSET rdi,RDI
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist
deleted file mode 100644
index 7ae18ec12454..000000000000
--- a/arch/x86_64/kernel/functionlist
+++ /dev/null
@@ -1,1284 +0,0 @@
-*(.text.flush_thread)
-*(.text.check_poison_obj)
-*(.text.copy_page)
-*(.text.__set_personality)
-*(.text.gart_map_sg)
-*(.text.kmem_cache_free)
-*(.text.find_get_page)
-*(.text._raw_spin_lock)
-*(.text.ide_outb)
-*(.text.unmap_vmas)
-*(.text.copy_page_range)
-*(.text.kprobe_handler)
-*(.text.__handle_mm_fault)
-*(.text.__d_lookup)
-*(.text.copy_user_generic)
-*(.text.__link_path_walk)
-*(.text.get_page_from_freelist)
-*(.text.kmem_cache_alloc)
-*(.text.drive_cmd_intr)
-*(.text.ia32_setup_sigcontext)
-*(.text.huge_pte_offset)
-*(.text.do_page_fault)
-*(.text.page_remove_rmap)
-*(.text.release_pages)
-*(.text.ide_end_request)
-*(.text.__mutex_lock_slowpath)
-*(.text.__find_get_block)
-*(.text.kfree)
-*(.text.vfs_read)
-*(.text._raw_spin_unlock)
-*(.text.free_hot_cold_page)
-*(.text.fget_light)
-*(.text.schedule)
-*(.text.memcmp)
-*(.text.touch_atime)
-*(.text.__might_sleep)
-*(.text.__down_read_trylock)
-*(.text.arch_pick_mmap_layout)
-*(.text.find_vma)
-*(.text.__make_request)
-*(.text.do_generic_mapping_read)
-*(.text.mutex_lock_interruptible)
-*(.text.__generic_file_aio_read)
-*(.text._atomic_dec_and_lock)
-*(.text.__wake_up_bit)
-*(.text.add_to_page_cache)
-*(.text.cache_alloc_debugcheck_after)
-*(.text.vm_normal_page)
-*(.text.mutex_debug_check_no_locks_freed)
-*(.text.net_rx_action)
-*(.text.__find_first_zero_bit)
-*(.text.put_page)
-*(.text._raw_read_lock)
-*(.text.__delay)
-*(.text.dnotify_parent)
-*(.text.do_path_lookup)
-*(.text.do_sync_read)
-*(.text.do_lookup)
-*(.text.bit_waitqueue)
-*(.text.file_read_actor)
-*(.text.strncpy_from_user)
-*(.text.__pagevec_lru_add_active)
-*(.text.fget)
-*(.text.dput)
-*(.text.__strnlen_user)
-*(.text.inotify_inode_queue_event)
-*(.text.rw_verify_area)
-*(.text.ide_intr)
-*(.text.inotify_dentry_parent_queue_event)
-*(.text.permission)
-*(.text.memscan)
-*(.text.hpet_rtc_interrupt)
-*(.text.do_mmap_pgoff)
-*(.text.current_fs_time)
-*(.text.vfs_getattr)
-*(.text.kmem_flagcheck)
-*(.text.mark_page_accessed)
-*(.text.free_pages_and_swap_cache)
-*(.text.generic_fillattr)
-*(.text.__block_prepare_write)
-*(.text.__set_page_dirty_nobuffers)
-*(.text.link_path_walk)
-*(.text.find_get_pages_tag)
-*(.text.ide_do_request)
-*(.text.__alloc_pages)
-*(.text.generic_permission)
-*(.text.mod_page_state_offset)
-*(.text.free_pgd_range)
-*(.text.generic_file_buffered_write)
-*(.text.number)
-*(.text.ide_do_rw_disk)
-*(.text.__brelse)
-*(.text.__mod_page_state_offset)
-*(.text.rotate_reclaimable_page)
-*(.text.find_vma_prepare)
-*(.text.find_vma_prev)
-*(.text.lru_cache_add_active)
-*(.text.__kmalloc_track_caller)
-*(.text.smp_invalidate_interrupt)
-*(.text.handle_IRQ_event)
-*(.text.__find_get_block_slow)
-*(.text.do_wp_page)
-*(.text.do_select)
-*(.text.set_user_nice)
-*(.text.sys_read)
-*(.text.do_munmap)
-*(.text.csum_partial)
-*(.text.__do_softirq)
-*(.text.may_open)
-*(.text.getname)
-*(.text.get_empty_filp)
-*(.text.__fput)
-*(.text.remove_mapping)
-*(.text.filp_ctor)
-*(.text.poison_obj)
-*(.text.unmap_region)
-*(.text.test_set_page_writeback)
-*(.text.__do_page_cache_readahead)
-*(.text.sock_def_readable)
-*(.text.ide_outl)
-*(.text.shrink_zone)
-*(.text.rb_insert_color)
-*(.text.get_request)
-*(.text.sys_pread64)
-*(.text.spin_bug)
-*(.text.ide_outsl)
-*(.text.mask_and_ack_8259A)
-*(.text.filemap_nopage)
-*(.text.page_add_file_rmap)
-*(.text.find_lock_page)
-*(.text.tcp_poll)
-*(.text.__mark_inode_dirty)
-*(.text.file_ra_state_init)
-*(.text.generic_file_llseek)
-*(.text.__pagevec_lru_add)
-*(.text.page_cache_readahead)
-*(.text.n_tty_receive_buf)
-*(.text.zonelist_policy)
-*(.text.vma_adjust)
-*(.text.test_clear_page_dirty)
-*(.text.sync_buffer)
-*(.text.do_exit)
-*(.text.__bitmap_weight)
-*(.text.alloc_pages_current)
-*(.text.get_unused_fd)
-*(.text.zone_watermark_ok)
-*(.text.cpuset_update_task_memory_state)
-*(.text.__bitmap_empty)
-*(.text.sys_munmap)
-*(.text.__inode_dir_notify)
-*(.text.__generic_file_aio_write_nolock)
-*(.text.__pte_alloc)
-*(.text.sys_select)
-*(.text.vm_acct_memory)
-*(.text.vfs_write)
-*(.text.__lru_add_drain)
-*(.text.prio_tree_insert)
-*(.text.generic_file_aio_read)
-*(.text.vma_merge)
-*(.text.block_write_full_page)
-*(.text.__page_set_anon_rmap)
-*(.text.apic_timer_interrupt)
-*(.text.release_console_sem)
-*(.text.sys_write)
-*(.text.sys_brk)
-*(.text.dup_mm)
-*(.text.read_current_timer)
-*(.text.ll_rw_block)
-*(.text.blk_rq_map_sg)
-*(.text.dbg_userword)
-*(.text.__block_commit_write)
-*(.text.cache_grow)
-*(.text.copy_strings)
-*(.text.release_task)
-*(.text.do_sync_write)
-*(.text.unlock_page)
-*(.text.load_elf_binary)
-*(.text.__follow_mount)
-*(.text.__getblk)
-*(.text.do_sys_open)
-*(.text.current_kernel_time)
-*(.text.call_rcu)
-*(.text.write_chan)
-*(.text.vsnprintf)
-*(.text.dummy_inode_setsecurity)
-*(.text.submit_bh)
-*(.text.poll_freewait)
-*(.text.bio_alloc_bioset)
-*(.text.skb_clone)
-*(.text.page_waitqueue)
-*(.text.__mutex_lock_interruptible_slowpath)
-*(.text.get_index)
-*(.text.csum_partial_copy_generic)
-*(.text.bad_range)
-*(.text.remove_vma)
-*(.text.cp_new_stat)
-*(.text.alloc_arraycache)
-*(.text.test_clear_page_writeback)
-*(.text.strsep)
-*(.text.open_namei)
-*(.text._raw_read_unlock)
-*(.text.get_vma_policy)
-*(.text.__down_write_trylock)
-*(.text.find_get_pages)
-*(.text.tcp_rcv_established)
-*(.text.generic_make_request)
-*(.text.__block_write_full_page)
-*(.text.cfq_set_request)
-*(.text.sys_inotify_init)
-*(.text.split_vma)
-*(.text.__mod_timer)
-*(.text.get_options)
-*(.text.vma_link)
-*(.text.mpage_writepages)
-*(.text.truncate_complete_page)
-*(.text.tcp_recvmsg)
-*(.text.sigprocmask)
-*(.text.filemap_populate)
-*(.text.sys_close)
-*(.text.inotify_dev_queue_event)
-*(.text.do_task_stat)
-*(.text.__dentry_open)
-*(.text.unlink_file_vma)
-*(.text.__pollwait)
-*(.text.packet_rcv_spkt)
-*(.text.drop_buffers)
-*(.text.free_pgtables)
-*(.text.generic_file_direct_write)
-*(.text.copy_process)
-*(.text.netif_receive_skb)
-*(.text.dnotify_flush)
-*(.text.print_bad_pte)
-*(.text.anon_vma_unlink)
-*(.text.sys_mprotect)
-*(.text.sync_sb_inodes)
-*(.text.find_inode_fast)
-*(.text.dummy_inode_readlink)
-*(.text.putname)
-*(.text.init_smp_flush)
-*(.text.dbg_redzone2)
-*(.text.sk_run_filter)
-*(.text.may_expand_vm)
-*(.text.generic_file_aio_write)
-*(.text.find_next_zero_bit)
-*(.text.file_kill)
-*(.text.audit_getname)
-*(.text.arch_unmap_area_topdown)
-*(.text.alloc_page_vma)
-*(.text.tcp_transmit_skb)
-*(.text.rb_next)
-*(.text.dbg_redzone1)
-*(.text.generic_file_mmap)
-*(.text.vfs_fstat)
-*(.text.sys_time)
-*(.text.page_lock_anon_vma)
-*(.text.get_unmapped_area)
-*(.text.remote_llseek)
-*(.text.__up_read)
-*(.text.fd_install)
-*(.text.eventpoll_init_file)
-*(.text.dma_alloc_coherent)
-*(.text.create_empty_buffers)
-*(.text.__mutex_unlock_slowpath)
-*(.text.dup_fd)
-*(.text.d_alloc)
-*(.text.tty_ldisc_try)
-*(.text.sys_stime)
-*(.text.__rb_rotate_right)
-*(.text.d_validate)
-*(.text.rb_erase)
-*(.text.path_release)
-*(.text.memmove)
-*(.text.invalidate_complete_page)
-*(.text.clear_inode)
-*(.text.cache_estimate)
-*(.text.alloc_buffer_head)
-*(.text.smp_call_function_interrupt)
-*(.text.flush_tlb_others)
-*(.text.file_move)
-*(.text.balance_dirty_pages_ratelimited)
-*(.text.vma_prio_tree_add)
-*(.text.timespec_trunc)
-*(.text.mempool_alloc)
-*(.text.iget_locked)
-*(.text.d_alloc_root)
-*(.text.cpuset_populate_dir)
-*(.text.anon_vma_prepare)
-*(.text.sys_newstat)
-*(.text.alloc_page_interleave)
-*(.text.__path_lookup_intent_open)
-*(.text.__pagevec_free)
-*(.text.inode_init_once)
-*(.text.free_vfsmnt)
-*(.text.__user_walk_fd)
-*(.text.cfq_idle_slice_timer)
-*(.text.sys_mmap)
-*(.text.sys_llseek)
-*(.text.prio_tree_remove)
-*(.text.filp_close)
-*(.text.file_permission)
-*(.text.vma_prio_tree_remove)
-*(.text.tcp_ack)
-*(.text.nameidata_to_filp)
-*(.text.sys_lseek)
-*(.text.percpu_counter_mod)
-*(.text.igrab)
-*(.text.__bread)
-*(.text.alloc_inode)
-*(.text.filldir)
-*(.text.__rb_rotate_left)
-*(.text.irq_affinity_write_proc)
-*(.text.init_request_from_bio)
-*(.text.find_or_create_page)
-*(.text.tty_poll)
-*(.text.tcp_sendmsg)
-*(.text.ide_wait_stat)
-*(.text.free_buffer_head)
-*(.text.flush_signal_handlers)
-*(.text.tcp_v4_rcv)
-*(.text.nr_blockdev_pages)
-*(.text.locks_remove_flock)
-*(.text.__iowrite32_copy)
-*(.text.do_filp_open)
-*(.text.try_to_release_page)
-*(.text.page_add_new_anon_rmap)
-*(.text.kmem_cache_size)
-*(.text.eth_type_trans)
-*(.text.try_to_free_buffers)
-*(.text.schedule_tail)
-*(.text.proc_lookup)
-*(.text.no_llseek)
-*(.text.kfree_skbmem)
-*(.text.do_wait)
-*(.text.do_mpage_readpage)
-*(.text.vfs_stat_fd)
-*(.text.tty_write)
-*(.text.705)
-*(.text.sync_page)
-*(.text.__remove_shared_vm_struct)
-*(.text.__kfree_skb)
-*(.text.sock_poll)
-*(.text.get_request_wait)
-*(.text.do_sigaction)
-*(.text.do_brk)
-*(.text.tcp_event_data_recv)
-*(.text.read_chan)
-*(.text.pipe_writev)
-*(.text.__emul_lookup_dentry)
-*(.text.rtc_get_rtc_time)
-*(.text.print_objinfo)
-*(.text.file_update_time)
-*(.text.do_signal)
-*(.text.disable_8259A_irq)
-*(.text.blk_queue_bounce)
-*(.text.__anon_vma_link)
-*(.text.__vma_link)
-*(.text.vfs_rename)
-*(.text.sys_newlstat)
-*(.text.sys_newfstat)
-*(.text.sys_mknod)
-*(.text.__show_regs)
-*(.text.iput)
-*(.text.get_signal_to_deliver)
-*(.text.flush_tlb_page)
-*(.text.debug_mutex_wake_waiter)
-*(.text.copy_thread)
-*(.text.clear_page_dirty_for_io)
-*(.text.buffer_io_error)
-*(.text.vfs_permission)
-*(.text.truncate_inode_pages_range)
-*(.text.sys_recvfrom)
-*(.text.remove_suid)
-*(.text.mark_buffer_dirty)
-*(.text.local_bh_enable)
-*(.text.get_zeroed_page)
-*(.text.get_vmalloc_info)
-*(.text.flush_old_exec)
-*(.text.dummy_inode_permission)
-*(.text.__bio_add_page)
-*(.text.prio_tree_replace)
-*(.text.notify_change)
-*(.text.mntput_no_expire)
-*(.text.fput)
-*(.text.__end_that_request_first)
-*(.text.wake_up_bit)
-*(.text.unuse_mm)
-*(.text.shrink_icache_memory)
-*(.text.sched_balance_self)
-*(.text.__pmd_alloc)
-*(.text.pipe_poll)
-*(.text.normal_poll)
-*(.text.__free_pages)
-*(.text.follow_mount)
-*(.text.cdrom_start_packet_command)
-*(.text.blk_recount_segments)
-*(.text.bio_put)
-*(.text.__alloc_skb)
-*(.text.__wake_up)
-*(.text.vm_stat_account)
-*(.text.sys_fcntl)
-*(.text.sys_fadvise64)
-*(.text._raw_write_unlock)
-*(.text.__pud_alloc)
-*(.text.alloc_page_buffers)
-*(.text.vfs_llseek)
-*(.text.sockfd_lookup)
-*(.text._raw_write_lock)
-*(.text.put_compound_page)
-*(.text.prune_dcache)
-*(.text.pipe_readv)
-*(.text.mempool_free)
-*(.text.make_ahead_window)
-*(.text.lru_add_drain)
-*(.text.constant_test_bit)
-*(.text.__clear_user)
-*(.text.arch_unmap_area)
-*(.text.anon_vma_link)
-*(.text.sys_chroot)
-*(.text.setup_arg_pages)
-*(.text.radix_tree_preload)
-*(.text.init_rwsem)
-*(.text.generic_osync_inode)
-*(.text.generic_delete_inode)
-*(.text.do_sys_poll)
-*(.text.dev_queue_xmit)
-*(.text.default_llseek)
-*(.text.__writeback_single_inode)
-*(.text.vfs_ioctl)
-*(.text.__up_write)
-*(.text.unix_poll)
-*(.text.sys_rt_sigprocmask)
-*(.text.sock_recvmsg)
-*(.text.recalc_bh_state)
-*(.text.__put_unused_fd)
-*(.text.process_backlog)
-*(.text.locks_remove_posix)
-*(.text.lease_modify)
-*(.text.expand_files)
-*(.text.end_buffer_read_nobh)
-*(.text.d_splice_alias)
-*(.text.debug_mutex_init_waiter)
-*(.text.copy_from_user)
-*(.text.cap_vm_enough_memory)
-*(.text.show_vfsmnt)
-*(.text.release_sock)
-*(.text.pfifo_fast_enqueue)
-*(.text.half_md4_transform)
-*(.text.fs_may_remount_ro)
-*(.text.do_fork)
-*(.text.copy_hugetlb_page_range)
-*(.text.cache_free_debugcheck)
-*(.text.__tcp_select_window)
-*(.text.task_handoff_register)
-*(.text.sys_open)
-*(.text.strlcpy)
-*(.text.skb_copy_datagram_iovec)
-*(.text.set_up_list3s)
-*(.text.release_open_intent)
-*(.text.qdisc_restart)
-*(.text.n_tty_chars_in_buffer)
-*(.text.inode_change_ok)
-*(.text.__downgrade_write)
-*(.text.debug_mutex_unlock)
-*(.text.add_timer_randomness)
-*(.text.sock_common_recvmsg)
-*(.text.set_bh_page)
-*(.text.printk_lock)
-*(.text.path_release_on_umount)
-*(.text.ip_output)
-*(.text.ide_build_dmatable)
-*(.text.__get_user_8)
-*(.text.end_buffer_read_sync)
-*(.text.__d_path)
-*(.text.d_move)
-*(.text.del_timer)
-*(.text.constant_test_bit)
-*(.text.blockable_page_cache_readahead)
-*(.text.tty_read)
-*(.text.sys_readlink)
-*(.text.sys_faccessat)
-*(.text.read_swap_cache_async)
-*(.text.pty_write_room)
-*(.text.page_address_in_vma)
-*(.text.kthread)
-*(.text.cfq_exit_io_context)
-*(.text.__tcp_push_pending_frames)
-*(.text.sys_pipe)
-*(.text.submit_bio)
-*(.text.pid_revalidate)
-*(.text.page_referenced_file)
-*(.text.lock_sock)
-*(.text.get_page_state_node)
-*(.text.generic_block_bmap)
-*(.text.do_setitimer)
-*(.text.dev_queue_xmit_nit)
-*(.text.copy_from_read_buf)
-*(.text.__const_udelay)
-*(.text.console_conditional_schedule)
-*(.text.wake_up_new_task)
-*(.text.wait_for_completion_interruptible)
-*(.text.tcp_rcv_rtt_update)
-*(.text.sys_mlockall)
-*(.text.set_fs_altroot)
-*(.text.schedule_timeout)
-*(.text.nr_free_pagecache_pages)
-*(.text.nf_iterate)
-*(.text.mapping_tagged)
-*(.text.ip_queue_xmit)
-*(.text.ip_local_deliver)
-*(.text.follow_page)
-*(.text.elf_map)
-*(.text.dummy_file_permission)
-*(.text.dispose_list)
-*(.text.dentry_open)
-*(.text.dentry_iput)
-*(.text.bio_alloc)
-*(.text.wait_on_page_bit)
-*(.text.vfs_readdir)
-*(.text.vfs_lstat)
-*(.text.seq_escape)
-*(.text.__posix_lock_file)
-*(.text.mm_release)
-*(.text.kref_put)
-*(.text.ip_rcv)
-*(.text.__iget)
-*(.text.free_pages)
-*(.text.find_mergeable_anon_vma)
-*(.text.find_extend_vma)
-*(.text.dummy_inode_listsecurity)
-*(.text.bio_add_page)
-*(.text.__vm_enough_memory)
-*(.text.vfs_stat)
-*(.text.tty_paranoia_check)
-*(.text.tcp_read_sock)
-*(.text.tcp_data_queue)
-*(.text.sys_uname)
-*(.text.sys_renameat)
-*(.text.__strncpy_from_user)
-*(.text.__mutex_init)
-*(.text.__lookup_hash)
-*(.text.kref_get)
-*(.text.ip_route_input)
-*(.text.__insert_inode_hash)
-*(.text.do_sock_write)
-*(.text.blk_done_softirq)
-*(.text.__wake_up_sync)
-*(.text.__vma_link_rb)
-*(.text.tty_ioctl)
-*(.text.tracesys)
-*(.text.sys_getdents)
-*(.text.sys_dup)
-*(.text.stub_execve)
-*(.text.sha_transform)
-*(.text.radix_tree_tag_clear)
-*(.text.put_unused_fd)
-*(.text.put_files_struct)
-*(.text.mpage_readpages)
-*(.text.may_delete)
-*(.text.kmem_cache_create)
-*(.text.ip_mc_output)
-*(.text.interleave_nodes)
-*(.text.groups_search)
-*(.text.generic_drop_inode)
-*(.text.generic_commit_write)
-*(.text.fcntl_setlk)
-*(.text.exit_mmap)
-*(.text.end_page_writeback)
-*(.text.__d_rehash)
-*(.text.debug_mutex_free_waiter)
-*(.text.csum_ipv6_magic)
-*(.text.count)
-*(.text.cleanup_rbuf)
-*(.text.check_spinlock_acquired_node)
-*(.text.can_vma_merge_after)
-*(.text.bio_endio)
-*(.text.alloc_pidmap)
-*(.text.write_ldt)
-*(.text.vmtruncate_range)
-*(.text.vfs_create)
-*(.text.__user_walk)
-*(.text.update_send_head)
-*(.text.unmap_underlying_metadata)
-*(.text.tty_ldisc_deref)
-*(.text.tcp_setsockopt)
-*(.text.tcp_send_ack)
-*(.text.sys_pause)
-*(.text.sys_gettimeofday)
-*(.text.sync_dirty_buffer)
-*(.text.strncmp)
-*(.text.release_posix_timer)
-*(.text.proc_file_read)
-*(.text.prepare_to_wait)
-*(.text.locks_mandatory_locked)
-*(.text.interruptible_sleep_on_timeout)
-*(.text.inode_sub_bytes)
-*(.text.in_group_p)
-*(.text.hrtimer_try_to_cancel)
-*(.text.filldir64)
-*(.text.fasync_helper)
-*(.text.dummy_sb_pivotroot)
-*(.text.d_lookup)
-*(.text.d_instantiate)
-*(.text.__d_find_alias)
-*(.text.cpu_idle_wait)
-*(.text.cond_resched_lock)
-*(.text.chown_common)
-*(.text.blk_congestion_wait)
-*(.text.activate_page)
-*(.text.unlock_buffer)
-*(.text.tty_wakeup)
-*(.text.tcp_v4_do_rcv)
-*(.text.tcp_current_mss)
-*(.text.sys_openat)
-*(.text.sys_fchdir)
-*(.text.strnlen_user)
-*(.text.strnlen)
-*(.text.strchr)
-*(.text.sock_common_getsockopt)
-*(.text.skb_checksum)
-*(.text.remove_wait_queue)
-*(.text.rb_replace_node)
-*(.text.radix_tree_node_ctor)
-*(.text.pty_chars_in_buffer)
-*(.text.profile_hit)
-*(.text.prio_tree_left)
-*(.text.pgd_clear_bad)
-*(.text.pfifo_fast_dequeue)
-*(.text.page_referenced)
-*(.text.open_exec)
-*(.text.mmput)
-*(.text.mm_init)
-*(.text.__ide_dma_off_quietly)
-*(.text.ide_dma_intr)
-*(.text.hrtimer_start)
-*(.text.get_io_context)
-*(.text.__get_free_pages)
-*(.text.find_first_zero_bit)
-*(.text.file_free_rcu)
-*(.text.dummy_socket_sendmsg)
-*(.text.do_unlinkat)
-*(.text.do_arch_prctl)
-*(.text.destroy_inode)
-*(.text.can_vma_merge_before)
-*(.text.block_sync_page)
-*(.text.block_prepare_write)
-*(.text.bio_init)
-*(.text.arch_ptrace)
-*(.text.wake_up_inode)
-*(.text.wait_on_retry_sync_kiocb)
-*(.text.vma_prio_tree_next)
-*(.text.tcp_rcv_space_adjust)
-*(.text.__tcp_ack_snd_check)
-*(.text.sys_utime)
-*(.text.sys_recvmsg)
-*(.text.sys_mremap)
-*(.text.sys_bdflush)
-*(.text.sleep_on)
-*(.text.set_page_dirty_lock)
-*(.text.seq_path)
-*(.text.schedule_timeout_interruptible)
-*(.text.sched_fork)
-*(.text.rt_run_flush)
-*(.text.profile_munmap)
-*(.text.prepare_binprm)
-*(.text.__pagevec_release_nonlru)
-*(.text.m_show)
-*(.text.lookup_mnt)
-*(.text.__lookup_mnt)
-*(.text.lock_timer_base)
-*(.text.is_subdir)
-*(.text.invalidate_bh_lru)
-*(.text.init_buffer_head)
-*(.text.ifind_fast)
-*(.text.ide_dma_start)
-*(.text.__get_page_state)
-*(.text.flock_to_posix_lock)
-*(.text.__find_symbol)
-*(.text.do_futex)
-*(.text.do_execve)
-*(.text.dirty_writeback_centisecs_handler)
-*(.text.dev_watchdog)
-*(.text.can_share_swap_page)
-*(.text.blkdev_put)
-*(.text.bio_get_nr_vecs)
-*(.text.xfrm_compile_policy)
-*(.text.vma_prio_tree_insert)
-*(.text.vfs_lstat_fd)
-*(.text.__user_path_lookup_open)
-*(.text.thread_return)
-*(.text.tcp_send_delayed_ack)
-*(.text.sock_def_error_report)
-*(.text.shrink_slab)
-*(.text.serial_out)
-*(.text.seq_read)
-*(.text.secure_ip_id)
-*(.text.search_binary_handler)
-*(.text.proc_pid_unhash)
-*(.text.pagevec_lookup)
-*(.text.new_inode)
-*(.text.memcpy_toiovec)
-*(.text.locks_free_lock)
-*(.text.__lock_page)
-*(.text.__lock_buffer)
-*(.text.load_module)
-*(.text.is_bad_inode)
-*(.text.invalidate_inode_buffers)
-*(.text.insert_vm_struct)
-*(.text.inode_setattr)
-*(.text.inode_add_bytes)
-*(.text.ide_read_24)
-*(.text.ide_get_error_location)
-*(.text.ide_do_drive_cmd)
-*(.text.get_locked_pte)
-*(.text.get_filesystem_list)
-*(.text.generic_file_open)
-*(.text.follow_down)
-*(.text.find_next_bit)
-*(.text.__find_first_bit)
-*(.text.exit_mm)
-*(.text.exec_keys)
-*(.text.end_buffer_write_sync)
-*(.text.end_bio_bh_io_sync)
-*(.text.dummy_socket_shutdown)
-*(.text.d_rehash)
-*(.text.d_path)
-*(.text.do_ioctl)
-*(.text.dget_locked)
-*(.text.copy_thread_group_keys)
-*(.text.cdrom_end_request)
-*(.text.cap_bprm_apply_creds)
-*(.text.blk_rq_bio_prep)
-*(.text.__bitmap_intersects)
-*(.text.bio_phys_segments)
-*(.text.bio_free)
-*(.text.arch_get_unmapped_area_topdown)
-*(.text.writeback_in_progress)
-*(.text.vfs_follow_link)
-*(.text.tcp_rcv_state_process)
-*(.text.tcp_check_space)
-*(.text.sys_stat)
-*(.text.sys_rt_sigreturn)
-*(.text.sys_rt_sigaction)
-*(.text.sys_remap_file_pages)
-*(.text.sys_pwrite64)
-*(.text.sys_fchownat)
-*(.text.sys_fchmodat)
-*(.text.strncat)
-*(.text.strlcat)
-*(.text.strcmp)
-*(.text.steal_locks)
-*(.text.sock_create)
-*(.text.sk_stream_rfree)
-*(.text.sk_stream_mem_schedule)
-*(.text.skip_atoi)
-*(.text.sk_alloc)
-*(.text.show_stat)
-*(.text.set_fs_pwd)
-*(.text.set_binfmt)
-*(.text.pty_unthrottle)
-*(.text.proc_symlink)
-*(.text.pipe_release)
-*(.text.pageout)
-*(.text.n_tty_write_wakeup)
-*(.text.n_tty_ioctl)
-*(.text.nr_free_zone_pages)
-*(.text.migration_thread)
-*(.text.mempool_free_slab)
-*(.text.meminfo_read_proc)
-*(.text.max_sane_readahead)
-*(.text.lru_cache_add)
-*(.text.kill_fasync)
-*(.text.kernel_read)
-*(.text.invalidate_mapping_pages)
-*(.text.inode_has_buffers)
-*(.text.init_once)
-*(.text.inet_sendmsg)
-*(.text.idedisk_issue_flush)
-*(.text.generic_file_write)
-*(.text.free_more_memory)
-*(.text.__free_fdtable)
-*(.text.filp_dtor)
-*(.text.exit_sem)
-*(.text.exit_itimers)
-*(.text.error_interrupt)
-*(.text.end_buffer_async_write)
-*(.text.eligible_child)
-*(.text.elf_map)
-*(.text.dump_task_regs)
-*(.text.dummy_task_setscheduler)
-*(.text.dummy_socket_accept)
-*(.text.dummy_file_free_security)
-*(.text.__down_read)
-*(.text.do_sock_read)
-*(.text.do_sigaltstack)
-*(.text.do_mremap)
-*(.text.current_io_context)
-*(.text.cpu_swap_callback)
-*(.text.copy_vma)
-*(.text.cap_bprm_set_security)
-*(.text.blk_insert_request)
-*(.text.bio_map_kern_endio)
-*(.text.bio_hw_segments)
-*(.text.bictcp_cong_avoid)
-*(.text.add_interrupt_randomness)
-*(.text.wait_for_completion)
-*(.text.version_read_proc)
-*(.text.unix_write_space)
-*(.text.tty_ldisc_ref_wait)
-*(.text.tty_ldisc_put)
-*(.text.try_to_wake_up)
-*(.text.tcp_v4_tw_remember_stamp)
-*(.text.tcp_try_undo_dsack)
-*(.text.tcp_may_send_now)
-*(.text.sys_waitid)
-*(.text.sys_sched_getparam)
-*(.text.sys_getppid)
-*(.text.sys_getcwd)
-*(.text.sys_dup2)
-*(.text.sys_chmod)
-*(.text.sys_chdir)
-*(.text.sprintf)
-*(.text.sock_wfree)
-*(.text.sock_aio_write)
-*(.text.skb_drop_fraglist)
-*(.text.skb_dequeue)
-*(.text.set_close_on_exec)
-*(.text.set_brk)
-*(.text.seq_puts)
-*(.text.SELECT_DRIVE)
-*(.text.sched_exec)
-*(.text.return_EIO)
-*(.text.remove_from_page_cache)
-*(.text.rcu_start_batch)
-*(.text.__put_task_struct)
-*(.text.proc_pid_readdir)
-*(.text.proc_get_inode)
-*(.text.prepare_to_wait_exclusive)
-*(.text.pipe_wait)
-*(.text.pipe_new)
-*(.text.pdflush_operation)
-*(.text.__pagevec_release)
-*(.text.pagevec_lookup_tag)
-*(.text.packet_rcv)
-*(.text.n_tty_set_room)
-*(.text.nr_free_pages)
-*(.text.__net_timestamp)
-*(.text.mpage_end_io_read)
-*(.text.mod_timer)
-*(.text.__memcpy)
-*(.text.mb_cache_shrink_fn)
-*(.text.lock_rename)
-*(.text.kstrdup)
-*(.text.is_ignored)
-*(.text.int_very_careful)
-*(.text.inotify_inode_is_dead)
-*(.text.inotify_get_cookie)
-*(.text.inode_get_bytes)
-*(.text.init_timer)
-*(.text.init_dev)
-*(.text.inet_getname)
-*(.text.ide_map_sg)
-*(.text.__ide_dma_end)
-*(.text.hrtimer_get_remaining)
-*(.text.get_task_mm)
-*(.text.get_random_int)
-*(.text.free_pipe_info)
-*(.text.filemap_write_and_wait_range)
-*(.text.exit_thread)
-*(.text.enter_idle)
-*(.text.end_that_request_first)
-*(.text.end_8259A_irq)
-*(.text.dummy_file_alloc_security)
-*(.text.do_group_exit)
-*(.text.debug_mutex_init)
-*(.text.cpuset_exit)
-*(.text.cpu_idle)
-*(.text.copy_semundo)
-*(.text.copy_files)
-*(.text.chrdev_open)
-*(.text.cdrom_transfer_packet_command)
-*(.text.cdrom_mode_sense)
-*(.text.blk_phys_contig_segment)
-*(.text.blk_get_queue)
-*(.text.bio_split)
-*(.text.audit_alloc)
-*(.text.anon_pipe_buf_release)
-*(.text.add_wait_queue_exclusive)
-*(.text.add_wait_queue)
-*(.text.acct_process)
-*(.text.account)
-*(.text.zeromap_page_range)
-*(.text.yield)
-*(.text.writeback_acquire)
-*(.text.worker_thread)
-*(.text.wait_on_page_writeback_range)
-*(.text.__wait_on_buffer)
-*(.text.vscnprintf)
-*(.text.vmalloc_to_pfn)
-*(.text.vgacon_save_screen)
-*(.text.vfs_unlink)
-*(.text.vfs_rmdir)
-*(.text.unregister_md_personality)
-*(.text.unlock_new_inode)
-*(.text.unix_stream_sendmsg)
-*(.text.unix_stream_recvmsg)
-*(.text.unhash_process)
-*(.text.udp_v4_lookup_longway)
-*(.text.tty_ldisc_flush)
-*(.text.tty_ldisc_enable)
-*(.text.tty_hung_up_p)
-*(.text.tty_buffer_free_all)
-*(.text.tso_fragment)
-*(.text.try_to_del_timer_sync)
-*(.text.tcp_v4_err)
-*(.text.tcp_unhash)
-*(.text.tcp_seq_next)
-*(.text.tcp_select_initial_window)
-*(.text.tcp_sacktag_write_queue)
-*(.text.tcp_cwnd_validate)
-*(.text.sys_vhangup)
-*(.text.sys_uselib)
-*(.text.sys_symlink)
-*(.text.sys_signal)
-*(.text.sys_poll)
-*(.text.sys_mount)
-*(.text.sys_kill)
-*(.text.sys_ioctl)
-*(.text.sys_inotify_add_watch)
-*(.text.sys_getuid)
-*(.text.sys_getrlimit)
-*(.text.sys_getitimer)
-*(.text.sys_getgroups)
-*(.text.sys_ftruncate)
-*(.text.sysfs_lookup)
-*(.text.sys_exit_group)
-*(.text.stub_fork)
-*(.text.sscanf)
-*(.text.sock_map_fd)
-*(.text.sock_get_timestamp)
-*(.text.__sock_create)
-*(.text.smp_call_function_single)
-*(.text.sk_stop_timer)
-*(.text.skb_copy_and_csum_datagram)
-*(.text.__skb_checksum_complete)
-*(.text.single_next)
-*(.text.sigqueue_alloc)
-*(.text.shrink_dcache_parent)
-*(.text.select_idle_routine)
-*(.text.run_workqueue)
-*(.text.run_local_timers)
-*(.text.remove_inode_hash)
-*(.text.remove_dquot_ref)
-*(.text.register_binfmt)
-*(.text.read_cache_pages)
-*(.text.rb_last)
-*(.text.pty_open)
-*(.text.proc_root_readdir)
-*(.text.proc_pid_flush)
-*(.text.proc_pident_lookup)
-*(.text.proc_fill_super)
-*(.text.proc_exe_link)
-*(.text.posix_locks_deadlock)
-*(.text.pipe_iov_copy_from_user)
-*(.text.opost)
-*(.text.nf_register_hook)
-*(.text.netif_rx_ni)
-*(.text.m_start)
-*(.text.mpage_writepage)
-*(.text.mm_alloc)
-*(.text.memory_open)
-*(.text.mark_buffer_async_write)
-*(.text.lru_add_drain_all)
-*(.text.locks_init_lock)
-*(.text.locks_delete_lock)
-*(.text.lock_hrtimer_base)
-*(.text.load_script)
-*(.text.__kill_fasync)
-*(.text.ip_mc_sf_allow)
-*(.text.__ioremap)
-*(.text.int_with_check)
-*(.text.int_sqrt)
-*(.text.install_thread_keyring)
-*(.text.init_page_buffers)
-*(.text.inet_sock_destruct)
-*(.text.idle_notifier_register)
-*(.text.ide_execute_command)
-*(.text.ide_end_drive_cmd)
-*(.text.__ide_dma_host_on)
-*(.text.hrtimer_run_queues)
-*(.text.hpet_mask_rtc_irq_bit)
-*(.text.__get_zone_counts)
-*(.text.get_zone_counts)
-*(.text.get_write_access)
-*(.text.get_fs_struct)
-*(.text.get_dirty_limits)
-*(.text.generic_readlink)
-*(.text.free_hot_page)
-*(.text.finish_wait)
-*(.text.find_inode)
-*(.text.find_first_bit)
-*(.text.__filemap_fdatawrite_range)
-*(.text.__filemap_copy_from_user_iovec)
-*(.text.exit_aio)
-*(.text.elv_set_request)
-*(.text.elv_former_request)
-*(.text.dup_namespace)
-*(.text.dupfd)
-*(.text.dummy_socket_getsockopt)
-*(.text.dummy_sb_post_mountroot)
-*(.text.dummy_quotactl)
-*(.text.dummy_inode_rename)
-*(.text.__do_SAK)
-*(.text.do_pipe)
-*(.text.do_fsync)
-*(.text.d_instantiate_unique)
-*(.text.d_find_alias)
-*(.text.deny_write_access)
-*(.text.dentry_unhash)
-*(.text.d_delete)
-*(.text.datagram_poll)
-*(.text.cpuset_fork)
-*(.text.cpuid_read)
-*(.text.copy_namespace)
-*(.text.cond_resched)
-*(.text.check_version)
-*(.text.__change_page_attr)
-*(.text.cfq_slab_kill)
-*(.text.cfq_completed_request)
-*(.text.cdrom_pc_intr)
-*(.text.cdrom_decode_status)
-*(.text.cap_capset_check)
-*(.text.blk_put_request)
-*(.text.bio_fs_destructor)
-*(.text.bictcp_min_cwnd)
-*(.text.alloc_chrdev_region)
-*(.text.add_element)
-*(.text.acct_update_integrals)
-*(.text.write_boundary_block)
-*(.text.writeback_release)
-*(.text.writeback_inodes)
-*(.text.wake_up_state)
-*(.text.__wake_up_locked)
-*(.text.wake_futex)
-*(.text.wait_task_inactive)
-*(.text.__wait_on_freeing_inode)
-*(.text.wait_noreap_copyout)
-*(.text.vmstat_start)
-*(.text.vgacon_do_font_op)
-*(.text.vfs_readv)
-*(.text.vfs_quota_sync)
-*(.text.update_queue)
-*(.text.unshare_files)
-*(.text.unmap_vm_area)
-*(.text.unix_socketpair)
-*(.text.unix_release_sock)
-*(.text.unix_detach_fds)
-*(.text.unix_create1)
-*(.text.unix_bind)
-*(.text.udp_sendmsg)
-*(.text.udp_rcv)
-*(.text.udp_queue_rcv_skb)
-*(.text.uart_write)
-*(.text.uart_startup)
-*(.text.uart_open)
-*(.text.tty_vhangup)
-*(.text.tty_termios_baud_rate)
-*(.text.tty_release)
-*(.text.tty_ldisc_ref)
-*(.text.throttle_vm_writeout)
-*(.text.058)
-*(.text.tcp_xmit_probe_skb)
-*(.text.tcp_v4_send_check)
-*(.text.tcp_v4_destroy_sock)
-*(.text.tcp_sync_mss)
-*(.text.tcp_snd_test)
-*(.text.tcp_slow_start)
-*(.text.tcp_send_fin)
-*(.text.tcp_rtt_estimator)
-*(.text.tcp_parse_options)
-*(.text.tcp_ioctl)
-*(.text.tcp_init_tso_segs)
-*(.text.tcp_init_cwnd)
-*(.text.tcp_getsockopt)
-*(.text.tcp_fin)
-*(.text.tcp_connect)
-*(.text.tcp_cong_avoid)
-*(.text.__tcp_checksum_complete_user)
-*(.text.task_dumpable)
-*(.text.sys_wait4)
-*(.text.sys_utimes)
-*(.text.sys_symlinkat)
-*(.text.sys_socketpair)
-*(.text.sys_rmdir)
-*(.text.sys_readahead)
-*(.text.sys_nanosleep)
-*(.text.sys_linkat)
-*(.text.sys_fstat)
-*(.text.sysfs_readdir)
-*(.text.sys_execve)
-*(.text.sysenter_tracesys)
-*(.text.sys_chown)
-*(.text.stub_clone)
-*(.text.strrchr)
-*(.text.strncpy)
-*(.text.stopmachine_set_state)
-*(.text.sock_sendmsg)
-*(.text.sock_release)
-*(.text.sock_fasync)
-*(.text.sock_close)
-*(.text.sk_stream_write_space)
-*(.text.sk_reset_timer)
-*(.text.skb_split)
-*(.text.skb_recv_datagram)
-*(.text.skb_queue_tail)
-*(.text.sk_attach_filter)
-*(.text.si_swapinfo)
-*(.text.simple_strtoll)
-*(.text.set_termios)
-*(.text.set_task_comm)
-*(.text.set_shrinker)
-*(.text.set_normalized_timespec)
-*(.text.set_brk)
-*(.text.serial_in)
-*(.text.seq_printf)
-*(.text.secure_dccp_sequence_number)
-*(.text.rwlock_bug)
-*(.text.rt_hash_code)
-*(.text.__rta_fill)
-*(.text.__request_resource)
-*(.text.relocate_new_kernel)
-*(.text.release_thread)
-*(.text.release_mem)
-*(.text.rb_prev)
-*(.text.rb_first)
-*(.text.random_poll)
-*(.text.__put_super_and_need_restart)
-*(.text.pty_write)
-*(.text.ptrace_stop)
-*(.text.proc_self_readlink)
-*(.text.proc_root_lookup)
-*(.text.proc_root_link)
-*(.text.proc_pid_make_inode)
-*(.text.proc_pid_attr_write)
-*(.text.proc_lookupfd)
-*(.text.proc_delete_inode)
-*(.text.posix_same_owner)
-*(.text.posix_block_lock)
-*(.text.poll_initwait)
-*(.text.pipe_write)
-*(.text.pipe_read_fasync)
-*(.text.pipe_ioctl)
-*(.text.pdflush)
-*(.text.pci_user_read_config_dword)
-*(.text.page_readlink)
-*(.text.null_lseek)
-*(.text.nf_hook_slow)
-*(.text.netlink_sock_destruct)
-*(.text.netlink_broadcast)
-*(.text.neigh_resolve_output)
-*(.text.name_to_int)
-*(.text.mwait_idle)
-*(.text.mutex_trylock)
-*(.text.mutex_debug_check_no_locks_held)
-*(.text.m_stop)
-*(.text.mpage_end_io_write)
-*(.text.mpage_alloc)
-*(.text.move_page_tables)
-*(.text.mounts_open)
-*(.text.__memset)
-*(.text.memcpy_fromiovec)
-*(.text.make_8259A_irq)
-*(.text.lookup_user_key_possessed)
-*(.text.lookup_create)
-*(.text.locks_insert_lock)
-*(.text.locks_alloc_lock)
-*(.text.kthread_should_stop)
-*(.text.kswapd)
-*(.text.kobject_uevent)
-*(.text.kobject_get_path)
-*(.text.kobject_get)
-*(.text.klist_children_put)
-*(.text.__ip_route_output_key)
-*(.text.ip_flush_pending_frames)
-*(.text.ip_compute_csum)
-*(.text.ip_append_data)
-*(.text.ioc_set_batching)
-*(.text.invalidate_inode_pages)
-*(.text.__invalidate_device)
-*(.text.install_arg_page)
-*(.text.in_sched_functions)
-*(.text.inotify_unmount_inodes)
-*(.text.init_once)
-*(.text.init_cdrom_command)
-*(.text.inet_stream_connect)
-*(.text.inet_sk_rebuild_header)
-*(.text.inet_csk_addr2sockaddr)
-*(.text.inet_create)
-*(.text.ifind)
-*(.text.ide_setup_dma)
-*(.text.ide_outsw)
-*(.text.ide_fixstring)
-*(.text.ide_dma_setup)
-*(.text.ide_cdrom_packet)
-*(.text.ide_cd_put)
-*(.text.ide_build_sglist)
-*(.text.i8259A_shutdown)
-*(.text.hung_up_tty_ioctl)
-*(.text.hrtimer_nanosleep)
-*(.text.hrtimer_init)
-*(.text.hrtimer_cancel)
-*(.text.hash_futex)
-*(.text.group_send_sig_info)
-*(.text.grab_cache_page_nowait)
-*(.text.get_wchan)
-*(.text.get_stack)
-*(.text.get_page_state)
-*(.text.getnstimeofday)
-*(.text.get_node)
-*(.text.get_kprobe)
-*(.text.generic_unplug_device)
-*(.text.free_task)
-*(.text.frag_show)
-*(.text.find_next_zero_string)
-*(.text.filp_open)
-*(.text.fillonedir)
-*(.text.exit_io_context)
-*(.text.exit_idle)
-*(.text.exact_lock)
-*(.text.eth_header)
-*(.text.dummy_unregister_security)
-*(.text.dummy_socket_post_create)
-*(.text.dummy_socket_listen)
-*(.text.dummy_quota_on)
-*(.text.dummy_inode_follow_link)
-*(.text.dummy_file_receive)
-*(.text.dummy_file_mprotect)
-*(.text.dummy_file_lock)
-*(.text.dummy_file_ioctl)
-*(.text.dummy_bprm_post_apply_creds)
-*(.text.do_writepages)
-*(.text.__down_interruptible)
-*(.text.do_notify_resume)
-*(.text.do_acct_process)
-*(.text.del_timer_sync)
-*(.text.default_rebuild_header)
-*(.text.d_callback)
-*(.text.dcache_readdir)
-*(.text.ctrl_dumpfamily)
-*(.text.cpuset_rmdir)
-*(.text.copy_strings_kernel)
-*(.text.con_write_room)
-*(.text.complete_all)
-*(.text.collect_sigign_sigcatch)
-*(.text.clear_user)
-*(.text.check_unthrottle)
-*(.text.cdrom_release)
-*(.text.cdrom_newpc_intr)
-*(.text.cdrom_ioctl)
-*(.text.cdrom_check_status)
-*(.text.cdev_put)
-*(.text.cdev_add)
-*(.text.cap_ptrace)
-*(.text.cap_bprm_secureexec)
-*(.text.cache_alloc_refill)
-*(.text.bmap)
-*(.text.blk_run_queue)
-*(.text.blk_queue_dma_alignment)
-*(.text.blk_ordered_req_seq)
-*(.text.blk_backing_dev_unplug)
-*(.text.__bitmap_subset)
-*(.text.__bitmap_and)
-*(.text.bio_unmap_user)
-*(.text.__bforget)
-*(.text.bd_forget)
-*(.text.bad_pipe_w)
-*(.text.bad_get_user)
-*(.text.audit_free)
-*(.text.anon_vma_ctor)
-*(.text.anon_pipe_buf_map)
-*(.text.alloc_sock_iocb)
-*(.text.alloc_fdset)
-*(.text.aio_kick_handler)
-*(.text.__add_entropy_words)
-*(.text.add_disk_randomness)
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 0b3603adf56d..47496a40e84f 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -11,120 +11,54 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
+#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
-#include <linux/module.h>
#include <asm/smp.h>
#include <asm/ipi.h>
+#include <asm/genapic.h>
-#if defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
+ = { [0 ... NR_CPUS-1] = BAD_APICID };
EXPORT_SYMBOL(x86_cpu_to_apicid);
-u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-extern struct genapic apic_cluster;
-extern struct genapic apic_flat;
-extern struct genapic apic_physflat;
+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-struct genapic *genapic = &apic_flat;
-struct genapic *genapic_force;
+struct genapic __read_mostly *genapic = &apic_flat;
/*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
*/
-void __init clustered_apic_check(void)
+void __init setup_apic_routing(void)
{
- long i;
- u8 clusters, max_cluster;
- u8 id;
- u8 cluster_cnt[NUM_APIC_CLUSTERS];
- int max_apic = 0;
-
- /* genapic selection can be forced because of certain quirks.
- */
- if (genapic_force) {
- genapic = genapic_force;
- goto print;
- }
-
-#if defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI
/*
- * Some x86_64 machines use physical APIC mode regardless of how many
- * procs/clusters are present (x86_64 ES7000 is an example).
+ * Quirk: some x86_64 machines can only use physical APIC mode
+ * regardless of how many processors are present (x86_64 ES7000
+ * is an example).
*/
- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID)
- if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) {
- genapic = &apic_cluster;
- goto print;
- }
-#endif
-
- memset(cluster_cnt, 0, sizeof(cluster_cnt));
- for (i = 0; i < NR_CPUS; i++) {
- id = bios_cpu_apicid[i];
- if (id == BAD_APICID)
- continue;
- if (id > max_apic)
- max_apic = id;
- cluster_cnt[APIC_CLUSTERID(id)]++;
- }
-
- /* Don't use clustered mode on AMD platforms. */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
genapic = &apic_physflat;
-#ifndef CONFIG_HOTPLUG_CPU
- /* In the CPU hotplug case we cannot use broadcast mode
- because that opens a race when a CPU is removed.
- Stay at physflat mode in this case.
- It is bad to do this unconditionally though. Once
- we have ACPI platform support for CPU hotplug
- we should detect hotplug capablity from ACPI tables and
- only do this when really needed. -AK */
- if (max_apic <= 8)
- genapic = &apic_flat;
+ else
#endif
- goto print;
- }
- clusters = 0;
- max_cluster = 0;
-
- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
- if (cluster_cnt[i] > 0) {
- ++clusters;
- if (cluster_cnt[i] > max_cluster)
- max_cluster = cluster_cnt[i];
- }
- }
-
- /*
- * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
- * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
- * else physical mode.
- * (We don't use lowest priority delivery + HW APIC IRQ steering, so
- * can ignore the clustered logical case and go straight to physical.)
- */
- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
-#ifdef CONFIG_HOTPLUG_CPU
- /* Don't use APIC shortcuts in CPU hotplug to avoid races */
- genapic = &apic_physflat;
-#else
+ if (cpus_weight(cpu_possible_map) <= 8)
genapic = &apic_flat;
-#endif
- } else
- genapic = &apic_cluster;
+ else
+ genapic = &apic_physflat;
-print:
printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
}
-/* Same for both flat and clustered. */
+/* Same for both flat and physical. */
void send_IPI_self(int vector)
{
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
deleted file mode 100644
index 73d76308b955..000000000000
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Clustered APIC subarch code. Up to 255 CPUs, physical delivery.
- * (A more realistic maximum is around 230 CPUs.)
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <asm/smp.h>
-#include <asm/ipi.h>
-
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static void cluster_init_apic_ldr(void)
-{
- unsigned long val, id;
- long i, count;
- u8 lid;
- u8 my_id = hard_smp_processor_id();
- u8 my_cluster = APIC_CLUSTER(my_id);
-
- /* Create logical APIC IDs by counting CPUs already in cluster. */
- for (count = 0, i = NR_CPUS; --i >= 0; ) {
- lid = x86_cpu_to_log_apicid[i];
- if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
- ++count;
- }
- /*
- * We only have a 4 wide bitmap in cluster mode. There's no way
- * to get above 60 CPUs and still give each one it's own bit.
- * But, we're using physical IRQ delivery, so we don't care.
- * Use bit 3 for the 4th through Nth CPU in each cluster.
- */
- if (count >= XAPIC_DEST_CPUS_SHIFT)
- count = 3;
- id = my_cluster | (1UL << count);
- x86_cpu_to_log_apicid[smp_processor_id()] = id;
- apic_write(APIC_DFR, APIC_DFR_CLUSTER);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(id);
- apic_write(APIC_LDR, val);
-}
-
-/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-
-static cpumask_t cluster_target_cpus(void)
-{
- return cpumask_of_cpu(0);
-}
-
-static cpumask_t cluster_vector_allocation_domain(int cpu)
-{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
-}
-
-static void cluster_send_IPI_mask(cpumask_t mask, int vector)
-{
- send_IPI_mask_sequence(mask, vector);
-}
-
-static void cluster_send_IPI_allbutself(int vector)
-{
- cpumask_t mask = cpu_online_map;
-
- cpu_clear(smp_processor_id(), mask);
-
- if (!cpus_empty(mask))
- cluster_send_IPI_mask(mask, vector);
-}
-
-static void cluster_send_IPI_all(int vector)
-{
- cluster_send_IPI_mask(cpu_online_map, vector);
-}
-
-static int cluster_apic_id_registered(void)
-{
- return 1;
-}
-
-static unsigned int cluster_cpu_mask_to_apicid(cpumask_t cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
- return x86_cpu_to_apicid[cpu];
- else
- return BAD_APICID;
-}
-
-/* cpuid returns the value latched in the HW at reset, not the APIC ID
- * register's value. For any box whose BIOS changes APIC IDs, like
- * clustered APIC systems, we must use hard_smp_processor_id.
- *
- * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
- */
-static unsigned int phys_pkg_id(int index_msb)
-{
- return hard_smp_processor_id() >> index_msb;
-}
-
-struct genapic apic_cluster = {
- .name = "clustered",
- .int_delivery_mode = dest_Fixed,
- .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .target_cpus = cluster_target_cpus,
- .vector_allocation_domain = cluster_vector_allocation_domain,
- .apic_id_registered = cluster_apic_id_registered,
- .init_apic_ldr = cluster_init_apic_ldr,
- .send_IPI_all = cluster_send_IPI_all,
- .send_IPI_allbutself = cluster_send_IPI_allbutself,
- .send_IPI_mask = cluster_send_IPI_mask,
- .cpu_mask_to_apicid = cluster_cpu_mask_to_apicid,
- .phys_pkg_id = phys_pkg_id,
-};
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 7c01db8fa9d1..ecb01eefdd27 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -8,6 +8,7 @@
* Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
* James Cleverdon.
*/
+#include <linux/errno.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
@@ -16,6 +17,7 @@
#include <linux/init.h>
#include <asm/smp.h>
#include <asm/ipi.h>
+#include <asm/genapic.h>
static cpumask_t flat_target_cpus(void)
{
@@ -60,31 +62,10 @@ static void flat_init_apic_ldr(void)
static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
{
unsigned long mask = cpus_addr(cpumask)[0];
- unsigned long cfg;
unsigned long flags;
local_irq_save(flags);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(mask);
- apic_write(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector, APIC_DEST_LOGICAL);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write(APIC_ICR, cfg);
+ __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
local_irq_restore(flags);
}
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 598a4d0351fc..1fab487dee86 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,6 +5,7 @@
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
* Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
* Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
+ * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
*/
@@ -13,97 +14,131 @@
#include <linux/init.h>
#include <asm/desc.h>
#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/msr.h>
#include <asm/cache.h>
-
+
/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
- * because we need identity-mapped pages on setup so define __START_KERNEL to
- * 0x100000 for this stage
- *
+ * because we need identity-mapped pages.
+ *
*/
.text
.section .bootstrap.text
- .code32
- .globl startup_32
-/* %bx: 1 if coming from smp trampoline on secondary cpu */
-startup_32:
-
+ .code64
+ .globl startup_64
+startup_64:
+
/*
- * At this point the CPU runs in 32bit protected mode (CS.D = 1) with
- * paging disabled and the point of this file is to switch to 64bit
- * long mode with a kernel mapping for kerneland to jump into the
- * kernel virtual addresses.
- * There is no stack until we set one up.
+ * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
+ * and someone has loaded an identity mapped page table
+ * for us. These identity mapped page tables map all of the
+ * kernel pages and possibly all of memory.
+ *
+ * %esi holds a physical pointer to real_mode_data.
+ *
+ * We come here either directly from a 64bit bootloader, or from
+ * arch/x86_64/boot/compressed/head.S.
+ *
+ * We only come here initially at boot nothing else comes here.
+ *
+ * Since we may be loaded at an address different from what we were
+ * compiled to run at we first fixup the physical addresses in our page
+ * tables and then reload them.
*/
- /* Initialize the %ds segment register */
- movl $__KERNEL_DS,%eax
- movl %eax,%ds
-
- /* Load new GDT with the 64bit segments using 32bit descriptor */
- lgdt pGDT32 - __START_KERNEL_map
-
- /* If the CPU doesn't support CPUID this will double fault.
- * Unfortunately it is hard to check for CPUID without a stack.
+ /* Compute the delta between the address I am compiled to run at and the
+ * address I am actually running at.
*/
-
- /* Check if extended functions are implemented */
- movl $0x80000000, %eax
- cpuid
- cmpl $0x80000000, %eax
- jbe no_long_mode
- /* Check if long mode is implemented */
- mov $0x80000001, %eax
- cpuid
- btl $29, %edx
- jnc no_long_mode
-
- /*
- * Prepare for entering 64bits mode
+ leaq _text(%rip), %rbp
+ subq $_text - __START_KERNEL_map, %rbp
+
+ /* Is the address not 2M aligned? */
+ movq %rbp, %rax
+ andl $~LARGE_PAGE_MASK, %eax
+ testl %eax, %eax
+ jnz bad_address
+
+ /* Is the address too large? */
+ leaq _text(%rip), %rdx
+ movq $PGDIR_SIZE, %rax
+ cmpq %rax, %rdx
+ jae bad_address
+
+ /* Fixup the physical addresses in the page table
*/
+ addq %rbp, init_level4_pgt + 0(%rip)
+ addq %rbp, init_level4_pgt + (258*8)(%rip)
+ addq %rbp, init_level4_pgt + (511*8)(%rip)
+
+ addq %rbp, level3_ident_pgt + 0(%rip)
+ addq %rbp, level3_kernel_pgt + (510*8)(%rip)
+
+ /* Add an Identity mapping if I am above 1G */
+ leaq _text(%rip), %rdi
+ andq $LARGE_PAGE_MASK, %rdi
+
+ movq %rdi, %rax
+ shrq $PUD_SHIFT, %rax
+ andq $(PTRS_PER_PUD - 1), %rax
+ jz ident_complete
+
+ leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
+ leaq level3_ident_pgt(%rip), %rbx
+ movq %rdx, 0(%rbx, %rax, 8)
+
+ movq %rdi, %rax
+ shrq $PMD_SHIFT, %rax
+ andq $(PTRS_PER_PMD - 1), %rax
+ leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
+ leaq level2_spare_pgt(%rip), %rbx
+ movq %rdx, 0(%rbx, %rax, 8)
+ident_complete:
+
+ /* Fixup the kernel text+data virtual addresses
+ */
+ leaq level2_kernel_pgt(%rip), %rdi
+ leaq 4096(%rdi), %r8
+ /* See if it is a valid page table entry */
+1: testq $1, 0(%rdi)
+ jz 2f
+ addq %rbp, 0(%rdi)
+ /* Go to the next page */
+2: addq $8, %rdi
+ cmp %r8, %rdi
+ jne 1b
+
+ /* Fixup phys_base */
+ addq %rbp, phys_base(%rip)
- /* Enable PAE mode */
- xorl %eax, %eax
- btsl $5, %eax
- movl %eax, %cr4
-
- /* Setup early boot stage 4 level pagetables */
- movl $(boot_level4_pgt - __START_KERNEL_map), %eax
- movl %eax, %cr3
-
- /* Setup EFER (Extended Feature Enable Register) */
- movl $MSR_EFER, %ecx
- rdmsr
-
- /* Enable Long Mode */
- btsl $_EFER_LME, %eax
-
- /* Make changes effective */
- wrmsr
+#ifdef CONFIG_SMP
+ addq %rbp, trampoline_level4_pgt + 0(%rip)
+ addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
+#endif
+#ifdef CONFIG_ACPI_SLEEP
+ addq %rbp, wakeup_level4_pgt + 0(%rip)
+ addq %rbp, wakeup_level4_pgt + (511*8)(%rip)
+#endif
- xorl %eax, %eax
- btsl $31, %eax /* Enable paging and in turn activate Long Mode */
- btsl $0, %eax /* Enable protected mode */
- /* Make changes effective */
- movl %eax, %cr0
- /*
- * At this point we're in long mode but in 32bit compatibility mode
- * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
- * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
- * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ /* Due to ENTRY(), sometimes the empty space gets filled with
+ * zeros. Better take a jmp than relying on empty space being
+ * filled with 0x90 (nop)
*/
- ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map)
-
- .code64
- .org 0x100
- .globl startup_64
-startup_64:
- /* We come here either from startup_32
- * or directly from a 64bit bootloader.
- * Since we may have come directly from a bootloader we
- * reload the page tables here.
+ jmp secondary_startup_64
+ENTRY(secondary_startup_64)
+ /*
+ * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
+ * and someone has loaded a mapped page table.
+ *
+ * %esi holds a physical pointer to real_mode_data.
+ *
+ * We come here either from startup_64 (using physical addresses)
+ * or from trampoline.S (using virtual addresses).
+ *
+ * Using virtual addresses from trampoline.S removes the need
+ * to have any identity mapped pages in the kernel page table
+ * after the boot processor executes this code.
*/
/* Enable PAE mode and PGE */
@@ -113,9 +148,15 @@ startup_64:
movq %rax, %cr4
/* Setup early boot stage 4 level pagetables. */
- movq $(boot_level4_pgt - __START_KERNEL_map), %rax
+ movq $(init_level4_pgt - __START_KERNEL_map), %rax
+ addq phys_base(%rip), %rax
movq %rax, %cr3
+ /* Ensure I am executing from virtual addresses */
+ movq $1f, %rax
+ jmp *%rax
+1:
+
/* Check if nx is implemented */
movl $0x80000001, %eax
cpuid
@@ -124,17 +165,11 @@ startup_64:
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
-
- /* Enable System Call */
- btsl $_EFER_SCE, %eax
-
- /* No Execute supported? */
- btl $20,%edi
+ btsl $_EFER_SCE, %eax /* Enable System Call */
+ btl $20,%edi /* No Execute supported? */
jnc 1f
btsl $_EFER_NX, %eax
-1:
- /* Make changes effective */
- wrmsr
+1: wrmsr /* Make changes effective */
/* Setup cr0 */
#define CR0_PM 1 /* protected mode */
@@ -161,7 +196,7 @@ startup_64:
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
- lgdt cpu_gdt_descr
+ lgdt cpu_gdt_descr(%rip)
/* set up data segments. actually 0 would do too */
movl $__KERNEL_DS,%eax
@@ -212,6 +247,9 @@ initial_code:
init_rsp:
.quad init_thread_union+THREAD_SIZE-8
+bad_address:
+ jmp bad_address
+
ENTRY(early_idt_handler)
cmpl $2,early_recursion_flag(%rip)
jz 1f
@@ -240,110 +278,66 @@ early_idt_msg:
early_idt_ripmsg:
.asciz "RIP %s\n"
-.code32
-ENTRY(no_long_mode)
- /* This isn't an x86-64 CPU so hang */
-1:
- jmp 1b
-
-.org 0xf00
- .globl pGDT32
-pGDT32:
- .word gdt_end-cpu_gdt_table-1
- .long cpu_gdt_table-__START_KERNEL_map
-
-.org 0xf10
-ljumpvector:
- .long startup_64-__START_KERNEL_map
- .word __KERNEL_CS
+.balign PAGE_SIZE
-ENTRY(stext)
-ENTRY(_stext)
-
- $page = 0
#define NEXT_PAGE(name) \
- $page = $page + 1; \
- .org $page * 0x1000; \
- phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \
+ .balign PAGE_SIZE; \
ENTRY(name)
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+ .rept (COUNT) ; \
+ .quad (START) + (i << 21) + (PERM) ; \
+ i = i + 1 ; \
+ .endr
+
+ /*
+ * This default setting generates an ident mapping at address 0x100000
+ * and a mapping for the kernel that precisely maps virtual address
+ * 0xffffffff80000000 to physical address 0x000000. (always using
+ * 2Mbyte large pages provided by PAE mode)
+ */
NEXT_PAGE(init_level4_pgt)
- /* This gets initialized in x86_64_start_kernel */
- .fill 512,8,0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 257,8,0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 252,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
NEXT_PAGE(level3_ident_pgt)
- .quad phys_level2_ident_pgt | 0x007
+ .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.fill 511,8,0
NEXT_PAGE(level3_kernel_pgt)
.fill 510,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
- .quad phys_level2_kernel_pgt | 0x007
+ .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
.fill 1,8,0
NEXT_PAGE(level2_ident_pgt)
- /* 40MB for bootup. */
- i = 0
- .rept 20
- .quad i << 21 | 0x083
- i = i + 1
- .endr
- /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
- .globl temp_boot_pmds
-temp_boot_pmds:
- .fill 492,8,0
-
+ /* Since I easily can, map the first 1G.
+ * Don't set NX because code runs from these pages.
+ */
+ PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+
NEXT_PAGE(level2_kernel_pgt)
/* 40MB kernel mapping. The kernel code cannot be bigger than that.
When you change this change KERNEL_TEXT_SIZE in page.h too. */
/* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- i = 0
- .rept 20
- .quad i << 21 | 0x183
- i = i + 1
- .endr
+ PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+ KERNEL_TEXT_SIZE/PMD_SIZE)
/* Module mapping starts here */
- .fill 492,8,0
+ .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
-NEXT_PAGE(level3_physmem_pgt)
- .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */
- .fill 511,8,0
+NEXT_PAGE(level2_spare_pgt)
+ .fill 512,8,0
+#undef PMDS
#undef NEXT_PAGE
.data
-
-#ifdef CONFIG_ACPI_SLEEP
- .align PAGE_SIZE
-ENTRY(wakeup_level4_pgt)
- .quad phys_level3_ident_pgt | 0x007
- .fill 255,8,0
- .quad phys_level3_physmem_pgt | 0x007
- .fill 254,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad phys_level3_kernel_pgt | 0x007
-#endif
-
-#ifndef CONFIG_HOTPLUG_CPU
- __INITDATA
-#endif
- /*
- * This default setting generates an ident mapping at address 0x100000
- * and a mapping for the kernel that precisely maps virtual address
- * 0xffffffff80000000 to physical address 0x000000. (always using
- * 2Mbyte large pages provided by PAE mode)
- */
- .align PAGE_SIZE
-ENTRY(boot_level4_pgt)
- .quad phys_level3_ident_pgt | 0x007
- .fill 255,8,0
- .quad phys_level3_physmem_pgt | 0x007
- .fill 254,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad phys_level3_kernel_pgt | 0x007
-
- .data
-
.align 16
.globl cpu_gdt_descr
cpu_gdt_descr:
@@ -357,6 +351,10 @@ gdt:
.endr
#endif
+ENTRY(phys_base)
+ /* This must match the first entry in level2_kernel_pgt */
+ .quad 0x0000000000000000
+
/* We need valid kernel segments for data and code in long mode too
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
@@ -370,13 +368,13 @@ gdt:
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
+ .quad 0x00af9b000000ffff /* __KERNEL_CS */
+ .quad 0x00cf93000000ffff /* __KERNEL_DS */
+ .quad 0x00cffb000000ffff /* __USER32_CS */
+ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
+ .quad 0x00affb000000ffff /* __USER_CS */
.quad 0x0 /* unused */
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
- .quad 0x00cf92000000ffff /* __KERNEL_DS */
- .quad 0x00cffa000000ffff /* __USER32_CS */
- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
- .quad 0x00affa000000ffff /* __USER_CS */
- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0,0 /* TSS */
.quad 0,0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 5f197b0a330a..213d90e04755 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -18,8 +18,16 @@
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
#include <asm/sections.h>
+static void __init zap_identity_mappings(void)
+{
+ pgd_t *pgd = pgd_offset_k(0UL);
+ pgd_clear(pgd);
+ __flush_tlb();
+}
+
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
static void __init clear_bss(void)
@@ -29,25 +37,24 @@ static void __init clear_bss(void)
}
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#define OLD_CL_MAGIC_ADDR 0x90020
+#define OLD_CL_MAGIC_ADDR 0x20
#define OLD_CL_MAGIC 0xA33F
-#define OLD_CL_BASE_ADDR 0x90000
-#define OLD_CL_OFFSET 0x90022
+#define OLD_CL_OFFSET 0x22
static void __init copy_bootdata(char *real_mode_data)
{
- int new_data;
+ unsigned long new_data;
char * command_line;
memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+ new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
if (!new_data) {
- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+ if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
return;
}
- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+ new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
}
- command_line = (char *) ((u64)(new_data));
+ command_line = __va(new_data);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
@@ -55,26 +62,30 @@ void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
+ /*
+ * Make sure kernel is aligned to 2MB address. Catching it at compile
+ * time is better. Change your config file and compile the kernel
+ * for a 2MB aligned address (CONFIG_PHYSICAL_START)
+ */
+ BUILD_BUG_ON(CONFIG_PHYSICAL_START & (__KERNEL_ALIGN - 1));
+
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
+ /* Make NULL pointers segfault */
+ zap_identity_mappings();
+
for (i = 0; i < IDT_ENTRIES; i++)
set_intr_gate(i, early_idt_handler);
asm volatile("lidt %0" :: "m" (idt_descr));
early_printk("Kernel alive\n");
- /*
- * switch to init_level4_pgt from boot_level4_pgt
- */
- memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
-
for (i = 0; i < NR_CPUS; i++)
cpu_pda(i) = &boot_cpu_pda[i];
pda_init(0);
- copy_bootdata(real_mode_data);
+ copy_bootdata(__va(real_mode_data));
#ifdef CONFIG_SMP
cpu_set(0, cpu_online_map);
#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index b7d2b76b92d4..2a2df14dab7e 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -907,10 +907,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
enable_8259A_irq(0);
}
-void __init UNEXPECTED_IO_APIC(void)
-{
-}
-
void __apicdebuginit print_IO_APIC(void)
{
int apic, i;
@@ -946,40 +942,16 @@ void __apicdebuginit print_IO_APIC(void)
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
- (reg_01.bits.entries != 0x2E) &&
- (reg_01.bits.entries != 0x3F) &&
- (reg_01.bits.entries != 0x03)
- )
- UNEXPECTED_IO_APIC();
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
- (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
- )
- UNEXPECTED_IO_APIC();
- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
if (reg_01.bits.version >= 0x10) {
printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
}
printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -1407,8 +1379,7 @@ static void irq_complete_move(unsigned int irq)
vector = ~get_irq_regs()->orig_rax;
me = smp_processor_id();
- if ((vector == cfg->vector) &&
- cpu_isset(smp_processor_id(), cfg->domain)) {
+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
cpumask_t cleanup_mask;
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c
index 745b1f0f494e..387d347b0e07 100644
--- a/arch/x86_64/kernel/ioport.c
+++ b/arch/x86_64/kernel/ioport.c
@@ -16,6 +16,7 @@
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/thread_info.h>
+#include <linux/syscalls.h>
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 0497e3bd5bff..a8bb33c1a8f2 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -191,19 +191,19 @@ NORET_TYPE void machine_kexec(struct kimage *image)
page_list[PA_CONTROL_PAGE] = __pa(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
- page_list[PA_PGD] = __pa(kexec_pgd);
+ page_list[PA_PGD] = __pa_symbol(&kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
- page_list[PA_PUD_0] = __pa(kexec_pud0);
+ page_list[PA_PUD_0] = __pa_symbol(&kexec_pud0);
page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
- page_list[PA_PMD_0] = __pa(kexec_pmd0);
+ page_list[PA_PMD_0] = __pa_symbol(&kexec_pmd0);
page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
- page_list[PA_PTE_0] = __pa(kexec_pte0);
+ page_list[PA_PTE_0] = __pa_symbol(&kexec_pte0);
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
- page_list[PA_PUD_1] = __pa(kexec_pud1);
+ page_list[PA_PUD_1] = __pa_symbol(&kexec_pud1);
page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
- page_list[PA_PMD_1] = __pa(kexec_pmd1);
+ page_list[PA_PMD_1] = __pa_symbol(&kexec_pmd1);
page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
- page_list[PA_PTE_1] = __pa(kexec_pte1);
+ page_list[PA_PTE_1] = __pa_symbol(&kexec_pte1);
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
page_list[PA_TABLE_PAGE] =
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 8011a8e1c7d4..fa2672682477 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -323,10 +323,13 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
#endif /* CONFIG_X86_MCE_INTEL */
/*
- * Periodic polling timer for "silent" machine check errors.
+ * Periodic polling timer for "silent" machine check errors. If the
+ * poller finds an MCE, poll 2x faster. When the poller finds no more
+ * errors, poll 2x slower (up to check_interval seconds).
*/
static int check_interval = 5 * 60; /* 5 minutes */
+static int next_interval; /* in jiffies */
static void mcheck_timer(struct work_struct *work);
static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
@@ -339,7 +342,6 @@ static void mcheck_check_cpu(void *info)
static void mcheck_timer(struct work_struct *work)
{
on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
- schedule_delayed_work(&mcheck_work, check_interval * HZ);
/*
* It's ok to read stale data here for notify_user and
@@ -349,17 +351,30 @@ static void mcheck_timer(struct work_struct *work)
* writes.
*/
if (notify_user && console_logged) {
+ static unsigned long last_print;
+ unsigned long now = jiffies;
+
+ /* if we logged an MCE, reduce the polling interval */
+ next_interval = max(next_interval/2, HZ/100);
notify_user = 0;
clear_bit(0, &console_logged);
- printk(KERN_INFO "Machine check events logged\n");
+ if (time_after_eq(now, last_print + (check_interval*HZ))) {
+ last_print = now;
+ printk(KERN_INFO "Machine check events logged\n");
+ }
+ } else {
+ next_interval = min(next_interval*2, check_interval*HZ);
}
+
+ schedule_delayed_work(&mcheck_work, next_interval);
}
static __init int periodic_mcheck_init(void)
{
- if (check_interval)
- schedule_delayed_work(&mcheck_work, check_interval*HZ);
+ next_interval = check_interval * HZ;
+ if (next_interval)
+ schedule_delayed_work(&mcheck_work, next_interval);
return 0;
}
__initcall(periodic_mcheck_init);
@@ -597,12 +612,13 @@ static int mce_resume(struct sys_device *dev)
/* Reinit MCEs after user configuration changes */
static void mce_restart(void)
{
- if (check_interval)
+ if (next_interval)
cancel_delayed_work(&mcheck_work);
/* Timer race is harmless here */
on_each_cpu(mce_init, NULL, 1, 1);
- if (check_interval)
- schedule_delayed_work(&mcheck_work, check_interval*HZ);
+ next_interval = check_interval * HZ;
+ if (next_interval)
+ schedule_delayed_work(&mcheck_work, next_interval);
}
static struct sysdev_class mce_sysclass = {
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 455aa0b932f0..d0dc4891599b 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -300,7 +300,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
}
}
- clustered_apic_check();
+ setup_apic_routing();
if (!num_processors)
printk(KERN_ERR "MPTABLE: no processors registered!\n");
return num_processors;
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index dfab9f167366..6cd2b30e2ffc 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -27,28 +27,11 @@
#include <asm/proto.h>
#include <asm/kdebug.h>
#include <asm/mce.h>
-#include <asm/intel_arch_perfmon.h>
int unknown_nmi_panic;
int nmi_watchdog_enabled;
int panic_on_unrecovered_nmi;
-/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
- * evtsel_nmi_owner tracks the ownership of the event selection
- * - different performance counters/ event selection may be reserved for
- * different subsystems this reservation system just tries to coordinate
- * things a little
- */
-
-/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
- * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
- */
-#define NMI_MAX_COUNTER_BITS 66
-#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
-
-static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
-static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
-
static cpumask_t backtrace_mask = CPU_MASK_NONE;
/* nmi_active:
@@ -63,191 +46,11 @@ int panic_on_timeout;
unsigned int nmi_watchdog = NMI_DEFAULT;
static unsigned int nmi_hz = HZ;
-struct nmi_watchdog_ctlblk {
- int enabled;
- u64 check_bit;
- unsigned int cccr_msr;
- unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
- unsigned int evntsel_msr; /* the MSR to select the events to handle */
-};
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
+static DEFINE_PER_CPU(short, wd_enabled);
/* local prototypes */
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
-/* converts an msr to an appropriate reservation bit */
-static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
-{
- /* returns the bit offset of the performance counter register */
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return (msr - MSR_K7_PERFCTR0);
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return (msr - MSR_ARCH_PERFMON_PERFCTR0);
- else
- return (msr - MSR_P4_BPU_PERFCTR0);
- }
- return 0;
-}
-
-/* converts an msr to an appropriate reservation bit */
-static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
-{
- /* returns the bit offset of the event selection register */
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return (msr - MSR_K7_EVNTSEL0);
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
- else
- return (msr - MSR_P4_BSU_ESCR0);
- }
- return 0;
-}
-
-/* checks for a bit availability (hack for oprofile) */
-int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
-{
- int cpu;
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- for_each_possible_cpu (cpu) {
- if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
- return 0;
- }
- return 1;
-}
-
-/* checks the an msr for availability */
-int avail_to_resrv_perfctr_nmi(unsigned int msr)
-{
- unsigned int counter;
- int cpu;
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- for_each_possible_cpu (cpu) {
- if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
- return 0;
- }
- return 1;
-}
-
-static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
- return 1;
- return 0;
-}
-
-static void __release_perfctr_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
-}
-
-int reserve_perfctr_nmi(unsigned int msr)
-{
- int cpu, i;
- for_each_possible_cpu (cpu) {
- if (!__reserve_perfctr_nmi(cpu, msr)) {
- for_each_possible_cpu (i) {
- if (i >= cpu)
- break;
- __release_perfctr_nmi(i, msr);
- }
- return 0;
- }
- }
- return 1;
-}
-
-void release_perfctr_nmi(unsigned int msr)
-{
- int cpu;
- for_each_possible_cpu (cpu)
- __release_perfctr_nmi(cpu, msr);
-}
-
-int __reserve_evntsel_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
- return 1;
- return 0;
-}
-
-static void __release_evntsel_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
-}
-
-int reserve_evntsel_nmi(unsigned int msr)
-{
- int cpu, i;
- for_each_possible_cpu (cpu) {
- if (!__reserve_evntsel_nmi(cpu, msr)) {
- for_each_possible_cpu (i) {
- if (i >= cpu)
- break;
- __release_evntsel_nmi(i, msr);
- }
- return 0;
- }
- }
- return 1;
-}
-
-void release_evntsel_nmi(unsigned int msr)
-{
- int cpu;
- for_each_possible_cpu (cpu) {
- __release_evntsel_nmi(cpu, msr);
- }
-}
-
-static __cpuinit inline int nmi_known_cpu(void)
-{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16;
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return 1;
- else
- return (boot_cpu_data.x86 == 15);
- }
- return 0;
-}
-
/* Run after command line and cpu_init init, but before all other checks */
void nmi_watchdog_default(void)
{
@@ -277,23 +80,6 @@ static __init void nmi_cpu_busy(void *data)
}
#endif
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
- unsigned int retval = hz;
-
- /*
- * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
- * are writable, with higher bits sign extending from bit 31.
- * So, we can only program the counter with 31 bit values and
- * 32nd bit should be 1, for 33.. to be 1.
- * Find the appropriate nmi_hz
- */
- if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
- retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
- }
- return retval;
-}
-
int __init check_nmi_watchdog (void)
{
int *counts;
@@ -322,14 +108,14 @@ int __init check_nmi_watchdog (void)
mdelay((20*1000)/nmi_hz); // wait 20 ticks
for_each_online_cpu(cpu) {
- if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
+ if (!per_cpu(wd_enabled, cpu))
continue;
if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
cpu,
counts[cpu],
cpu_pda(cpu)->__nmi_count);
- per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
+ per_cpu(wd_enabled, cpu) = 0;
atomic_dec(&nmi_active);
}
}
@@ -344,13 +130,8 @@ int __init check_nmi_watchdog (void)
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- nmi_hz = 1;
- if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- }
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ nmi_hz = lapic_adjust_nmi_hz(1);
kfree(counts);
return 0;
@@ -379,57 +160,6 @@ int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
-static void disable_lapic_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
- if (atomic_read(&nmi_active) <= 0)
- return;
-
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
-
- BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-static void enable_lapic_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
- /* are we already enabled */
- if (atomic_read(&nmi_active) != 0)
- return;
-
- /* are we lapic aware */
- if (nmi_known_cpu() <= 0)
- return;
-
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
- touch_nmi_watchdog();
-}
-
-void disable_timer_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_IO_APIC);
-
- if (atomic_read(&nmi_active) <= 0)
- return;
-
- disable_irq(0);
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
-
- BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_timer_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_IO_APIC);
-
- if (atomic_read(&nmi_active) == 0) {
- touch_nmi_watchdog();
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
- enable_irq(0);
- }
-}
static void __acpi_nmi_disable(void *__unused)
{
@@ -515,275 +245,9 @@ late_initcall(init_lapic_nmi_sysfs);
#endif /* CONFIG_PM */
-/*
- * Activate the NMI watchdog via the local APIC.
- * Original code written by Keith Owens.
- */
-
-/* Note that these events don't tick when the CPU idles. This means
- the frequency varies with CPU load. */
-
-#define K7_EVNTSEL_ENABLE (1 << 22)
-#define K7_EVNTSEL_INT (1 << 20)
-#define K7_EVNTSEL_OS (1 << 17)
-#define K7_EVNTSEL_USR (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
-#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(void)
-{
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- perfctr_msr = MSR_K7_PERFCTR0;
- evntsel_msr = MSR_K7_EVNTSEL0;
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- /* Simulator may not support it */
- if (checking_wrmsrl(evntsel_msr, 0UL))
- goto fail2;
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = K7_EVNTSEL_INT
- | K7_EVNTSEL_OS
- | K7_EVNTSEL_USR
- | K7_NMI_EVENT;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
- wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
- wd->check_bit = 1ULL<<63;
- return 1;
-fail2:
- __release_evntsel_nmi(-1, evntsel_msr);
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
-}
-
-static void stop_k7_watchdog(void)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
-}
-
-/* Note that these events don't tick when the CPU idles. This means
- the frequency varies with CPU load. */
-
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS (1<<3)
-#define P4_ESCR_USR (1<<2)
-#define P4_CCCR_OVF_PMI0 (1<<26)
-#define P4_CCCR_OVF_PMI1 (1<<27)
-#define P4_CCCR_THRESHOLD(N) ((N)<<20)
-#define P4_CCCR_COMPLEMENT (1<<19)
-#define P4_CCCR_COMPARE (1<<18)
-#define P4_CCCR_REQUIRED (3<<16)
-#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
-#define P4_CCCR_ENABLE (1<<12)
-#define P4_CCCR_OVF (1<<31)
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- CRU_ESCR0 (with any non-null event selector) through a complemented
- max threshold. [IA32-Vol3, Section 14.9.9] */
-
-static int setup_p4_watchdog(void)
-{
- unsigned int perfctr_msr, evntsel_msr, cccr_msr;
- unsigned int evntsel, cccr_val;
- unsigned int misc_enable, dummy;
- unsigned int ht_num;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
- return 0;
-
-#ifdef CONFIG_SMP
- /* detect which hyperthread we are on */
- if (smp_num_siblings == 2) {
- unsigned int ebx, apicid;
-
- ebx = cpuid_ebx(1);
- apicid = (ebx >> 24) & 0xff;
- ht_num = apicid & 1;
- } else
-#endif
- ht_num = 0;
-
- /* performance counters are shared resources
- * assign each hyperthread its own set
- * (re-use the ESCR0 register, seems safe
- * and keeps the cccr_val the same)
- */
- if (!ht_num) {
- /* logical cpu 0 */
- perfctr_msr = MSR_P4_IQ_PERFCTR0;
- evntsel_msr = MSR_P4_CRU_ESCR0;
- cccr_msr = MSR_P4_IQ_CCCR0;
- cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
- } else {
- /* logical cpu 1 */
- perfctr_msr = MSR_P4_IQ_PERFCTR1;
- evntsel_msr = MSR_P4_CRU_ESCR0;
- cccr_msr = MSR_P4_IQ_CCCR1;
- cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
- }
-
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- evntsel = P4_ESCR_EVENT_SELECT(0x3F)
- | P4_ESCR_OS
- | P4_ESCR_USR;
-
- cccr_val |= P4_CCCR_THRESHOLD(15)
- | P4_CCCR_COMPLEMENT
- | P4_CCCR_COMPARE
- | P4_CCCR_REQUIRED;
-
- wrmsr(evntsel_msr, evntsel, 0);
- wrmsr(cccr_msr, cccr_val, 0);
- wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- cccr_val |= P4_CCCR_ENABLE;
- wrmsr(cccr_msr, cccr_val, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = cccr_msr;
- wd->check_bit = 1ULL<<39;
- return 1;
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
-}
-
-static void stop_p4_watchdog(void)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- wrmsr(wd->cccr_msr, 0, 0);
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
-}
-
-#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static int setup_intel_arch_watchdog(void)
-{
- unsigned int ebx;
- union cpuid10_eax eax;
- unsigned int unused;
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebx indicates event present.
- */
- cpuid(10, &(eax.full), &ebx, &unused, &unused);
- if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
- (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- goto fail;
-
- perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
- evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
-
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = ARCH_PERFMON_EVENTSEL_INT
- | ARCH_PERFMON_EVENTSEL_OS
- | ARCH_PERFMON_EVENTSEL_USR
- | ARCH_PERFMON_NMI_EVENT_SEL
- | ARCH_PERFMON_NMI_EVENT_UMASK;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
-
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
-
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
- wd->check_bit = 1ULL << (eax.split.bit_width - 1);
- return 1;
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
-}
-
-static void stop_intel_arch_watchdog(void)
-{
- unsigned int ebx;
- union cpuid10_eax eax;
- unsigned int unused;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebx indicates event present.
- */
- cpuid(10, &(eax.full), &ebx, &unused, &unused);
- if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
- (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- return;
-
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
-}
-
void setup_apic_nmi_watchdog(void *unused)
{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /* only support LOCAL and IO APICs for now */
- if ((nmi_watchdog != NMI_LOCAL_APIC) &&
- (nmi_watchdog != NMI_IO_APIC))
- return;
-
- if (wd->enabled == 1)
+ if (__get_cpu_var(wd_enabled) == 1)
return;
/* cheap hack to support suspend/resume */
@@ -791,62 +255,31 @@ void setup_apic_nmi_watchdog(void *unused)
if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
return;
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
- return;
- if (!setup_k7_watchdog())
- return;
- break;
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- if (!setup_intel_arch_watchdog())
- return;
- break;
- }
- if (!setup_p4_watchdog())
- return;
- break;
- default:
+ switch (nmi_watchdog) {
+ case NMI_LOCAL_APIC:
+ __get_cpu_var(wd_enabled) = 1;
+ if (lapic_watchdog_init(nmi_hz) < 0) {
+ __get_cpu_var(wd_enabled) = 0;
return;
}
+ /* FALL THROUGH */
+ case NMI_IO_APIC:
+ __get_cpu_var(wd_enabled) = 1;
+ atomic_inc(&nmi_active);
}
- wd->enabled = 1;
- atomic_inc(&nmi_active);
}
void stop_apic_nmi_watchdog(void *unused)
{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
-
- if (wd->enabled == 0)
+ if (__get_cpu_var(wd_enabled) == 0)
return;
-
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
- return;
- stop_k7_watchdog();
- break;
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- stop_intel_arch_watchdog();
- break;
- }
- stop_p4_watchdog();
- break;
- default:
- return;
- }
- }
- wd->enabled = 0;
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ lapic_watchdog_stop();
+ __get_cpu_var(wd_enabled) = 0;
atomic_dec(&nmi_active);
}
@@ -885,9 +318,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
int sum;
int touched = 0;
int cpu = smp_processor_id();
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- u64 dummy;
- int rc=0;
+ int rc = 0;
/* check for other users first */
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
@@ -934,55 +365,20 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
}
/* see if the nmi watchdog went off */
- if (wd->enabled) {
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- rdmsrl(wd->perfctr_msr, dummy);
- if (dummy & wd->check_bit){
- /* this wasn't a watchdog timer interrupt */
- goto done;
- }
-
- /* only Intel uses the cccr msr */
- if (wd->cccr_msr != 0) {
- /*
- * P4 quirks:
- * - An overflown perfctr will assert its interrupt
- * until the OVF flag in its CCCR is cleared.
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- rdmsrl(wd->cccr_msr, dummy);
- dummy &= ~P4_CCCR_OVF;
- wrmsrl(wd->cccr_msr, dummy);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- /* start the cycle over again */
- wrmsrl(wd->perfctr_msr,
- -((u64)cpu_khz * 1000 / nmi_hz));
- } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
- /*
- * ArchPerfom/Core Duo needs to re-unmask
- * the apic vector
- */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- /* ARCH_PERFMON has 32 bit counter writes */
- wrmsr(wd->perfctr_msr,
- (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
- } else {
- /* start the cycle over again */
- wrmsrl(wd->perfctr_msr,
- -((u64)cpu_khz * 1000 / nmi_hz));
- }
- rc = 1;
- } else if (nmi_watchdog == NMI_IO_APIC) {
- /* don't know how to accurately check for this.
- * just assume it was a watchdog timer interrupt
- * This matches the old behaviour.
- */
- rc = 1;
- } else
- printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
+ if (!__get_cpu_var(wd_enabled))
+ return rc;
+ switch (nmi_watchdog) {
+ case NMI_LOCAL_APIC:
+ rc |= lapic_wd_event(nmi_hz);
+ break;
+ case NMI_IO_APIC:
+ /* don't know how to accurately check for this.
+ * just assume it was a watchdog timer interrupt
+ * This matches the old behaviour.
+ */
+ rc = 1;
+ break;
}
-done:
return rc;
}
@@ -1067,12 +463,4 @@ void __trigger_all_cpu_backtrace(void)
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
-EXPORT_SYMBOL(reserve_perfctr_nmi);
-EXPORT_SYMBOL(release_perfctr_nmi);
-EXPORT_SYMBOL(reserve_evntsel_nmi);
-EXPORT_SYMBOL(release_evntsel_nmi);
-EXPORT_SYMBOL(disable_timer_nmi_watchdog);
-EXPORT_SYMBOL(enable_timer_nmi_watchdog);
EXPORT_SYMBOL(touch_nmi_watchdog);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 04480c3b68f5..5bd20b542c1e 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -507,7 +507,7 @@ error:
return ret;
}
-static struct dma_mapping_ops calgary_dma_ops = {
+static const struct dma_mapping_ops calgary_dma_ops = {
.alloc_coherent = calgary_alloc_coherent,
.map_single = calgary_map_single,
.unmap_single = calgary_unmap_single,
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 0bae862e9a55..0a762e10f2be 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -556,7 +556,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
extern int agp_amd64_init(void);
-static struct dma_mapping_ops gart_dma_ops = {
+static const struct dma_mapping_ops gart_dma_ops = {
.mapping_error = NULL,
.map_single = gart_map_single,
.map_simple = gart_map_simple,
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index df09ab05a1bd..6dade0c867cc 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -79,7 +79,7 @@ void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
{
}
-struct dma_mapping_ops nommu_dma_ops = {
+const struct dma_mapping_ops nommu_dma_ops = {
.map_single = nommu_map_single,
.unmap_single = nommu_unmap_single,
.map_sg = nommu_map_sg,
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index eb18be5a6569..4b4569abc60c 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -12,7 +12,7 @@
int swiotlb __read_mostly;
EXPORT_SYMBOL(swiotlb);
-struct dma_mapping_ops swiotlb_dma_ops = {
+const struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index d8d5ccc245c8..4f21765078b7 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -288,16 +288,18 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
static int __init idle_setup (char *str)
{
- if (!strncmp(str, "poll", 4)) {
+ if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
pm_idle = poll_idle;
- }
+ } else if (!strcmp(str, "mwait"))
+ force_mwait = 1;
+ else
+ return -1;
boot_option_idle_override = 1;
- return 1;
+ return 0;
}
-
-__setup("idle=", idle_setup);
+early_param("idle", idle_setup);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs * regs)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 3d98b696881d..db30b5bcef61 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -79,6 +79,8 @@ int bootloader_type;
unsigned long saved_video_mode;
+int force_mwait __cpuinitdata;
+
/*
* Early DMI memory
*/
@@ -205,10 +207,10 @@ static void discover_ebda(void)
* there is a real-mode segmented pointer pointing to the
* 4K EBDA area at 0x40E
*/
- ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
+ ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
ebda_addr <<= 4;
- ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
+ ebda_size = *(unsigned short *)__va(ebda_addr);
/* Round EBDA up to pages */
if (ebda_size == 0)
@@ -243,11 +245,12 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
+ init_mm.pgd = __va(__pa_symbol(&init_level4_pgt));
- code_resource.start = virt_to_phys(&_text);
- code_resource.end = virt_to_phys(&_etext)-1;
- data_resource.start = virt_to_phys(&_etext);
- data_resource.end = virt_to_phys(&_edata)-1;
+ code_resource.start = __pa_symbol(&_text);
+ code_resource.end = __pa_symbol(&_etext)-1;
+ data_resource.start = __pa_symbol(&_etext);
+ data_resource.end = __pa_symbol(&_edata)-1;
early_identify_cpu(&boot_cpu_data);
@@ -274,8 +277,6 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
- zap_low_mappings(0);
-
#ifdef CONFIG_ACPI
/*
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -329,15 +330,8 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_SMP
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
-
/* Reserve SMP trampoline */
- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
+ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
#endif
#ifdef CONFIG_ACPI_SLEEP
@@ -612,6 +606,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
/* RDTSC can be speculated around */
clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+
+ /* Family 10 doesn't support C states in MWAIT so don't use it */
+ if (c->x86 == 0x10 && !force_mwait)
+ clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
}
static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -987,9 +985,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
"stc",
"100mhzsteps",
"hwpstate",
- NULL, /* tsc invariant mapped to constant_tsc */
- NULL,
- /* nothing */ /* constant_tsc - moved to flags */
+ "", /* tsc invariant mapped to constant_tsc */
+ /* nothing */
};
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 6a70b55f719d..64379a80d763 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -103,9 +103,9 @@ void __init setup_per_cpu_areas(void)
if (!NODE_DATA(cpu_to_node(i))) {
printk("cpu with no node %d, num_online_nodes %d\n",
i, num_online_nodes());
- ptr = alloc_bootmem(size);
+ ptr = alloc_bootmem_pages(size);
} else {
- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
+ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
}
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
@@ -201,7 +201,6 @@ void __cpuinit cpu_init (void)
/* CPU 0 is initialised in head64.c */
if (cpu != 0) {
pda_init(cpu);
- zap_low_mappings(cpu);
} else
estacks = boot_exception_stacks;
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 49ec324cd141..c819625f3316 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -141,7 +141,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
goto badframe;
#ifdef DEBUG_SIG
- printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax);
+ printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax);
#endif
if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
@@ -301,7 +301,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
#ifdef DEBUG_SIG
- printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+ printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
current->comm, current->pid, frame, regs->rip, frame->pretcode);
#endif
@@ -463,7 +463,7 @@ void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
#ifdef DEBUG_SIG
- printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
+ printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n",
thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current));
#endif
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index af1ec4d23cf8..22abae4e9f39 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -76,7 +76,7 @@ static inline void leave_mm(int cpu)
if (read_pda(mmu_state) == TLBSTATE_OK)
BUG();
cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
- load_cr3(swapper_pg_dir);
+ load_cr3(init_mm.pgd);
}
/*
@@ -452,42 +452,34 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
}
EXPORT_SYMBOL(smp_call_function);
-void smp_stop_cpu(void)
+static void stop_this_cpu(void *dummy)
{
- unsigned long flags;
+ local_irq_disable();
/*
* Remove this CPU:
*/
cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_save(flags);
disable_local_APIC();
- local_irq_restore(flags);
-}
-
-static void smp_really_stop_cpu(void *dummy)
-{
- smp_stop_cpu();
for (;;)
halt();
}
void smp_send_stop(void)
{
- int nolock = 0;
+ int nolock;
+ unsigned long flags;
+
if (reboot_force)
return;
+
/* Don't deadlock on the call lock in panic */
- if (!spin_trylock(&call_lock)) {
- /* ignore locking because we have panicked anyways */
- nolock = 1;
- }
- __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
+ nolock = !spin_trylock(&call_lock);
+ local_irq_save(flags);
+ __smp_call_function(stop_this_cpu, NULL, 0, 0);
if (!nolock)
spin_unlock(&call_lock);
-
- local_irq_disable();
disable_local_APIC();
- local_irq_enable();
+ local_irq_restore(flags);
}
/*
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index cd4643a37022..4d9dacfae575 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,7 +60,6 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/numa.h>
-#include <asm/genapic.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@@ -68,7 +67,6 @@ EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(cpu_llc_id);
/* Bitmask of currently online CPUs */
cpumask_t cpu_online_map __read_mostly;
@@ -392,7 +390,8 @@ static void inquire_remote_apic(int apicid)
{
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
+ int timeout;
+ unsigned int status;
printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
@@ -402,7 +401,9 @@ static void inquire_remote_apic(int apicid)
/*
* Wait for idle.
*/
- apic_wait_icr_idle();
+ status = safe_apic_wait_icr_idle();
+ if (status)
+ printk("a previous APIC delivery may have failed\n");
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -430,8 +431,8 @@ static void inquire_remote_apic(int apicid)
*/
static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
+ unsigned long send_status, accept_status = 0;
+ int maxlvt, num_starts, j;
Dprintk("Asserting INIT.\n");
@@ -447,12 +448,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
| APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
+ send_status = safe_apic_wait_icr_idle();
mdelay(10);
@@ -465,12 +461,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
+ send_status = safe_apic_wait_icr_idle();
mb();
atomic_set(&init_deasserted, 1);
@@ -509,12 +500,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
Dprintk("Startup point 1.\n");
Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
+ send_status = safe_apic_wait_icr_idle();
/*
* Give the other CPU some time to accept the IPI.
@@ -945,6 +931,12 @@ int __cpuinit __cpu_up(unsigned int cpu)
return -ENOSYS;
}
+ /*
+ * Save current MTRR state in case it was changed since early boot
+ * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
+ */
+ mtrr_save_state();
+
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Boot it! */
err = do_boot_cpu(cpu, apicid);
@@ -965,13 +957,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
while (!cpu_isset(cpu, cpu_online_map))
cpu_relax();
-
- if (num_online_cpus() > 8 && genapic == &apic_flat) {
- printk(KERN_WARNING
- "flat APIC routing can't be used with > 8 cpus\n");
- BUG();
- }
-
err = 0;
return err;
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index 91f7e678bae7..6a5a98f2a75c 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -12,6 +12,10 @@
#include <asm/proto.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/mtrr.h>
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
struct saved_context saved_context;
@@ -33,7 +37,6 @@ void __save_processor_state(struct saved_context *ctxt)
asm volatile ("str %0" : "=m" (ctxt->tr));
/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
- /* EFER should be constant for kernel version, no need to handle it. */
/*
* segment registers
*/
@@ -46,10 +49,12 @@ void __save_processor_state(struct saved_context *ctxt)
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
rdmsrl(MSR_GS_BASE, ctxt->gs_base);
rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ mtrr_save_fixed_ranges(NULL);
/*
* control registers
*/
+ rdmsrl(MSR_EFER, ctxt->efer);
asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0));
asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2));
asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
@@ -75,6 +80,7 @@ void __restore_processor_state(struct saved_context *ctxt)
/*
* control registers
*/
+ wrmsrl(MSR_EFER, ctxt->efer);
asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8));
asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4));
asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3));
@@ -219,4 +225,15 @@ int swsusp_arch_resume(void)
restore_image();
return 0;
}
+
+/*
+ * pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+ unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
#endif /* CONFIG_SOFTWARE_SUSPEND */
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index bfbe00763c68..16d183f67bc1 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -71,9 +71,10 @@ loop:
jmp loop
done:
/* go back to the original page tables */
- leaq init_level4_pgt(%rip), %rax
- subq $__START_KERNEL_map, %rax
- movq %rax, %cr3
+ movq $(init_level4_pgt - __START_KERNEL_map), %rax
+ addq phys_base(%rip), %rax
+ movq %rax, %cr3
+
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c
index 213fd6ab789d..63d592c276cc 100644
--- a/arch/x86_64/kernel/syscall.c
+++ b/arch/x86_64/kernel/syscall.c
@@ -3,6 +3,7 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
+#include <asm/asm-offsets.h>
#define __NO_STUBS
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 75d73a9aa9ff..0652e173813b 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -39,13 +39,11 @@
#include <asm/proto.h>
#include <asm/hpet.h>
#include <asm/sections.h>
-#include <linux/cpufreq.h>
#include <linux/hpet.h>
#include <asm/apic.h>
#include <asm/hpet.h>
-
-extern void i8254_timer_resume(void);
-extern int using_apic_timer;
+#include <asm/mpspec.h>
+#include <asm/nmi.h>
static char *timename = NULL;
@@ -252,6 +250,51 @@ static unsigned long get_cmos_time(void)
return mktime(year, mon, day, hour, min, sec);
}
+/* calibrate_cpu is used on systems with fixed rate TSCs to determine
+ * processor frequency */
+#define TICK_COUNT 100000000
+static unsigned int __init tsc_calibrate_cpu_khz(void)
+{
+ int tsc_start, tsc_now;
+ int i, no_ctr_free;
+ unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
+ unsigned long flags;
+
+ for (i = 0; i < 4; i++)
+ if (avail_to_resrv_perfctr_nmi_bit(i))
+ break;
+ no_ctr_free = (i == 4);
+ if (no_ctr_free) {
+ i = 3;
+ rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
+ wrmsrl(MSR_K7_EVNTSEL3, 0);
+ rdmsrl(MSR_K7_PERFCTR3, pmc3);
+ } else {
+ reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+ reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+ }
+ local_irq_save(flags);
+ /* start meauring cycles, incrementing from 0 */
+ wrmsrl(MSR_K7_PERFCTR0 + i, 0);
+ wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
+ rdtscl(tsc_start);
+ do {
+ rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
+ tsc_now = get_cycles_sync();
+ } while ((tsc_now - tsc_start) < TICK_COUNT);
+
+ local_irq_restore(flags);
+ if (no_ctr_free) {
+ wrmsrl(MSR_K7_EVNTSEL3, 0);
+ wrmsrl(MSR_K7_PERFCTR3, pmc3);
+ wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
+ } else {
+ release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+ release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+ }
+
+ return pmc_now * tsc_khz / (tsc_now - tsc_start);
+}
/*
* pit_calibrate_tsc() uses the speaker output (channel 2) of
@@ -285,7 +328,7 @@ static unsigned int __init pit_calibrate_tsc(void)
#define PIT_MODE 0x43
#define PIT_CH0 0x40
-static void __init __pit_init(int val, u8 mode)
+static void __pit_init(int val, u8 mode)
{
unsigned long flags;
@@ -301,12 +344,12 @@ void __init pit_init(void)
__pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
}
-void __init pit_stop_interrupt(void)
+void pit_stop_interrupt(void)
{
__pit_init(0, 0x30); /* mode 0 */
}
-void __init stop_timer_interrupt(void)
+void stop_timer_interrupt(void)
{
char *name;
if (hpet_address) {
@@ -339,23 +382,29 @@ void __init time_init(void)
if (hpet_use_timer) {
/* set tick_nsec to use the proper rate for HPET */
tick_nsec = TICK_NSEC_HPET;
- cpu_khz = hpet_calibrate_tsc();
+ tsc_khz = hpet_calibrate_tsc();
timename = "HPET";
} else {
pit_init();
- cpu_khz = pit_calibrate_tsc();
+ tsc_khz = pit_calibrate_tsc();
timename = "PIT";
}
+ cpu_khz = tsc_khz;
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 == 16)
+ cpu_khz = tsc_calibrate_cpu_khz();
+
if (unsynchronized_tsc())
- mark_tsc_unstable();
+ mark_tsc_unstable("TSCs unsynchronized");
if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
vgetcpu_mode = VGETCPU_RDTSCP;
else
vgetcpu_mode = VGETCPU_LSL;
- set_cyc2ns_scale(cpu_khz);
+ set_cyc2ns_scale(tsc_khz);
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
init_tsc_clocksource();
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
index c79b99a9e2f6..e7e2764c461b 100644
--- a/arch/x86_64/kernel/trampoline.S
+++ b/arch/x86_64/kernel/trampoline.S
@@ -3,6 +3,7 @@
* Trampoline.S Derived from Setup.S by Linus Torvalds
*
* 4 Jan 1997 Michael Chastain: changed to gnu as.
+ * 15 Sept 2005 Eric Biederman: 64bit PIC support
*
* Entry: CS:IP point to the start of our code, we are
* in real mode with no stack, but the rest of the
@@ -17,15 +18,20 @@
* and IP is zero. Thus, data addresses need to be absolute
* (no relocation) and are taken with regard to r_base.
*
+ * With the addition of trampoline_level4_pgt this code can
+ * now enter a 64bit kernel that lives at arbitrary 64bit
+ * physical addresses.
+ *
* If you work on this file, check the object module with objdump
* --full-contents --reloc to make sure there are no relocation
- * entries. For the GDT entry we do hand relocation in smpboot.c
- * because of 64bit linker limitations.
+ * entries.
*/
#include <linux/linkage.h>
-#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/msr.h>
+#include <asm/segment.h>
.data
@@ -33,15 +39,33 @@
ENTRY(trampoline_data)
r_base = .
+ cli # We should be safe anyway
wbinvd
mov %cs, %ax # Code and data in the same place
mov %ax, %ds
+ mov %ax, %es
+ mov %ax, %ss
- cli # We should be safe anyway
movl $0xA5A5A5A5, trampoline_data - r_base
# write marker for master knows we're running
+ # Setup stack
+ movw $(trampoline_stack_end - r_base), %sp
+
+ call verify_cpu # Verify the cpu supports long mode
+ testl %eax, %eax # Check for return code
+ jnz no_longmode
+
+ mov %cs, %ax
+ movzx %ax, %esi # Find the 32bit trampoline location
+ shll $4, %esi
+
+ # Fixup the vectors
+ addl %esi, startup_32_vector - r_base
+ addl %esi, startup_64_vector - r_base
+ addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer
+
/*
* GDT tables in non default location kernel can be beyond 16MB and
* lgdt will not be able to load the address as in real mode default
@@ -49,23 +73,94 @@ r_base = .
* to 32 bit.
*/
- lidtl idt_48 - r_base # load idt with 0, 0
- lgdtl gdt_48 - r_base # load gdt with whatever is appropriate
+ lidtl tidt - r_base # load idt with 0, 0
+ lgdtl tgdt - r_base # load gdt with whatever is appropriate
xor %ax, %ax
inc %ax # protected mode (PE) bit
lmsw %ax # into protected mode
- # flaush prefetch and jump to startup_32 in arch/x86_64/kernel/head.S
- ljmpl $__KERNEL32_CS, $(startup_32-__START_KERNEL_map)
+
+ # flush prefetch and jump to startup_32
+ ljmpl *(startup_32_vector - r_base)
+
+ .code32
+ .balign 4
+startup_32:
+ movl $__KERNEL_DS, %eax # Initialize the %ds segment register
+ movl %eax, %ds
+
+ xorl %eax, %eax
+ btsl $5, %eax # Enable PAE mode
+ movl %eax, %cr4
+
+ # Setup trampoline 4 level pagetables
+ leal (trampoline_level4_pgt - r_base)(%esi), %eax
+ movl %eax, %cr3
+
+ movl $MSR_EFER, %ecx
+ movl $(1 << _EFER_LME), %eax # Enable Long Mode
+ xorl %edx, %edx
+ wrmsr
+
+ xorl %eax, %eax
+ btsl $31, %eax # Enable paging and in turn activate Long Mode
+ btsl $0, %eax # Enable protected mode
+ movl %eax, %cr0
+
+ /*
+ * At this point we're in long mode but in 32bit compatibility mode
+ * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
+ * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
+ * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ */
+ ljmp *(startup_64_vector - r_base)(%esi)
+
+ .code64
+ .balign 4
+startup_64:
+ # Now jump into the kernel using virtual addresses
+ movq $secondary_startup_64, %rax
+ jmp *%rax
+
+ .code16
+no_longmode:
+ hlt
+ jmp no_longmode
+#include "verify_cpu.S"
# Careful these need to be in the same 64K segment as the above;
-idt_48:
+tidt:
.word 0 # idt limit = 0
.word 0, 0 # idt base = 0L
-gdt_48:
- .short GDT_ENTRIES*8 - 1 # gdt limit
- .long cpu_gdt_table-__START_KERNEL_map
+ # Duplicate the global descriptor table
+ # so the kernel can live anywhere
+ .balign 4
+tgdt:
+ .short tgdt_end - tgdt # gdt limit
+ .long tgdt - r_base
+ .short 0
+ .quad 0x00cf9b000000ffff # __KERNEL32_CS
+ .quad 0x00af9b000000ffff # __KERNEL_CS
+ .quad 0x00cf93000000ffff # __KERNEL_DS
+tgdt_end:
+
+ .balign 4
+startup_32_vector:
+ .long startup_32 - r_base
+ .word __KERNEL32_CS, 0
+
+ .balign 4
+startup_64_vector:
+ .long startup_64 - r_base
+ .word __KERNEL_CS, 0
+
+trampoline_stack:
+ .org 0x1000
+trampoline_stack_end:
+ENTRY(trampoline_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 510,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
-.globl trampoline_end
-trampoline_end:
+ENTRY(trampoline_end)
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 09d2e8a10a49..d76fc32d4599 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -426,8 +426,7 @@ void show_registers(struct pt_regs *regs)
const int cpu = smp_processor_id();
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
- rsp = regs->rsp;
-
+ rsp = regs->rsp;
printk("CPU %d ", cpu);
__show_regs(regs);
printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -438,7 +437,6 @@ void show_registers(struct pt_regs *regs)
* time of the fault..
*/
if (in_kernel) {
-
printk("Stack: ");
_show_stack(NULL, regs, (unsigned long*)rsp);
@@ -581,10 +579,20 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
{
struct task_struct *tsk = current;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
-
if (user_mode(regs)) {
+ /*
+ * We want error_code and trap_no set for userspace
+ * faults and kernelspace faults which result in
+ * die(), but not kernelspace faults which are fixed
+ * up. die() gives the process no chance to handle
+ * the signal and notice the kernel fault information,
+ * so that won't result in polluting the information
+ * about previously queued, but not yet delivered,
+ * faults. See also do_general_protection below.
+ */
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+
if (exception_trace && unhandled_signal(tsk, signr))
printk(KERN_INFO
"%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
@@ -605,8 +613,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
fixup = search_exception_tables(regs->rip);
if (fixup)
regs->rip = fixup->fixup;
- else
+ else {
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
die(str, regs, error_code);
+ }
return;
}
}
@@ -682,10 +693,10 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
conditional_sti(regs);
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 13;
-
if (user_mode(regs)) {
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
+
if (exception_trace && unhandled_signal(tsk, SIGSEGV))
printk(KERN_INFO
"%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
@@ -704,6 +715,9 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
regs->rip = fixup->fixup;
return;
}
+
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
if (notify_die(DIE_GPF, "general protection fault", regs,
error_code, 13, SIGSEGV) == NOTIFY_STOP)
return;
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
index 1a0edbbffaa0..48f9a8e6aa91 100644
--- a/arch/x86_64/kernel/tsc.c
+++ b/arch/x86_64/kernel/tsc.c
@@ -13,6 +13,8 @@ static int notsc __initdata = 0;
unsigned int cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
+unsigned int tsc_khz;
+EXPORT_SYMBOL(tsc_khz);
static unsigned int cyc2ns_scale __read_mostly;
@@ -77,7 +79,7 @@ static void handle_cpufreq_delayed_get(struct work_struct *v)
static unsigned int ref_freq = 0;
static unsigned long loops_per_jiffy_ref = 0;
-static unsigned long cpu_khz_ref = 0;
+static unsigned long tsc_khz_ref = 0;
static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
@@ -99,7 +101,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
if (!ref_freq) {
ref_freq = freq->old;
loops_per_jiffy_ref = *lpj;
- cpu_khz_ref = cpu_khz;
+ tsc_khz_ref = tsc_khz;
}
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
@@ -107,12 +109,12 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
*lpj =
cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
- cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
+ tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- mark_tsc_unstable();
+ mark_tsc_unstable("cpufreq changes");
}
- set_cyc2ns_scale(cpu_khz_ref);
+ set_cyc2ns_scale(tsc_khz_ref);
return 0;
}
@@ -197,10 +199,11 @@ static struct clocksource clocksource_tsc = {
.vread = vread_tsc,
};
-void mark_tsc_unstable(void)
+void mark_tsc_unstable(char *reason)
{
if (!tsc_unstable) {
tsc_unstable = 1;
+ printk("Marking TSC unstable due to %s\n", reason);
/* Change only the rating, when not registered */
if (clocksource_tsc.mult)
clocksource_change_rating(&clocksource_tsc, 0);
@@ -213,7 +216,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
void __init init_tsc_clocksource(void)
{
if (!notsc) {
- clocksource_tsc.mult = clocksource_khz2mult(cpu_khz,
+ clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
clocksource_tsc.shift);
if (check_tsc_unstable())
clocksource_tsc.rating = 0;
diff --git a/arch/x86_64/kernel/tsc_sync.c b/arch/x86_64/kernel/tsc_sync.c
index 014f0db45dfa..355f5f506c81 100644
--- a/arch/x86_64/kernel/tsc_sync.c
+++ b/arch/x86_64/kernel/tsc_sync.c
@@ -50,7 +50,7 @@ static __cpuinit void check_tsc_warp(void)
/*
* The measurement runs for 20 msecs:
*/
- end = start + cpu_khz * 20ULL;
+ end = start + tsc_khz * 20ULL;
now = start;
for (i = 0; ; i++) {
@@ -138,7 +138,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
printk("\n");
printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
" turning off TSC clock.\n", max_warp);
- mark_tsc_unstable();
+ mark_tsc_unstable("check_tsc_sync_source failed");
nr_warps = 0;
max_warp = 0;
last_tsc = 0;
diff --git a/arch/x86_64/kernel/verify_cpu.S b/arch/x86_64/kernel/verify_cpu.S
new file mode 100644
index 000000000000..e035f5948199
--- /dev/null
+++ b/arch/x86_64/kernel/verify_cpu.S
@@ -0,0 +1,119 @@
+/*
+ *
+ * verify_cpu.S - Code for cpu long mode and SSE verification. This
+ * code has been borrowed from boot/setup.S and was introduced by
+ * Andi Kleen.
+ *
+ * Copyright (c) 2007 Andi Kleen (ak@suse.de)
+ * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
+ * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ *
+ * This is a common code for verification whether CPU supports
+ * long mode and SSE or not. It is not called directly instead this
+ * file is included at various places and compiled in that context.
+ * Following are the current usage.
+ *
+ * This file is included by both 16bit and 32bit code.
+ *
+ * arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
+ * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
+ * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
+ * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
+ *
+ * verify_cpu, returns the status of cpu check in register %eax.
+ * 0: Success 1: Failure
+ *
+ * The caller needs to check for the error code and take the action
+ * appropriately. Either display a message or halt.
+ */
+
+#include <asm/cpufeature.h>
+
+verify_cpu:
+ pushfl # Save caller passed flags
+ pushl $0 # Kill any dangerous flags
+ popfl
+
+ /* minimum CPUID flags for x86-64 as defined by AMD */
+#define M(x) (1<<(x))
+#define M2(a,b) M(a)|M(b)
+#define M4(a,b,c,d) M(a)|M(b)|M(c)|M(d)
+
+#define SSE_MASK \
+ (M2(X86_FEATURE_XMM,X86_FEATURE_XMM2))
+#define REQUIRED_MASK1 \
+ (M4(X86_FEATURE_FPU,X86_FEATURE_PSE,X86_FEATURE_TSC,X86_FEATURE_MSR)|\
+ M4(X86_FEATURE_PAE,X86_FEATURE_CX8,X86_FEATURE_PGE,X86_FEATURE_CMOV)|\
+ M(X86_FEATURE_FXSR))
+#define REQUIRED_MASK2 \
+ (M(X86_FEATURE_LM - 32))
+
+ pushfl # standard way to check for cpuid
+ popl %eax
+ movl %eax,%ebx
+ xorl $0x200000,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ cmpl %eax,%ebx
+ jz verify_cpu_no_longmode # cpu has no cpuid
+
+ movl $0x0,%eax # See if cpuid 1 is implemented
+ cpuid
+ cmpl $0x1,%eax
+ jb verify_cpu_no_longmode # no cpuid 1
+
+ xor %di,%di
+ cmpl $0x68747541,%ebx # AuthenticAMD
+ jnz verify_cpu_noamd
+ cmpl $0x69746e65,%edx
+ jnz verify_cpu_noamd
+ cmpl $0x444d4163,%ecx
+ jnz verify_cpu_noamd
+ mov $1,%di # cpu is from AMD
+
+verify_cpu_noamd:
+ movl $0x1,%eax # Does the cpu have what it takes
+ cpuid
+ andl $REQUIRED_MASK1,%edx
+ xorl $REQUIRED_MASK1,%edx
+ jnz verify_cpu_no_longmode
+
+ movl $0x80000000,%eax # See if extended cpuid is implemented
+ cpuid
+ cmpl $0x80000001,%eax
+ jb verify_cpu_no_longmode # no extended cpuid
+
+ movl $0x80000001,%eax # Does the cpu have what it takes
+ cpuid
+ andl $REQUIRED_MASK2,%edx
+ xorl $REQUIRED_MASK2,%edx
+ jnz verify_cpu_no_longmode
+
+verify_cpu_sse_test:
+ movl $1,%eax
+ cpuid
+ andl $SSE_MASK,%edx
+ cmpl $SSE_MASK,%edx
+ je verify_cpu_sse_ok
+ test %di,%di
+ jz verify_cpu_no_longmode # only try to force SSE on AMD
+ movl $0xc0010015,%ecx # HWCR
+ rdmsr
+ btr $15,%eax # enable SSE
+ wrmsr
+ xor %di,%di # don't loop
+ jmp verify_cpu_sse_test # try again
+
+verify_cpu_no_longmode:
+ popfl # Restore caller passed flags
+ movl $1,%eax
+ ret
+verify_cpu_sse_ok:
+ popfl # Restore caller passed flags
+ xorl %eax, %eax
+ ret
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 5176ecf006ee..88cfa50b424d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -29,9 +29,7 @@ SECTIONS
.text : AT(ADDR(.text) - LOAD_OFFSET) {
/* First the code that has to be first for bootstrapping */
*(.bootstrap.text)
- /* Then all the functions that are "hot" in profiles, to group them
- onto the same hugetlb entry */
- #include "functionlist"
+ _stext = .;
/* Then the rest */
*(.text)
SCHED_TEXT
@@ -50,10 +48,10 @@ SECTIONS
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
__stop___ex_table = .;
- RODATA
-
BUG_TABLE
+ RODATA
+
. = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -94,6 +92,12 @@ SECTIONS
{ *(.vsyscall_gtod_data) }
vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+
+ .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
+ { *(.vsyscall_1) }
+ .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
+ { *(.vsyscall_2) }
+
.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
vgetcpu_mode = VVIRT(.vgetcpu_mode);
@@ -101,10 +105,6 @@ SECTIONS
.jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
jiffies = VVIRT(.jiffies);
- .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
- { *(.vsyscall_1) }
- .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
- { *(.vsyscall_2) }
.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
{ *(.vsyscall_3) }
@@ -194,7 +194,7 @@ SECTIONS
__initramfs_end = .;
#endif
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b43c698cf7d3..dc32cef96195 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -45,14 +45,34 @@
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define __syscall_clobber "r11","rcx","memory"
+#define __pa_vsymbol(x) \
+ ({unsigned long v; \
+ extern char __vsyscall_0; \
+ asm("" : "=r" (v) : "0" (x)); \
+ ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
+/*
+ * vsyscall_gtod_data contains data that is :
+ * - readonly from vsyscalls
+ * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
+ * Try to keep this structure as small as possible to avoid cache line ping pongs
+ */
struct vsyscall_gtod_data_t {
- seqlock_t lock;
- int sysctl_enabled;
- struct timeval wall_time_tv;
+ seqlock_t lock;
+
+ /* open coded 'struct timespec' */
+ time_t wall_time_sec;
+ u32 wall_time_nsec;
+
+ int sysctl_enabled;
struct timezone sys_tz;
- cycle_t offset_base;
- struct clocksource clock;
+ struct { /* extract of a clocksource struct */
+ cycle_t (*vread)(void);
+ cycle_t cycle_last;
+ cycle_t mask;
+ u32 mult;
+ u32 shift;
+ } clock;
};
int __vgetcpu_mode __section_vgetcpu_mode;
@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
/* copy vsyscall data */
- vsyscall_gtod_data.clock = *clock;
- vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
- vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
+ vsyscall_gtod_data.clock.vread = clock->vread;
+ vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
+ vsyscall_gtod_data.clock.mask = clock->mask;
+ vsyscall_gtod_data.clock.mult = clock->mult;
+ vsyscall_gtod_data.clock.shift = clock->shift;
+ vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
+ vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.sys_tz = sys_tz;
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
@@ -105,7 +129,8 @@ static __always_inline long time_syscall(long *t)
static __always_inline void do_vgettimeofday(struct timeval * tv)
{
cycle_t now, base, mask, cycle_delta;
- unsigned long seq, mult, shift, nsec_delta;
+ unsigned seq;
+ unsigned long mult, shift, nsec;
cycle_t (*vread)(void);
do {
seq = read_seqbegin(&__vsyscall_gtod_data.lock);
@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
mult = __vsyscall_gtod_data.clock.mult;
shift = __vsyscall_gtod_data.clock.shift;
- *tv = __vsyscall_gtod_data.wall_time_tv;
-
+ tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
+ nsec = __vsyscall_gtod_data.wall_time_nsec;
} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
/* calculate interval: */
cycle_delta = (now - base) & mask;
/* convert to nsecs: */
- nsec_delta = (cycle_delta * mult) >> shift;
+ nsec += (cycle_delta * mult) >> shift;
- /* convert to usecs and add to timespec: */
- tv->tv_usec += nsec_delta / NSEC_PER_USEC;
- while (tv->tv_usec > USEC_PER_SEC) {
+ while (nsec >= NSEC_PER_SEC) {
tv->tv_sec += 1;
- tv->tv_usec -= USEC_PER_SEC;
+ nsec -= NSEC_PER_SEC;
}
+ tv->tv_usec = nsec / NSEC_PER_USEC;
}
int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
@@ -151,11 +175,13 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
* unlikely */
time_t __vsyscall(1) vtime(time_t *t)
{
+ time_t result;
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
return time_syscall(t);
- else if (t)
- *t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
- return __vsyscall_gtod_data.wall_time_tv.tv_sec;
+ result = __vsyscall_gtod_data.wall_time_sec;
+ if (t)
+ *t = result;
+ return result;
}
/* Fast way to get current CPU and node.
@@ -224,10 +250,10 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
return ret;
/* gcc has some trouble with __va(__pa()), so just do it this
way. */
- map1 = ioremap(__pa_symbol(&vsysc1), 2);
+ map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
if (!map1)
return -ENOMEM;
- map2 = ioremap(__pa_symbol(&vsysc2), 2);
+ map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
if (!map2) {
ret = -ENOMEM;
goto out;
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 6ada7231f3ab..de99dba2c515 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -585,7 +585,7 @@ do_sigbus:
}
DEFINE_SPINLOCK(pgd_lock);
-struct page *pgd_list;
+LIST_HEAD(pgd_list);
void vmalloc_sync_all(void)
{
@@ -605,8 +605,7 @@ void vmalloc_sync_all(void)
if (pgd_none(*pgd_ref))
continue;
spin_lock(&pgd_lock);
- for (page = pgd_list; page;
- page = (struct page *)page->index) {
+ list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
if (pgd_none(*pgd))
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index ec31534eb104..282b0a8f00ad 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -22,10 +22,12 @@
#include <linux/bootmem.h>
#include <linux/proc_fs.h>
#include <linux/pci.h>
+#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/dma-mapping.h>
#include <linux/module.h>
#include <linux/memory_hotplug.h>
+#include <linux/nmi.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -46,7 +48,7 @@
#define Dprintk(x...)
#endif
-struct dma_mapping_ops* dma_ops;
+const struct dma_mapping_ops* dma_ops;
EXPORT_SYMBOL(dma_ops);
static unsigned long dma_reserve __initdata;
@@ -72,6 +74,11 @@ void show_mem(void)
for_each_online_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+ /* this loop can take a while with 256 GB and 4k pages
+ so update the NMI watchdog */
+ if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
+ touch_nmi_watchdog();
+ }
page = pfn_to_page(pgdat->node_start_pfn + i);
total++;
if (PageReserved(page))
@@ -167,23 +174,9 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
unsigned long __initdata table_start, table_end;
-extern pmd_t temp_boot_pmds[];
-
-static struct temp_map {
- pmd_t *pmd;
- void *address;
- int allocated;
-} temp_mappings[] __initdata = {
- { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
- { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
- {}
-};
-
-static __meminit void *alloc_low_page(int *index, unsigned long *phys)
+static __meminit void *alloc_low_page(unsigned long *phys)
{
- struct temp_map *ti;
- int i;
- unsigned long pfn = table_end++, paddr;
+ unsigned long pfn = table_end++;
void *adr;
if (after_bootmem) {
@@ -194,57 +187,63 @@ static __meminit void *alloc_low_page(int *index, unsigned long *phys)
if (pfn >= end_pfn)
panic("alloc_low_page: ran out of memory");
- for (i = 0; temp_mappings[i].allocated; i++) {
- if (!temp_mappings[i].pmd)
- panic("alloc_low_page: ran out of temp mappings");
- }
- ti = &temp_mappings[i];
- paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
- set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
- ti->allocated = 1;
- __flush_tlb();
- adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
+
+ adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
memset(adr, 0, PAGE_SIZE);
- *index = i;
- *phys = pfn * PAGE_SIZE;
- return adr;
-}
+ *phys = pfn * PAGE_SIZE;
+ return adr;
+}
-static __meminit void unmap_low_page(int i)
+static __meminit void unmap_low_page(void *adr)
{
- struct temp_map *ti;
if (after_bootmem)
return;
- ti = &temp_mappings[i];
- set_pmd(ti->pmd, __pmd(0));
- ti->allocated = 0;
+ early_iounmap(adr, PAGE_SIZE);
}
/* Must run before zap_low_mappings */
__init void *early_ioremap(unsigned long addr, unsigned long size)
{
- unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
-
- /* actually usually some more */
- if (size >= LARGE_PAGE_SIZE) {
- return NULL;
+ unsigned long vaddr;
+ pmd_t *pmd, *last_pmd;
+ int i, pmds;
+
+ pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
+ vaddr = __START_KERNEL_map;
+ pmd = level2_kernel_pgt;
+ last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
+ for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
+ for (i = 0; i < pmds; i++) {
+ if (pmd_present(pmd[i]))
+ goto next;
+ }
+ vaddr += addr & ~PMD_MASK;
+ addr &= PMD_MASK;
+ for (i = 0; i < pmds; i++, addr += PMD_SIZE)
+ set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
+ __flush_tlb();
+ return (void *)vaddr;
+ next:
+ ;
}
- set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
- map += LARGE_PAGE_SIZE;
- set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
- __flush_tlb();
- return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
+ printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
+ return NULL;
}
/* To avoid virtual aliases later */
__init void early_iounmap(void *addr, unsigned long size)
{
- if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
- printk("early_iounmap: bad address %p\n", addr);
- set_pmd(temp_mappings[0].pmd, __pmd(0));
- set_pmd(temp_mappings[1].pmd, __pmd(0));
+ unsigned long vaddr;
+ pmd_t *pmd;
+ int i, pmds;
+
+ vaddr = (unsigned long)addr;
+ pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
+ pmd = level2_kernel_pgt + pmd_index(vaddr);
+ for (i = 0; i < pmds; i++)
+ pmd_clear(pmd + i);
__flush_tlb();
}
@@ -289,7 +288,6 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
- int map;
unsigned long pmd_phys;
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
@@ -307,12 +305,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
continue;
}
- pmd = alloc_low_page(&map, &pmd_phys);
+ pmd = alloc_low_page(&pmd_phys);
spin_lock(&init_mm.page_table_lock);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
phys_pmd_init(pmd, addr, end);
spin_unlock(&init_mm.page_table_lock);
- unmap_low_page(map);
+ unmap_low_page(pmd);
}
__flush_tlb();
}
@@ -364,7 +362,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
end = (unsigned long)__va(end);
for (; start < end; start = next) {
- int map;
unsigned long pud_phys;
pgd_t *pgd = pgd_offset_k(start);
pud_t *pud;
@@ -372,7 +369,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
if (after_bootmem)
pud = pud_offset(pgd, start & PGDIR_MASK);
else
- pud = alloc_low_page(&map, &pud_phys);
+ pud = alloc_low_page(&pud_phys);
next = start + PGDIR_SIZE;
if (next > end)
@@ -380,7 +377,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
phys_pud_init(pud, __pa(start), __pa(next));
if (!after_bootmem)
set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
- unmap_low_page(map);
+ unmap_low_page(pud);
}
if (!after_bootmem)
@@ -388,21 +385,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
__flush_tlb_all();
}
-void __cpuinit zap_low_mappings(int cpu)
-{
- if (cpu == 0) {
- pgd_t *pgd = pgd_offset_k(0UL);
- pgd_clear(pgd);
- } else {
- /*
- * For AP's, zap the low identity mappings by changing the cr3
- * to init_level4_pgt and doing local flush tlb all
- */
- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
- }
- __flush_tlb_all();
-}
-
#ifndef CONFIG_NUMA
void __init paging_init(void)
{
@@ -579,15 +561,6 @@ void __init mem_init(void)
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10);
-
-#ifdef CONFIG_SMP
- /*
- * Sync boot_level4_pgt mappings with the init_level4_pgt
- * except for the low identity mappings which are already zapped
- * in init_level4_pgt. This sync-up is essential for AP's bringup
- */
- memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
-#endif
}
void free_init_pages(char *what, unsigned long begin, unsigned long end)
@@ -597,37 +570,44 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
if (begin >= end)
return;
- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)(addr & ~(PAGE_SIZE-1)),
- POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
+ struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
+ ClearPageReserved(page);
+ init_page_count(page);
+ memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
+ if (addr >= __START_KERNEL_map)
+ change_page_attr_addr(addr, 1, __pgprot(0));
+ __free_page(page);
totalram_pages++;
}
+ if (addr > __START_KERNEL_map)
+ global_flush_tlb();
}
void free_initmem(void)
{
- memset(__initdata_begin, POISON_FREE_INITDATA,
- __initdata_end - __initdata_begin);
free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
+ __pa_symbol(&__init_begin),
+ __pa_symbol(&__init_end));
}
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void)
{
- unsigned long addr = (unsigned long)__start_rodata;
+ unsigned long start = PFN_ALIGN(__va(__pa_symbol(&_stext))), size;
- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
- change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
+#ifdef CONFIG_HOTPLUG_CPU
+ /* It must still be possible to apply SMP alternatives. */
+ if (num_possible_cpus() > 1)
+ start = PFN_ALIGN(__va(__pa_symbol(&_etext)));
+#endif
+ size = (unsigned long)__va(__pa_symbol(&__end_rodata)) - start;
+ change_page_attr_addr(start, size >> PAGE_SHIFT, PAGE_KERNEL_RO);
- printk ("Write protecting the kernel read-only data: %luk\n",
- (__end_rodata - __start_rodata) >> 10);
+ printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
+ size >> 10);
/*
* change_page_attr_addr() requires a global_flush_tlb() call after it.
@@ -642,7 +622,7 @@ void mark_rodata_ro(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory", start, end);
+ free_init_pages("initrd memory", __pa(start), __pa(end));
}
#endif
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index b5b8dba28b4e..f983c75825d0 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -49,11 +49,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
int found = 0;
u32 reg;
unsigned numnodes;
- nodemask_t nodes_parsed;
unsigned dualcore = 0;
- nodes_clear(nodes_parsed);
-
if (!early_pci_allowed())
return -1;
@@ -65,6 +62,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1;
+ if (numnodes <= 1)
+ return -1;
printk(KERN_INFO "Number of nodes %d\n", numnodes);
@@ -102,7 +101,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
nodeid, (base>>8)&3, (limit>>8) & 3);
return -1;
}
- if (node_isset(nodeid, nodes_parsed)) {
+ if (node_isset(nodeid, node_possible_map)) {
printk(KERN_INFO "Node %d already present. Skipping\n",
nodeid);
continue;
@@ -155,7 +154,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = base;
- node_set(nodeid, nodes_parsed);
+ node_set(nodeid, node_possible_map);
}
if (!found)
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 41b8fb069924..51548947ad3b 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -273,125 +273,213 @@ void __init numa_init_array(void)
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
-int numa_fake __initdata = 0;
+#define E820_ADDR_HOLE_SIZE(start, end) \
+ (e820_hole_size((start) >> PAGE_SHIFT, (end) >> PAGE_SHIFT) << \
+ PAGE_SHIFT)
+char *cmdline __initdata;
/*
- * This function is used to find out if the start and end correspond to
- * different zones.
+ * Setups up nid to range from addr to addr + size. If the end boundary is
+ * greater than max_addr, then max_addr is used instead. The return value is 0
+ * if there is additional memory left for allocation past addr and -1 otherwise.
+ * addr is adjusted to be at the end of the node.
*/
-int zone_cross_over(unsigned long start, unsigned long end)
+static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
+ u64 size, u64 max_addr)
{
- if ((start < (MAX_DMA32_PFN << PAGE_SHIFT)) &&
- (end >= (MAX_DMA32_PFN << PAGE_SHIFT)))
- return 1;
- return 0;
+ int ret = 0;
+ nodes[nid].start = *addr;
+ *addr += size;
+ if (*addr >= max_addr) {
+ *addr = max_addr;
+ ret = -1;
+ }
+ nodes[nid].end = *addr;
+ node_set(nid, node_possible_map);
+ printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
+ nodes[nid].start, nodes[nid].end,
+ (nodes[nid].end - nodes[nid].start) >> 20);
+ return ret;
}
-static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
+/*
+ * Splits num_nodes nodes up equally starting at node_start. The return value
+ * is the number of nodes split up and addr is adjusted to be at the end of the
+ * last node allocated.
+ */
+static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
+ u64 max_addr, int node_start,
+ int num_nodes)
{
- int i, big;
- struct bootnode nodes[MAX_NUMNODES];
- unsigned long sz, old_sz;
- unsigned long hole_size;
- unsigned long start, end;
- unsigned long max_addr = (end_pfn << PAGE_SHIFT);
-
- start = (start_pfn << PAGE_SHIFT);
- hole_size = e820_hole_size(start, max_addr);
- sz = (max_addr - start - hole_size) / numa_fake;
-
- /* Kludge needed for the hash function */
-
- old_sz = sz;
- /*
- * Round down to the nearest FAKE_NODE_MIN_SIZE.
- */
- sz &= FAKE_NODE_MIN_HASH_MASK;
+ unsigned int big;
+ u64 size;
+ int i;
+ if (num_nodes <= 0)
+ return -1;
+ if (num_nodes > MAX_NUMNODES)
+ num_nodes = MAX_NUMNODES;
+ size = (max_addr - *addr - E820_ADDR_HOLE_SIZE(*addr, max_addr)) /
+ num_nodes;
/*
- * We ensure that each node is at least 64MB big. Smaller than this
- * size can cause VM hiccups.
+ * Calculate the number of big nodes that can be allocated as a result
+ * of consolidating the leftovers.
*/
- if (sz == 0) {
- printk(KERN_INFO "Not enough memory for %d nodes. Reducing "
- "the number of nodes\n", numa_fake);
- numa_fake = (max_addr - start - hole_size) / FAKE_NODE_MIN_SIZE;
- printk(KERN_INFO "Number of fake nodes will be = %d\n",
- numa_fake);
- sz = FAKE_NODE_MIN_SIZE;
+ big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
+ FAKE_NODE_MIN_SIZE;
+
+ /* Round down to nearest FAKE_NODE_MIN_SIZE. */
+ size &= FAKE_NODE_MIN_HASH_MASK;
+ if (!size) {
+ printk(KERN_ERR "Not enough memory for each node. "
+ "NUMA emulation disabled.\n");
+ return -1;
}
- /*
- * Find out how many nodes can get an extra NODE_MIN_SIZE granule.
- * This logic ensures the extra memory gets distributed among as many
- * nodes as possible (as compared to one single node getting all that
- * extra memory.
- */
- big = ((old_sz - sz) * numa_fake) / FAKE_NODE_MIN_SIZE;
- printk(KERN_INFO "Fake node Size: %luMB hole_size: %luMB big nodes: "
- "%d\n",
- (sz >> 20), (hole_size >> 20), big);
- memset(&nodes,0,sizeof(nodes));
- end = start;
- for (i = 0; i < numa_fake; i++) {
- /*
- * In case we are not able to allocate enough memory for all
- * the nodes, we reduce the number of fake nodes.
- */
- if (end >= max_addr) {
- numa_fake = i - 1;
- break;
- }
- start = nodes[i].start = end;
- /*
- * Final node can have all the remaining memory.
- */
- if (i == numa_fake-1)
- sz = max_addr - start;
- end = nodes[i].start + sz;
- /*
- * Fir "big" number of nodes get extra granule.
- */
+
+ for (i = node_start; i < num_nodes + node_start; i++) {
+ u64 end = *addr + size;
if (i < big)
end += FAKE_NODE_MIN_SIZE;
/*
- * Iterate over the range to ensure that this node gets at
- * least sz amount of RAM (excluding holes)
+ * The final node can have the remaining system RAM. Other
+ * nodes receive roughly the same amount of available pages.
*/
- while ((end - start - e820_hole_size(start, end)) < sz) {
- end += FAKE_NODE_MIN_SIZE;
- if (end >= max_addr)
- break;
+ if (i == num_nodes + node_start - 1)
+ end = max_addr;
+ else
+ while (end - *addr - E820_ADDR_HOLE_SIZE(*addr, end) <
+ size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > max_addr) {
+ end = max_addr;
+ break;
+ }
+ }
+ if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
+ break;
+ }
+ return i - node_start + 1;
+}
+
+/*
+ * Splits the remaining system RAM into chunks of size. The remaining memory is
+ * always assigned to a final node and can be asymmetric. Returns the number of
+ * nodes split.
+ */
+static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
+ u64 max_addr, int node_start, u64 size)
+{
+ int i = node_start;
+ size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
+ while (!setup_node_range(i++, nodes, addr, size, max_addr))
+ ;
+ return i - node_start;
+}
+
+/*
+ * Sets up the system RAM area from start_pfn to end_pfn according to the
+ * numa=fake command-line option.
+ */
+static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
+{
+ struct bootnode nodes[MAX_NUMNODES];
+ u64 addr = start_pfn << PAGE_SHIFT;
+ u64 max_addr = end_pfn << PAGE_SHIFT;
+ int num_nodes = 0;
+ int coeff_flag;
+ int coeff = -1;
+ int num = 0;
+ u64 size;
+ int i;
+
+ memset(&nodes, 0, sizeof(nodes));
+ /*
+ * If the numa=fake command-line is just a single number N, split the
+ * system RAM into N fake nodes.
+ */
+ if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
+ num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0,
+ simple_strtol(cmdline, NULL, 0));
+ if (num_nodes < 0)
+ return num_nodes;
+ goto out;
+ }
+
+ /* Parse the command line. */
+ for (coeff_flag = 0; ; cmdline++) {
+ if (*cmdline && isdigit(*cmdline)) {
+ num = num * 10 + *cmdline - '0';
+ continue;
}
- /*
- * Look at the next node to make sure there is some real memory
- * to map. Bad things happen when the only memory present
- * in a zone on a fake node is IO hole.
- */
- while (e820_hole_size(end, end + FAKE_NODE_MIN_SIZE) > 0) {
- if (zone_cross_over(start, end + sz)) {
- end = (MAX_DMA32_PFN << PAGE_SHIFT);
+ if (*cmdline == '*') {
+ if (num > 0)
+ coeff = num;
+ coeff_flag = 1;
+ }
+ if (!*cmdline || *cmdline == ',') {
+ if (!coeff_flag)
+ coeff = 1;
+ /*
+ * Round down to the nearest FAKE_NODE_MIN_SIZE.
+ * Command-line coefficients are in megabytes.
+ */
+ size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
+ if (size)
+ for (i = 0; i < coeff; i++, num_nodes++)
+ if (setup_node_range(num_nodes, nodes,
+ &addr, size, max_addr) < 0)
+ goto done;
+ if (!*cmdline)
break;
- }
- if (end >= max_addr)
+ coeff_flag = 0;
+ coeff = -1;
+ }
+ num = 0;
+ }
+done:
+ if (!num_nodes)
+ return -1;
+ /* Fill remainder of system RAM, if appropriate. */
+ if (addr < max_addr) {
+ if (coeff_flag && coeff < 0) {
+ /* Split remaining nodes into num-sized chunks */
+ num_nodes += split_nodes_by_size(nodes, &addr, max_addr,
+ num_nodes, num);
+ goto out;
+ }
+ switch (*(cmdline - 1)) {
+ case '*':
+ /* Split remaining nodes into coeff chunks */
+ if (coeff <= 0)
break;
- end += FAKE_NODE_MIN_SIZE;
+ num_nodes += split_nodes_equally(nodes, &addr, max_addr,
+ num_nodes, coeff);
+ break;
+ case ',':
+ /* Do not allocate remaining system RAM */
+ break;
+ default:
+ /* Give one final node */
+ setup_node_range(num_nodes, nodes, &addr,
+ max_addr - addr, max_addr);
+ num_nodes++;
}
- if (end > max_addr)
- end = max_addr;
- nodes[i].end = end;
- printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
- i,
- nodes[i].start, nodes[i].end,
- (nodes[i].end - nodes[i].start) >> 20);
- node_set_online(i);
- }
- memnode_shift = compute_hash_shift(nodes, numa_fake);
- if (memnode_shift < 0) {
- memnode_shift = 0;
- printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
- return -1;
- }
- for_each_online_node(i) {
+ }
+out:
+ memnode_shift = compute_hash_shift(nodes, num_nodes);
+ if (memnode_shift < 0) {
+ memnode_shift = 0;
+ printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
+ "disabled.\n");
+ return -1;
+ }
+
+ /*
+ * We need to vacate all active ranges that may have been registered by
+ * SRAT.
+ */
+ remove_all_active_ranges();
+ for_each_node_mask(i, node_possible_map) {
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
@@ -399,26 +487,32 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
numa_init_array();
return 0;
}
-#endif
+#undef E820_ADDR_HOLE_SIZE
+#endif /* CONFIG_NUMA_EMU */
void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
{
int i;
+ nodes_clear(node_possible_map);
+
#ifdef CONFIG_NUMA_EMU
- if (numa_fake && !numa_emulation(start_pfn, end_pfn))
+ if (cmdline && !numa_emulation(start_pfn, end_pfn))
return;
+ nodes_clear(node_possible_map);
#endif
#ifdef CONFIG_ACPI_NUMA
if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
end_pfn << PAGE_SHIFT))
return;
+ nodes_clear(node_possible_map);
#endif
#ifdef CONFIG_K8_NUMA
if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
return;
+ nodes_clear(node_possible_map);
#endif
printk(KERN_INFO "%s\n",
numa_off ? "NUMA turned off" : "No NUMA configuration found");
@@ -432,6 +526,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
memnodemap[0] = 0;
nodes_clear(node_online_map);
node_set_online(0);
+ node_set(0, node_possible_map);
for (i = 0; i < NR_CPUS; i++)
numa_set_node(i, 0);
node_to_cpumask[0] = cpumask_of_cpu(0);
@@ -486,11 +581,8 @@ static __init int numa_setup(char *opt)
if (!strncmp(opt,"off",3))
numa_off = 1;
#ifdef CONFIG_NUMA_EMU
- if(!strncmp(opt, "fake=", 5)) {
- numa_fake = simple_strtoul(opt+5,NULL,0); ;
- if (numa_fake >= MAX_NUMNODES)
- numa_fake = MAX_NUMNODES;
- }
+ if (!strncmp(opt, "fake=", 5))
+ cmdline = opt + 5;
#endif
#ifdef CONFIG_ACPI_NUMA
if (!strncmp(opt,"noacpi",6))
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 081409aa3452..bf4aa8dd4254 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -51,7 +51,6 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
SetPagePrivate(base);
page_private(base) = 0;
- address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
@@ -101,13 +100,12 @@ static inline void save_page(struct page *fpage)
* No more special protections in this 2/4MB area - revert to a
* large page again.
*/
-static void revert_page(unsigned long address, pgprot_t ref_prot)
+static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_prot)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t large_pte;
- unsigned long pfn;
pgd = pgd_offset_k(address);
BUG_ON(pgd_none(*pgd));
@@ -115,7 +113,6 @@ static void revert_page(unsigned long address, pgprot_t ref_prot)
BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, address);
BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
- pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
large_pte = pfn_pte(pfn, ref_prot);
large_pte = pte_mkhuge(large_pte);
set_pte((pte_t *)pmd, large_pte);
@@ -141,7 +138,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
*/
struct page *split;
ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
- split = split_large_page(address, prot, ref_prot2);
+ split = split_large_page(pfn << PAGE_SHIFT, prot,
+ ref_prot2);
if (!split)
return -ENOMEM;
set_pte(kpte, mk_pte(split, ref_prot2));
@@ -160,7 +158,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
if (page_private(kpte_page) == 0) {
save_page(kpte_page);
- revert_page(address, ref_prot);
+ revert_page(address, pfn, ref_prot);
}
return 0;
}
@@ -180,22 +178,32 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
*/
int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
{
- int err = 0;
+ unsigned long phys_base_pfn = __pa_symbol(__START_KERNEL_map) >> PAGE_SHIFT;
+ int err = 0, kernel_map = 0;
int i;
+ if (address >= __START_KERNEL_map
+ && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
+ address = (unsigned long)__va(__pa(address));
+ kernel_map = 1;
+ }
+
down_write(&init_mm.mmap_sem);
for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
unsigned long pfn = __pa(address) >> PAGE_SHIFT;
- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
- if (err)
- break;
+ if (!kernel_map || pte_present(pfn_pte(0, prot))) {
+ err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
+ if (err)
+ break;
+ }
/* Handle kernel mapping too which aliases part of the
* lowmem */
- if (__pa(address) < KERNEL_TEXT_SIZE) {
+ if ((pfn >= phys_base_pfn) &&
+ ((pfn - phys_base_pfn) < (KERNEL_TEXT_SIZE >> PAGE_SHIFT))) {
unsigned long addr2;
pgprot_t prot2;
- addr2 = __START_KERNEL_map + __pa(address);
+ addr2 = __START_KERNEL_map + ((pfn - phys_base_pfn) << PAGE_SHIFT);
/* Make sure the kernel mappings stay executable */
prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
err = __change_page_attr(addr2, pfn, prot2,
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 2efe215fc76a..1e76bb0a7277 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -419,19 +419,21 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
+ node_possible_map = nodes_parsed;
+
/* Finally register nodes */
- for_each_node_mask(i, nodes_parsed)
+ for_each_node_mask(i, node_possible_map)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
/* Try again in case setup_node_bootmem missed one due
to missing bootmem */
- for_each_node_mask(i, nodes_parsed)
+ for_each_node_mask(i, node_possible_map)
if (!node_online(i))
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
for (i = 0; i < NR_CPUS; i++) {
if (cpu_to_node[i] == NUMA_NO_NODE)
continue;
- if (!node_isset(cpu_to_node[i], nodes_parsed))
+ if (!node_isset(cpu_to_node[i], node_possible_map))
numa_set_node(i, NUMA_NO_NODE);
}
numa_init_array();