summaryrefslogtreecommitdiff
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig4
-rw-r--r--arch/x86_64/Makefile28
-rw-r--r--arch/x86_64/boot/.gitignore5
-rw-r--r--arch/x86_64/boot/Makefile9
-rw-r--r--arch/x86_64/boot/compressed/Makefile30
-rw-r--r--arch/x86_64/boot/compressed/head.S311
-rw-r--r--arch/x86_64/boot/compressed/misc.c371
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.lds44
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.scr10
-rw-r--r--arch/x86_64/boot/tools/.gitignore1
-rw-r--r--arch/x86_64/crypto/Makefile12
-rw-r--r--arch/x86_64/crypto/aes-x86_64-asm.S190
-rw-r--r--arch/x86_64/crypto/aes.c336
-rw-r--r--arch/x86_64/crypto/twofish-x86_64-asm.S324
-rw-r--r--arch/x86_64/crypto/twofish.c97
-rw-r--r--arch/x86_64/ia32/Makefile35
-rw-r--r--arch/x86_64/ia32/audit.c42
-rw-r--r--arch/x86_64/ia32/fpu32.c183
-rw-r--r--arch/x86_64/ia32/ia32_aout.c528
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c320
-rw-r--r--arch/x86_64/ia32/ia32_signal.c617
-rw-r--r--arch/x86_64/ia32/ia32entry.S736
-rw-r--r--arch/x86_64/ia32/ipc32.c57
-rw-r--r--arch/x86_64/ia32/mmap32.c79
-rw-r--r--arch/x86_64/ia32/ptrace32.c404
-rw-r--r--arch/x86_64/ia32/sys_ia32.c889
-rw-r--r--arch/x86_64/ia32/syscall32.c83
-rw-r--r--arch/x86_64/ia32/syscall32_syscall.S17
-rw-r--r--arch/x86_64/ia32/tls32.c163
-rw-r--r--arch/x86_64/ia32/vsyscall-sigreturn.S143
-rw-r--r--arch/x86_64/ia32/vsyscall-syscall.S69
-rw-r--r--arch/x86_64/ia32/vsyscall-sysenter.S95
-rw-r--r--arch/x86_64/ia32/vsyscall.lds80
-rw-r--r--arch/x86_64/kernel/Makefile63
-rw-r--r--arch/x86_64/kernel/acpi/Makefile9
-rw-r--r--arch/x86_64/kernel/acpi/sleep.c120
-rw-r--r--arch/x86_64/kernel/acpi/wakeup.S456
-rw-r--r--arch/x86_64/kernel/aperture.c298
-rw-r--r--arch/x86_64/kernel/apic.c1253
-rw-r--r--arch/x86_64/kernel/asm-offsets.c85
-rw-r--r--arch/x86_64/kernel/audit.c81
-rw-r--r--arch/x86_64/kernel/bugs.c24
-rw-r--r--arch/x86_64/kernel/cpufreq/Kconfig108
-rw-r--r--arch/x86_64/kernel/cpufreq/Makefile17
-rw-r--r--arch/x86_64/kernel/crash.c135
-rw-r--r--arch/x86_64/kernel/crash_dump.c47
-rw-r--r--arch/x86_64/kernel/e820.c725
-rw-r--r--arch/x86_64/kernel/early-quirks.c127
-rw-r--r--arch/x86_64/kernel/early_printk.c259
-rw-r--r--arch/x86_64/kernel/entry.S1172
-rw-r--r--arch/x86_64/kernel/genapic.c66
-rw-r--r--arch/x86_64/kernel/genapic_flat.c194
-rw-r--r--arch/x86_64/kernel/head.S416
-rw-r--r--arch/x86_64/kernel/head64.c86
-rw-r--r--arch/x86_64/kernel/hpet.c493
-rw-r--r--arch/x86_64/kernel/i387.c151
-rw-r--r--arch/x86_64/kernel/i8259.c544
-rw-r--r--arch/x86_64/kernel/init_task.c54
-rw-r--r--arch/x86_64/kernel/io_apic.c2202
-rw-r--r--arch/x86_64/kernel/ioport.c119
-rw-r--r--arch/x86_64/kernel/irq.c213
-rw-r--r--arch/x86_64/kernel/k8.c123
-rw-r--r--arch/x86_64/kernel/kprobes.c749
-rw-r--r--arch/x86_64/kernel/ldt.c252
-rw-r--r--arch/x86_64/kernel/machine_kexec.c259
-rw-r--r--arch/x86_64/kernel/mce.c875
-rw-r--r--arch/x86_64/kernel/mce_amd.c689
-rw-r--r--arch/x86_64/kernel/mce_intel.c89
-rw-r--r--arch/x86_64/kernel/module.c185
-rw-r--r--arch/x86_64/kernel/mpparse.c852
-rw-r--r--arch/x86_64/kernel/nmi.c483
-rw-r--r--arch/x86_64/kernel/pci-calgary.c1578
-rw-r--r--arch/x86_64/kernel/pci-dma.c346
-rw-r--r--arch/x86_64/kernel/pci-gart.c740
-rw-r--r--arch/x86_64/kernel/pci-nommu.c97
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c44
-rw-r--r--arch/x86_64/kernel/pmtimer.c69
-rw-r--r--arch/x86_64/kernel/process.c903
-rw-r--r--arch/x86_64/kernel/ptrace.c627
-rw-r--r--arch/x86_64/kernel/reboot.c171
-rw-r--r--arch/x86_64/kernel/relocate_kernel.S276
-rw-r--r--arch/x86_64/kernel/setup.c1117
-rw-r--r--arch/x86_64/kernel/setup64.c289
-rw-r--r--arch/x86_64/kernel/signal.c495
-rw-r--r--arch/x86_64/kernel/smp.c523
-rw-r--r--arch/x86_64/kernel/smpboot.c1085
-rw-r--r--arch/x86_64/kernel/stacktrace.c54
-rw-r--r--arch/x86_64/kernel/suspend.c239
-rw-r--r--arch/x86_64/kernel/suspend_asm.S110
-rw-r--r--arch/x86_64/kernel/sys_x86_64.c159
-rw-r--r--arch/x86_64/kernel/syscall.c26
-rw-r--r--arch/x86_64/kernel/tce.c189
-rw-r--r--arch/x86_64/kernel/time.c447
-rw-r--r--arch/x86_64/kernel/trampoline.S166
-rw-r--r--arch/x86_64/kernel/traps.c1138
-rw-r--r--arch/x86_64/kernel/tsc.c207
-rw-r--r--arch/x86_64/kernel/tsc_sync.c187
-rw-r--r--arch/x86_64/kernel/verify_cpu.S105
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S235
-rw-r--r--arch/x86_64/kernel/vsmp.c49
-rw-r--r--arch/x86_64/kernel/vsyscall.c349
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c62
-rw-r--r--arch/x86_64/lib/Makefile13
-rw-r--r--arch/x86_64/lib/bitops.c175
-rw-r--r--arch/x86_64/lib/bitstr.c28
-rw-r--r--arch/x86_64/lib/clear_page.S59
-rw-r--r--arch/x86_64/lib/copy_page.S119
-rw-r--r--arch/x86_64/lib/copy_user.S354
-rw-r--r--arch/x86_64/lib/copy_user_nocache.S217
-rw-r--r--arch/x86_64/lib/csum-copy.S249
-rw-r--r--arch/x86_64/lib/csum-partial.c150
-rw-r--r--arch/x86_64/lib/csum-wrappers.c135
-rw-r--r--arch/x86_64/lib/delay.c57
-rw-r--r--arch/x86_64/lib/getuser.S109
-rw-r--r--arch/x86_64/lib/io.c23
-rw-r--r--arch/x86_64/lib/iomap_copy.S30
-rw-r--r--arch/x86_64/lib/memcpy.S131
-rw-r--r--arch/x86_64/lib/memmove.c21
-rw-r--r--arch/x86_64/lib/memset.S133
-rw-r--r--arch/x86_64/lib/msr-on-cpu.c1
-rw-r--r--arch/x86_64/lib/putuser.S106
-rw-r--r--arch/x86_64/lib/rwlock.S38
-rw-r--r--arch/x86_64/lib/thunk.S67
-rw-r--r--arch/x86_64/lib/usercopy.c166
-rw-r--r--arch/x86_64/mm/Makefile11
-rw-r--r--arch/x86_64/mm/extable.c34
-rw-r--r--arch/x86_64/mm/fault.c636
-rw-r--r--arch/x86_64/mm/init.c750
-rw-r--r--arch/x86_64/mm/ioremap.c210
-rw-r--r--arch/x86_64/mm/k8topology.c182
-rw-r--r--arch/x86_64/mm/mmap.c29
-rw-r--r--arch/x86_64/mm/numa.c648
-rw-r--r--arch/x86_64/mm/pageattr.c249
-rw-r--r--arch/x86_64/mm/srat.c566
-rw-r--r--arch/x86_64/oprofile/Kconfig17
-rw-r--r--arch/x86_64/oprofile/Makefile19
-rw-r--r--arch/x86_64/pci/Makefile27
-rw-r--r--arch/x86_64/pci/k8-bus.c83
-rw-r--r--arch/x86_64/pci/mmconfig.c157
-rw-r--r--arch/x86_64/vdso/.gitignore1
-rw-r--r--arch/x86_64/vdso/Makefile49
-rw-r--r--arch/x86_64/vdso/vclock_gettime.c121
-rw-r--r--arch/x86_64/vdso/vdso-note.S12
-rw-r--r--arch/x86_64/vdso/vdso-start.S2
-rw-r--r--arch/x86_64/vdso/vdso.S2
-rw-r--r--arch/x86_64/vdso/vdso.lds.S77
-rw-r--r--arch/x86_64/vdso/vextern.h16
-rw-r--r--arch/x86_64/vdso/vgetcpu.c50
-rw-r--r--arch/x86_64/vdso/vma.c140
-rw-r--r--arch/x86_64/vdso/voffset.h1
-rw-r--r--arch/x86_64/vdso/vvar.c12
151 files changed, 19 insertions, 38930 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index b4d9089a6a06..b1b98e614f7c 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -704,7 +704,7 @@ source kernel/power/Kconfig
source "drivers/acpi/Kconfig"
-source "arch/x86_64/kernel/cpufreq/Kconfig"
+source "arch/x86/kernel/cpufreq/Kconfig"
endmenu
@@ -778,7 +778,7 @@ source fs/Kconfig
menu "Instrumentation Support"
depends on EXPERIMENTAL
-source "arch/x86_64/oprofile/Kconfig"
+source "arch/x86/oprofile/Kconfig"
config KPROBES
bool "Kprobes"
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index b024e4a86895..8bffb94c71b5 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -21,6 +21,9 @@
#
# $Id: Makefile,v 1.31 2002/03/22 15:56:07 ak Exp $
+# Fill in SRCARCH
+SRCARCH := x86
+
LDFLAGS := -m elf_x86_64
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
LDFLAGS_vmlinux :=
@@ -71,18 +74,18 @@ CFLAGS += $(cflags-y)
CFLAGS_KERNEL += $(cflags-kernel-y)
AFLAGS += -m64
-head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
+head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task_64.o
-libs-y += arch/x86_64/lib/
-core-y += arch/x86_64/kernel/ \
- arch/x86_64/mm/ \
- arch/x86_64/crypto/ \
- arch/x86_64/vdso/
-core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/
-drivers-$(CONFIG_PCI) += arch/x86_64/pci/
-drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
+libs-y += arch/x86/lib/
+core-y += arch/x86/kernel/ \
+ arch/x86/mm/ \
+ arch/x86/crypto/ \
+ arch/x86/vdso/
+core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
+drivers-$(CONFIG_PCI) += arch/x86/pci/
+drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
-boot := arch/x86_64/boot
+boot := arch/x86/boot
PHONY += bzImage bzlilo install archmrproper \
fdimage fdimage144 fdimage288 isoimage archclean
@@ -90,10 +93,12 @@ PHONY += bzImage bzlilo install archmrproper \
#Default target when executing "make"
all: bzImage
-BOOTIMAGE := arch/x86_64/boot/bzImage
+BOOTIMAGE := arch/x86/boot/bzImage
KBUILD_IMAGE := $(BOOTIMAGE)
bzImage: vmlinux
+ $(Q)mkdir -p $(objtree)/arch/x86_64/boot
+ $(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage
$(Q)$(MAKE) $(build)=$(boot) $(BOOTIMAGE)
bzlilo: vmlinux
@@ -109,6 +114,7 @@ install:
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
archclean:
+ $(Q)rm -rf $(objtree)/arch/x86_64/boot
$(Q)$(MAKE) $(clean)=$(boot)
define archhelp
diff --git a/arch/x86_64/boot/.gitignore b/arch/x86_64/boot/.gitignore
deleted file mode 100644
index 18465143cfa2..000000000000
--- a/arch/x86_64/boot/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-bootsect
-bzImage
-setup
-setup.bin
-setup.elf
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
deleted file mode 100644
index 67096389de1f..000000000000
--- a/arch/x86_64/boot/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# arch/x86_64/boot/Makefile
-#
-# The actual boot code is shared with i386 including the Makefile.
-# So tell kbuild that we fetch the code from i386 and include the
-# Makefile from i386 too.
-
-src := arch/i386/boot
-include $(src)/Makefile
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
deleted file mode 100644
index 877c0bdbbc67..000000000000
--- a/arch/x86_64/boot/compressed/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# linux/arch/x86_64/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
-
-CFLAGS := -m64 -D__KERNEL__ $(LINUXINCLUDE) -O2 \
- -fno-strict-aliasing -fPIC -mcmodel=small \
- $(call cc-option, -ffreestanding) \
- $(call cc-option, -fno-stack-protector)
-AFLAGS := $(CFLAGS) -D__ASSEMBLY__
-LDFLAGS := -m elf_x86_64
-
-LDFLAGS_vmlinux := -T
-$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
- $(call if_changed,ld)
- @:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
- $(call if_changed,objcopy)
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
- $(call if_changed,gzip)
-
-LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
-
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
- $(call if_changed,ld)
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
deleted file mode 100644
index 9fd8030cc54f..000000000000
--- a/arch/x86_64/boot/compressed/head.S
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * linux/boot/head.S
- *
- * Copyright (C) 1991, 1992, 1993 Linus Torvalds
- */
-
-/*
- * head.S contains the 32-bit startup code.
- *
- * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
- * the page directory will exist. The startup code will be overwritten by
- * the page directory. [According to comments etc elsewhere on a compressed
- * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
- *
- * Page 0 is deliberately kept safe, since System Management Mode code in
- * laptops may need to access the BIOS data stored there. This is also
- * useful for future device drivers that either access the BIOS via VM86
- * mode.
- */
-
-/*
- * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
- */
-.code32
-.text
-
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-
-.section ".text.head"
- .code32
- .globl startup_32
-
-startup_32:
- cld
- cli
- movl $(__KERNEL_DS), %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
-
-/* Calculate the delta between where we were compiled to run
- * at and where we were actually loaded at. This can only be done
- * with a short local call on x86. Nothing else will tell us what
- * address we are running at. The reserved chunk of the real-mode
- * data at 0x1e4 (defined as a scratch field) are used as the stack
- * for this calculation. Only 4 bytes are needed.
- */
- leal (0x1e4+4)(%esi), %esp
- call 1f
-1: popl %ebp
- subl $1b, %ebp
-
-/* setup a stack and make sure cpu supports long mode. */
- movl $user_stack_end, %eax
- addl %ebp, %eax
- movl %eax, %esp
-
- call verify_cpu
- testl %eax, %eax
- jnz no_longmode
-
-/* Compute the delta between where we were compiled to run at
- * and where the code will actually run at.
- */
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
- * contains the address where we should move the kernel image temporarily
- * for safe in-place decompression.
- */
-
-#ifdef CONFIG_RELOCATABLE
- movl %ebp, %ebx
- addl $(LARGE_PAGE_SIZE -1), %ebx
- andl $LARGE_PAGE_MASK, %ebx
-#else
- movl $CONFIG_PHYSICAL_START, %ebx
-#endif
-
- /* Replace the compressed data size with the uncompressed size */
- subl input_len(%ebp), %ebx
- movl output_len(%ebp), %eax
- addl %eax, %ebx
- /* Add 8 bytes for every 32K input block */
- shrl $12, %eax
- addl %eax, %ebx
- /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
- addl $(32768 + 18 + 4095), %ebx
- andl $~4095, %ebx
-
-/*
- * Prepare for entering 64 bit mode
- */
-
- /* Load new GDT with the 64bit segments using 32bit descriptor */
- leal gdt(%ebp), %eax
- movl %eax, gdt+2(%ebp)
- lgdt gdt(%ebp)
-
- /* Enable PAE mode */
- xorl %eax, %eax
- orl $(1 << 5), %eax
- movl %eax, %cr4
-
- /*
- * Build early 4G boot pagetable
- */
- /* Initialize Page tables to 0*/
- leal pgtable(%ebx), %edi
- xorl %eax, %eax
- movl $((4096*6)/4), %ecx
- rep stosl
-
- /* Build Level 4 */
- leal pgtable + 0(%ebx), %edi
- leal 0x1007 (%edi), %eax
- movl %eax, 0(%edi)
-
- /* Build Level 3 */
- leal pgtable + 0x1000(%ebx), %edi
- leal 0x1007(%edi), %eax
- movl $4, %ecx
-1: movl %eax, 0x00(%edi)
- addl $0x00001000, %eax
- addl $8, %edi
- decl %ecx
- jnz 1b
-
- /* Build Level 2 */
- leal pgtable + 0x2000(%ebx), %edi
- movl $0x00000183, %eax
- movl $2048, %ecx
-1: movl %eax, 0(%edi)
- addl $0x00200000, %eax
- addl $8, %edi
- decl %ecx
- jnz 1b
-
- /* Enable the boot page tables */
- leal pgtable(%ebx), %eax
- movl %eax, %cr3
-
- /* Enable Long mode in EFER (Extended Feature Enable Register) */
- movl $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_LME, %eax
- wrmsr
-
- /* Setup for the jump to 64bit mode
- *
- * When the jump is performend we will be in long mode but
- * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
- * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
- * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
- * We place all of the values on our mini stack so lret can
- * used to perform that far jump.
- */
- pushl $__KERNEL_CS
- leal startup_64(%ebp), %eax
- pushl %eax
-
- /* Enter paged protected Mode, activating Long Mode */
- movl $0x80000001, %eax /* Enable Paging and Protected mode */
- movl %eax, %cr0
-
- /* Jump from 32bit compatibility mode into 64bit mode. */
- lret
-
-no_longmode:
- /* This isn't an x86-64 CPU so hang */
-1:
- hlt
- jmp 1b
-
-#include "../../kernel/verify_cpu.S"
-
- /* Be careful here startup_64 needs to be at a predictable
- * address so I can export it in an ELF header. Bootloaders
- * should look at the ELF header to find this address, as
- * it may change in the future.
- */
- .code64
- .org 0x200
-ENTRY(startup_64)
- /* We come here either from startup_32 or directly from a
- * 64bit bootloader. If we come here from a bootloader we depend on
- * an identity mapped page table being provied that maps our
- * entire text+data+bss and hopefully all of memory.
- */
-
- /* Setup data segments. */
- xorl %eax, %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
- movl %eax, %fs
- movl %eax, %gs
- lldt %ax
- movl $0x20, %eax
- ltr %ax
-
- /* Compute the decompressed kernel start address. It is where
- * we were loaded at aligned to a 2M boundary. %rbp contains the
- * decompressed kernel start address.
- *
- * If it is a relocatable kernel then decompress and run the kernel
- * from load address aligned to 2MB addr, otherwise decompress and
- * run the kernel from CONFIG_PHYSICAL_START
- */
-
- /* Start with the delta to where the kernel will run at. */
-#ifdef CONFIG_RELOCATABLE
- leaq startup_32(%rip) /* - $startup_32 */, %rbp
- addq $(LARGE_PAGE_SIZE - 1), %rbp
- andq $LARGE_PAGE_MASK, %rbp
- movq %rbp, %rbx
-#else
- movq $CONFIG_PHYSICAL_START, %rbp
- movq %rbp, %rbx
-#endif
-
- /* Replace the compressed data size with the uncompressed size */
- movl input_len(%rip), %eax
- subq %rax, %rbx
- movl output_len(%rip), %eax
- addq %rax, %rbx
- /* Add 8 bytes for every 32K input block */
- shrq $12, %rax
- addq %rax, %rbx
- /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
- addq $(32768 + 18 + 4095), %rbx
- andq $~4095, %rbx
-
-/* Copy the compressed kernel to the end of our buffer
- * where decompression in place becomes safe.
- */
- leaq _end(%rip), %r8
- leaq _end(%rbx), %r9
- movq $_end /* - $startup_32 */, %rcx
-1: subq $8, %r8
- subq $8, %r9
- movq 0(%r8), %rax
- movq %rax, 0(%r9)
- subq $8, %rcx
- jnz 1b
-
-/*
- * Jump to the relocated address.
- */
- leaq relocated(%rbx), %rax
- jmp *%rax
-
-.section ".text"
-relocated:
-
-/*
- * Clear BSS
- */
- xorq %rax, %rax
- leaq _edata(%rbx), %rdi
- leaq _end(%rbx), %rcx
- subq %rdi, %rcx
- cld
- rep
- stosb
-
- /* Setup the stack */
- leaq user_stack_end(%rip), %rsp
-
- /* zero EFLAGS after setting rsp */
- pushq $0
- popfq
-
-/*
- * Do the decompression, and jump to the new kernel..
- */
- pushq %rsi # Save the real mode argument
- movq %rsi, %rdi # real mode address
- leaq _heap(%rip), %rsi # _heap
- leaq input_data(%rip), %rdx # input_data
- movl input_len(%rip), %eax
- movq %rax, %rcx # input_len
- movq %rbp, %r8 # output
- call decompress_kernel
- popq %rsi
-
-
-/*
- * Jump to the decompressed kernel.
- */
- jmp *%rbp
-
- .data
-gdt:
- .word gdt_end - gdt
- .long gdt
- .word 0
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
- .quad 0x00cf92000000ffff /* __KERNEL_DS */
- .quad 0x0080890000000000 /* TS descriptor */
- .quad 0x0000000000000000 /* TS continued */
-gdt_end:
- .bss
-/* Stack for uncompression */
- .balign 4
-user_stack:
- .fill 4096,4,0
-user_stack_end:
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
deleted file mode 100644
index f932b0e89096..000000000000
--- a/arch/x86_64/boot/compressed/misc.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * misc.c
- *
- * This is a collection of several routines from gzip-1.0.3
- * adapted for Linux.
- *
- * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
- * puts by Nick Holloway 1993, better puts by Martin Mares 1995
- * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
- */
-
-#define _LINUX_STRING_H_ 1
-#define __LINUX_BITMAP_H 1
-
-#include <linux/linkage.h>
-#include <linux/screen_info.h>
-#include <asm/io.h>
-#include <asm/page.h>
-
-/* WARNING!!
- * This code is compiled with -fPIC and it is relocated dynamically
- * at run time, but no relocation processing is performed.
- * This means that it is not safe to place pointers in static structures.
- */
-
-/*
- * Getting to provable safe in place decompression is hard.
- * Worst case behaviours need to be analized.
- * Background information:
- *
- * The file layout is:
- * magic[2]
- * method[1]
- * flags[1]
- * timestamp[4]
- * extraflags[1]
- * os[1]
- * compressed data blocks[N]
- * crc[4] orig_len[4]
- *
- * resulting in 18 bytes of non compressed data overhead.
- *
- * Files divided into blocks
- * 1 bit (last block flag)
- * 2 bits (block type)
- *
- * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
- * The smallest block type encoding is always used.
- *
- * stored:
- * 32 bits length in bytes.
- *
- * fixed:
- * magic fixed tree.
- * symbols.
- *
- * dynamic:
- * dynamic tree encoding.
- * symbols.
- *
- *
- * The buffer for decompression in place is the length of the
- * uncompressed data, plus a small amount extra to keep the algorithm safe.
- * The compressed data is placed at the end of the buffer. The output
- * pointer is placed at the start of the buffer and the input pointer
- * is placed where the compressed data starts. Problems will occur
- * when the output pointer overruns the input pointer.
- *
- * The output pointer can only overrun the input pointer if the input
- * pointer is moving faster than the output pointer. A condition only
- * triggered by data whose compressed form is larger than the uncompressed
- * form.
- *
- * The worst case at the block level is a growth of the compressed data
- * of 5 bytes per 32767 bytes.
- *
- * The worst case internal to a compressed block is very hard to figure.
- * The worst case can at least be boundined by having one bit that represents
- * 32764 bytes and then all of the rest of the bytes representing the very
- * very last byte.
- *
- * All of which is enough to compute an amount of extra data that is required
- * to be safe. To avoid problems at the block level allocating 5 extra bytes
- * per 32767 bytes of data is sufficient. To avoind problems internal to a block
- * adding an extra 32767 bytes (the worst case uncompressed block size) is
- * sufficient, to ensure that in the worst case the decompressed data for
- * block will stop the byte before the compressed data for a block begins.
- * To avoid problems with the compressed data's meta information an extra 18
- * bytes are needed. Leading to the formula:
- *
- * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
- *
- * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
- * Adding 32768 instead of 32767 just makes for round numbers.
- * Adding the decompressor_size is necessary as it musht live after all
- * of the data as well. Last I measured the decompressor is about 14K.
- * 10K of actuall data and 4K of bss.
- *
- */
-
-/*
- * gzip declarations
- */
-
-#define OF(args) args
-#define STATIC static
-
-#undef memset
-#undef memcpy
-#define memzero(s, n) memset ((s), 0, (n))
-
-typedef unsigned char uch;
-typedef unsigned short ush;
-typedef unsigned long ulg;
-
-#define WSIZE 0x80000000 /* Window size must be at least 32k,
- * and a power of two
- * We don't actually have a window just
- * a huge output buffer so I report
- * a 2G windows size, as that should
- * always be larger than our output buffer.
- */
-
-static uch *inbuf; /* input buffer */
-static uch *window; /* Sliding window buffer, (and final output buffer) */
-
-static unsigned insize; /* valid bytes in inbuf */
-static unsigned inptr; /* index of next byte to be processed in inbuf */
-static unsigned outcnt; /* bytes in output buffer */
-
-/* gzip flag byte */
-#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
-#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
-#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
-#define COMMENT 0x10 /* bit 4 set: file comment present */
-#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
-#define RESERVED 0xC0 /* bit 6,7: reserved */
-
-#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf())
-
-/* Diagnostic functions */
-#ifdef DEBUG
-# define Assert(cond,msg) {if(!(cond)) error(msg);}
-# define Trace(x) fprintf x
-# define Tracev(x) {if (verbose) fprintf x ;}
-# define Tracevv(x) {if (verbose>1) fprintf x ;}
-# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
-# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
-#else
-# define Assert(cond,msg)
-# define Trace(x)
-# define Tracev(x)
-# define Tracevv(x)
-# define Tracec(c,x)
-# define Tracecv(c,x)
-#endif
-
-static int fill_inbuf(void);
-static void flush_window(void);
-static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
-
-/*
- * This is set up by the setup-routine at boot-time
- */
-static unsigned char *real_mode; /* Pointer to real-mode data */
-
-#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
-#ifndef STANDARD_MEMORY_BIOS_CALL
-#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
-#endif
-#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
-
-extern unsigned char input_data[];
-extern int input_len;
-
-static long bytes_out = 0;
-
-static void *malloc(int size);
-static void free(void *where);
-
-static void *memset(void *s, int c, unsigned n);
-static void *memcpy(void *dest, const void *src, unsigned n);
-
-static void putstr(const char *);
-
-static long free_mem_ptr;
-static long free_mem_end_ptr;
-
-#define HEAP_SIZE 0x7000
-
-static char *vidmem = (char *)0xb8000;
-static int vidport;
-static int lines, cols;
-
-#include "../../../../lib/inflate.c"
-
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr <= 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
-static void scroll(void)
-{
- int i;
-
- memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
- for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
- vidmem[i] = ' ';
-}
-
-static void putstr(const char *s)
-{
- int x,y,pos;
- char c;
-
- x = RM_SCREEN_INFO.orig_x;
- y = RM_SCREEN_INFO.orig_y;
-
- while ( ( c = *s++ ) != '\0' ) {
- if ( c == '\n' ) {
- x = 0;
- if ( ++y >= lines ) {
- scroll();
- y--;
- }
- } else {
- vidmem [ ( x + cols * y ) * 2 ] = c;
- if ( ++x >= cols ) {
- x = 0;
- if ( ++y >= lines ) {
- scroll();
- y--;
- }
- }
- }
- }
-
- RM_SCREEN_INFO.orig_x = x;
- RM_SCREEN_INFO.orig_y = y;
-
- pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
-}
-
-static void* memset(void* s, int c, unsigned n)
-{
- int i;
- char *ss = (char*)s;
-
- for (i=0;i<n;i++) ss[i] = c;
- return s;
-}
-
-static void* memcpy(void* dest, const void* src, unsigned n)
-{
- int i;
- char *d = (char *)dest, *s = (char *)src;
-
- for (i=0;i<n;i++) d[i] = s[i];
- return dest;
-}
-
-/* ===========================================================================
- * Fill the input buffer. This is called only when the buffer is empty
- * and at least one byte is really needed.
- */
-static int fill_inbuf(void)
-{
- error("ran out of input data");
- return 0;
-}
-
-/* ===========================================================================
- * Write the output window window[0..outcnt-1] and update crc and bytes_out.
- * (Used for the decompressed data only.)
- */
-static void flush_window(void)
-{
- /* With my window equal to my output buffer
- * I only need to compute the crc here.
- */
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, ch;
-
- in = window;
- for (n = 0; n < outcnt; n++) {
- ch = *in++;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- outcnt = 0;
-}
-
-static void error(char *x)
-{
- putstr("\n\n");
- putstr(x);
- putstr("\n\n -- System halted");
-
- while(1); /* Halt */
-}
-
-asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
- uch *input_data, unsigned long input_len, uch *output)
-{
- real_mode = rmode;
-
- if (RM_SCREEN_INFO.orig_video_mode == 7) {
- vidmem = (char *) 0xb0000;
- vidport = 0x3b4;
- } else {
- vidmem = (char *) 0xb8000;
- vidport = 0x3d4;
- }
-
- lines = RM_SCREEN_INFO.orig_video_lines;
- cols = RM_SCREEN_INFO.orig_video_cols;
-
- window = output; /* Output buffer (Normally at 1M) */
- free_mem_ptr = heap; /* Heap */
- free_mem_end_ptr = heap + HEAP_SIZE;
- inbuf = input_data; /* Input buffer */
- insize = input_len;
- inptr = 0;
-
- if ((ulg)output & (__KERNEL_ALIGN - 1))
- error("Destination address not 2M aligned");
- if ((ulg)output >= 0xffffffffffUL)
- error("Destination address too large");
-
- makecrc();
- putstr(".\nDecompressing Linux...");
- gunzip();
- putstr("done.\nBooting the kernel.\n");
- return;
-}
diff --git a/arch/x86_64/boot/compressed/vmlinux.lds b/arch/x86_64/boot/compressed/vmlinux.lds
deleted file mode 100644
index 94c13e557fb4..000000000000
--- a/arch/x86_64/boot/compressed/vmlinux.lds
+++ /dev/null
@@ -1,44 +0,0 @@
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(startup_64)
-SECTIONS
-{
- /* Be careful parts of head.S assume startup_32 is at
- * address 0.
- */
- . = 0;
- .text : {
- _head = . ;
- *(.text.head)
- _ehead = . ;
- *(.text.compressed)
- _text = .; /* Text */
- *(.text)
- *(.text.*)
- _etext = . ;
- }
- .rodata : {
- _rodata = . ;
- *(.rodata) /* read-only data */
- *(.rodata.*)
- _erodata = . ;
- }
- .data : {
- _data = . ;
- *(.data)
- *(.data.*)
- _edata = . ;
- }
- .bss : {
- _bss = . ;
- *(.bss)
- *(.bss.*)
- *(COMMON)
- . = ALIGN(8);
- _end = . ;
- . = ALIGN(4096);
- pgtable = . ;
- . = . + 4096 * 6;
- _heap = .;
- }
-}
diff --git a/arch/x86_64/boot/compressed/vmlinux.scr b/arch/x86_64/boot/compressed/vmlinux.scr
deleted file mode 100644
index bd1429ce193e..000000000000
--- a/arch/x86_64/boot/compressed/vmlinux.scr
+++ /dev/null
@@ -1,10 +0,0 @@
-SECTIONS
-{
- .text.compressed : {
- input_len = .;
- LONG(input_data_end - input_data) input_data = .;
- *(.data)
- output_len = . - 4;
- input_data_end = .;
- }
-}
diff --git a/arch/x86_64/boot/tools/.gitignore b/arch/x86_64/boot/tools/.gitignore
deleted file mode 100644
index 378eac25d311..000000000000
--- a/arch/x86_64/boot/tools/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-build
diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile
deleted file mode 100644
index 15b538a8b7f7..000000000000
--- a/arch/x86_64/crypto/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# x86_64/crypto/Makefile
-#
-# Arch-specific CryptoAPI modules.
-#
-
-obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
-obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
-
-aes-x86_64-y := aes-x86_64-asm.o aes.o
-twofish-x86_64-y := twofish-x86_64-asm.o twofish.o
-
diff --git a/arch/x86_64/crypto/aes-x86_64-asm.S b/arch/x86_64/crypto/aes-x86_64-asm.S
deleted file mode 100644
index 26b40de4d0b0..000000000000
--- a/arch/x86_64/crypto/aes-x86_64-asm.S
+++ /dev/null
@@ -1,190 +0,0 @@
-/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
- *
- * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
- *
- * License:
- * This code can be distributed under the terms of the GNU General Public
- * License (GPL) Version 2 provided that the above header down to and
- * including this sentence is retained in full.
- */
-
-.extern aes_ft_tab
-.extern aes_it_tab
-.extern aes_fl_tab
-.extern aes_il_tab
-
-.text
-
-#include <asm/asm-offsets.h>
-
-#define BASE crypto_tfm_ctx_offset
-
-#define R1 %rax
-#define R1E %eax
-#define R1X %ax
-#define R1H %ah
-#define R1L %al
-#define R2 %rbx
-#define R2E %ebx
-#define R2X %bx
-#define R2H %bh
-#define R2L %bl
-#define R3 %rcx
-#define R3E %ecx
-#define R3X %cx
-#define R3H %ch
-#define R3L %cl
-#define R4 %rdx
-#define R4E %edx
-#define R4X %dx
-#define R4H %dh
-#define R4L %dl
-#define R5 %rsi
-#define R5E %esi
-#define R6 %rdi
-#define R6E %edi
-#define R7 %rbp
-#define R7E %ebp
-#define R8 %r8
-#define R9 %r9
-#define R10 %r10
-#define R11 %r11
-
-#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
- .global FUNC; \
- .type FUNC,@function; \
- .align 8; \
-FUNC: movq r1,r2; \
- movq r3,r4; \
- leaq BASE+KEY+52(r8),r9; \
- movq r10,r11; \
- movl (r7),r5 ## E; \
- movl 4(r7),r1 ## E; \
- movl 8(r7),r6 ## E; \
- movl 12(r7),r7 ## E; \
- movl BASE(r8),r10 ## E; \
- xorl -48(r9),r5 ## E; \
- xorl -44(r9),r1 ## E; \
- xorl -40(r9),r6 ## E; \
- xorl -36(r9),r7 ## E; \
- cmpl $24,r10 ## E; \
- jb B128; \
- leaq 32(r9),r9; \
- je B192; \
- leaq 32(r9),r9;
-
-#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
- movq r1,r2; \
- movq r3,r4; \
- movl r5 ## E,(r9); \
- movl r6 ## E,4(r9); \
- movl r7 ## E,8(r9); \
- movl r8 ## E,12(r9); \
- ret;
-
-#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
- movzbl r2 ## H,r5 ## E; \
- movzbl r2 ## L,r6 ## E; \
- movl TAB+1024(,r5,4),r5 ## E;\
- movw r4 ## X,r2 ## X; \
- movl TAB(,r6,4),r6 ## E; \
- roll $16,r2 ## E; \
- shrl $16,r4 ## E; \
- movzbl r4 ## H,r7 ## E; \
- movzbl r4 ## L,r4 ## E; \
- xorl OFFSET(r8),ra ## E; \
- xorl OFFSET+4(r8),rb ## E; \
- xorl TAB+3072(,r7,4),r5 ## E;\
- xorl TAB+2048(,r4,4),r6 ## E;\
- movzbl r1 ## L,r7 ## E; \
- movzbl r1 ## H,r4 ## E; \
- movl TAB+1024(,r4,4),r4 ## E;\
- movw r3 ## X,r1 ## X; \
- roll $16,r1 ## E; \
- shrl $16,r3 ## E; \
- xorl TAB(,r7,4),r5 ## E; \
- movzbl r3 ## H,r7 ## E; \
- movzbl r3 ## L,r3 ## E; \
- xorl TAB+3072(,r7,4),r4 ## E;\
- xorl TAB+2048(,r3,4),r5 ## E;\
- movzbl r1 ## H,r7 ## E; \
- movzbl r1 ## L,r3 ## E; \
- shrl $16,r1 ## E; \
- xorl TAB+3072(,r7,4),r6 ## E;\
- movl TAB+2048(,r3,4),r3 ## E;\
- movzbl r1 ## H,r7 ## E; \
- movzbl r1 ## L,r1 ## E; \
- xorl TAB+1024(,r7,4),r6 ## E;\
- xorl TAB(,r1,4),r3 ## E; \
- movzbl r2 ## H,r1 ## E; \
- movzbl r2 ## L,r7 ## E; \
- shrl $16,r2 ## E; \
- xorl TAB+3072(,r1,4),r3 ## E;\
- xorl TAB+2048(,r7,4),r4 ## E;\
- movzbl r2 ## H,r1 ## E; \
- movzbl r2 ## L,r2 ## E; \
- xorl OFFSET+8(r8),rc ## E; \
- xorl OFFSET+12(r8),rd ## E; \
- xorl TAB+1024(,r1,4),r3 ## E;\
- xorl TAB(,r2,4),r4 ## E;
-
-#define move_regs(r1,r2,r3,r4) \
- movl r3 ## E,r1 ## E; \
- movl r4 ## E,r2 ## E;
-
-#define entry(FUNC,KEY,B128,B192) \
- prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
-
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
-
-#define encrypt_round(TAB,OFFSET) \
- round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
- move_regs(R1,R2,R5,R6)
-
-#define encrypt_final(TAB,OFFSET) \
- round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
-
-#define decrypt_round(TAB,OFFSET) \
- round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
- move_regs(R1,R2,R5,R6)
-
-#define decrypt_final(TAB,OFFSET) \
- round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
-
-/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
-
- entry(aes_enc_blk,0,enc128,enc192)
- encrypt_round(aes_ft_tab,-96)
- encrypt_round(aes_ft_tab,-80)
-enc192: encrypt_round(aes_ft_tab,-64)
- encrypt_round(aes_ft_tab,-48)
-enc128: encrypt_round(aes_ft_tab,-32)
- encrypt_round(aes_ft_tab,-16)
- encrypt_round(aes_ft_tab, 0)
- encrypt_round(aes_ft_tab, 16)
- encrypt_round(aes_ft_tab, 32)
- encrypt_round(aes_ft_tab, 48)
- encrypt_round(aes_ft_tab, 64)
- encrypt_round(aes_ft_tab, 80)
- encrypt_round(aes_ft_tab, 96)
- encrypt_final(aes_fl_tab,112)
- return
-
-/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
-
- entry(aes_dec_blk,240,dec128,dec192)
- decrypt_round(aes_it_tab,-96)
- decrypt_round(aes_it_tab,-80)
-dec192: decrypt_round(aes_it_tab,-64)
- decrypt_round(aes_it_tab,-48)
-dec128: decrypt_round(aes_it_tab,-32)
- decrypt_round(aes_it_tab,-16)
- decrypt_round(aes_it_tab, 0)
- decrypt_round(aes_it_tab, 16)
- decrypt_round(aes_it_tab, 32)
- decrypt_round(aes_it_tab, 48)
- decrypt_round(aes_it_tab, 64)
- decrypt_round(aes_it_tab, 80)
- decrypt_round(aes_it_tab, 96)
- decrypt_final(aes_il_tab,112)
- return
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
deleted file mode 100644
index 5cdb13ea5cc2..000000000000
--- a/arch/x86_64/crypto/aes.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Cryptographic API.
- *
- * AES Cipher Algorithm.
- *
- * Based on Brian Gladman's code.
- *
- * Linux developers:
- * Alexander Kjeldaas <astor@fast.no>
- * Herbert Valerio Riedel <hvr@hvrlab.org>
- * Kyle McMartin <kyle@debian.org>
- * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
- * Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * ---------------------------------------------------------------------------
- * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
- * All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- * 1. distributions of this source code include the above copyright
- * notice, this list of conditions and the following disclaimer;
- *
- * 2. distributions in binary form include the above copyright
- * notice, this list of conditions and the following disclaimer
- * in the documentation and/or other associated materials;
- *
- * 3. the copyright holder's name is not used to endorse products
- * built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- * ---------------------------------------------------------------------------
- */
-
-/* Some changes from the Gladman version:
- s/RIJNDAEL(e_key)/E_KEY/g
- s/RIJNDAEL(d_key)/D_KEY/g
-*/
-
-#include <asm/byteorder.h>
-#include <linux/bitops.h>
-#include <linux/crypto.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-#define AES_MIN_KEY_SIZE 16
-#define AES_MAX_KEY_SIZE 32
-
-#define AES_BLOCK_SIZE 16
-
-/*
- * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
- */
-static inline u8 byte(const u32 x, const unsigned n)
-{
- return x >> (n << 3);
-}
-
-struct aes_ctx
-{
- u32 key_length;
- u32 buf[120];
-};
-
-#define E_KEY (&ctx->buf[0])
-#define D_KEY (&ctx->buf[60])
-
-static u8 pow_tab[256] __initdata;
-static u8 log_tab[256] __initdata;
-static u8 sbx_tab[256] __initdata;
-static u8 isb_tab[256] __initdata;
-static u32 rco_tab[10];
-u32 aes_ft_tab[4][256];
-u32 aes_it_tab[4][256];
-
-u32 aes_fl_tab[4][256];
-u32 aes_il_tab[4][256];
-
-static inline u8 f_mult(u8 a, u8 b)
-{
- u8 aa = log_tab[a], cc = aa + log_tab[b];
-
- return pow_tab[cc + (cc < aa ? 1 : 0)];
-}
-
-#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
-
-#define ls_box(x) \
- (aes_fl_tab[0][byte(x, 0)] ^ \
- aes_fl_tab[1][byte(x, 1)] ^ \
- aes_fl_tab[2][byte(x, 2)] ^ \
- aes_fl_tab[3][byte(x, 3)])
-
-static void __init gen_tabs(void)
-{
- u32 i, t;
- u8 p, q;
-
- /* log and power tables for GF(2**8) finite field with
- 0x011b as modular polynomial - the simplest primitive
- root is 0x03, used here to generate the tables */
-
- for (i = 0, p = 1; i < 256; ++i) {
- pow_tab[i] = (u8)p;
- log_tab[p] = (u8)i;
-
- p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
- }
-
- log_tab[1] = 0;
-
- for (i = 0, p = 1; i < 10; ++i) {
- rco_tab[i] = p;
-
- p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
- }
-
- for (i = 0; i < 256; ++i) {
- p = (i ? pow_tab[255 - log_tab[i]] : 0);
- q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
- p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
- sbx_tab[i] = p;
- isb_tab[p] = (u8)i;
- }
-
- for (i = 0; i < 256; ++i) {
- p = sbx_tab[i];
-
- t = p;
- aes_fl_tab[0][i] = t;
- aes_fl_tab[1][i] = rol32(t, 8);
- aes_fl_tab[2][i] = rol32(t, 16);
- aes_fl_tab[3][i] = rol32(t, 24);
-
- t = ((u32)ff_mult(2, p)) |
- ((u32)p << 8) |
- ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
-
- aes_ft_tab[0][i] = t;
- aes_ft_tab[1][i] = rol32(t, 8);
- aes_ft_tab[2][i] = rol32(t, 16);
- aes_ft_tab[3][i] = rol32(t, 24);
-
- p = isb_tab[i];
-
- t = p;
- aes_il_tab[0][i] = t;
- aes_il_tab[1][i] = rol32(t, 8);
- aes_il_tab[2][i] = rol32(t, 16);
- aes_il_tab[3][i] = rol32(t, 24);
-
- t = ((u32)ff_mult(14, p)) |
- ((u32)ff_mult(9, p) << 8) |
- ((u32)ff_mult(13, p) << 16) |
- ((u32)ff_mult(11, p) << 24);
-
- aes_it_tab[0][i] = t;
- aes_it_tab[1][i] = rol32(t, 8);
- aes_it_tab[2][i] = rol32(t, 16);
- aes_it_tab[3][i] = rol32(t, 24);
- }
-}
-
-#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
-
-#define imix_col(y, x) \
- u = star_x(x); \
- v = star_x(u); \
- w = star_x(v); \
- t = w ^ (x); \
- (y) = u ^ v ^ w; \
- (y) ^= ror32(u ^ t, 8) ^ \
- ror32(v ^ t, 16) ^ \
- ror32(t, 24)
-
-/* initialise the key schedule from the user supplied key */
-
-#define loop4(i) \
-{ \
- t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
- t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \
- t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \
- t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \
- t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \
-}
-
-#define loop6(i) \
-{ \
- t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
- t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \
- t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \
- t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \
- t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \
- t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \
- t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \
-}
-
-#define loop8(i) \
-{ \
- t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \
- t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \
- t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \
- t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \
- t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \
- t = E_KEY[8 * i + 4] ^ ls_box(t); \
- E_KEY[8 * i + 12] = t; \
- t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \
- t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \
- t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
- const __le32 *key = (const __le32 *)in_key;
- u32 *flags = &tfm->crt_flags;
- u32 i, j, t, u, v, w;
-
- if (key_len % 8) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
-
- ctx->key_length = key_len;
-
- D_KEY[key_len + 24] = E_KEY[0] = le32_to_cpu(key[0]);
- D_KEY[key_len + 25] = E_KEY[1] = le32_to_cpu(key[1]);
- D_KEY[key_len + 26] = E_KEY[2] = le32_to_cpu(key[2]);
- D_KEY[key_len + 27] = E_KEY[3] = le32_to_cpu(key[3]);
-
- switch (key_len) {
- case 16:
- t = E_KEY[3];
- for (i = 0; i < 10; ++i)
- loop4(i);
- break;
-
- case 24:
- E_KEY[4] = le32_to_cpu(key[4]);
- t = E_KEY[5] = le32_to_cpu(key[5]);
- for (i = 0; i < 8; ++i)
- loop6 (i);
- break;
-
- case 32:
- E_KEY[4] = le32_to_cpu(key[4]);
- E_KEY[5] = le32_to_cpu(key[5]);
- E_KEY[6] = le32_to_cpu(key[6]);
- t = E_KEY[7] = le32_to_cpu(key[7]);
- for (i = 0; i < 7; ++i)
- loop8(i);
- break;
- }
-
- D_KEY[0] = E_KEY[key_len + 24];
- D_KEY[1] = E_KEY[key_len + 25];
- D_KEY[2] = E_KEY[key_len + 26];
- D_KEY[3] = E_KEY[key_len + 27];
-
- for (i = 4; i < key_len + 24; ++i) {
- j = key_len + 24 - (i & ~3) + (i & 3);
- imix_col(D_KEY[j], E_KEY[i]);
- }
-
- return 0;
-}
-
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- aes_enc_blk(tfm, dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- aes_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg aes_alg = {
- .cra_name = "aes",
- .cra_driver_name = "aes-x86_64",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct aes_ctx),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = aes_encrypt,
- .cia_decrypt = aes_decrypt
- }
- }
-};
-
-static int __init aes_init(void)
-{
- gen_tabs();
- return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
- crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
diff --git a/arch/x86_64/crypto/twofish-x86_64-asm.S b/arch/x86_64/crypto/twofish-x86_64-asm.S
deleted file mode 100644
index 35974a586615..000000000000
--- a/arch/x86_64/crypto/twofish-x86_64-asm.S
+++ /dev/null
@@ -1,324 +0,0 @@
-/***************************************************************************
-* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
-* *
-* This program is free software; you can redistribute it and/or modify *
-* it under the terms of the GNU General Public License as published by *
-* the Free Software Foundation; either version 2 of the License, or *
-* (at your option) any later version. *
-* *
-* This program is distributed in the hope that it will be useful, *
-* but WITHOUT ANY WARRANTY; without even the implied warranty of *
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
-* GNU General Public License for more details. *
-* *
-* You should have received a copy of the GNU General Public License *
-* along with this program; if not, write to the *
-* Free Software Foundation, Inc., *
-* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
-***************************************************************************/
-
-.file "twofish-x86_64-asm.S"
-.text
-
-#include <asm/asm-offsets.h>
-
-#define a_offset 0
-#define b_offset 4
-#define c_offset 8
-#define d_offset 12
-
-/* Structure of the crypto context struct*/
-
-#define s0 0 /* S0 Array 256 Words each */
-#define s1 1024 /* S1 Array */
-#define s2 2048 /* S2 Array */
-#define s3 3072 /* S3 Array */
-#define w 4096 /* 8 whitening keys (word) */
-#define k 4128 /* key 1-32 ( word ) */
-
-/* define a few register aliases to allow macro substitution */
-
-#define R0 %rax
-#define R0D %eax
-#define R0B %al
-#define R0H %ah
-
-#define R1 %rbx
-#define R1D %ebx
-#define R1B %bl
-#define R1H %bh
-
-#define R2 %rcx
-#define R2D %ecx
-#define R2B %cl
-#define R2H %ch
-
-#define R3 %rdx
-#define R3D %edx
-#define R3B %dl
-#define R3H %dh
-
-
-/* performs input whitening */
-#define input_whitening(src,context,offset)\
- xor w+offset(context), src;
-
-/* performs input whitening */
-#define output_whitening(src,context,offset)\
- xor w+16+offset(context), src;
-
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define encrypt_round(a,b,c,d,round)\
- movzx b ## B, %edi;\
- mov s1(%r11,%rdi,4),%r8d;\
- movzx a ## B, %edi;\
- mov s2(%r11,%rdi,4),%r9d;\
- movzx b ## H, %edi;\
- ror $16, b ## D;\
- xor s2(%r11,%rdi,4),%r8d;\
- movzx a ## H, %edi;\
- ror $16, a ## D;\
- xor s3(%r11,%rdi,4),%r9d;\
- movzx b ## B, %edi;\
- xor s3(%r11,%rdi,4),%r8d;\
- movzx a ## B, %edi;\
- xor (%r11,%rdi,4), %r9d;\
- movzx b ## H, %edi;\
- ror $15, b ## D;\
- xor (%r11,%rdi,4), %r8d;\
- movzx a ## H, %edi;\
- xor s1(%r11,%rdi,4),%r9d;\
- add %r8d, %r9d;\
- add %r9d, %r8d;\
- add k+round(%r11), %r9d;\
- xor %r9d, c ## D;\
- rol $15, c ## D;\
- add k+4+round(%r11),%r8d;\
- xor %r8d, d ## D;
-
-/*
- * a input register containing a(rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * during the round a and b are prepared for the output whitening
- */
-#define encrypt_last_round(a,b,c,d,round)\
- mov b ## D, %r10d;\
- shl $32, %r10;\
- movzx b ## B, %edi;\
- mov s1(%r11,%rdi,4),%r8d;\
- movzx a ## B, %edi;\
- mov s2(%r11,%rdi,4),%r9d;\
- movzx b ## H, %edi;\
- ror $16, b ## D;\
- xor s2(%r11,%rdi,4),%r8d;\
- movzx a ## H, %edi;\
- ror $16, a ## D;\
- xor s3(%r11,%rdi,4),%r9d;\
- movzx b ## B, %edi;\
- xor s3(%r11,%rdi,4),%r8d;\
- movzx a ## B, %edi;\
- xor (%r11,%rdi,4), %r9d;\
- xor a, %r10;\
- movzx b ## H, %edi;\
- xor (%r11,%rdi,4), %r8d;\
- movzx a ## H, %edi;\
- xor s1(%r11,%rdi,4),%r9d;\
- add %r8d, %r9d;\
- add %r9d, %r8d;\
- add k+round(%r11), %r9d;\
- xor %r9d, c ## D;\
- ror $1, c ## D;\
- add k+4+round(%r11),%r8d;\
- xor %r8d, d ## D
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c (already rol $1)
- * d input register containing d
- * operations on a and b are interleaved to increase performance
- */
-#define decrypt_round(a,b,c,d,round)\
- movzx a ## B, %edi;\
- mov (%r11,%rdi,4), %r9d;\
- movzx b ## B, %edi;\
- mov s3(%r11,%rdi,4),%r8d;\
- movzx a ## H, %edi;\
- ror $16, a ## D;\
- xor s1(%r11,%rdi,4),%r9d;\
- movzx b ## H, %edi;\
- ror $16, b ## D;\
- xor (%r11,%rdi,4), %r8d;\
- movzx a ## B, %edi;\
- xor s2(%r11,%rdi,4),%r9d;\
- movzx b ## B, %edi;\
- xor s1(%r11,%rdi,4),%r8d;\
- movzx a ## H, %edi;\
- ror $15, a ## D;\
- xor s3(%r11,%rdi,4),%r9d;\
- movzx b ## H, %edi;\
- xor s2(%r11,%rdi,4),%r8d;\
- add %r8d, %r9d;\
- add %r9d, %r8d;\
- add k+round(%r11), %r9d;\
- xor %r9d, c ## D;\
- add k+4+round(%r11),%r8d;\
- xor %r8d, d ## D;\
- rol $15, d ## D;
-
-/*
- * a input register containing a
- * b input register containing b
- * c input register containing c (already rol $1)
- * d input register containing d
- * operations on a and b are interleaved to increase performance
- * during the round a and b are prepared for the output whitening
- */
-#define decrypt_last_round(a,b,c,d,round)\
- movzx a ## B, %edi;\
- mov (%r11,%rdi,4), %r9d;\
- movzx b ## B, %edi;\
- mov s3(%r11,%rdi,4),%r8d;\
- movzx b ## H, %edi;\
- ror $16, b ## D;\
- xor (%r11,%rdi,4), %r8d;\
- movzx a ## H, %edi;\
- mov b ## D, %r10d;\
- shl $32, %r10;\
- xor a, %r10;\
- ror $16, a ## D;\
- xor s1(%r11,%rdi,4),%r9d;\
- movzx b ## B, %edi;\
- xor s1(%r11,%rdi,4),%r8d;\
- movzx a ## B, %edi;\
- xor s2(%r11,%rdi,4),%r9d;\
- movzx b ## H, %edi;\
- xor s2(%r11,%rdi,4),%r8d;\
- movzx a ## H, %edi;\
- xor s3(%r11,%rdi,4),%r9d;\
- add %r8d, %r9d;\
- add %r9d, %r8d;\
- add k+round(%r11), %r9d;\
- xor %r9d, c ## D;\
- add k+4+round(%r11),%r8d;\
- xor %r8d, d ## D;\
- ror $1, d ## D;
-
-.align 8
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
- pushq R1
-
- /* %rdi contains the crypto tfm adress */
- /* %rsi contains the output adress */
- /* %rdx contains the input adress */
- add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */
- /* ctx adress is moved to free one non-rex register
- as target for the 8bit high operations */
- mov %rdi, %r11
-
- movq (R3), R1
- movq 8(R3), R3
- input_whitening(R1,%r11,a_offset)
- input_whitening(R3,%r11,c_offset)
- mov R1D, R0D
- rol $16, R0D
- shr $32, R1
- mov R3D, R2D
- shr $32, R3
- rol $1, R3D
-
- encrypt_round(R0,R1,R2,R3,0);
- encrypt_round(R2,R3,R0,R1,8);
- encrypt_round(R0,R1,R2,R3,2*8);
- encrypt_round(R2,R3,R0,R1,3*8);
- encrypt_round(R0,R1,R2,R3,4*8);
- encrypt_round(R2,R3,R0,R1,5*8);
- encrypt_round(R0,R1,R2,R3,6*8);
- encrypt_round(R2,R3,R0,R1,7*8);
- encrypt_round(R0,R1,R2,R3,8*8);
- encrypt_round(R2,R3,R0,R1,9*8);
- encrypt_round(R0,R1,R2,R3,10*8);
- encrypt_round(R2,R3,R0,R1,11*8);
- encrypt_round(R0,R1,R2,R3,12*8);
- encrypt_round(R2,R3,R0,R1,13*8);
- encrypt_round(R0,R1,R2,R3,14*8);
- encrypt_last_round(R2,R3,R0,R1,15*8);
-
-
- output_whitening(%r10,%r11,a_offset)
- movq %r10, (%rsi)
-
- shl $32, R1
- xor R0, R1
-
- output_whitening(R1,%r11,c_offset)
- movq R1, 8(%rsi)
-
- popq R1
- movq $1,%rax
- ret
-
-twofish_dec_blk:
- pushq R1
-
- /* %rdi contains the crypto tfm adress */
- /* %rsi contains the output adress */
- /* %rdx contains the input adress */
- add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */
- /* ctx adress is moved to free one non-rex register
- as target for the 8bit high operations */
- mov %rdi, %r11
-
- movq (R3), R1
- movq 8(R3), R3
- output_whitening(R1,%r11,a_offset)
- output_whitening(R3,%r11,c_offset)
- mov R1D, R0D
- shr $32, R1
- rol $16, R1D
- mov R3D, R2D
- shr $32, R3
- rol $1, R2D
-
- decrypt_round(R0,R1,R2,R3,15*8);
- decrypt_round(R2,R3,R0,R1,14*8);
- decrypt_round(R0,R1,R2,R3,13*8);
- decrypt_round(R2,R3,R0,R1,12*8);
- decrypt_round(R0,R1,R2,R3,11*8);
- decrypt_round(R2,R3,R0,R1,10*8);
- decrypt_round(R0,R1,R2,R3,9*8);
- decrypt_round(R2,R3,R0,R1,8*8);
- decrypt_round(R0,R1,R2,R3,7*8);
- decrypt_round(R2,R3,R0,R1,6*8);
- decrypt_round(R0,R1,R2,R3,5*8);
- decrypt_round(R2,R3,R0,R1,4*8);
- decrypt_round(R0,R1,R2,R3,3*8);
- decrypt_round(R2,R3,R0,R1,2*8);
- decrypt_round(R0,R1,R2,R3,1*8);
- decrypt_last_round(R2,R3,R0,R1,0);
-
- input_whitening(%r10,%r11,a_offset)
- movq %r10, (%rsi)
-
- shl $32, R1
- xor R0, R1
-
- input_whitening(R1,%r11,c_offset)
- movq R1, 8(%rsi)
-
- popq R1
- movq $1,%rax
- ret
diff --git a/arch/x86_64/crypto/twofish.c b/arch/x86_64/crypto/twofish.c
deleted file mode 100644
index 182d91d5cfb9..000000000000
--- a/arch/x86_64/crypto/twofish.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Glue Code for optimized x86_64 assembler version of TWOFISH
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <crypto/twofish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- twofish_enc_blk(tfm, dst, src);
-}
-
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- twofish_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg alg = {
- .cra_name = "twofish",
- .cra_driver_name = "twofish-x86_64",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 3,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = TF_MIN_KEY_SIZE,
- .cia_max_keysize = TF_MAX_KEY_SIZE,
- .cia_setkey = twofish_setkey,
- .cia_encrypt = twofish_encrypt,
- .cia_decrypt = twofish_decrypt
- }
- }
-};
-
-static int __init init(void)
-{
- return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
-MODULE_ALIAS("twofish");
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile
deleted file mode 100644
index cdae36435e21..000000000000
--- a/arch/x86_64/ia32/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Makefile for the ia32 kernel emulation subsystem.
-#
-
-obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
- ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \
- mmap32.o
-
-sysv-$(CONFIG_SYSVIPC) := ipc32.o
-obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
-
-obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
-
-audit-class-$(CONFIG_AUDIT) := audit.o
-obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
-
-$(obj)/syscall32_syscall.o: \
- $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
-
-# Teach kbuild about targets
-targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
-
-# The DSO images are built using a special linker script
-quiet_cmd_syscall = SYSCALL $@
- cmd_syscall = $(CC) -m32 -nostdlib -shared -s \
- $(call ld-option, -Wl$(comma)--hash-style=sysv) \
- -Wl,-soname=linux-gate.so.1 -o $@ \
- -Wl,-T,$(filter-out FORCE,$^)
-
-$(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
- $(call if_changed,syscall)
-
-AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
-AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
diff --git a/arch/x86_64/ia32/audit.c b/arch/x86_64/ia32/audit.c
deleted file mode 100644
index 8850fe40ea34..000000000000
--- a/arch/x86_64/ia32/audit.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <asm-i386/unistd.h>
-
-unsigned ia32_dir_class[] = {
-#include <asm-generic/audit_dir_write.h>
-~0U
-};
-
-unsigned ia32_chattr_class[] = {
-#include <asm-generic/audit_change_attr.h>
-~0U
-};
-
-unsigned ia32_write_class[] = {
-#include <asm-generic/audit_write.h>
-~0U
-};
-
-unsigned ia32_read_class[] = {
-#include <asm-generic/audit_read.h>
-~0U
-};
-
-unsigned ia32_signal_class[] = {
-#include <asm-generic/audit_signal.h>
-~0U
-};
-
-int ia32_classify_syscall(unsigned syscall)
-{
- switch(syscall) {
- case __NR_open:
- return 2;
- case __NR_openat:
- return 3;
- case __NR_socketcall:
- return 4;
- case __NR_execve:
- return 5;
- default:
- return 1;
- }
-}
diff --git a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c
deleted file mode 100644
index 2c8209a3605a..000000000000
--- a/arch/x86_64/ia32/fpu32.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
- * This is used for ptrace, signals and coredumps in 32bit emulation.
- */
-
-#include <linux/sched.h>
-#include <asm/sigcontext32.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/i387.h>
-
-static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
-{
- unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
- /* Transform each pair of bits into 01 (valid) or 00 (empty) */
- tmp = ~twd;
- tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
- /* and move the valid bits to the lower byte. */
- tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
- tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
- tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
- return tmp;
-}
-
-static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
-{
- struct _fpxreg *st = NULL;
- unsigned long tos = (fxsave->swd >> 11) & 7;
- unsigned long twd = (unsigned long) fxsave->twd;
- unsigned long tag;
- unsigned long ret = 0xffff0000;
- int i;
-
-#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
-
- for (i = 0 ; i < 8 ; i++) {
- if (twd & 0x1) {
- st = FPREG_ADDR( fxsave, (i - tos) & 7 );
-
- switch (st->exponent & 0x7fff) {
- case 0x7fff:
- tag = 2; /* Special */
- break;
- case 0x0000:
- if ( !st->significand[0] &&
- !st->significand[1] &&
- !st->significand[2] &&
- !st->significand[3] ) {
- tag = 1; /* Zero */
- } else {
- tag = 2; /* Special */
- }
- break;
- default:
- if (st->significand[3] & 0x8000) {
- tag = 0; /* Valid */
- } else {
- tag = 2; /* Special */
- }
- break;
- }
- } else {
- tag = 3; /* Empty */
- }
- ret |= (tag << (2 * i));
- twd = twd >> 1;
- }
- return ret;
-}
-
-
-static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
- struct _fpstate_ia32 __user *buf)
-{
- struct _fpxreg *to;
- struct _fpreg __user *from;
- int i;
- u32 v;
- int err = 0;
-
-#define G(num,val) err |= __get_user(val, num + (u32 __user *)buf)
- G(0, fxsave->cwd);
- G(1, fxsave->swd);
- G(2, fxsave->twd);
- fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
- G(3, fxsave->rip);
- G(4, v);
- fxsave->fop = v>>16; /* cs ignored */
- G(5, fxsave->rdp);
- /* 6: ds ignored */
-#undef G
- if (err)
- return -1;
-
- to = (struct _fpxreg *)&fxsave->st_space[0];
- from = &buf->_st[0];
- for (i = 0 ; i < 8 ; i++, to++, from++) {
- if (__copy_from_user(to, from, sizeof(*from)))
- return -1;
- }
- return 0;
-}
-
-
-static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf,
- struct i387_fxsave_struct *fxsave,
- struct pt_regs *regs,
- struct task_struct *tsk)
-{
- struct _fpreg __user *to;
- struct _fpxreg *from;
- int i;
- u16 cs,ds;
- int err = 0;
-
- if (tsk == current) {
- /* should be actually ds/cs at fpu exception time,
- but that information is not available in 64bit mode. */
- asm("movw %%ds,%0 " : "=r" (ds));
- asm("movw %%cs,%0 " : "=r" (cs));
- } else { /* ptrace. task has stopped. */
- ds = tsk->thread.ds;
- cs = regs->cs;
- }
-
-#define P(num,val) err |= __put_user(val, num + (u32 __user *)buf)
- P(0, (u32)fxsave->cwd | 0xffff0000);
- P(1, (u32)fxsave->swd | 0xffff0000);
- P(2, twd_fxsr_to_i387(fxsave));
- P(3, (u32)fxsave->rip);
- P(4, cs | ((u32)fxsave->fop) << 16);
- P(5, fxsave->rdp);
- P(6, 0xffff0000 | ds);
-#undef P
-
- if (err)
- return -1;
-
- to = &buf->_st[0];
- from = (struct _fpxreg *) &fxsave->st_space[0];
- for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
- if (__copy_to_user(to, from, sizeof(*to)))
- return -1;
- }
- return 0;
-}
-
-int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave)
-{
- clear_fpu(tsk);
- if (!fsave) {
- if (__copy_from_user(&tsk->thread.i387.fxsave,
- &buf->_fxsr_env[0],
- sizeof(struct i387_fxsave_struct)))
- return -1;
- tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
- set_stopped_child_used_math(tsk);
- }
- return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
-}
-
-int save_i387_ia32(struct task_struct *tsk,
- struct _fpstate_ia32 __user *buf,
- struct pt_regs *regs,
- int fsave)
-{
- int err = 0;
-
- init_fpu(tsk);
- if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk))
- return -1;
- if (fsave)
- return 0;
- err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
- if (fsave)
- return err ? -1 : 1;
- err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
- err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
- sizeof(struct i387_fxsave_struct));
- return err ? -1 : 1;
-}
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
deleted file mode 100644
index 08781370256d..000000000000
--- a/arch/x86_64/ia32/ia32_aout.c
+++ /dev/null
@@ -1,528 +0,0 @@
-/*
- * a.out loader for x86-64
- *
- * Copyright (C) 1991, 1992, 1996 Linus Torvalds
- * Hacked together by Andi Kleen
- */
-
-#include <linux/module.h>
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/slab.h>
-#include <linux/binfmts.h>
-#include <linux/personality.h>
-#include <linux/init.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/cacheflush.h>
-#include <asm/user32.h>
-#include <asm/ia32.h>
-
-#undef WARN_OLD
-#undef CORE_DUMP /* probably broken */
-
-static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
-static int load_aout_library(struct file*);
-
-#ifdef CORE_DUMP
-static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file);
-
-/*
- * fill in the user structure for a core dump..
- */
-static void dump_thread32(struct pt_regs * regs, struct user32 * dump)
-{
- u32 fs,gs;
-
-/* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_ssize = 0;
- dump->u_debugreg[0] = current->thread.debugreg0;
- dump->u_debugreg[1] = current->thread.debugreg1;
- dump->u_debugreg[2] = current->thread.debugreg2;
- dump->u_debugreg[3] = current->thread.debugreg3;
- dump->u_debugreg[4] = 0;
- dump->u_debugreg[5] = 0;
- dump->u_debugreg[6] = current->thread.debugreg6;
- dump->u_debugreg[7] = current->thread.debugreg7;
-
- if (dump->start_stack < 0xc0000000)
- dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT;
-
- dump->regs.ebx = regs->rbx;
- dump->regs.ecx = regs->rcx;
- dump->regs.edx = regs->rdx;
- dump->regs.esi = regs->rsi;
- dump->regs.edi = regs->rdi;
- dump->regs.ebp = regs->rbp;
- dump->regs.eax = regs->rax;
- dump->regs.ds = current->thread.ds;
- dump->regs.es = current->thread.es;
- asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
- asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
- dump->regs.orig_eax = regs->orig_rax;
- dump->regs.eip = regs->rip;
- dump->regs.cs = regs->cs;
- dump->regs.eflags = regs->eflags;
- dump->regs.esp = regs->rsp;
- dump->regs.ss = regs->ss;
-
-#if 1 /* FIXME */
- dump->u_fpvalid = 0;
-#else
- dump->u_fpvalid = dump_fpu (regs, &dump->i387);
-#endif
-}
-
-#endif
-
-static struct linux_binfmt aout_format = {
- .module = THIS_MODULE,
- .load_binary = load_aout_binary,
- .load_shlib = load_aout_library,
-#ifdef CORE_DUMP
- .core_dump = aout_core_dump,
-#endif
- .min_coredump = PAGE_SIZE
-};
-
-static void set_brk(unsigned long start, unsigned long end)
-{
- start = PAGE_ALIGN(start);
- end = PAGE_ALIGN(end);
- if (end <= start)
- return;
- down_write(&current->mm->mmap_sem);
- do_brk(start, end - start);
- up_write(&current->mm->mmap_sem);
-}
-
-#ifdef CORE_DUMP
-/*
- * These are the only things you should do on a core-file: use only these
- * macros to write out all the necessary info.
- */
-
-static int dump_write(struct file *file, const void *addr, int nr)
-{
- return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-#define DUMP_WRITE(addr, nr) \
- if (!dump_write(file, (void *)(addr), (nr))) \
- goto end_coredump;
-
-#define DUMP_SEEK(offset) \
-if (file->f_op->llseek) { \
- if (file->f_op->llseek(file,(offset),0) != (offset)) \
- goto end_coredump; \
-} else file->f_pos = (offset)
-
-/*
- * Routine writes a core dump image in the current directory.
- * Currently only a stub-function.
- *
- * Note that setuid/setgid files won't make a core-dump if the uid/gid
- * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
- * field, which also makes sure the core-dumps won't be recursive if the
- * dumping of the process results in another error..
- */
-
-static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file)
-{
- mm_segment_t fs;
- int has_dumped = 0;
- unsigned long dump_start, dump_size;
- struct user32 dump;
-# define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
-# define START_STACK(u) (u.start_stack)
-
- fs = get_fs();
- set_fs(KERNEL_DS);
- has_dumped = 1;
- current->flags |= PF_DUMPCORE;
- strncpy(dump.u_comm, current->comm, sizeof(current->comm));
- dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump)));
- dump.signal = signr;
- dump_thread32(regs, &dump);
-
-/* If the size of the dump file exceeds the rlimit, then see what would happen
- if we wrote the stack, but not the data area. */
- if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE >
- current->signal->rlim[RLIMIT_CORE].rlim_cur)
- dump.u_dsize = 0;
-
-/* Make sure we have enough room to write the stack and data areas. */
- if ((dump.u_ssize+1) * PAGE_SIZE >
- current->signal->rlim[RLIMIT_CORE].rlim_cur)
- dump.u_ssize = 0;
-
-/* make sure we actually have a data and stack area to dump */
- set_fs(USER_DS);
- if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
- dump.u_dsize = 0;
- if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
- dump.u_ssize = 0;
-
- set_fs(KERNEL_DS);
-/* struct user */
- DUMP_WRITE(&dump,sizeof(dump));
-/* Now dump all of the user data. Include malloced stuff as well */
- DUMP_SEEK(PAGE_SIZE);
-/* now we start writing out the user space info */
- set_fs(USER_DS);
-/* Dump the data area */
- if (dump.u_dsize != 0) {
- dump_start = START_DATA(dump);
- dump_size = dump.u_dsize << PAGE_SHIFT;
- DUMP_WRITE(dump_start,dump_size);
- }
-/* Now prepare to dump the stack area */
- if (dump.u_ssize != 0) {
- dump_start = START_STACK(dump);
- dump_size = dump.u_ssize << PAGE_SHIFT;
- DUMP_WRITE(dump_start,dump_size);
- }
-/* Finally dump the task struct. Not be used by gdb, but could be useful */
- set_fs(KERNEL_DS);
- DUMP_WRITE(current,sizeof(*current));
-end_coredump:
- set_fs(fs);
- return has_dumped;
-}
-#endif
-
-/*
- * create_aout_tables() parses the env- and arg-strings in new user
- * memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
- */
-static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
-{
- u32 __user *argv;
- u32 __user *envp;
- u32 __user *sp;
- int argc = bprm->argc;
- int envc = bprm->envc;
-
- sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
- sp -= envc+1;
- envp = sp;
- sp -= argc+1;
- argv = sp;
- put_user((unsigned long) envp,--sp);
- put_user((unsigned long) argv,--sp);
- put_user(argc,--sp);
- current->mm->arg_start = (unsigned long) p;
- while (argc-->0) {
- char c;
- put_user((u32)(unsigned long)p,argv++);
- do {
- get_user(c,p++);
- } while (c);
- }
- put_user(0, argv);
- current->mm->arg_end = current->mm->env_start = (unsigned long) p;
- while (envc-->0) {
- char c;
- put_user((u32)(unsigned long)p,envp++);
- do {
- get_user(c,p++);
- } while (c);
- }
- put_user(0, envp);
- current->mm->env_end = (unsigned long) p;
- return sp;
-}
-
-/*
- * These are the functions used to load a.out style executables and shared
- * libraries. There is no binary dependent code anywhere else.
- */
-
-static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
-{
- struct exec ex;
- unsigned long error;
- unsigned long fd_offset;
- unsigned long rlim;
- int retval;
-
- ex = *((struct exec *) bprm->buf); /* exec-header */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
- N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
- N_TRSIZE(ex) || N_DRSIZE(ex) ||
- i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- return -ENOEXEC;
- }
-
- fd_offset = N_TXTOFF(ex);
-
- /* Check initial limits. This avoids letting people circumvent
- * size limits imposed on them by creating programs with large
- * arrays in the data or bss.
- */
- rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
- if (rlim >= RLIM_INFINITY)
- rlim = ~0;
- if (ex.a_data + ex.a_bss > rlim)
- return -ENOMEM;
-
- /* Flush all traces of the currently running executable */
- retval = flush_old_exec(bprm);
- if (retval)
- return retval;
-
- regs->cs = __USER32_CS;
- regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
- regs->r13 = regs->r14 = regs->r15 = 0;
-
- /* OK, This is the point of no return */
- set_personality(PER_LINUX);
- set_thread_flag(TIF_IA32);
- clear_thread_flag(TIF_ABI_PENDING);
-
- current->mm->end_code = ex.a_text +
- (current->mm->start_code = N_TXTADDR(ex));
- current->mm->end_data = ex.a_data +
- (current->mm->start_data = N_DATADDR(ex));
- current->mm->brk = ex.a_bss +
- (current->mm->start_brk = N_BSSADDR(ex));
- current->mm->free_area_cache = TASK_UNMAPPED_BASE;
- current->mm->cached_hole_size = 0;
-
- current->mm->mmap = NULL;
- compute_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
-
- if (N_MAGIC(ex) == OMAGIC) {
- unsigned long text_addr, map_size;
- loff_t pos;
-
- text_addr = N_TXTADDR(ex);
-
- pos = 32;
- map_size = ex.a_text+ex.a_data;
-
- down_write(&current->mm->mmap_sem);
- error = do_brk(text_addr & PAGE_MASK, map_size);
- up_write(&current->mm->mmap_sem);
-
- if (error != (text_addr & PAGE_MASK)) {
- send_sig(SIGKILL, current, 0);
- return error;
- }
-
- error = bprm->file->f_op->read(bprm->file,
- (char __user *)text_addr,
- ex.a_text+ex.a_data, &pos);
- if ((signed long)error < 0) {
- send_sig(SIGKILL, current, 0);
- return error;
- }
-
- flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
- } else {
-#ifdef WARN_OLD
- static unsigned long error_time, error_time2;
- if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
- (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
- {
- printk(KERN_NOTICE "executable not page aligned\n");
- error_time2 = jiffies;
- }
-
- if ((fd_offset & ~PAGE_MASK) != 0 &&
- (jiffies-error_time) > 5*HZ)
- {
- printk(KERN_WARNING
- "fd_offset is not page aligned. Please convert program: %s\n",
- bprm->file->f_path.dentry->d_name.name);
- error_time = jiffies;
- }
-#endif
-
- if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
- loff_t pos = fd_offset;
- down_write(&current->mm->mmap_sem);
- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- up_write(&current->mm->mmap_sem);
- bprm->file->f_op->read(bprm->file,
- (char __user *)N_TXTADDR(ex),
- ex.a_text+ex.a_data, &pos);
- flush_icache_range((unsigned long) N_TXTADDR(ex),
- (unsigned long) N_TXTADDR(ex) +
- ex.a_text+ex.a_data);
- goto beyond_if;
- }
-
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
- PROT_READ | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
- fd_offset);
- up_write(&current->mm->mmap_sem);
-
- if (error != N_TXTADDR(ex)) {
- send_sig(SIGKILL, current, 0);
- return error;
- }
-
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
- fd_offset + ex.a_text);
- up_write(&current->mm->mmap_sem);
- if (error != N_DATADDR(ex)) {
- send_sig(SIGKILL, current, 0);
- return error;
- }
- }
-beyond_if:
- set_binfmt(&aout_format);
-
- set_brk(current->mm->start_brk, current->mm->brk);
-
- retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0) {
- /* Someone check-me: is this error path enough? */
- send_sig(SIGKILL, current, 0);
- return retval;
- }
-
- current->mm->start_stack =
- (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
- /* start thread */
- asm volatile("movl %0,%%fs" :: "r" (0)); \
- asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
- load_gs_index(0);
- (regs)->rip = ex.a_entry;
- (regs)->rsp = current->mm->start_stack;
- (regs)->eflags = 0x200;
- (regs)->cs = __USER32_CS;
- (regs)->ss = __USER32_DS;
- set_fs(USER_DS);
- if (unlikely(current->ptrace & PT_PTRACED)) {
- if (current->ptrace & PT_TRACE_EXEC)
- ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
- else
- send_sig(SIGTRAP, current, 0);
- }
- return 0;
-}
-
-static int load_aout_library(struct file *file)
-{
- struct inode * inode;
- unsigned long bss, start_addr, len;
- unsigned long error;
- int retval;
- struct exec ex;
-
- inode = file->f_path.dentry->d_inode;
-
- retval = -ENOEXEC;
- error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
- if (error != sizeof(ex))
- goto out;
-
- /* We come in here for the regular a.out style of shared libraries */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
- N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
- i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- goto out;
- }
-
- if (N_FLAGS(ex))
- goto out;
-
- /* For QMAGIC, the starting address is 0x20 into the page. We mask
- this off to get the starting address for the page */
-
- start_addr = ex.a_entry & 0xfffff000;
-
- if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
- loff_t pos = N_TXTOFF(ex);
-
-#ifdef WARN_OLD
- static unsigned long error_time;
- if ((jiffies-error_time) > 5*HZ)
- {
- printk(KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert library: %s\n",
- file->f_path.dentry->d_name.name);
- error_time = jiffies;
- }
-#endif
- down_write(&current->mm->mmap_sem);
- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- up_write(&current->mm->mmap_sem);
-
- file->f_op->read(file, (char __user *)start_addr,
- ex.a_text + ex.a_data, &pos);
- flush_icache_range((unsigned long) start_addr,
- (unsigned long) start_addr + ex.a_text + ex.a_data);
-
- retval = 0;
- goto out;
- }
- /* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
- N_TXTOFF(ex));
- up_write(&current->mm->mmap_sem);
- retval = error;
- if (error != start_addr)
- goto out;
-
- len = PAGE_ALIGN(ex.a_text + ex.a_data);
- bss = ex.a_text + ex.a_data + ex.a_bss;
- if (bss > len) {
- down_write(&current->mm->mmap_sem);
- error = do_brk(start_addr + len, bss - len);
- up_write(&current->mm->mmap_sem);
- retval = error;
- if (error != start_addr + len)
- goto out;
- }
- retval = 0;
-out:
- return retval;
-}
-
-static int __init init_aout_binfmt(void)
-{
- return register_binfmt(&aout_format);
-}
-
-static void __exit exit_aout_binfmt(void)
-{
- unregister_binfmt(&aout_format);
-}
-
-module_init(init_aout_binfmt);
-module_exit(exit_aout_binfmt);
-MODULE_LICENSE("GPL");
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
deleted file mode 100644
index dffd2ac72747..000000000000
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ /dev/null
@@ -1,320 +0,0 @@
-/*
- * Written 2000,2002 by Andi Kleen.
- *
- * Loosely based on the sparc64 and IA64 32bit emulation loaders.
- * This tricks binfmt_elf.c into loading 32bit binaries using lots
- * of ugly preprocessor tricks. Talk about very very poor man's inheritance.
- */
-#define __ASM_X86_64_ELF_H 1
-
-#undef ELF_CLASS
-#define ELF_CLASS ELFCLASS32
-
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/compat.h>
-#include <linux/string.h>
-#include <linux/binfmts.h>
-#include <linux/mm.h>
-#include <linux/security.h>
-
-#include <asm/segment.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/user32.h>
-#include <asm/sigcontext32.h>
-#include <asm/fpu32.h>
-#include <asm/i387.h>
-#include <asm/uaccess.h>
-#include <asm/ia32.h>
-#include <asm/vsyscall32.h>
-
-#define ELF_NAME "elf/i386"
-
-#define AT_SYSINFO 32
-#define AT_SYSINFO_EHDR 33
-
-int sysctl_vsyscall32 = 1;
-
-#undef ARCH_DLINFO
-#define ARCH_DLINFO do { \
- if (sysctl_vsyscall32) { \
- current->mm->context.vdso = (void *)VSYSCALL32_BASE; \
- NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
- NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \
- } \
-} while(0)
-
-struct file;
-struct elf_phdr;
-
-#define IA32_EMULATOR 1
-
-#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
-
-#undef ELF_ARCH
-#define ELF_ARCH EM_386
-
-#define ELF_DATA ELFDATA2LSB
-
-#define USE_ELF_CORE_DUMP 1
-
-/* Override elfcore.h */
-#define _LINUX_ELFCORE_H 1
-typedef unsigned int elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-struct elf_siginfo
-{
- int si_signo; /* signal number */
- int si_code; /* extra code */
- int si_errno; /* errno */
-};
-
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
-
-struct elf_prstatus
-{
- struct elf_siginfo pr_info; /* Info associated with signal */
- short pr_cursig; /* Current signal */
- unsigned int pr_sigpend; /* Set of pending signals */
- unsigned int pr_sighold; /* Set of held signals */
- pid_t pr_pid;
- pid_t pr_ppid;
- pid_t pr_pgrp;
- pid_t pr_sid;
- struct compat_timeval pr_utime; /* User time */
- struct compat_timeval pr_stime; /* System time */
- struct compat_timeval pr_cutime; /* Cumulative user time */
- struct compat_timeval pr_cstime; /* Cumulative system time */
- elf_gregset_t pr_reg; /* GP registers */
- int pr_fpvalid; /* True if math co-processor being used. */
-};
-
-#define ELF_PRARGSZ (80) /* Number of chars for args */
-
-struct elf_prpsinfo
-{
- char pr_state; /* numeric process state */
- char pr_sname; /* char for pr_state */
- char pr_zomb; /* zombie */
- char pr_nice; /* nice val */
- unsigned int pr_flag; /* flags */
- __u16 pr_uid;
- __u16 pr_gid;
- pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
- /* Lots missing */
- char pr_fname[16]; /* filename of executable */
- char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
-};
-
-#define __STR(x) #x
-#define STR(x) __STR(x)
-
-#define _GET_SEG(x) \
- ({ __u32 seg; asm("movl %%" STR(x) ",%0" : "=r"(seg)); seg; })
-
-/* Assumes current==process to be dumped */
-#define ELF_CORE_COPY_REGS(pr_reg, regs) \
- pr_reg[0] = regs->rbx; \
- pr_reg[1] = regs->rcx; \
- pr_reg[2] = regs->rdx; \
- pr_reg[3] = regs->rsi; \
- pr_reg[4] = regs->rdi; \
- pr_reg[5] = regs->rbp; \
- pr_reg[6] = regs->rax; \
- pr_reg[7] = _GET_SEG(ds); \
- pr_reg[8] = _GET_SEG(es); \
- pr_reg[9] = _GET_SEG(fs); \
- pr_reg[10] = _GET_SEG(gs); \
- pr_reg[11] = regs->orig_rax; \
- pr_reg[12] = regs->rip; \
- pr_reg[13] = regs->cs; \
- pr_reg[14] = regs->eflags; \
- pr_reg[15] = regs->rsp; \
- pr_reg[16] = regs->ss;
-
-#define user user32
-
-#undef elf_read_implies_exec
-#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X)
-//#include <asm/ia32.h>
-#include <linux/elf.h>
-
-typedef struct user_i387_ia32_struct elf_fpregset_t;
-typedef struct user32_fxsr_struct elf_fpxregset_t;
-
-
-static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
-{
- ELF_CORE_COPY_REGS((*elfregs), regs)
-}
-
-static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
-{
- struct pt_regs *pp = task_pt_regs(t);
- ELF_CORE_COPY_REGS((*elfregs), pp);
- /* fix wrong segments */
- (*elfregs)[7] = t->thread.ds;
- (*elfregs)[9] = t->thread.fsindex;
- (*elfregs)[10] = t->thread.gsindex;
- (*elfregs)[8] = t->thread.es;
- return 1;
-}
-
-static inline int
-elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
-{
- struct _fpstate_ia32 *fpstate = (void*)fpu;
- mm_segment_t oldfs = get_fs();
-
- if (!tsk_used_math(tsk))
- return 0;
- if (!regs)
- regs = task_pt_regs(tsk);
- if (tsk == current)
- unlazy_fpu(tsk);
- set_fs(KERNEL_DS);
- save_i387_ia32(tsk, fpstate, regs, 1);
- /* Correct for i386 bug. It puts the fop into the upper 16bits of
- the tag word (like FXSAVE), not into the fcs*/
- fpstate->cssel |= fpstate->tag & 0xffff0000;
- set_fs(oldfs);
- return 1;
-}
-
-#define ELF_CORE_COPY_XFPREGS 1
-static inline int
-elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
-{
- struct pt_regs *regs = task_pt_regs(t);
- if (!tsk_used_math(t))
- return 0;
- if (t == current)
- unlazy_fpu(t);
- memcpy(xfpu, &t->thread.i387.fxsave, sizeof(elf_fpxregset_t));
- xfpu->fcs = regs->cs;
- xfpu->fos = t->thread.ds; /* right? */
- return 1;
-}
-
-#undef elf_check_arch
-#define elf_check_arch(x) \
- ((x)->e_machine == EM_386)
-
-extern int force_personality32;
-
-#define ELF_EXEC_PAGESIZE PAGE_SIZE
-#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
-#define ELF_PLATFORM ("i686")
-#define SET_PERSONALITY(ex, ibcs2) \
-do { \
- unsigned long new_flags = 0; \
- if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \
- new_flags = _TIF_IA32; \
- if ((current_thread_info()->flags & _TIF_IA32) \
- != new_flags) \
- set_thread_flag(TIF_ABI_PENDING); \
- else \
- clear_thread_flag(TIF_ABI_PENDING); \
- /* XXX This overwrites the user set personality */ \
- current->personality |= force_personality32; \
-} while (0)
-
-/* Override some function names */
-#define elf_format elf32_format
-
-#define init_elf_binfmt init_elf32_binfmt
-#define exit_elf_binfmt exit_elf32_binfmt
-
-#define load_elf_binary load_elf32_binary
-
-#define ELF_PLAT_INIT(r, load_addr) elf32_init(r)
-
-#undef start_thread
-#define start_thread(regs,new_rip,new_rsp) do { \
- asm volatile("movl %0,%%fs" :: "r" (0)); \
- asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
- load_gs_index(0); \
- (regs)->rip = (new_rip); \
- (regs)->rsp = (new_rsp); \
- (regs)->eflags = 0x200; \
- (regs)->cs = __USER32_CS; \
- (regs)->ss = __USER32_DS; \
- set_fs(USER_DS); \
-} while(0)
-
-
-#include <linux/module.h>
-
-MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries.");
-MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
-
-#undef MODULE_DESCRIPTION
-#undef MODULE_AUTHOR
-
-static void elf32_init(struct pt_regs *);
-
-#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
-#define arch_setup_additional_pages syscall32_setup_pages
-extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
-
-#include "../../../fs/binfmt_elf.c"
-
-static void elf32_init(struct pt_regs *regs)
-{
- struct task_struct *me = current;
- regs->rdi = 0;
- regs->rsi = 0;
- regs->rdx = 0;
- regs->rcx = 0;
- regs->rax = 0;
- regs->rbx = 0;
- regs->rbp = 0;
- regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
- regs->r13 = regs->r14 = regs->r15 = 0;
- me->thread.fs = 0;
- me->thread.gs = 0;
- me->thread.fsindex = 0;
- me->thread.gsindex = 0;
- me->thread.ds = __USER_DS;
- me->thread.es = __USER_DS;
-}
-
-#ifdef CONFIG_SYSCTL
-/* Register vsyscall32 into the ABI table */
-#include <linux/sysctl.h>
-
-static ctl_table abi_table2[] = {
- {
- .ctl_name = 99,
- .procname = "vsyscall32",
- .data = &sysctl_vsyscall32,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {}
-};
-
-static ctl_table abi_root_table2[] = {
- {
- .ctl_name = CTL_ABI,
- .procname = "abi",
- .mode = 0555,
- .child = abi_table2
- },
- {}
-};
-
-static __init int ia32_binfmt_init(void)
-{
- register_sysctl_table(abi_root_table2);
- return 0;
-}
-__initcall(ia32_binfmt_init);
-#endif
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
deleted file mode 100644
index 6ea19c25f90d..000000000000
--- a/arch/x86_64/ia32/ia32_signal.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/*
- * linux/arch/x86_64/ia32/ia32_signal.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
- * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/compat.h>
-#include <linux/binfmts.h>
-#include <asm/ucontext.h>
-#include <asm/uaccess.h>
-#include <asm/i387.h>
-#include <asm/ia32.h>
-#include <asm/ptrace.h>
-#include <asm/ia32_unistd.h>
-#include <asm/user32.h>
-#include <asm/sigcontext32.h>
-#include <asm/fpu32.h>
-#include <asm/proto.h>
-#include <asm/vsyscall32.h>
-
-#define DEBUG_SIG 0
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
-void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
-{
- int err;
- if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
- return -EFAULT;
-
- /* If you change siginfo_t structure, please make sure that
- this code is fixed accordingly.
- It should never copy any pad contained in the structure
- to avoid security leaks, but must copy the generic
- 3 ints plus the relevant union member. */
- err = __put_user(from->si_signo, &to->si_signo);
- err |= __put_user(from->si_errno, &to->si_errno);
- err |= __put_user((short)from->si_code, &to->si_code);
-
- if (from->si_code < 0) {
- err |= __put_user(from->si_pid, &to->si_pid);
- err |= __put_user(from->si_uid, &to->si_uid);
- err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
- } else {
- /* First 32bits of unions are always present:
- * si_pid === si_band === si_tid === si_addr(LS half) */
- err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]);
- switch (from->si_code >> 16) {
- case __SI_FAULT >> 16:
- break;
- case __SI_CHLD >> 16:
- err |= __put_user(from->si_utime, &to->si_utime);
- err |= __put_user(from->si_stime, &to->si_stime);
- err |= __put_user(from->si_status, &to->si_status);
- /* FALL THROUGH */
- default:
- case __SI_KILL >> 16:
- err |= __put_user(from->si_uid, &to->si_uid);
- break;
- case __SI_POLL >> 16:
- err |= __put_user(from->si_fd, &to->si_fd);
- break;
- case __SI_TIMER >> 16:
- err |= __put_user(from->si_overrun, &to->si_overrun);
- err |= __put_user(ptr_to_compat(from->si_ptr),
- &to->si_ptr);
- break;
- case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
- case __SI_MESGQ >> 16:
- err |= __put_user(from->si_uid, &to->si_uid);
- err |= __put_user(from->si_int, &to->si_int);
- break;
- }
- }
- return err;
-}
-
-int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
-{
- int err;
- u32 ptr32;
- if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
- return -EFAULT;
-
- err = __get_user(to->si_signo, &from->si_signo);
- err |= __get_user(to->si_errno, &from->si_errno);
- err |= __get_user(to->si_code, &from->si_code);
-
- err |= __get_user(to->si_pid, &from->si_pid);
- err |= __get_user(to->si_uid, &from->si_uid);
- err |= __get_user(ptr32, &from->si_ptr);
- to->si_ptr = compat_ptr(ptr32);
-
- return err;
-}
-
-asmlinkage long
-sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
- mask &= _BLOCKABLE;
- spin_lock_irq(&current->sighand->siglock);
- current->saved_sigmask = current->blocked;
- siginitset(&current->blocked, mask);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- set_thread_flag(TIF_RESTORE_SIGMASK);
- return -ERESTARTNOHAND;
-}
-
-asmlinkage long
-sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
- stack_ia32_t __user *uoss_ptr,
- struct pt_regs *regs)
-{
- stack_t uss,uoss;
- int ret;
- mm_segment_t seg;
- if (uss_ptr) {
- u32 ptr;
- memset(&uss,0,sizeof(stack_t));
- if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
- __get_user(ptr, &uss_ptr->ss_sp) ||
- __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
- __get_user(uss.ss_size, &uss_ptr->ss_size))
- return -EFAULT;
- uss.ss_sp = compat_ptr(ptr);
- }
- seg = get_fs();
- set_fs(KERNEL_DS);
- ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp);
- set_fs(seg);
- if (ret >= 0 && uoss_ptr) {
- if (!access_ok(VERIFY_WRITE,uoss_ptr,sizeof(stack_ia32_t)) ||
- __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
- __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
- __put_user(uoss.ss_size, &uoss_ptr->ss_size))
- ret = -EFAULT;
- }
- return ret;
-}
-
-/*
- * Do a signal return; undo the signal stack.
- */
-
-struct sigframe
-{
- u32 pretcode;
- int sig;
- struct sigcontext_ia32 sc;
- struct _fpstate_ia32 fpstate;
- unsigned int extramask[_COMPAT_NSIG_WORDS-1];
- char retcode[8];
-};
-
-struct rt_sigframe
-{
- u32 pretcode;
- int sig;
- u32 pinfo;
- u32 puc;
- compat_siginfo_t info;
- struct ucontext_ia32 uc;
- struct _fpstate_ia32 fpstate;
- char retcode[8];
-};
-
-static int
-ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *peax)
-{
- unsigned int err = 0;
-
- /* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-#if DEBUG_SIG
- printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
- sc, sc->err, sc->eip, sc->cs, sc->eflags);
-#endif
-#define COPY(x) { \
- unsigned int reg; \
- err |= __get_user(reg, &sc->e ##x); \
- regs->r ## x = reg; \
-}
-
-#define RELOAD_SEG(seg,mask) \
- { unsigned int cur; \
- unsigned short pre; \
- err |= __get_user(pre, &sc->seg); \
- asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \
- pre |= mask; \
- if (pre != cur) loadsegment(seg,pre); }
-
- /* Reload fs and gs if they have changed in the signal handler.
- This does not handle long fs/gs base changes in the handler, but
- does not clobber them at least in the normal case. */
-
- {
- unsigned gs, oldgs;
- err |= __get_user(gs, &sc->gs);
- gs |= 3;
- asm("movl %%gs,%0" : "=r" (oldgs));
- if (gs != oldgs)
- load_gs_index(gs);
- }
- RELOAD_SEG(fs,3);
- RELOAD_SEG(ds,3);
- RELOAD_SEG(es,3);
-
- COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
- COPY(dx); COPY(cx); COPY(ip);
- /* Don't touch extended registers */
-
- err |= __get_user(regs->cs, &sc->cs);
- regs->cs |= 3;
- err |= __get_user(regs->ss, &sc->ss);
- regs->ss |= 3;
-
- {
- unsigned int tmpflags;
- err |= __get_user(tmpflags, &sc->eflags);
- regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
- regs->orig_rax = -1; /* disable syscall checks */
- }
-
- {
- u32 tmp;
- struct _fpstate_ia32 __user * buf;
- err |= __get_user(tmp, &sc->fpstate);
- buf = compat_ptr(tmp);
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387_ia32(current, buf, 0);
- } else {
- struct task_struct *me = current;
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
- }
- }
-
- {
- u32 tmp;
- err |= __get_user(tmp, &sc->eax);
- *peax = tmp;
- }
- return err;
-
-badframe:
- return 1;
-}
-
-asmlinkage long sys32_sigreturn(struct pt_regs *regs)
-{
- struct sigframe __user *frame = (struct sigframe __user *)(regs->rsp-8);
- sigset_t set;
- unsigned int eax;
-
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
- if (__get_user(set.sig[0], &frame->sc.oldmask)
- || (_COMPAT_NSIG_WORDS > 1
- && __copy_from_user((((char *) &set.sig) + 4), &frame->extramask,
- sizeof(frame->extramask))))
- goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (ia32_restore_sigcontext(regs, &frame->sc, &eax))
- goto badframe;
- return eax;
-
-badframe:
- signal_fault(regs, frame, "32bit sigreturn");
- return 0;
-}
-
-asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
-{
- struct rt_sigframe __user *frame;
- sigset_t set;
- unsigned int eax;
- struct pt_regs tregs;
-
- frame = (struct rt_sigframe __user *)(regs->rsp - 4);
-
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
- goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
- goto badframe;
-
- tregs = *regs;
- if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
- goto badframe;
-
- return eax;
-
-badframe:
- signal_fault(regs,frame,"32bit rt sigreturn");
- return 0;
-}
-
-/*
- * Set up a signal frame.
- */
-
-static int
-ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate,
- struct pt_regs *regs, unsigned int mask)
-{
- int tmp, err = 0;
-
- tmp = 0;
- __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
- err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
- err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
- __asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp));
- err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
- __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
- err |= __put_user(tmp, (unsigned int __user *)&sc->es);
-
- err |= __put_user((u32)regs->rdi, &sc->edi);
- err |= __put_user((u32)regs->rsi, &sc->esi);
- err |= __put_user((u32)regs->rbp, &sc->ebp);
- err |= __put_user((u32)regs->rsp, &sc->esp);
- err |= __put_user((u32)regs->rbx, &sc->ebx);
- err |= __put_user((u32)regs->rdx, &sc->edx);
- err |= __put_user((u32)regs->rcx, &sc->ecx);
- err |= __put_user((u32)regs->rax, &sc->eax);
- err |= __put_user((u32)regs->cs, &sc->cs);
- err |= __put_user((u32)regs->ss, &sc->ss);
- err |= __put_user(current->thread.trap_no, &sc->trapno);
- err |= __put_user(current->thread.error_code, &sc->err);
- err |= __put_user((u32)regs->rip, &sc->eip);
- err |= __put_user((u32)regs->eflags, &sc->eflags);
- err |= __put_user((u32)regs->rsp, &sc->esp_at_signal);
-
- tmp = save_i387_ia32(current, fpstate, regs, 0);
- if (tmp < 0)
- err = -EFAULT;
- else {
- clear_used_math();
- stts();
- err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
- &sc->fpstate);
- }
-
- /* non-iBCS2 extensions.. */
- err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(current->thread.cr2, &sc->cr2);
-
- return err;
-}
-
-/*
- * Determine which stack to use..
- */
-static void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
-{
- unsigned long rsp;
-
- /* Default to using normal stack */
- rsp = regs->rsp;
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(rsp) == 0)
- rsp = current->sas_ss_sp + current->sas_ss_size;
- }
-
- /* This is the legacy signal stack switching. */
- else if ((regs->ss & 0xffff) != __USER_DS &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer) {
- rsp = (unsigned long) ka->sa.sa_restorer;
- }
-
- rsp -= frame_size;
- /* Align the stack pointer according to the i386 ABI,
- * i.e. so that on function entry ((sp + 4) & 15) == 0. */
- rsp = ((rsp + 4) & -16ul) - 4;
- return (void __user *) rsp;
-}
-
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
- compat_sigset_t *set, struct pt_regs * regs)
-{
- struct sigframe __user *frame;
- int err = 0;
-
- frame = get_sigframe(ka, regs, sizeof(*frame));
-
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
-
- err |= __put_user(sig, &frame->sig);
- if (err)
- goto give_sigsegv;
-
- err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs,
- set->sig[0]);
- if (err)
- goto give_sigsegv;
-
- if (_COMPAT_NSIG_WORDS > 1) {
- err |= __copy_to_user(frame->extramask, &set->sig[1],
- sizeof(frame->extramask));
- }
- if (err)
- goto give_sigsegv;
-
- /* Return stub is in 32bit vsyscall page */
- {
- void __user *restorer;
- if (current->binfmt->hasvdso)
- restorer = VSYSCALL32_SIGRETURN;
- else
- restorer = (void *)&frame->retcode;
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
- err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
- }
- /* These are actually not used anymore, but left because some
- gdb versions depend on them as a marker. */
- {
- /* copy_to_user optimizes that into a single 8 byte store */
- static const struct {
- u16 poplmovl;
- u32 val;
- u16 int80;
- u16 pad;
- } __attribute__((packed)) code = {
- 0xb858, /* popl %eax ; movl $...,%eax */
- __NR_ia32_sigreturn,
- 0x80cd, /* int $0x80 */
- 0,
- };
- err |= __copy_to_user(frame->retcode, &code, 8);
- }
- if (err)
- goto give_sigsegv;
-
- /* Set up registers for signal handler */
- regs->rsp = (unsigned long) frame;
- regs->rip = (unsigned long) ka->sa.sa_handler;
-
- /* Make -mregparm=3 work */
- regs->rax = sig;
- regs->rdx = 0;
- regs->rcx = 0;
-
- asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
- asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
-
- regs->cs = __USER32_CS;
- regs->ss = __USER32_DS;
-
- set_fs(USER_DS);
- regs->eflags &= ~TF_MASK;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
-#if DEBUG_SIG
- printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
-#endif
-
- return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
-}
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- compat_sigset_t *set, struct pt_regs * regs)
-{
- struct rt_sigframe __user *frame;
- int err = 0;
-
- frame = get_sigframe(ka, regs, sizeof(*frame));
-
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
-
- {
- struct exec_domain *ed = current_thread_info()->exec_domain;
- err |= __put_user((ed
- && ed->signal_invmap
- && sig < 32
- ? ed->signal_invmap[sig]
- : sig),
- &frame->sig);
- }
- err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
- err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
- err |= copy_siginfo_to_user32(&frame->info, info);
- if (err)
- goto give_sigsegv;
-
- /* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->rsp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto give_sigsegv;
-
-
- {
- void __user *restorer = VSYSCALL32_RTSIGRETURN;
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
- err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
- }
-
- /* This is movl $,%eax ; int $0x80 */
- /* Not actually used anymore, but left because some gdb versions
- need it. */
- {
- /* __copy_to_user optimizes that into a single 8 byte store */
- static const struct {
- u8 movl;
- u32 val;
- u16 int80;
- u16 pad;
- u8 pad2;
- } __attribute__((packed)) code = {
- 0xb8,
- __NR_ia32_rt_sigreturn,
- 0x80cd,
- 0,
- };
- err |= __copy_to_user(frame->retcode, &code, 8);
- }
- if (err)
- goto give_sigsegv;
-
- /* Set up registers for signal handler */
- regs->rsp = (unsigned long) frame;
- regs->rip = (unsigned long) ka->sa.sa_handler;
-
- /* Make -mregparm=3 work */
- regs->rax = sig;
- regs->rdx = (unsigned long) &frame->info;
- regs->rcx = (unsigned long) &frame->uc;
-
- /* Make -mregparm=3 work */
- regs->rax = sig;
- regs->rdx = (unsigned long) &frame->info;
- regs->rcx = (unsigned long) &frame->uc;
-
- asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
- asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
-
- regs->cs = __USER32_CS;
- regs->ss = __USER32_DS;
-
- set_fs(USER_DS);
- regs->eflags &= ~TF_MASK;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
-#if DEBUG_SIG
- printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
-#endif
-
- return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
-}
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
deleted file mode 100644
index 18b231810908..000000000000
--- a/arch/x86_64/ia32/ia32entry.S
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * Compatibility mode system call entry point for x86-64.
- *
- * Copyright 2000-2002 Andi Kleen, SuSE Labs.
- */
-
-#include <asm/dwarf2.h>
-#include <asm/calling.h>
-#include <asm/asm-offsets.h>
-#include <asm/current.h>
-#include <asm/errno.h>
-#include <asm/ia32_unistd.h>
-#include <asm/thread_info.h>
-#include <asm/segment.h>
-#include <asm/vsyscall32.h>
-#include <asm/irqflags.h>
-#include <linux/linkage.h>
-
-#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
-
- .macro IA32_ARG_FIXUP noebp=0
- movl %edi,%r8d
- .if \noebp
- .else
- movl %ebp,%r9d
- .endif
- xchg %ecx,%esi
- movl %ebx,%edi
- movl %edx,%edx /* zero extension */
- .endm
-
- /* clobbers %eax */
- .macro CLEAR_RREGS
- xorl %eax,%eax
- movq %rax,R11(%rsp)
- movq %rax,R10(%rsp)
- movq %rax,R9(%rsp)
- movq %rax,R8(%rsp)
- .endm
-
- .macro LOAD_ARGS32 offset
- movl \offset(%rsp),%r11d
- movl \offset+8(%rsp),%r10d
- movl \offset+16(%rsp),%r9d
- movl \offset+24(%rsp),%r8d
- movl \offset+40(%rsp),%ecx
- movl \offset+48(%rsp),%edx
- movl \offset+56(%rsp),%esi
- movl \offset+64(%rsp),%edi
- movl \offset+72(%rsp),%eax
- .endm
-
- .macro CFI_STARTPROC32 simple
- CFI_STARTPROC \simple
- CFI_UNDEFINED r8
- CFI_UNDEFINED r9
- CFI_UNDEFINED r10
- CFI_UNDEFINED r11
- CFI_UNDEFINED r12
- CFI_UNDEFINED r13
- CFI_UNDEFINED r14
- CFI_UNDEFINED r15
- .endm
-
-/*
- * 32bit SYSENTER instruction entry.
- *
- * Arguments:
- * %eax System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp user stack
- * 0(%ebp) Arg6
- *
- * Interrupts off.
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. Set up a complete hardware stack frame to share code
- * with the int 0x80 path.
- */
-ENTRY(ia32_sysenter_target)
- CFI_STARTPROC32 simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,0
- CFI_REGISTER rsp,rbp
- swapgs
- movq %gs:pda_kernelstack, %rsp
- addq $(PDA_STACKOFFSET),%rsp
- /*
- * No need to follow this irqs on/off section: the syscall
- * disabled irqs, here we enable it straight after entry:
- */
- sti
- movl %ebp,%ebp /* zero extension */
- pushq $__USER32_DS
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET ss,0*/
- pushq %rbp
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rsp,0
- pushfq
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET rflags,0*/
- movl $VSYSCALL32_SYSEXIT, %r10d
- CFI_REGISTER rip,r10
- pushq $__USER32_CS
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET cs,0*/
- movl %eax, %eax
- pushq %r10
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip,0
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
- cld
- SAVE_ARGS 0,0,1
- /* no need to do an access_ok check here because rbp has been
- 32bit zero extended */
-1: movl (%rbp),%r9d
- .section __ex_table,"a"
- .quad 1b,ia32_badarg
- .previous
- GET_THREAD_INFO(%r10)
- orl $TS_COMPAT,threadinfo_status(%r10)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
- CFI_REMEMBER_STATE
- jnz sysenter_tracesys
-sysenter_do_call:
- cmpl $(IA32_NR_syscalls-1),%eax
- ja ia32_badsys
- IA32_ARG_FIXUP 1
- call *ia32_sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
- GET_THREAD_INFO(%r10)
- cli
- TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
- jnz int_ret_from_sys_call
- andl $~TS_COMPAT,threadinfo_status(%r10)
- /* clear IF, that popfq doesn't enable interrupts early */
- andl $~0x200,EFLAGS-R11(%rsp)
- RESTORE_ARGS 1,24,1,1,1,1
- popfq
- CFI_ADJUST_CFA_OFFSET -8
- /*CFI_RESTORE rflags*/
- popq %rcx /* User %esp */
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rsp,rcx
- movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
- CFI_REGISTER rip,rdx
- TRACE_IRQS_ON
- swapgs
- sti /* sti only takes effect after the next instruction */
- /* sysexit */
- .byte 0xf, 0x35
-
-sysenter_tracesys:
- CFI_RESTORE_STATE
- SAVE_REST
- CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp) /* really needed? */
- movq %rsp,%rdi /* &pt_regs -> arg1 */
- call syscall_trace_enter
- LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
- RESTORE_REST
- movl %ebp, %ebp
- /* no need to do an access_ok check here because rbp has been
- 32bit zero extended */
-1: movl (%rbp),%r9d
- .section __ex_table,"a"
- .quad 1b,ia32_badarg
- .previous
- jmp sysenter_do_call
- CFI_ENDPROC
-ENDPROC(ia32_sysenter_target)
-
-/*
- * 32bit SYSCALL instruction entry.
- *
- * Arguments:
- * %eax System call number.
- * %ebx Arg1
- * %ecx return EIP
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg2 [note: not saved in the stack frame, should not be touched]
- * %esp user stack
- * 0(%esp) Arg6
- *
- * Interrupts off.
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. Set up a complete hardware stack frame to share code
- * with the int 0x80 path.
- */
-ENTRY(ia32_cstar_target)
- CFI_STARTPROC32 simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,PDA_STACKOFFSET
- CFI_REGISTER rip,rcx
- /*CFI_REGISTER rflags,r11*/
- swapgs
- movl %esp,%r8d
- CFI_REGISTER rsp,r8
- movq %gs:pda_kernelstack,%rsp
- /*
- * No need to follow this irqs on/off section: the syscall
- * disabled irqs and here we enable it straight after entry:
- */
- sti
- SAVE_ARGS 8,1,1
- movl %eax,%eax /* zero extension */
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
- movq %rcx,RIP-ARGOFFSET(%rsp)
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
- movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
- movl %ebp,%ecx
- movq $__USER32_CS,CS-ARGOFFSET(%rsp)
- movq $__USER32_DS,SS-ARGOFFSET(%rsp)
- movq %r11,EFLAGS-ARGOFFSET(%rsp)
- /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
- movq %r8,RSP-ARGOFFSET(%rsp)
- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
- /* no need to do an access_ok check here because r8 has been
- 32bit zero extended */
- /* hardware stack frame is complete now */
-1: movl (%r8),%r9d
- .section __ex_table,"a"
- .quad 1b,ia32_badarg
- .previous
- GET_THREAD_INFO(%r10)
- orl $TS_COMPAT,threadinfo_status(%r10)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
- CFI_REMEMBER_STATE
- jnz cstar_tracesys
-cstar_do_call:
- cmpl $IA32_NR_syscalls-1,%eax
- ja ia32_badsys
- IA32_ARG_FIXUP 1
- call *ia32_sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
- GET_THREAD_INFO(%r10)
- cli
- TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
- jnz int_ret_from_sys_call
- andl $~TS_COMPAT,threadinfo_status(%r10)
- RESTORE_ARGS 1,-ARG_SKIP,1,1,1
- movl RIP-ARGOFFSET(%rsp),%ecx
- CFI_REGISTER rip,rcx
- movl EFLAGS-ARGOFFSET(%rsp),%r11d
- /*CFI_REGISTER rflags,r11*/
- TRACE_IRQS_ON
- movl RSP-ARGOFFSET(%rsp),%esp
- CFI_RESTORE rsp
- swapgs
- sysretl
-
-cstar_tracesys:
- CFI_RESTORE_STATE
- SAVE_REST
- CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp) /* really needed? */
- movq %rsp,%rdi /* &pt_regs -> arg1 */
- call syscall_trace_enter
- LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
- RESTORE_REST
- movl RSP-ARGOFFSET(%rsp), %r8d
- /* no need to do an access_ok check here because r8 has been
- 32bit zero extended */
-1: movl (%r8),%r9d
- .section __ex_table,"a"
- .quad 1b,ia32_badarg
- .previous
- jmp cstar_do_call
-END(ia32_cstar_target)
-
-ia32_badarg:
- movq $-EFAULT,%rax
- jmp ia32_sysret
- CFI_ENDPROC
-
-/*
- * Emulated IA32 system calls via int 0x80.
- *
- * Arguments:
- * %eax System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg6 [note: not saved in the stack frame, should not be touched]
- *
- * Notes:
- * Uses the same stack frame as the x86-64 version.
- * All registers except %eax must be saved (but ptrace may violate that)
- * Arguments are zero extended. For system calls that want sign extension and
- * take long arguments a wrapper is needed. Most calls can just be called
- * directly.
- * Assumes it is only called from user space and entered with interrupts off.
- */
-
-ENTRY(ia32_syscall)
- CFI_STARTPROC32 simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8-RIP
- /*CFI_REL_OFFSET ss,SS-RIP*/
- CFI_REL_OFFSET rsp,RSP-RIP
- /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
- /*CFI_REL_OFFSET cs,CS-RIP*/
- CFI_REL_OFFSET rip,RIP-RIP
- swapgs
- /*
- * No need to follow this irqs on/off section: the syscall
- * disabled irqs and here we enable it straight after entry:
- */
- sti
- movl %eax,%eax
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
- cld
- /* note the registers are not zero extended to the sf.
- this could be a problem. */
- SAVE_ARGS 0,0,1
- GET_THREAD_INFO(%r10)
- orl $TS_COMPAT,threadinfo_status(%r10)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
- jnz ia32_tracesys
-ia32_do_syscall:
- cmpl $(IA32_NR_syscalls-1),%eax
- ja ia32_badsys
- IA32_ARG_FIXUP
- call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
-ia32_sysret:
- movq %rax,RAX-ARGOFFSET(%rsp)
- jmp int_ret_from_sys_call
-
-ia32_tracesys:
- SAVE_REST
- CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp) /* really needed? */
- movq %rsp,%rdi /* &pt_regs -> arg1 */
- call syscall_trace_enter
- LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
- RESTORE_REST
- jmp ia32_do_syscall
-END(ia32_syscall)
-
-ia32_badsys:
- movq $0,ORIG_RAX-ARGOFFSET(%rsp)
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp int_ret_from_sys_call
-
-quiet_ni_syscall:
- movq $-ENOSYS,%rax
- ret
- CFI_ENDPROC
-
- .macro PTREGSCALL label, func, arg
- .globl \label
-\label:
- leaq \func(%rip),%rax
- leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
- jmp ia32_ptregs_common
- .endm
-
- CFI_STARTPROC32
-
- PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
- PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
- PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
- PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
- PTREGSCALL stub32_execve, sys32_execve, %rcx
- PTREGSCALL stub32_fork, sys_fork, %rdi
- PTREGSCALL stub32_clone, sys32_clone, %rdx
- PTREGSCALL stub32_vfork, sys_vfork, %rdi
- PTREGSCALL stub32_iopl, sys_iopl, %rsi
- PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
-
-ENTRY(ia32_ptregs_common)
- popq %r11
- CFI_ENDPROC
- CFI_STARTPROC32 simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8-ARGOFFSET
- CFI_REL_OFFSET rax,RAX-ARGOFFSET
- CFI_REL_OFFSET rcx,RCX-ARGOFFSET
- CFI_REL_OFFSET rdx,RDX-ARGOFFSET
- CFI_REL_OFFSET rsi,RSI-ARGOFFSET
- CFI_REL_OFFSET rdi,RDI-ARGOFFSET
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
-/* CFI_REL_OFFSET cs,CS-ARGOFFSET*/
-/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
-/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
- SAVE_REST
- call *%rax
- RESTORE_REST
- jmp ia32_sysret /* misbalances the return cache */
- CFI_ENDPROC
-END(ia32_ptregs_common)
-
- .section .rodata,"a"
- .align 8
-ia32_sys_call_table:
- .quad sys_restart_syscall
- .quad sys_exit
- .quad stub32_fork
- .quad sys_read
- .quad sys_write
- .quad compat_sys_open /* 5 */
- .quad sys_close
- .quad sys32_waitpid
- .quad sys_creat
- .quad sys_link
- .quad sys_unlink /* 10 */
- .quad stub32_execve
- .quad sys_chdir
- .quad compat_sys_time
- .quad sys_mknod
- .quad sys_chmod /* 15 */
- .quad sys_lchown16
- .quad quiet_ni_syscall /* old break syscall holder */
- .quad sys_stat
- .quad sys32_lseek
- .quad sys_getpid /* 20 */
- .quad compat_sys_mount /* mount */
- .quad sys_oldumount /* old_umount */
- .quad sys_setuid16
- .quad sys_getuid16
- .quad compat_sys_stime /* stime */ /* 25 */
- .quad sys32_ptrace /* ptrace */
- .quad sys_alarm
- .quad sys_fstat /* (old)fstat */
- .quad sys_pause
- .quad compat_sys_utime /* 30 */
- .quad quiet_ni_syscall /* old stty syscall holder */
- .quad quiet_ni_syscall /* old gtty syscall holder */
- .quad sys_access
- .quad sys_nice
- .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */
- .quad sys_sync
- .quad sys32_kill
- .quad sys_rename
- .quad sys_mkdir
- .quad sys_rmdir /* 40 */
- .quad sys_dup
- .quad sys32_pipe
- .quad compat_sys_times
- .quad quiet_ni_syscall /* old prof syscall holder */
- .quad sys_brk /* 45 */
- .quad sys_setgid16
- .quad sys_getgid16
- .quad sys_signal
- .quad sys_geteuid16
- .quad sys_getegid16 /* 50 */
- .quad sys_acct
- .quad sys_umount /* new_umount */
- .quad quiet_ni_syscall /* old lock syscall holder */
- .quad compat_sys_ioctl
- .quad compat_sys_fcntl64 /* 55 */
- .quad quiet_ni_syscall /* old mpx syscall holder */
- .quad sys_setpgid
- .quad quiet_ni_syscall /* old ulimit syscall holder */
- .quad sys32_olduname
- .quad sys_umask /* 60 */
- .quad sys_chroot
- .quad sys32_ustat
- .quad sys_dup2
- .quad sys_getppid
- .quad sys_getpgrp /* 65 */
- .quad sys_setsid
- .quad sys32_sigaction
- .quad sys_sgetmask
- .quad sys_ssetmask
- .quad sys_setreuid16 /* 70 */
- .quad sys_setregid16
- .quad stub32_sigsuspend
- .quad compat_sys_sigpending
- .quad sys_sethostname
- .quad compat_sys_setrlimit /* 75 */
- .quad compat_sys_old_getrlimit /* old_getrlimit */
- .quad compat_sys_getrusage
- .quad sys32_gettimeofday
- .quad sys32_settimeofday
- .quad sys_getgroups16 /* 80 */
- .quad sys_setgroups16
- .quad sys32_old_select
- .quad sys_symlink
- .quad sys_lstat
- .quad sys_readlink /* 85 */
- .quad sys_uselib
- .quad sys_swapon
- .quad sys_reboot
- .quad compat_sys_old_readdir
- .quad sys32_mmap /* 90 */
- .quad sys_munmap
- .quad sys_truncate
- .quad sys_ftruncate
- .quad sys_fchmod
- .quad sys_fchown16 /* 95 */
- .quad sys_getpriority
- .quad sys_setpriority
- .quad quiet_ni_syscall /* old profil syscall holder */
- .quad compat_sys_statfs
- .quad compat_sys_fstatfs /* 100 */
- .quad sys_ioperm
- .quad compat_sys_socketcall
- .quad sys_syslog
- .quad compat_sys_setitimer
- .quad compat_sys_getitimer /* 105 */
- .quad compat_sys_newstat
- .quad compat_sys_newlstat
- .quad compat_sys_newfstat
- .quad sys32_uname
- .quad stub32_iopl /* 110 */
- .quad sys_vhangup
- .quad quiet_ni_syscall /* old "idle" system call */
- .quad sys32_vm86_warning /* vm86old */
- .quad compat_sys_wait4
- .quad sys_swapoff /* 115 */
- .quad compat_sys_sysinfo
- .quad sys32_ipc
- .quad sys_fsync
- .quad stub32_sigreturn
- .quad stub32_clone /* 120 */
- .quad sys_setdomainname
- .quad sys_uname
- .quad sys_modify_ldt
- .quad compat_sys_adjtimex
- .quad sys32_mprotect /* 125 */
- .quad compat_sys_sigprocmask
- .quad quiet_ni_syscall /* create_module */
- .quad sys_init_module
- .quad sys_delete_module
- .quad quiet_ni_syscall /* 130 get_kernel_syms */
- .quad sys32_quotactl
- .quad sys_getpgid
- .quad sys_fchdir
- .quad quiet_ni_syscall /* bdflush */
- .quad sys_sysfs /* 135 */
- .quad sys_personality
- .quad quiet_ni_syscall /* for afs_syscall */
- .quad sys_setfsuid16
- .quad sys_setfsgid16
- .quad sys_llseek /* 140 */
- .quad compat_sys_getdents
- .quad compat_sys_select
- .quad sys_flock
- .quad sys_msync
- .quad compat_sys_readv /* 145 */
- .quad compat_sys_writev
- .quad sys_getsid
- .quad sys_fdatasync
- .quad sys32_sysctl /* sysctl */
- .quad sys_mlock /* 150 */
- .quad sys_munlock
- .quad sys_mlockall
- .quad sys_munlockall
- .quad sys_sched_setparam
- .quad sys_sched_getparam /* 155 */
- .quad sys_sched_setscheduler
- .quad sys_sched_getscheduler
- .quad sys_sched_yield
- .quad sys_sched_get_priority_max
- .quad sys_sched_get_priority_min /* 160 */
- .quad sys32_sched_rr_get_interval
- .quad compat_sys_nanosleep
- .quad sys_mremap
- .quad sys_setresuid16
- .quad sys_getresuid16 /* 165 */
- .quad sys32_vm86_warning /* vm86 */
- .quad quiet_ni_syscall /* query_module */
- .quad sys_poll
- .quad compat_sys_nfsservctl
- .quad sys_setresgid16 /* 170 */
- .quad sys_getresgid16
- .quad sys_prctl
- .quad stub32_rt_sigreturn
- .quad sys32_rt_sigaction
- .quad sys32_rt_sigprocmask /* 175 */
- .quad sys32_rt_sigpending
- .quad compat_sys_rt_sigtimedwait
- .quad sys32_rt_sigqueueinfo
- .quad stub32_rt_sigsuspend
- .quad sys32_pread /* 180 */
- .quad sys32_pwrite
- .quad sys_chown16
- .quad sys_getcwd
- .quad sys_capget
- .quad sys_capset
- .quad stub32_sigaltstack
- .quad sys32_sendfile
- .quad quiet_ni_syscall /* streams1 */
- .quad quiet_ni_syscall /* streams2 */
- .quad stub32_vfork /* 190 */
- .quad compat_sys_getrlimit
- .quad sys32_mmap2
- .quad sys32_truncate64
- .quad sys32_ftruncate64
- .quad sys32_stat64 /* 195 */
- .quad sys32_lstat64
- .quad sys32_fstat64
- .quad sys_lchown
- .quad sys_getuid
- .quad sys_getgid /* 200 */
- .quad sys_geteuid
- .quad sys_getegid
- .quad sys_setreuid
- .quad sys_setregid
- .quad sys_getgroups /* 205 */
- .quad sys_setgroups
- .quad sys_fchown
- .quad sys_setresuid
- .quad sys_getresuid
- .quad sys_setresgid /* 210 */
- .quad sys_getresgid
- .quad sys_chown
- .quad sys_setuid
- .quad sys_setgid
- .quad sys_setfsuid /* 215 */
- .quad sys_setfsgid
- .quad sys_pivot_root
- .quad sys_mincore
- .quad sys_madvise
- .quad compat_sys_getdents64 /* 220 getdents64 */
- .quad compat_sys_fcntl64
- .quad quiet_ni_syscall /* tux */
- .quad quiet_ni_syscall /* security */
- .quad sys_gettid
- .quad sys32_readahead /* 225 */
- .quad sys_setxattr
- .quad sys_lsetxattr
- .quad sys_fsetxattr
- .quad sys_getxattr
- .quad sys_lgetxattr /* 230 */
- .quad sys_fgetxattr
- .quad sys_listxattr
- .quad sys_llistxattr
- .quad sys_flistxattr
- .quad sys_removexattr /* 235 */
- .quad sys_lremovexattr
- .quad sys_fremovexattr
- .quad sys_tkill
- .quad sys_sendfile64
- .quad compat_sys_futex /* 240 */
- .quad compat_sys_sched_setaffinity
- .quad compat_sys_sched_getaffinity
- .quad sys32_set_thread_area
- .quad sys32_get_thread_area
- .quad compat_sys_io_setup /* 245 */
- .quad sys_io_destroy
- .quad compat_sys_io_getevents
- .quad compat_sys_io_submit
- .quad sys_io_cancel
- .quad sys32_fadvise64 /* 250 */
- .quad quiet_ni_syscall /* free_huge_pages */
- .quad sys_exit_group
- .quad sys32_lookup_dcookie
- .quad sys_epoll_create
- .quad sys_epoll_ctl /* 255 */
- .quad sys_epoll_wait
- .quad sys_remap_file_pages
- .quad sys_set_tid_address
- .quad compat_sys_timer_create
- .quad compat_sys_timer_settime /* 260 */
- .quad compat_sys_timer_gettime
- .quad sys_timer_getoverrun
- .quad sys_timer_delete
- .quad compat_sys_clock_settime
- .quad compat_sys_clock_gettime /* 265 */
- .quad compat_sys_clock_getres
- .quad compat_sys_clock_nanosleep
- .quad compat_sys_statfs64
- .quad compat_sys_fstatfs64
- .quad sys_tgkill /* 270 */
- .quad compat_sys_utimes
- .quad sys32_fadvise64_64
- .quad quiet_ni_syscall /* sys_vserver */
- .quad sys_mbind
- .quad compat_sys_get_mempolicy /* 275 */
- .quad sys_set_mempolicy
- .quad compat_sys_mq_open
- .quad sys_mq_unlink
- .quad compat_sys_mq_timedsend
- .quad compat_sys_mq_timedreceive /* 280 */
- .quad compat_sys_mq_notify
- .quad compat_sys_mq_getsetattr
- .quad compat_sys_kexec_load /* reserved for kexec */
- .quad compat_sys_waitid
- .quad quiet_ni_syscall /* 285: sys_altroot */
- .quad sys_add_key
- .quad sys_request_key
- .quad sys_keyctl
- .quad sys_ioprio_set
- .quad sys_ioprio_get /* 290 */
- .quad sys_inotify_init
- .quad sys_inotify_add_watch
- .quad sys_inotify_rm_watch
- .quad sys_migrate_pages
- .quad compat_sys_openat /* 295 */
- .quad sys_mkdirat
- .quad sys_mknodat
- .quad sys_fchownat
- .quad compat_sys_futimesat
- .quad sys32_fstatat /* 300 */
- .quad sys_unlinkat
- .quad sys_renameat
- .quad sys_linkat
- .quad sys_symlinkat
- .quad sys_readlinkat /* 305 */
- .quad sys_fchmodat
- .quad sys_faccessat
- .quad compat_sys_pselect6
- .quad compat_sys_ppoll
- .quad sys_unshare /* 310 */
- .quad compat_sys_set_robust_list
- .quad compat_sys_get_robust_list
- .quad sys_splice
- .quad sys32_sync_file_range
- .quad sys_tee /* 315 */
- .quad compat_sys_vmsplice
- .quad compat_sys_move_pages
- .quad sys_getcpu
- .quad sys_epoll_pwait
- .quad compat_sys_utimensat /* 320 */
- .quad compat_sys_signalfd
- .quad compat_sys_timerfd
- .quad sys_eventfd
- .quad sys32_fallocate
-ia32_syscall_end:
diff --git a/arch/x86_64/ia32/ipc32.c b/arch/x86_64/ia32/ipc32.c
deleted file mode 100644
index 369151dc3213..000000000000
--- a/arch/x86_64/ia32/ipc32.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/syscalls.h>
-#include <linux/time.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/ipc.h>
-#include <linux/compat.h>
-
-#include <asm-i386/ipc.h>
-
-asmlinkage long
-sys32_ipc(u32 call, int first, int second, int third,
- compat_uptr_t ptr, u32 fifth)
-{
- int version;
-
- version = call >> 16; /* hack for backward compatibility */
- call &= 0xffff;
-
- switch (call) {
- case SEMOP:
- /* struct sembuf is the same on 32 and 64bit :)) */
- return sys_semtimedop(first, compat_ptr(ptr), second, NULL);
- case SEMTIMEDOP:
- return compat_sys_semtimedop(first, compat_ptr(ptr), second,
- compat_ptr(fifth));
- case SEMGET:
- return sys_semget(first, second, third);
- case SEMCTL:
- return compat_sys_semctl(first, second, third, compat_ptr(ptr));
-
- case MSGSND:
- return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
- case MSGRCV:
- return compat_sys_msgrcv(first, second, fifth, third,
- version, compat_ptr(ptr));
- case MSGGET:
- return sys_msgget((key_t) first, second);
- case MSGCTL:
- return compat_sys_msgctl(first, second, compat_ptr(ptr));
-
- case SHMAT:
- return compat_sys_shmat(first, second, third, version,
- compat_ptr(ptr));
- break;
- case SHMDT:
- return sys_shmdt(compat_ptr(ptr));
- case SHMGET:
- return sys_shmget(first, (unsigned)second, third);
- case SHMCTL:
- return compat_sys_shmctl(first, second, compat_ptr(ptr));
- }
- return -ENOSYS;
-}
diff --git a/arch/x86_64/ia32/mmap32.c b/arch/x86_64/ia32/mmap32.c
deleted file mode 100644
index e4b84b4a417a..000000000000
--- a/arch/x86_64/ia32/mmap32.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * linux/arch/x86_64/ia32/mm/mmap.c
- *
- * flexible mmap layout support
- *
- * Based on the i386 version which was
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *
- * Started by Ingo Molnar <mingo@elte.hu>
- */
-
-#include <linux/personality.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/sched.h>
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline unsigned long mmap_base(struct mm_struct *mm)
-{
- unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
- unsigned long random_factor = 0;
-
- if (current->flags & PF_RANDOMIZE)
- random_factor = get_random_int() % (1024*1024);
-
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
-
- return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
-}
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-void ia32_pick_mmap_layout(struct mm_struct *mm)
-{
- /*
- * Fall back to the standard layout if the personality
- * bit is set, or if the expected stack growth is unlimited:
- */
- if (sysctl_legacy_va_layout ||
- (current->personality & ADDR_COMPAT_LAYOUT) ||
- current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
- mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
- } else {
- mm->mmap_base = mmap_base(mm);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
- }
-}
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
deleted file mode 100644
index 4a233ad6269c..000000000000
--- a/arch/x86_64/ia32/ptrace32.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * 32bit ptrace for x86-64.
- *
- * Copyright 2001,2002 Andi Kleen, SuSE Labs.
- * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier
- * copyright.
- *
- * This allows to access 64bit processes too; but there is no way to see the extended
- * register contents.
- */
-
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/syscalls.h>
-#include <linux/unistd.h>
-#include <linux/mm.h>
-#include <linux/err.h>
-#include <linux/ptrace.h>
-#include <asm/ptrace.h>
-#include <asm/compat.h>
-#include <asm/uaccess.h>
-#include <asm/user32.h>
-#include <asm/user.h>
-#include <asm/errno.h>
-#include <asm/debugreg.h>
-#include <asm/i387.h>
-#include <asm/fpu32.h>
-#include <asm/ia32.h>
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
- * Also masks reserved bits (31-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x54dd5UL
-
-#define R32(l,q) \
- case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
-
-static int putreg32(struct task_struct *child, unsigned regno, u32 val)
-{
- int i;
- __u64 *stack = (__u64 *)task_pt_regs(child);
-
- switch (regno) {
- case offsetof(struct user32, regs.fs):
- if (val && (val & 3) != 3) return -EIO;
- child->thread.fsindex = val & 0xffff;
- break;
- case offsetof(struct user32, regs.gs):
- if (val && (val & 3) != 3) return -EIO;
- child->thread.gsindex = val & 0xffff;
- break;
- case offsetof(struct user32, regs.ds):
- if (val && (val & 3) != 3) return -EIO;
- child->thread.ds = val & 0xffff;
- break;
- case offsetof(struct user32, regs.es):
- child->thread.es = val & 0xffff;
- break;
- case offsetof(struct user32, regs.ss):
- if ((val & 3) != 3) return -EIO;
- stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
- break;
- case offsetof(struct user32, regs.cs):
- if ((val & 3) != 3) return -EIO;
- stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
- break;
-
- R32(ebx, rbx);
- R32(ecx, rcx);
- R32(edx, rdx);
- R32(edi, rdi);
- R32(esi, rsi);
- R32(ebp, rbp);
- R32(eax, rax);
- R32(orig_eax, orig_rax);
- R32(eip, rip);
- R32(esp, rsp);
-
- case offsetof(struct user32, regs.eflags): {
- __u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8];
- val &= FLAG_MASK;
- *flags = val | (*flags & ~FLAG_MASK);
- break;
- }
-
- case offsetof(struct user32, u_debugreg[4]):
- case offsetof(struct user32, u_debugreg[5]):
- return -EIO;
-
- case offsetof(struct user32, u_debugreg[0]):
- child->thread.debugreg0 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[1]):
- child->thread.debugreg1 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[2]):
- child->thread.debugreg2 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[3]):
- child->thread.debugreg3 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[6]):
- child->thread.debugreg6 = val;
- break;
-
- case offsetof(struct user32, u_debugreg[7]):
- val &= ~DR_CONTROL_RESERVED;
- /* See arch/i386/kernel/ptrace.c for an explanation of
- * this awkward check.*/
- for(i=0; i<4; i++)
- if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
- return -EIO;
- child->thread.debugreg7 = val;
- if (val)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- break;
-
- default:
- if (regno > sizeof(struct user32) || (regno & 3))
- return -EIO;
-
- /* Other dummy fields in the virtual user structure are ignored */
- break;
- }
- return 0;
-}
-
-#undef R32
-
-#define R32(l,q) \
- case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break
-
-static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
-{
- __u64 *stack = (__u64 *)task_pt_regs(child);
-
- switch (regno) {
- case offsetof(struct user32, regs.fs):
- *val = child->thread.fsindex;
- break;
- case offsetof(struct user32, regs.gs):
- *val = child->thread.gsindex;
- break;
- case offsetof(struct user32, regs.ds):
- *val = child->thread.ds;
- break;
- case offsetof(struct user32, regs.es):
- *val = child->thread.es;
- break;
-
- R32(cs, cs);
- R32(ss, ss);
- R32(ebx, rbx);
- R32(ecx, rcx);
- R32(edx, rdx);
- R32(edi, rdi);
- R32(esi, rsi);
- R32(ebp, rbp);
- R32(eax, rax);
- R32(orig_eax, orig_rax);
- R32(eip, rip);
- R32(eflags, eflags);
- R32(esp, rsp);
-
- case offsetof(struct user32, u_debugreg[0]):
- *val = child->thread.debugreg0;
- break;
- case offsetof(struct user32, u_debugreg[1]):
- *val = child->thread.debugreg1;
- break;
- case offsetof(struct user32, u_debugreg[2]):
- *val = child->thread.debugreg2;
- break;
- case offsetof(struct user32, u_debugreg[3]):
- *val = child->thread.debugreg3;
- break;
- case offsetof(struct user32, u_debugreg[6]):
- *val = child->thread.debugreg6;
- break;
- case offsetof(struct user32, u_debugreg[7]):
- *val = child->thread.debugreg7;
- break;
-
- default:
- if (regno > sizeof(struct user32) || (regno & 3))
- return -EIO;
-
- /* Other dummy fields in the virtual user structure are ignored */
- *val = 0;
- break;
- }
- return 0;
-}
-
-#undef R32
-
-static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
-{
- int ret;
- compat_siginfo_t __user *si32 = compat_ptr(data);
- siginfo_t ssi;
- siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
- if (request == PTRACE_SETSIGINFO) {
- memset(&ssi, 0, sizeof(siginfo_t));
- ret = copy_siginfo_from_user32(&ssi, si32);
- if (ret)
- return ret;
- if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
- return -EFAULT;
- }
- ret = sys_ptrace(request, pid, addr, (unsigned long)si);
- if (ret)
- return ret;
- if (request == PTRACE_GETSIGINFO) {
- if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
- return -EFAULT;
- ret = copy_siginfo_to_user32(si32, &ssi);
- }
- return ret;
-}
-
-asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
-{
- struct task_struct *child;
- struct pt_regs *childregs;
- void __user *datap = compat_ptr(data);
- int ret;
- __u32 val;
-
- switch (request) {
- case PTRACE_TRACEME:
- case PTRACE_ATTACH:
- case PTRACE_KILL:
- case PTRACE_CONT:
- case PTRACE_SINGLESTEP:
- case PTRACE_DETACH:
- case PTRACE_SYSCALL:
- case PTRACE_OLDSETOPTIONS:
- case PTRACE_SETOPTIONS:
- case PTRACE_SET_THREAD_AREA:
- case PTRACE_GET_THREAD_AREA:
- return sys_ptrace(request, pid, addr, data);
-
- default:
- return -EINVAL;
-
- case PTRACE_PEEKTEXT:
- case PTRACE_PEEKDATA:
- case PTRACE_POKEDATA:
- case PTRACE_POKETEXT:
- case PTRACE_POKEUSR:
- case PTRACE_PEEKUSR:
- case PTRACE_GETREGS:
- case PTRACE_SETREGS:
- case PTRACE_SETFPREGS:
- case PTRACE_GETFPREGS:
- case PTRACE_SETFPXREGS:
- case PTRACE_GETFPXREGS:
- case PTRACE_GETEVENTMSG:
- break;
-
- case PTRACE_SETSIGINFO:
- case PTRACE_GETSIGINFO:
- return ptrace32_siginfo(request, pid, addr, data);
- }
-
- child = ptrace_get_task_struct(pid);
- if (IS_ERR(child))
- return PTR_ERR(child);
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out;
-
- childregs = task_pt_regs(child);
-
- switch (request) {
- case PTRACE_PEEKDATA:
- case PTRACE_PEEKTEXT:
- ret = 0;
- if (access_process_vm(child, addr, &val, sizeof(u32), 0)!=sizeof(u32))
- ret = -EIO;
- else
- ret = put_user(val, (unsigned int __user *)datap);
- break;
-
- case PTRACE_POKEDATA:
- case PTRACE_POKETEXT:
- ret = 0;
- if (access_process_vm(child, addr, &data, sizeof(u32), 1)!=sizeof(u32))
- ret = -EIO;
- break;
-
- case PTRACE_PEEKUSR:
- ret = getreg32(child, addr, &val);
- if (ret == 0)
- ret = put_user(val, (__u32 __user *)datap);
- break;
-
- case PTRACE_POKEUSR:
- ret = putreg32(child, addr, data);
- break;
-
- case PTRACE_GETREGS: { /* Get all gp regs from the child. */
- int i;
- if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
- ret = -EIO;
- break;
- }
- ret = 0;
- for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) {
- getreg32(child, i, &val);
- ret |= __put_user(val,(u32 __user *)datap);
- datap += sizeof(u32);
- }
- break;
- }
-
- case PTRACE_SETREGS: { /* Set all gp regs in the child. */
- unsigned long tmp;
- int i;
- if (!access_ok(VERIFY_READ, datap, 16*4)) {
- ret = -EIO;
- break;
- }
- ret = 0;
- for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
- ret |= __get_user(tmp, (u32 __user *)datap);
- putreg32(child, i, tmp);
- datap += sizeof(u32);
- }
- break;
- }
-
- case PTRACE_GETFPREGS:
- ret = -EIO;
- if (!access_ok(VERIFY_READ, compat_ptr(data),
- sizeof(struct user_i387_struct)))
- break;
- save_i387_ia32(child, datap, childregs, 1);
- ret = 0;
- break;
-
- case PTRACE_SETFPREGS:
- ret = -EIO;
- if (!access_ok(VERIFY_WRITE, datap,
- sizeof(struct user_i387_struct)))
- break;
- ret = 0;
- /* don't check EFAULT to be bug-to-bug compatible to i386 */
- restore_i387_ia32(child, datap, 1);
- break;
-
- case PTRACE_GETFPXREGS: {
- struct user32_fxsr_struct __user *u = datap;
- init_fpu(child);
- ret = -EIO;
- if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
- break;
- ret = -EFAULT;
- if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
- break;
- ret = __put_user(childregs->cs, &u->fcs);
- ret |= __put_user(child->thread.ds, &u->fos);
- break;
- }
- case PTRACE_SETFPXREGS: {
- struct user32_fxsr_struct __user *u = datap;
- unlazy_fpu(child);
- ret = -EIO;
- if (!access_ok(VERIFY_READ, u, sizeof(*u)))
- break;
- /* no checking to be bug-to-bug compatible with i386. */
- /* but silence warning */
- if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
- ;
- set_stopped_child_used_math(child);
- child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
- ret = 0;
- break;
- }
-
- case PTRACE_GETEVENTMSG:
- ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data));
- break;
-
- default:
- BUG();
- }
-
- out:
- put_task_struct(child);
- return ret;
-}
-
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
deleted file mode 100644
index bee96d614432..000000000000
--- a/arch/x86_64/ia32/sys_ia32.c
+++ /dev/null
@@ -1,889 +0,0 @@
-/*
- * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
- * sys_sparc32
- *
- * Copyright (C) 2000 VA Linux Co
- * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
- * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 2000 Hewlett-Packard Co.
- * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port)
- *
- * These routines maintain argument size conversion between 32bit and 64bit
- * environment. In 2.5 most of this should be moved to a generic directory.
- *
- * This file assumes that there is a hole at the end of user address space.
- *
- * Some of the functions are LE specific currently. These are hopefully all marked.
- * This should be fixed.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/signal.h>
-#include <linux/syscalls.h>
-#include <linux/resource.h>
-#include <linux/times.h>
-#include <linux/utsname.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/mm.h>
-#include <linux/shm.h>
-#include <linux/slab.h>
-#include <linux/uio.h>
-#include <linux/nfs_fs.h>
-#include <linux/quota.h>
-#include <linux/module.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/nfsd/syscall.h>
-#include <linux/poll.h>
-#include <linux/personality.h>
-#include <linux/stat.h>
-#include <linux/ipc.h>
-#include <linux/rwsem.h>
-#include <linux/binfmts.h>
-#include <linux/init.h>
-#include <linux/aio_abi.h>
-#include <linux/aio.h>
-#include <linux/compat.h>
-#include <linux/vfs.h>
-#include <linux/ptrace.h>
-#include <linux/highuid.h>
-#include <linux/vmalloc.h>
-#include <linux/fsnotify.h>
-#include <linux/sysctl.h>
-#include <asm/mman.h>
-#include <asm/types.h>
-#include <asm/uaccess.h>
-#include <asm/semaphore.h>
-#include <asm/atomic.h>
-#include <asm/ldt.h>
-
-#include <net/scm.h>
-#include <net/sock.h>
-#include <asm/ia32.h>
-
-#define AA(__x) ((unsigned long)(__x))
-
-int cp_compat_stat(struct kstat *kbuf, struct compat_stat __user *ubuf)
-{
- compat_ino_t ino;
-
- typeof(ubuf->st_uid) uid = 0;
- typeof(ubuf->st_gid) gid = 0;
- SET_UID(uid, kbuf->uid);
- SET_GID(gid, kbuf->gid);
- if (!old_valid_dev(kbuf->dev) || !old_valid_dev(kbuf->rdev))
- return -EOVERFLOW;
- if (kbuf->size >= 0x7fffffff)
- return -EOVERFLOW;
- ino = kbuf->ino;
- if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino)
- return -EOVERFLOW;
- if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) ||
- __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
- __put_user (ino, &ubuf->st_ino) ||
- __put_user (kbuf->mode, &ubuf->st_mode) ||
- __put_user (kbuf->nlink, &ubuf->st_nlink) ||
- __put_user (uid, &ubuf->st_uid) ||
- __put_user (gid, &ubuf->st_gid) ||
- __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
- __put_user (kbuf->size, &ubuf->st_size) ||
- __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) ||
- __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
- __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
- __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
- __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
- __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
- __put_user (kbuf->blksize, &ubuf->st_blksize) ||
- __put_user (kbuf->blocks, &ubuf->st_blocks))
- return -EFAULT;
- return 0;
-}
-
-asmlinkage long
-sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high)
-{
- return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
-}
-
-asmlinkage long
-sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high)
-{
- return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
-}
-
-/* Another set for IA32/LFS -- x86_64 struct stat is different due to
- support for 64bit inode numbers. */
-
-static int
-cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
-{
- typeof(ubuf->st_uid) uid = 0;
- typeof(ubuf->st_gid) gid = 0;
- SET_UID(uid, stat->uid);
- SET_GID(gid, stat->gid);
- if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
- __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
- __put_user (stat->ino, &ubuf->__st_ino) ||
- __put_user (stat->ino, &ubuf->st_ino) ||
- __put_user (stat->mode, &ubuf->st_mode) ||
- __put_user (stat->nlink, &ubuf->st_nlink) ||
- __put_user (uid, &ubuf->st_uid) ||
- __put_user (gid, &ubuf->st_gid) ||
- __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
- __put_user (stat->size, &ubuf->st_size) ||
- __put_user (stat->atime.tv_sec, &ubuf->st_atime) ||
- __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
- __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) ||
- __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
- __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) ||
- __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
- __put_user (stat->blksize, &ubuf->st_blksize) ||
- __put_user (stat->blocks, &ubuf->st_blocks))
- return -EFAULT;
- return 0;
-}
-
-asmlinkage long
-sys32_stat64(char __user * filename, struct stat64 __user *statbuf)
-{
- struct kstat stat;
- int ret = vfs_stat(filename, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-asmlinkage long
-sys32_lstat64(char __user * filename, struct stat64 __user *statbuf)
-{
- struct kstat stat;
- int ret = vfs_lstat(filename, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-asmlinkage long
-sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
-{
- struct kstat stat;
- int ret = vfs_fstat(fd, &stat);
- if (!ret)
- ret = cp_stat64(statbuf, &stat);
- return ret;
-}
-
-asmlinkage long
-sys32_fstatat(unsigned int dfd, char __user *filename,
- struct stat64 __user* statbuf, int flag)
-{
- struct kstat stat;
- int error = -EINVAL;
-
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
-
- if (!error)
- error = cp_stat64(statbuf, &stat);
-
-out:
- return error;
-}
-
-/*
- * Linux/i386 didn't use to be able to handle more than
- * 4 system call parameters, so these system calls used a memory
- * block for parameter passing..
- */
-
-struct mmap_arg_struct {
- unsigned int addr;
- unsigned int len;
- unsigned int prot;
- unsigned int flags;
- unsigned int fd;
- unsigned int offset;
-};
-
-asmlinkage long
-sys32_mmap(struct mmap_arg_struct __user *arg)
-{
- struct mmap_arg_struct a;
- struct file *file = NULL;
- unsigned long retval;
- struct mm_struct *mm ;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- return -EFAULT;
-
- if (a.offset & ~PAGE_MASK)
- return -EINVAL;
-
- if (!(a.flags & MAP_ANONYMOUS)) {
- file = fget(a.fd);
- if (!file)
- return -EBADF;
- }
-
- mm = current->mm;
- down_write(&mm->mmap_sem);
- retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT);
- if (file)
- fput(file);
-
- up_write(&mm->mmap_sem);
-
- return retval;
-}
-
-asmlinkage long
-sys32_mprotect(unsigned long start, size_t len, unsigned long prot)
-{
- return sys_mprotect(start,len,prot);
-}
-
-asmlinkage long
-sys32_pipe(int __user *fd)
-{
- int retval;
- int fds[2];
-
- retval = do_pipe(fds);
- if (retval)
- goto out;
- if (copy_to_user(fd, fds, sizeof(fds)))
- retval = -EFAULT;
- out:
- return retval;
-}
-
-asmlinkage long
-sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
- struct sigaction32 __user *oact, unsigned int sigsetsize)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
- compat_sigset_t set32;
-
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
-
- if (act) {
- compat_uptr_t handler, restorer;
-
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(restorer, &act->sa_restorer)||
- __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)))
- return -EFAULT;
- new_ka.sa.sa_handler = compat_ptr(handler);
- new_ka.sa.sa_restorer = compat_ptr(restorer);
- /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
- switch (_NSIG_WORDS) {
- case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
- | (((long)set32.sig[7]) << 32);
- case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
- | (((long)set32.sig[5]) << 32);
- case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
- | (((long)set32.sig[3]) << 32);
- case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
- | (((long)set32.sig[1]) << 32);
- }
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
- switch (_NSIG_WORDS) {
- case 4:
- set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
- set32.sig[6] = old_ka.sa.sa_mask.sig[3];
- case 3:
- set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
- set32.sig[4] = old_ka.sa.sa_mask.sig[2];
- case 2:
- set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
- set32.sig[2] = old_ka.sa.sa_mask.sig[1];
- case 1:
- set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
- set32.sig[0] = old_ka.sa.sa_mask.sig[0];
- }
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)))
- return -EFAULT;
- }
-
- return ret;
-}
-
-asmlinkage long
-sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
-
- if (act) {
- compat_old_sigset_t mask;
- compat_uptr_t handler, restorer;
-
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(restorer, &act->sa_restorer) ||
- __get_user(mask, &act->sa_mask))
- return -EFAULT;
-
- new_ka.sa.sa_handler = compat_ptr(handler);
- new_ka.sa.sa_restorer = compat_ptr(restorer);
-
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
- return -EFAULT;
- }
-
- return ret;
-}
-
-asmlinkage long
-sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
- compat_sigset_t __user *oset, unsigned int sigsetsize)
-{
- sigset_t s;
- compat_sigset_t s32;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- if (set) {
- if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
- return -EFAULT;
- switch (_NSIG_WORDS) {
- case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
- case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
- case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
- case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
- }
- }
- set_fs (KERNEL_DS);
- ret = sys_rt_sigprocmask(how,
- set ? (sigset_t __user *)&s : NULL,
- oset ? (sigset_t __user *)&s : NULL,
- sigsetsize);
- set_fs (old_fs);
- if (ret) return ret;
- if (oset) {
- switch (_NSIG_WORDS) {
- case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
- case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
- case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
- case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
- }
- if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
- return -EFAULT;
- }
- return 0;
-}
-
-static inline long
-get_tv32(struct timeval *o, struct compat_timeval __user *i)
-{
- int err = -EFAULT;
- if (access_ok(VERIFY_READ, i, sizeof(*i))) {
- err = __get_user(o->tv_sec, &i->tv_sec);
- err |= __get_user(o->tv_usec, &i->tv_usec);
- }
- return err;
-}
-
-static inline long
-put_tv32(struct compat_timeval __user *o, struct timeval *i)
-{
- int err = -EFAULT;
- if (access_ok(VERIFY_WRITE, o, sizeof(*o))) {
- err = __put_user(i->tv_sec, &o->tv_sec);
- err |= __put_user(i->tv_usec, &o->tv_usec);
- }
- return err;
-}
-
-extern unsigned int alarm_setitimer(unsigned int seconds);
-
-asmlinkage long
-sys32_alarm(unsigned int seconds)
-{
- return alarm_setitimer(seconds);
-}
-
-/* Translations due to time_t size differences. Which affects all
- sorts of things, like timeval and itimerval. */
-
-extern struct timezone sys_tz;
-
-asmlinkage long
-sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
- if (tv) {
- struct timeval ktv;
- do_gettimeofday(&ktv);
- if (put_tv32(tv, &ktv))
- return -EFAULT;
- }
- if (tz) {
- if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
- return -EFAULT;
- }
- return 0;
-}
-
-asmlinkage long
-sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
- struct timeval ktv;
- struct timespec kts;
- struct timezone ktz;
-
- if (tv) {
- if (get_tv32(&ktv, tv))
- return -EFAULT;
- kts.tv_sec = ktv.tv_sec;
- kts.tv_nsec = ktv.tv_usec * NSEC_PER_USEC;
- }
- if (tz) {
- if (copy_from_user(&ktz, tz, sizeof(ktz)))
- return -EFAULT;
- }
-
- return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
-struct sel_arg_struct {
- unsigned int n;
- unsigned int inp;
- unsigned int outp;
- unsigned int exp;
- unsigned int tvp;
-};
-
-asmlinkage long
-sys32_old_select(struct sel_arg_struct __user *arg)
-{
- struct sel_arg_struct a;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- return -EFAULT;
- return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
- compat_ptr(a.exp), compat_ptr(a.tvp));
-}
-
-extern asmlinkage long
-compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options,
- struct compat_rusage *ru);
-
-asmlinkage long
-sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
-{
- return compat_sys_wait4(pid, stat_addr, options, NULL);
-}
-
-/* 32-bit timeval and related flotsam. */
-
-asmlinkage long
-sys32_sysfs(int option, u32 arg1, u32 arg2)
-{
- return sys_sysfs(option, arg1, arg2);
-}
-
-asmlinkage long
-sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
-{
- struct timespec t;
- int ret;
- mm_segment_t old_fs = get_fs ();
-
- set_fs (KERNEL_DS);
- ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
- set_fs (old_fs);
- if (put_compat_timespec(&t, interval))
- return -EFAULT;
- return ret;
-}
-
-asmlinkage long
-sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
-{
- sigset_t s;
- compat_sigset_t s32;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- set_fs (KERNEL_DS);
- ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
- set_fs (old_fs);
- if (!ret) {
- switch (_NSIG_WORDS) {
- case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
- case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
- case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
- case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
- }
- if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
- return -EFAULT;
- }
- return ret;
-}
-
-asmlinkage long
-sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
-{
- siginfo_t info;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- if (copy_siginfo_from_user32(&info, uinfo))
- return -EFAULT;
- set_fs (KERNEL_DS);
- ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
- set_fs (old_fs);
- return ret;
-}
-
-/* These are here just in case some old ia32 binary calls it. */
-asmlinkage long
-sys32_pause(void)
-{
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- return -ERESTARTNOHAND;
-}
-
-
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct sysctl_ia32 {
- unsigned int name;
- int nlen;
- unsigned int oldval;
- unsigned int oldlenp;
- unsigned int newval;
- unsigned int newlen;
- unsigned int __unused[4];
-};
-
-
-asmlinkage long
-sys32_sysctl(struct sysctl_ia32 __user *args32)
-{
- struct sysctl_ia32 a32;
- mm_segment_t old_fs = get_fs ();
- void __user *oldvalp, *newvalp;
- size_t oldlen;
- int __user *namep;
- long ret;
-
- if (copy_from_user(&a32, args32, sizeof (a32)))
- return -EFAULT;
-
- /*
- * We need to pre-validate these because we have to disable address checking
- * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
- * user specifying bad addresses here. Well, since we're dealing with 32 bit
- * addresses, we KNOW that access_ok() will always succeed, so this is an
- * expensive NOP, but so what...
- */
- namep = compat_ptr(a32.name);
- oldvalp = compat_ptr(a32.oldval);
- newvalp = compat_ptr(a32.newval);
-
- if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
- || !access_ok(VERIFY_WRITE, namep, 0)
- || !access_ok(VERIFY_WRITE, oldvalp, 0)
- || !access_ok(VERIFY_WRITE, newvalp, 0))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- lock_kernel();
- ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen,
- newvalp, (size_t) a32.newlen);
- unlock_kernel();
- set_fs(old_fs);
-
- if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp)))
- return -EFAULT;
-
- return ret;
-}
-#endif
-
-/* warning: next two assume little endian */
-asmlinkage long
-sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
-{
- return sys_pread64(fd, ubuf, count,
- ((loff_t)AA(poshi) << 32) | AA(poslo));
-}
-
-asmlinkage long
-sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
-{
- return sys_pwrite64(fd, ubuf, count,
- ((loff_t)AA(poshi) << 32) | AA(poslo));
-}
-
-
-asmlinkage long
-sys32_personality(unsigned long personality)
-{
- int ret;
- if (personality(current->personality) == PER_LINUX32 &&
- personality == PER_LINUX)
- personality = PER_LINUX32;
- ret = sys_personality(personality);
- if (ret == PER_LINUX32)
- ret = PER_LINUX;
- return ret;
-}
-
-asmlinkage long
-sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count)
-{
- mm_segment_t old_fs = get_fs();
- int ret;
- off_t of;
-
- if (offset && get_user(of, offset))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
- count);
- set_fs(old_fs);
-
- if (offset && put_user(of, offset))
- return -EFAULT;
-
- return ret;
-}
-
-asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- struct mm_struct *mm = current->mm;
- unsigned long error;
- struct file * file = NULL;
-
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- return -EBADF;
- }
-
- down_write(&mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&mm->mmap_sem);
-
- if (file)
- fput(file);
- return error;
-}
-
-asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
-{
- int err;
-
- if (!name)
- return -EFAULT;
- if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
- return -EFAULT;
-
- down_read(&uts_sem);
-
- err = __copy_to_user(&name->sysname,&utsname()->sysname,
- __OLD_UTS_LEN);
- err |= __put_user(0,name->sysname+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->nodename,&utsname()->nodename,
- __OLD_UTS_LEN);
- err |= __put_user(0,name->nodename+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->release,&utsname()->release,
- __OLD_UTS_LEN);
- err |= __put_user(0,name->release+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->version,&utsname()->version,
- __OLD_UTS_LEN);
- err |= __put_user(0,name->version+__OLD_UTS_LEN);
- {
- char *arch = "x86_64";
- if (personality(current->personality) == PER_LINUX32)
- arch = "i686";
-
- err |= __copy_to_user(&name->machine, arch, strlen(arch)+1);
- }
-
- up_read(&uts_sem);
-
- err = err ? -EFAULT : 0;
-
- return err;
-}
-
-long sys32_uname(struct old_utsname __user * name)
-{
- int err;
- if (!name)
- return -EFAULT;
- down_read(&uts_sem);
- err = copy_to_user(name, utsname(), sizeof (*name));
- up_read(&uts_sem);
- if (personality(current->personality) == PER_LINUX32)
- err |= copy_to_user(&name->machine, "i686", 5);
- return err?-EFAULT:0;
-}
-
-long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
-{
- struct ustat u;
- mm_segment_t seg;
- int ret;
-
- seg = get_fs();
- set_fs(KERNEL_DS);
- ret = sys_ustat(dev, (struct ustat __user *)&u);
- set_fs(seg);
- if (ret >= 0) {
- if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) ||
- __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
- __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
- __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
- __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
- ret = -EFAULT;
- }
- return ret;
-}
-
-asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
- compat_uptr_t __user *envp, struct pt_regs *regs)
-{
- long error;
- char * filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = compat_do_execve(filename, argv, envp, regs);
- if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
- }
- putname(filename);
- return error;
-}
-
-asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
- struct pt_regs *regs)
-{
- void __user *parent_tid = (void __user *)regs->rdx;
- void __user *child_tid = (void __user *)regs->rdi;
- if (!newsp)
- newsp = regs->rsp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
-
-/*
- * Some system calls that need sign extended arguments. This could be done by a generic wrapper.
- */
-
-long sys32_lseek (unsigned int fd, int offset, unsigned int whence)
-{
- return sys_lseek(fd, offset, whence);
-}
-
-long sys32_kill(int pid, int sig)
-{
- return sys_kill(pid, sig);
-}
-
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
- __u32 len_low, __u32 len_high, int advice)
-{
- return sys_fadvise64_64(fd,
- (((u64)offset_high)<<32) | offset_low,
- (((u64)len_high)<<32) | len_low,
- advice);
-}
-
-long sys32_vm86_warning(void)
-{
- struct task_struct *me = current;
- static char lastcomm[sizeof(me->comm)];
- if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
- compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
- me->comm);
- strncpy(lastcomm, me->comm, sizeof(lastcomm));
- }
- return -ENOSYS;
-}
-
-long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
- char __user * buf, size_t len)
-{
- return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
-}
-
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count)
-{
- return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
-}
-
-asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
- unsigned n_low, unsigned n_hi, int flags)
-{
- return sys_sync_file_range(fd,
- ((u64)off_hi << 32) | off_low,
- ((u64)n_hi << 32) | n_low, flags);
-}
-
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, size_t len,
- int advice)
-{
- return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
- len, advice);
-}
-
-asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
- unsigned offset_hi, unsigned len_lo,
- unsigned len_hi)
-{
- return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
- ((u64)len_hi << 32) | len_lo);
-}
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
deleted file mode 100644
index 15013bac181c..000000000000
--- a/arch/x86_64/ia32/syscall32.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Copyright 2002,2003 Andi Kleen, SuSE Labs */
-
-/* vsyscall handling for 32bit processes. Map a stub page into it
- on demand because 32bit cannot reach the kernel's fixmaps */
-
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/stringify.h>
-#include <linux/security.h>
-#include <asm/proto.h>
-#include <asm/tlbflush.h>
-#include <asm/ia32_unistd.h>
-#include <asm/vsyscall32.h>
-
-extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
-extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
-extern int sysctl_vsyscall32;
-
-static struct page *syscall32_pages[1];
-static int use_sysenter = -1;
-
-struct linux_binprm;
-
-/* Setup a VMA at program startup for the vsyscall page */
-int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
-{
- struct mm_struct *mm = current->mm;
- int ret;
-
- down_write(&mm->mmap_sem);
- /*
- * MAYWRITE to allow gdb to COW and set breakpoints
- *
- * Make sure the vDSO gets into every core dump.
- * Dumping its contents makes post-mortem fully interpretable later
- * without matching up the same kernel and hardware config to see
- * what PC values meant.
- */
- /* Could randomize here */
- ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
- syscall32_pages);
- up_write(&mm->mmap_sem);
- return ret;
-}
-
-static int __init init_syscall32(void)
-{
- char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
- if (!syscall32_page)
- panic("Cannot allocate syscall32 page");
- syscall32_pages[0] = virt_to_page(syscall32_page);
- if (use_sysenter > 0) {
- memcpy(syscall32_page, syscall32_sysenter,
- syscall32_sysenter_end - syscall32_sysenter);
- } else {
- memcpy(syscall32_page, syscall32_syscall,
- syscall32_syscall_end - syscall32_syscall);
- }
- return 0;
-}
-
-__initcall(init_syscall32);
-
-/* May not be __init: called during resume */
-void syscall32_cpu_init(void)
-{
- if (use_sysenter < 0)
- use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
-
- /* Load these always in case some future AMD CPU supports
- SYSENTER from compat mode too. */
- checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
- checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
- wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
diff --git a/arch/x86_64/ia32/syscall32_syscall.S b/arch/x86_64/ia32/syscall32_syscall.S
deleted file mode 100644
index 8f8271bdf135..000000000000
--- a/arch/x86_64/ia32/syscall32_syscall.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* 32bit VDSOs mapped into user space. */
-
- .section ".init.data","aw"
-
- .globl syscall32_syscall
- .globl syscall32_syscall_end
-
-syscall32_syscall:
- .incbin "arch/x86_64/ia32/vsyscall-syscall.so"
-syscall32_syscall_end:
-
- .globl syscall32_sysenter
- .globl syscall32_sysenter_end
-
-syscall32_sysenter:
- .incbin "arch/x86_64/ia32/vsyscall-sysenter.so"
-syscall32_sysenter_end:
diff --git a/arch/x86_64/ia32/tls32.c b/arch/x86_64/ia32/tls32.c
deleted file mode 100644
index 1cc4340de3ca..000000000000
--- a/arch/x86_64/ia32/tls32.c
+++ /dev/null
@@ -1,163 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/user.h>
-
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/processor.h>
-#include <asm/proto.h>
-
-/*
- * sys_alloc_thread_area: get a yet unused TLS descriptor index.
- */
-static int get_free_idx(void)
-{
- struct thread_struct *t = &current->thread;
- int idx;
-
- for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
- if (desc_empty((struct n_desc_struct *)(t->tls_array) + idx))
- return idx + GDT_ENTRY_TLS_MIN;
- return -ESRCH;
-}
-
-/*
- * Set a given TLS descriptor:
- * When you want addresses > 32bit use arch_prctl()
- */
-int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
-{
- struct user_desc info;
- struct n_desc_struct *desc;
- int cpu, idx;
-
- if (copy_from_user(&info, u_info, sizeof(info)))
- return -EFAULT;
-
- idx = info.entry_number;
-
- /*
- * index -1 means the kernel should try to find and
- * allocate an empty descriptor:
- */
- if (idx == -1) {
- idx = get_free_idx();
- if (idx < 0)
- return idx;
- if (put_user(idx, &u_info->entry_number))
- return -EFAULT;
- }
-
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
-
- /*
- * We must not get preempted while modifying the TLS.
- */
- cpu = get_cpu();
-
- if (LDT_empty(&info)) {
- desc->a = 0;
- desc->b = 0;
- } else {
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
- }
- if (t == &current->thread)
- load_TLS(t, cpu);
-
- put_cpu();
- return 0;
-}
-
-asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
-{
- return do_set_thread_area(&current->thread, u_info);
-}
-
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
- (((desc)->a >> 16) & 0x0000ffff) | \
- (((desc)->b << 16) & 0x00ff0000) | \
- ( (desc)->b & 0xff000000) )
-
-#define GET_LIMIT(desc) ( \
- ((desc)->a & 0x0ffff) | \
- ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
-#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
-#define GET_LONGMODE(desc) (((desc)->b >> 21) & 1)
-
-int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
-{
- struct user_desc info;
- struct n_desc_struct *desc;
- int idx;
-
- if (get_user(idx, &u_info->entry_number))
- return -EFAULT;
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
-
- memset(&info, 0, sizeof(struct user_desc));
- info.entry_number = idx;
- info.base_addr = GET_BASE(desc);
- info.limit = GET_LIMIT(desc);
- info.seg_32bit = GET_32BIT(desc);
- info.contents = GET_CONTENTS(desc);
- info.read_exec_only = !GET_WRITABLE(desc);
- info.limit_in_pages = GET_LIMIT_PAGES(desc);
- info.seg_not_present = !GET_PRESENT(desc);
- info.useable = GET_USEABLE(desc);
- info.lm = GET_LONGMODE(desc);
-
- if (copy_to_user(u_info, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
-asmlinkage long sys32_get_thread_area(struct user_desc __user *u_info)
-{
- return do_get_thread_area(&current->thread, u_info);
-}
-
-
-int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs)
-{
- struct n_desc_struct *desc;
- struct user_desc info;
- struct user_desc __user *cp;
- int idx;
-
- cp = (void __user *)childregs->rsi;
- if (copy_from_user(&info, cp, sizeof(info)))
- return -EFAULT;
- if (LDT_empty(&info))
- return -EINVAL;
-
- idx = info.entry_number;
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN;
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
-
- return 0;
-}
diff --git a/arch/x86_64/ia32/vsyscall-sigreturn.S b/arch/x86_64/ia32/vsyscall-sigreturn.S
deleted file mode 100644
index 1384367cdbe1..000000000000
--- a/arch/x86_64/ia32/vsyscall-sigreturn.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Common code for the sigreturn entry points on the vsyscall page.
- * This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80)
- * to enter the kernel.
- * This file is #include'd by vsyscall-*.S to define them after the
- * vsyscall entry point. The addresses we get for these entry points
- * by doing ".balign 32" must match in both versions of the page.
- */
-
- .code32
- .section .text.sigreturn,"ax"
- .balign 32
- .globl __kernel_sigreturn
- .type __kernel_sigreturn,@function
-__kernel_sigreturn:
-.LSTART_sigreturn:
- popl %eax
- movl $__NR_ia32_sigreturn, %eax
- SYSCALL_ENTER_KERNEL
-.LEND_sigreturn:
- .size __kernel_sigreturn,.-.LSTART_sigreturn
-
- .section .text.rtsigreturn,"ax"
- .balign 32
- .globl __kernel_rt_sigreturn
- .type __kernel_rt_sigreturn,@function
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
- movl $__NR_ia32_rt_sigreturn, %eax
- SYSCALL_ENTER_KERNEL
-.LEND_rt_sigreturn:
- .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAMES:
- .long .LENDCIES-.LSTARTCIES
-.LSTARTCIES:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zRS" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIES:
-
- .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */
-.LSTARTFDE2:
- .long .LSTARTFDE2-.LSTARTFRAMES /* CIE pointer */
- /* HACK: The dwarf2 unwind routines will subtract 1 from the
- return address to get an address in the middle of the
- presumed call instruction. Since we didn't get here via
- a call, we need to include the nop before the real start
- to make up for it. */
- .long .LSTART_sigreturn-1-. /* PC-relative start address */
- .long .LEND_sigreturn-.LSTART_sigreturn+1
- .uleb128 0 /* Augmentation length */
- /* What follows are the instructions for the table generation.
- We record the locations of each register saved. This is
- complicated by the fact that the "CFA" is always assumed to
- be the value of the stack pointer in the caller. This means
- that we must define the CFA of this body of code to be the
- saved value of the stack pointer in the sigcontext. Which
- also means that there is no fixed relation to the other
- saved registers, which means that we must use DW_CFA_expression
- to compute their addresses. It also means that when we
- adjust the stack with the popl, we have to do it all over again. */
-
-#define do_cfa_expr(offset) \
- .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
- .uleb128 1f-0f; /* length */ \
-0: .byte 0x74; /* DW_OP_breg4 */ \
- .sleb128 offset; /* offset */ \
- .byte 0x06; /* DW_OP_deref */ \
-1:
-
-#define do_expr(regno, offset) \
- .byte 0x10; /* DW_CFA_expression */ \
- .uleb128 regno; /* regno */ \
- .uleb128 1f-0f; /* length */ \
-0: .byte 0x74; /* DW_OP_breg4 */ \
- .sleb128 offset; /* offset */ \
-1:
-
- do_cfa_expr(IA32_SIGCONTEXT_esp+4)
- do_expr(0, IA32_SIGCONTEXT_eax+4)
- do_expr(1, IA32_SIGCONTEXT_ecx+4)
- do_expr(2, IA32_SIGCONTEXT_edx+4)
- do_expr(3, IA32_SIGCONTEXT_ebx+4)
- do_expr(5, IA32_SIGCONTEXT_ebp+4)
- do_expr(6, IA32_SIGCONTEXT_esi+4)
- do_expr(7, IA32_SIGCONTEXT_edi+4)
- do_expr(8, IA32_SIGCONTEXT_eip+4)
-
- .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
-
- do_cfa_expr(IA32_SIGCONTEXT_esp)
- do_expr(0, IA32_SIGCONTEXT_eax)
- do_expr(1, IA32_SIGCONTEXT_ecx)
- do_expr(2, IA32_SIGCONTEXT_edx)
- do_expr(3, IA32_SIGCONTEXT_ebx)
- do_expr(5, IA32_SIGCONTEXT_ebp)
- do_expr(6, IA32_SIGCONTEXT_esi)
- do_expr(7, IA32_SIGCONTEXT_edi)
- do_expr(8, IA32_SIGCONTEXT_eip)
-
- .align 4
-.LENDFDE2:
-
- .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */
-.LSTARTFDE3:
- .long .LSTARTFDE3-.LSTARTFRAMES /* CIE pointer */
- /* HACK: See above wrt unwind library assumptions. */
- .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
- .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
- .uleb128 0 /* Augmentation */
- /* What follows are the instructions for the table generation.
- We record the locations of each register saved. This is
- slightly less complicated than the above, since we don't
- modify the stack pointer in the process. */
-
- do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp)
- do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax)
- do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx)
- do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx)
- do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx)
- do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp)
- do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi)
- do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi)
- do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip)
-
- .align 4
-.LENDFDE3:
-
-#include "../../i386/kernel/vsyscall-note.S"
-
diff --git a/arch/x86_64/ia32/vsyscall-syscall.S b/arch/x86_64/ia32/vsyscall-syscall.S
deleted file mode 100644
index cf9ef678de3e..000000000000
--- a/arch/x86_64/ia32/vsyscall-syscall.S
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Code for the vsyscall page. This version uses the syscall instruction.
- */
-
-#include <asm/ia32_unistd.h>
-#include <asm/asm-offsets.h>
-#include <asm/segment.h>
-
- .code32
- .text
- .section .text.vsyscall,"ax"
- .globl __kernel_vsyscall
- .type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
- push %ebp
-.Lpush_ebp:
- movl %ecx, %ebp
- syscall
- movl $__USER32_DS, %ecx
- movl %ecx, %ss
- movl %ebp, %ecx
- popl %ebp
-.Lpop_ebp:
- ret
-.LEND_vsyscall:
- .size __kernel_vsyscall,.-.LSTART_vsyscall
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAME:
- .long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zR" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIE:
-
- .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
-.LSTARTFDE1:
- .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
- .long .LSTART_vsyscall-. /* PC-relative start address */
- .long .LEND_vsyscall-.LSTART_vsyscall
- .uleb128 0 /* Augmentation length */
- /* What follows are the instructions for the table generation.
- We have to record all changes of the stack pointer. */
- .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .uleb128 8
- .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
- .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
- .byte 0xc5 /* DW_CFA_restore %ebp */
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .uleb128 4
- .align 4
-.LENDFDE1:
-
-#define SYSCALL_ENTER_KERNEL syscall
-#include "vsyscall-sigreturn.S"
diff --git a/arch/x86_64/ia32/vsyscall-sysenter.S b/arch/x86_64/ia32/vsyscall-sysenter.S
deleted file mode 100644
index ae056e553d13..000000000000
--- a/arch/x86_64/ia32/vsyscall-sysenter.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Code for the vsyscall page. This version uses the sysenter instruction.
- */
-
-#include <asm/ia32_unistd.h>
-#include <asm/asm-offsets.h>
-
- .code32
- .text
- .section .text.vsyscall,"ax"
- .globl __kernel_vsyscall
- .type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
- push %ecx
-.Lpush_ecx:
- push %edx
-.Lpush_edx:
- push %ebp
-.Lenter_kernel:
- movl %esp,%ebp
- sysenter
- .space 7,0x90
- jmp .Lenter_kernel
- /* 16: System call normal return point is here! */
- pop %ebp
-.Lpop_ebp:
- pop %edx
-.Lpop_edx:
- pop %ecx
-.Lpop_ecx:
- ret
-.LEND_vsyscall:
- .size __kernel_vsyscall,.-.LSTART_vsyscall
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAME:
- .long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zR" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIE:
-
- .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
-.LSTARTFDE1:
- .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
- .long .LSTART_vsyscall-. /* PC-relative start address */
- .long .LEND_vsyscall-.LSTART_vsyscall
- .uleb128 0 /* Augmentation length */
- /* What follows are the instructions for the table generation.
- We have to record all changes of the stack pointer. */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpush_ecx-.LSTART_vsyscall
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x08 /* RA at offset 8 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpush_edx-.Lpush_ecx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x0c /* RA at offset 12 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lenter_kernel-.Lpush_edx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x10 /* RA at offset 16 now */
- .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
- /* Finally the epilogue. */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_ebp-.Lenter_kernel
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x12 /* RA at offset 12 now */
- .byte 0xc5 /* DW_CFA_restore %ebp */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_edx-.Lpop_ebp
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x08 /* RA at offset 8 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_ecx-.Lpop_edx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x04 /* RA at offset 4 now */
- .align 4
-.LENDFDE1:
-
-#define SYSCALL_ENTER_KERNEL int $0x80
-#include "vsyscall-sigreturn.S"
diff --git a/arch/x86_64/ia32/vsyscall.lds b/arch/x86_64/ia32/vsyscall.lds
deleted file mode 100644
index 1dc86ff5bcb9..000000000000
--- a/arch/x86_64/ia32/vsyscall.lds
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
- * object prelinked to its virtual address. This script controls its layout.
- */
-
-/* This must match <asm/fixmap.h>. */
-VSYSCALL_BASE = 0xffffe000;
-
-SECTIONS
-{
- . = VSYSCALL_BASE + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- /* This linker script is used both with -r and with -shared.
- For the layouts to match, we need to skip more than enough
- space for the dynamic symbol table et al. If this amount
- is insufficient, ld -shared will barf. Just increase it here. */
- . = VSYSCALL_BASE + 0x400;
-
- .text.vsyscall : { *(.text.vsyscall) } :text =0x90909090
-
- /* This is an 32bit object and we cannot easily get the offsets
- into the 64bit kernel. Just hardcode them here. This assumes
- that all the stubs don't need more than 0x100 bytes. */
- . = VSYSCALL_BASE + 0x500;
-
- .text.sigreturn : { *(.text.sigreturn) } :text =0x90909090
-
- . = VSYSCALL_BASE + 0x600;
-
- .text.rtsigreturn : { *(.text.rtsigreturn) } :text =0x90909090
-
- .note : { *(.note.*) } :text :note
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .dynamic : { *(.dynamic) } :text :dynamic
- .useless : {
- *(.got.plt) *(.got)
- *(.data .data.* .gnu.linkonce.d.*)
- *(.dynbss)
- *(.bss .bss.* .gnu.linkonce.b.*)
- } :text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- LINUX_2.5 {
- global:
- __kernel_vsyscall;
- __kernel_sigreturn;
- __kernel_rt_sigreturn;
-
- local: *;
- };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value. */
-ENTRY(__kernel_vsyscall);
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
deleted file mode 100644
index ff5d8c9b96d9..000000000000
--- a/arch/x86_64/kernel/Makefile
+++ /dev/null
@@ -1,63 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := head.o head64.o init_task.o vmlinux.lds
-EXTRA_AFLAGS := -traditional
-obj-y := process.o signal.o entry.o traps.o irq.o \
- ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
- x8664_ksyms.o i387.o syscall.o vsyscall.o \
- setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
- perfctr-watchdog.o
-
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
-obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
-obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
-obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
-obj-$(CONFIG_ACPI) += acpi/
-obj-$(CONFIG_X86_MSR) += msr.o
-obj-$(CONFIG_MICROCODE) += microcode.o
-obj-$(CONFIG_X86_CPUID) += cpuid.o
-obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
-obj-y += apic.o nmi.o
-obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_PM) += suspend.o
-obj-$(CONFIG_HIBERNATION) += suspend_asm.o
-obj-$(CONFIG_CPU_FREQ) += cpufreq/
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o
-obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
-obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
-obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
-obj-$(CONFIG_X86_VSMP) += vsmp.o
-obj-$(CONFIG_K8_NB) += k8.o
-obj-$(CONFIG_AUDIT) += audit.o
-
-obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_PCI) += early-quirks.o
-
-obj-y += topology.o
-obj-y += intel_cacheinfo.o
-obj-y += addon_cpuid_features.o
-obj-y += pcspeaker.o
-
-CFLAGS_vsyscall.o := $(PROFILING) -g0
-
-therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o
-bootflag-y += ../../i386/kernel/bootflag.o
-cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
-topology-y += ../../i386/kernel/topology.o
-microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
-intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
-addon_cpuid_features-y += ../../i386/kernel/cpu/addon_cpuid_features.o
-quirks-y += ../../i386/kernel/quirks.o
-i8237-y += ../../i386/kernel/i8237.o
-msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
-alternative-y += ../../i386/kernel/alternative.o
-pcspeaker-y += ../../i386/kernel/pcspeaker.o
-perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o
diff --git a/arch/x86_64/kernel/acpi/Makefile b/arch/x86_64/kernel/acpi/Makefile
deleted file mode 100644
index 080b9963f1bc..000000000000
--- a/arch/x86_64/kernel/acpi/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-obj-y := boot.o
-boot-y := ../../../i386/kernel/acpi/boot.o
-obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y += processor.o
-processor-y := ../../../i386/kernel/acpi/processor.o ../../../i386/kernel/acpi/cstate.o
-endif
-
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c
deleted file mode 100644
index 79475d237071..000000000000
--- a/arch/x86_64/kernel/acpi/sleep.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * acpi.c - Architecture-Specific Low-Level ACPI Support
- *
- * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
- * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- * Copyright (C) 2002 Andi Kleen, SuSE Labs (x86-64 port)
- * Copyright (C) 2003 Pavel Machek, SuSE Labs
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/bootmem.h>
-#include <linux/acpi.h>
-#include <linux/cpumask.h>
-
-#include <asm/mpspec.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/tlbflush.h>
-
-/* --------------------------------------------------------------------------
- Low-Level Sleep Support
- -------------------------------------------------------------------------- */
-
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
-unsigned long acpi_realmode_flags;
-extern char wakeup_start, wakeup_end;
-
-extern unsigned long acpi_copy_wakeup_routine(unsigned long);
-
-/**
- * acpi_save_state_mem - save kernel state
- *
- * Create an identity mapped page table and copy the wakeup routine to
- * low memory.
- */
-int acpi_save_state_mem(void)
-{
- memcpy((void *)acpi_wakeup_address, &wakeup_start,
- &wakeup_end - &wakeup_start);
- acpi_copy_wakeup_routine(acpi_wakeup_address);
-
- return 0;
-}
-
-/*
- * acpi_restore_state
- */
-void acpi_restore_state_mem(void)
-{
-}
-
-/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
- *
- * We allocate a page in low memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16M pages, but not
- * <1M pages.
- */
-void __init acpi_reserve_bootmem(void)
-{
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
- if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
- printk(KERN_CRIT
- "ACPI: Wakeup code way too big, will crash on attempt"
- " to suspend\n");
-}
-
-static int __init acpi_sleep_setup(char *str)
-{
- while ((str != NULL) && (*str != '\0')) {
- if (strncmp(str, "s3_bios", 7) == 0)
- acpi_realmode_flags |= 1;
- if (strncmp(str, "s3_mode", 7) == 0)
- acpi_realmode_flags |= 2;
- if (strncmp(str, "s3_beep", 7) == 0)
- acpi_realmode_flags |= 4;
- str = strchr(str, ',');
- if (str != NULL)
- str += strspn(str, ", \t");
- }
- return 1;
-}
-
-__setup("acpi_sleep=", acpi_sleep_setup);
-
-void acpi_pci_link_exit(void)
-{
-}
diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S
deleted file mode 100644
index a06f2bcabef9..000000000000
--- a/arch/x86_64/kernel/acpi/wakeup.S
+++ /dev/null
@@ -1,456 +0,0 @@
-.text
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-
-# Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
-#
-# wakeup_code runs in real mode, and at unknown address (determined at run-time).
-# Therefore it must only use relative jumps/calls.
-#
-# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled
-#
-# If physical address of wakeup_code is 0x12345, BIOS should call us with
-# cs = 0x1234, eip = 0x05
-#
-
-#define BEEP \
- inb $97, %al; \
- outb %al, $0x80; \
- movb $3, %al; \
- outb %al, $97; \
- outb %al, $0x80; \
- movb $-74, %al; \
- outb %al, $67; \
- outb %al, $0x80; \
- movb $-119, %al; \
- outb %al, $66; \
- outb %al, $0x80; \
- movb $15, %al; \
- outb %al, $66;
-
-
-ALIGN
- .align 16
-ENTRY(wakeup_start)
-wakeup_code:
- wakeup_code_start = .
- .code16
-
-# Running in *copy* of this code, somewhere in low 1MB.
-
- movb $0xa1, %al ; outb %al, $0x80
- cli
- cld
- # setup data segment
- movw %cs, %ax
- movw %ax, %ds # Make ds:0 point to wakeup_start
- movw %ax, %ss
-
- # Data segment must be set up before we can see whether to beep.
- testl $4, realmode_flags - wakeup_code
- jz 1f
- BEEP
-1:
-
- # Private stack is needed for ASUS board
- mov $(wakeup_stack - wakeup_code), %sp
-
- pushl $0 # Kill any dangerous flags
- popfl
-
- movl real_magic - wakeup_code, %eax
- cmpl $0x12345678, %eax
- jne bogus_real_magic
-
- call verify_cpu # Verify the cpu supports long
- # mode
- testl %eax, %eax
- jnz no_longmode
-
- testl $1, realmode_flags - wakeup_code
- jz 1f
- lcall $0xc000,$3
- movw %cs, %ax
- movw %ax, %ds # Bios might have played with that
- movw %ax, %ss
-1:
-
- testl $2, realmode_flags - wakeup_code
- jz 1f
- mov video_mode - wakeup_code, %ax
- call mode_set
-1:
-
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'L', %fs:(0x10)
-
- movb $0xa2, %al ; outb %al, $0x80
-
- mov %ds, %ax # Find 32bit wakeup_code addr
- movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
- shll $4, %esi
- # Fix up the vectors
- addl %esi, wakeup_32_vector - wakeup_code
- addl %esi, wakeup_long64_vector - wakeup_code
- addl %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer
-
- lidtl %ds:idt_48a - wakeup_code
- lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is
- # appropriate
-
- movl $1, %eax # protected mode (PE) bit
- lmsw %ax # This is it!
- jmp 1f
-1:
-
- ljmpl *(wakeup_32_vector - wakeup_code)
-
- .balign 4
-wakeup_32_vector:
- .long wakeup_32 - wakeup_code
- .word __KERNEL32_CS, 0
-
- .code32
-wakeup_32:
-# Running in this code, but at low address; paging is not yet turned on.
- movb $0xa5, %al ; outb %al, $0x80
-
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
-
- movw $0x0e00 + 'i', %ds:(0xb8012)
- movb $0xa8, %al ; outb %al, $0x80;
-
- /*
- * Prepare for entering 64bits mode
- */
-
- /* Enable PAE */
- xorl %eax, %eax
- btsl $5, %eax
- movl %eax, %cr4
-
- /* Setup early boot stage 4 level pagetables */
- leal (wakeup_level4_pgt - wakeup_code)(%esi), %eax
- movl %eax, %cr3
-
- /* Check if nx is implemented */
- movl $0x80000001, %eax
- cpuid
- movl %edx,%edi
-
- /* Enable Long Mode */
- xorl %eax, %eax
- btsl $_EFER_LME, %eax
-
- /* No Execute supported? */
- btl $20,%edi
- jnc 1f
- btsl $_EFER_NX, %eax
-
- /* Make changes effective */
-1: movl $MSR_EFER, %ecx
- xorl %edx, %edx
- wrmsr
-
- xorl %eax, %eax
- btsl $31, %eax /* Enable paging and in turn activate Long Mode */
- btsl $0, %eax /* Enable protected mode */
-
- /* Make changes effective */
- movl %eax, %cr0
-
- /* At this point:
- CR4.PAE must be 1
- CS.L must be 0
- CR3 must point to PML4
- Next instruction must be a branch
- This must be on identity-mapped page
- */
- /*
- * At this point we're in long mode but in 32bit compatibility mode
- * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
- * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we load
- * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
- */
-
- /* Finally jump in 64bit mode */
- ljmp *(wakeup_long64_vector - wakeup_code)(%esi)
-
- .balign 4
-wakeup_long64_vector:
- .long wakeup_long64 - wakeup_code
- .word __KERNEL_CS, 0
-
-.code64
-
- /* Hooray, we are in Long 64-bit mode (but still running in
- * low memory)
- */
-wakeup_long64:
- /*
- * We must switch to a new descriptor in kernel space for the GDT
- * because soon the kernel won't have access anymore to the userspace
- * addresses where we're currently running on. We have to do that here
- * because in 32bit we couldn't load a 64bit linear address.
- */
- lgdt cpu_gdt_descr
-
- movw $0x0e00 + 'n', %ds:(0xb8014)
- movb $0xa9, %al ; outb %al, $0x80
-
- movq saved_magic, %rax
- movq $0x123456789abcdef0, %rdx
- cmpq %rdx, %rax
- jne bogus_64_magic
-
- movw $0x0e00 + 'u', %ds:(0xb8016)
-
- nop
- nop
- movw $__KERNEL_DS, %ax
- movw %ax, %ss
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %fs
- movw %ax, %gs
- movq saved_rsp, %rsp
-
- movw $0x0e00 + 'x', %ds:(0xb8018)
- movq saved_rbx, %rbx
- movq saved_rdi, %rdi
- movq saved_rsi, %rsi
- movq saved_rbp, %rbp
-
- movw $0x0e00 + '!', %ds:(0xb801a)
- movq saved_rip, %rax
- jmp *%rax
-
-.code32
-
- .align 64
-gdta:
- /* Its good to keep gdt in sync with one in trampoline.S */
- .word 0, 0, 0, 0 # dummy
- /* ??? Why I need the accessed bit set in order for this to work? */
- .quad 0x00cf9b000000ffff # __KERNEL32_CS
- .quad 0x00af9b000000ffff # __KERNEL_CS
- .quad 0x00cf93000000ffff # __KERNEL_DS
-
-idt_48a:
- .word 0 # idt limit = 0
- .word 0, 0 # idt base = 0L
-
-gdt_48a:
- .word 0x800 # gdt limit=2048,
- # 256 GDT entries
- .long gdta - wakeup_code # gdt base (relocated in later)
-
-real_magic: .quad 0
-video_mode: .quad 0
-realmode_flags: .quad 0
-
-.code16
-bogus_real_magic:
- movb $0xba,%al ; outb %al,$0x80
- jmp bogus_real_magic
-
-.code64
-bogus_64_magic:
- movb $0xb3,%al ; outb %al,$0x80
- jmp bogus_64_magic
-
-.code16
-no_longmode:
- movb $0xbc,%al ; outb %al,$0x80
- jmp no_longmode
-
-#include "../verify_cpu.S"
-
-/* This code uses an extended set of video mode numbers. These include:
- * Aliases for standard modes
- * NORMAL_VGA (-1)
- * EXTENDED_VGA (-2)
- * ASK_VGA (-3)
- * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
- * of compatibility when extending the table. These are between 0x00 and 0xff.
- */
-#define VIDEO_FIRST_MENU 0x0000
-
-/* Standard BIOS video modes (BIOS number + 0x0100) */
-#define VIDEO_FIRST_BIOS 0x0100
-
-/* VESA BIOS video modes (VESA number + 0x0200) */
-#define VIDEO_FIRST_VESA 0x0200
-
-/* Video7 special modes (BIOS number + 0x0900) */
-#define VIDEO_FIRST_V7 0x0900
-
-# Setting of user mode (AX=mode ID) => CF=success
-
-# For now, we only handle VESA modes (0x0200..0x03ff). To handle other
-# modes, we should probably compile in the video code from the boot
-# directory.
-.code16
-mode_set:
- movw %ax, %bx
- subb $VIDEO_FIRST_VESA>>8, %bh
- cmpb $2, %bh
- jb check_vesa
-
-setbad:
- clc
- ret
-
-check_vesa:
- orw $0x4000, %bx # Use linear frame buffer
- movw $0x4f02, %ax # VESA BIOS mode set call
- int $0x10
- cmpw $0x004f, %ax # AL=4f if implemented
- jnz setbad # AH=0 if OK
-
- stc
- ret
-
-wakeup_stack_begin: # Stack grows down
-
-.org 0xff0
-wakeup_stack: # Just below end of page
-
-.org 0x1000
-ENTRY(wakeup_level4_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 510,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
-
-ENTRY(wakeup_end)
-
-##
-# acpi_copy_wakeup_routine
-#
-# Copy the above routine to low memory.
-#
-# Parameters:
-# %rdi: place to copy wakeup routine to
-#
-# Returned address is location of code in low memory (past data and stack)
-#
- .code64
-ENTRY(acpi_copy_wakeup_routine)
- pushq %rax
- pushq %rdx
-
- movl saved_video_mode, %edx
- movl %edx, video_mode - wakeup_start (,%rdi)
- movl acpi_realmode_flags, %edx
- movl %edx, realmode_flags - wakeup_start (,%rdi)
- movq $0x12345678, real_magic - wakeup_start (,%rdi)
- movq $0x123456789abcdef0, %rdx
- movq %rdx, saved_magic
-
- movq saved_magic, %rax
- movq $0x123456789abcdef0, %rdx
- cmpq %rdx, %rax
- jne bogus_64_magic
-
- # restore the regs we used
- popq %rdx
- popq %rax
-ENTRY(do_suspend_lowlevel_s4bios)
- ret
-
- .align 2
- .p2align 4,,15
-.globl do_suspend_lowlevel
- .type do_suspend_lowlevel,@function
-do_suspend_lowlevel:
-.LFB5:
- subq $8, %rsp
- xorl %eax, %eax
- call save_processor_state
-
- movq %rsp, saved_context_esp(%rip)
- movq %rax, saved_context_eax(%rip)
- movq %rbx, saved_context_ebx(%rip)
- movq %rcx, saved_context_ecx(%rip)
- movq %rdx, saved_context_edx(%rip)
- movq %rbp, saved_context_ebp(%rip)
- movq %rsi, saved_context_esi(%rip)
- movq %rdi, saved_context_edi(%rip)
- movq %r8, saved_context_r08(%rip)
- movq %r9, saved_context_r09(%rip)
- movq %r10, saved_context_r10(%rip)
- movq %r11, saved_context_r11(%rip)
- movq %r12, saved_context_r12(%rip)
- movq %r13, saved_context_r13(%rip)
- movq %r14, saved_context_r14(%rip)
- movq %r15, saved_context_r15(%rip)
- pushfq ; popq saved_context_eflags(%rip)
-
- movq $.L97, saved_rip(%rip)
-
- movq %rsp,saved_rsp
- movq %rbp,saved_rbp
- movq %rbx,saved_rbx
- movq %rdi,saved_rdi
- movq %rsi,saved_rsi
-
- addq $8, %rsp
- movl $3, %edi
- xorl %eax, %eax
- jmp acpi_enter_sleep_state
-.L97:
- .p2align 4,,7
-.L99:
- .align 4
- movl $24, %eax
- movw %ax, %ds
- movq saved_context+58(%rip), %rax
- movq %rax, %cr4
- movq saved_context+50(%rip), %rax
- movq %rax, %cr3
- movq saved_context+42(%rip), %rax
- movq %rax, %cr2
- movq saved_context+34(%rip), %rax
- movq %rax, %cr0
- pushq saved_context_eflags(%rip) ; popfq
- movq saved_context_esp(%rip), %rsp
- movq saved_context_ebp(%rip), %rbp
- movq saved_context_eax(%rip), %rax
- movq saved_context_ebx(%rip), %rbx
- movq saved_context_ecx(%rip), %rcx
- movq saved_context_edx(%rip), %rdx
- movq saved_context_esi(%rip), %rsi
- movq saved_context_edi(%rip), %rdi
- movq saved_context_r08(%rip), %r8
- movq saved_context_r09(%rip), %r9
- movq saved_context_r10(%rip), %r10
- movq saved_context_r11(%rip), %r11
- movq saved_context_r12(%rip), %r12
- movq saved_context_r13(%rip), %r13
- movq saved_context_r14(%rip), %r14
- movq saved_context_r15(%rip), %r15
-
- xorl %eax, %eax
- addq $8, %rsp
- jmp restore_processor_state
-.LFE5:
-.Lfe5:
- .size do_suspend_lowlevel,.Lfe5-do_suspend_lowlevel
-
-.data
-ALIGN
-ENTRY(saved_rbp) .quad 0
-ENTRY(saved_rsi) .quad 0
-ENTRY(saved_rdi) .quad 0
-ENTRY(saved_rbx) .quad 0
-
-ENTRY(saved_rip) .quad 0
-ENTRY(saved_rsp) .quad 0
-
-ENTRY(saved_magic) .quad 0
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
deleted file mode 100644
index 8f681cae7bf7..000000000000
--- a/arch/x86_64/kernel/aperture.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * Firmware replacement code.
- *
- * Work around broken BIOSes that don't set an aperture or only set the
- * aperture in the AGP bridge.
- * If all fails map the aperture over some low memory. This is cheaper than
- * doing bounce buffering. The memory is lost. This is done at early boot
- * because only the bootmem allocator can allocate 32+MB.
- *
- * Copyright 2002 Andi Kleen, SuSE Labs.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mmzone.h>
-#include <linux/pci_ids.h>
-#include <linux/pci.h>
-#include <linux/bitops.h>
-#include <linux/ioport.h>
-#include <asm/e820.h>
-#include <asm/io.h>
-#include <asm/iommu.h>
-#include <asm/pci-direct.h>
-#include <asm/dma.h>
-#include <asm/k8.h>
-
-int iommu_aperture;
-int iommu_aperture_disabled __initdata = 0;
-int iommu_aperture_allowed __initdata = 0;
-
-int fallback_aper_order __initdata = 1; /* 64MB */
-int fallback_aper_force __initdata = 0;
-
-int fix_aperture __initdata = 1;
-
-static struct resource gart_resource = {
- .name = "GART",
- .flags = IORESOURCE_MEM,
-};
-
-static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
-{
- gart_resource.start = aper_base;
- gart_resource.end = aper_base + aper_size - 1;
- insert_resource(&iomem_resource, &gart_resource);
-}
-
-/* This code runs before the PCI subsystem is initialized, so just
- access the northbridge directly. */
-
-static u32 __init allocate_aperture(void)
-{
- u32 aper_size;
- void *p;
-
- if (fallback_aper_order > 7)
- fallback_aper_order = 7;
- aper_size = (32 * 1024 * 1024) << fallback_aper_order;
-
- /*
- * Aperture has to be naturally aligned. This means an 2GB aperture won't
- * have much chance of finding a place in the lower 4GB of memory.
- * Unfortunately we cannot move it up because that would make the
- * IOMMU useless.
- */
- p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
- if (!p || __pa(p)+aper_size > 0xffffffff) {
- printk("Cannot allocate aperture memory hole (%p,%uK)\n",
- p, aper_size>>10);
- if (p)
- free_bootmem(__pa(p), aper_size);
- return 0;
- }
- printk("Mapping aperture over %d KB of RAM @ %lx\n",
- aper_size >> 10, __pa(p));
- insert_aperture_resource((u32)__pa(p), aper_size);
- return (u32)__pa(p);
-}
-
-static int __init aperture_valid(u64 aper_base, u32 aper_size)
-{
- if (!aper_base)
- return 0;
- if (aper_size < 64*1024*1024) {
- printk("Aperture too small (%d MB)\n", aper_size>>20);
- return 0;
- }
- if (aper_base + aper_size > 0x100000000UL) {
- printk("Aperture beyond 4GB. Ignoring.\n");
- return 0;
- }
- if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
- printk("Aperture pointing to e820 RAM. Ignoring.\n");
- return 0;
- }
- return 1;
-}
-
-/* Find a PCI capability */
-static __u32 __init find_cap(int num, int slot, int func, int cap)
-{
- u8 pos;
- int bytes;
- if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST))
- return 0;
- pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST);
- for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
- u8 id;
- pos &= ~3;
- id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID);
- if (id == 0xff)
- break;
- if (id == cap)
- return pos;
- pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT);
- }
- return 0;
-}
-
-/* Read a standard AGPv3 bridge header */
-static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
-{
- u32 apsize;
- u32 apsizereg;
- int nbits;
- u32 aper_low, aper_hi;
- u64 aper;
-
- printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func);
- apsizereg = read_pci_config_16(num,slot,func, cap + 0x14);
- if (apsizereg == 0xffffffff) {
- printk("APSIZE in AGP bridge unreadable\n");
- return 0;
- }
-
- apsize = apsizereg & 0xfff;
- /* Some BIOS use weird encodings not in the AGPv3 table. */
- if (apsize & 0xff)
- apsize |= 0xf00;
- nbits = hweight16(apsize);
- *order = 7 - nbits;
- if ((int)*order < 0) /* < 32MB */
- *order = 0;
-
- aper_low = read_pci_config(num,slot,func, 0x10);
- aper_hi = read_pci_config(num,slot,func,0x14);
- aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
-
- printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
- aper, 32 << *order, apsizereg);
-
- if (!aperture_valid(aper, (32*1024*1024) << *order))
- return 0;
- return (u32)aper;
-}
-
-/* Look for an AGP bridge. Windows only expects the aperture in the
- AGP bridge and some BIOS forget to initialize the Northbridge too.
- Work around this here.
-
- Do an PCI bus scan by hand because we're running before the PCI
- subsystem.
-
- All K8 AGP bridges are AGPv3 compliant, so we can do this scan
- generically. It's probably overkill to always scan all slots because
- the AGP bridges should be always an own bus on the HT hierarchy,
- but do it here for future safety. */
-static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
-{
- int num, slot, func;
-
- /* Poor man's PCI discovery */
- for (num = 0; num < 256; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- u32 class, cap;
- u8 type;
- class = read_pci_config(num,slot,func,
- PCI_CLASS_REVISION);
- if (class == 0xffffffff)
- break;
-
- switch (class >> 16) {
- case PCI_CLASS_BRIDGE_HOST:
- case PCI_CLASS_BRIDGE_OTHER: /* needed? */
- /* AGP bridge? */
- cap = find_cap(num,slot,func,PCI_CAP_ID_AGP);
- if (!cap)
- break;
- *valid_agp = 1;
- return read_agp(num,slot,func,cap,order);
- }
-
- /* No multi-function device? */
- type = read_pci_config_byte(num,slot,func,
- PCI_HEADER_TYPE);
- if (!(type & 0x80))
- break;
- }
- }
- }
- printk("No AGP bridge found\n");
- return 0;
-}
-
-void __init iommu_hole_init(void)
-{
- int fix, num;
- u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
- u64 aper_base, last_aper_base = 0;
- int valid_agp = 0;
-
- if (iommu_aperture_disabled || !fix_aperture || !early_pci_allowed())
- return;
-
- printk(KERN_INFO "Checking aperture...\n");
-
- fix = 0;
- for (num = 24; num < 32; num++) {
- if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
- continue;
-
- iommu_detected = 1;
- iommu_aperture = 1;
-
- aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
- aper_size = (32 * 1024 * 1024) << aper_order;
- aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
- aper_base <<= 25;
-
- printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
- aper_base, aper_size>>20);
-
- if (!aperture_valid(aper_base, aper_size)) {
- fix = 1;
- break;
- }
-
- if ((last_aper_order && aper_order != last_aper_order) ||
- (last_aper_base && aper_base != last_aper_base)) {
- fix = 1;
- break;
- }
- last_aper_order = aper_order;
- last_aper_base = aper_base;
- }
-
- if (!fix && !fallback_aper_force) {
- if (last_aper_base) {
- unsigned long n = (32 * 1024 * 1024) << last_aper_order;
- insert_aperture_resource((u32)last_aper_base, n);
- }
- return;
- }
-
- if (!fallback_aper_force)
- aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
-
- if (aper_alloc) {
- /* Got the aperture from the AGP bridge */
- } else if (swiotlb && !valid_agp) {
- /* Do nothing */
- } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) ||
- force_iommu ||
- valid_agp ||
- fallback_aper_force) {
- printk("Your BIOS doesn't leave a aperture memory hole\n");
- printk("Please enable the IOMMU option in the BIOS setup\n");
- printk("This costs you %d MB of RAM\n",
- 32 << fallback_aper_order);
-
- aper_order = fallback_aper_order;
- aper_alloc = allocate_aperture();
- if (!aper_alloc) {
- /* Could disable AGP and IOMMU here, but it's probably
- not worth it. But the later users cannot deal with
- bad apertures and turning on the aperture over memory
- causes very strange problems, so it's better to
- panic early. */
- panic("Not enough memory for aperture");
- }
- } else {
- return;
- }
-
- /* Fix up the north bridges */
- for (num = 24; num < 32; num++) {
- if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
- continue;
-
- /* Don't enable translation yet. That is done later.
- Assume this BIOS didn't initialise the GART so
- just overwrite all previous bits */
- write_pci_config(0, num, 3, 0x90, aper_order<<1);
- write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
- }
-}
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
deleted file mode 100644
index 925758dbca0c..000000000000
--- a/arch/x86_64/kernel/apic.c
+++ /dev/null
@@ -1,1253 +0,0 @@
-/*
- * Local APIC handling, local APIC timers
- *
- * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively.
- * Maciej W. Rozycki : Various updates and fixes.
- * Mikael Pettersson : Power Management for UP-APIC.
- * Pavel Machek and
- * Mikael Pettersson : PM converted to driver model.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/pgalloc.h>
-#include <asm/mach_apic.h>
-#include <asm/nmi.h>
-#include <asm/idle.h>
-#include <asm/proto.h>
-#include <asm/timex.h>
-#include <asm/hpet.h>
-#include <asm/apic.h>
-
-int apic_mapped;
-int apic_verbosity;
-int apic_runs_main_timer;
-int apic_calibrate_pmtmr __initdata;
-
-int disable_apic_timer __initdata;
-
-/* Local APIC timer works in C2? */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-static struct resource *ioapic_resources;
-static struct resource lapic_resource = {
- .name = "Local APIC",
- .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-/*
- * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
- * IPIs in place of local APIC timers
- */
-static cpumask_t timer_interrupt_broadcast_ipi_mask;
-
-/* Using APIC to generate smp_local_timer_interrupt? */
-int using_apic_timer __read_mostly = 0;
-
-static void apic_pm_activate(void);
-
-void apic_wait_icr_idle(void)
-{
- while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
- cpu_relax();
-}
-
-unsigned int safe_apic_wait_icr_idle(void)
-{
- unsigned int send_status;
- int timeout;
-
- timeout = 0;
- do {
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- if (!send_status)
- break;
- udelay(100);
- } while (timeout++ < 1000);
-
- return send_status;
-}
-
-void enable_NMI_through_LVT0 (void * dummy)
-{
- unsigned int v;
-
- /* unmask and set to NMI */
- v = APIC_DM_NMI;
- apic_write(APIC_LVT0, v);
-}
-
-int get_maxlvt(void)
-{
- unsigned int v, maxlvt;
-
- v = apic_read(APIC_LVR);
- maxlvt = GET_APIC_MAXLVT(v);
- return maxlvt;
-}
-
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
- printk("unexpected IRQ trap at vector %02x\n", irq);
- /*
- * Currently unexpected vectors happen only on SMP and APIC.
- * We _must_ ack these because every local APIC has only N
- * irq slots per priority level, and a 'hanging, unacked' IRQ
- * holds up an irq slot - in excessive cases (when multiple
- * unexpected vectors occur) that might lock up the APIC
- * completely.
- * But don't ack when the APIC is disabled. -AK
- */
- if (!disable_apic)
- ack_APIC_irq();
-}
-
-void clear_local_APIC(void)
-{
- int maxlvt;
- unsigned int v;
-
- maxlvt = get_maxlvt();
-
- /*
- * Masking an LVT entry can trigger a local APIC error
- * if the vector is zero. Mask LVTERR first to prevent this.
- */
- if (maxlvt >= 3) {
- v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
- apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
- }
- /*
- * Careful: we have to set masks only first to deassert
- * any level-triggered sources.
- */
- v = apic_read(APIC_LVTT);
- apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
- v = apic_read(APIC_LVT1);
- apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
- if (maxlvt >= 4) {
- v = apic_read(APIC_LVTPC);
- apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
- }
-
- /*
- * Clean APIC state for other OSs:
- */
- apic_write(APIC_LVTT, APIC_LVT_MASKED);
- apic_write(APIC_LVT0, APIC_LVT_MASKED);
- apic_write(APIC_LVT1, APIC_LVT_MASKED);
- if (maxlvt >= 3)
- apic_write(APIC_LVTERR, APIC_LVT_MASKED);
- if (maxlvt >= 4)
- apic_write(APIC_LVTPC, APIC_LVT_MASKED);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
-}
-
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
- /* Go back to Virtual Wire compatibility mode */
- unsigned long value;
-
- /* For the spurious interrupt use vector F, and enable it */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
- value |= 0xf;
- apic_write(APIC_SPIV, value);
-
- if (!virt_wire_setup) {
- /* For LVT0 make it edge triggered, active high, external and enabled */
- value = apic_read(APIC_LVT0);
- value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
- apic_write(APIC_LVT0, value);
- } else {
- /* Disable LVT0 */
- apic_write(APIC_LVT0, APIC_LVT_MASKED);
- }
-
- /* For LVT1 make it edge triggered, active high, nmi and enabled */
- value = apic_read(APIC_LVT1);
- value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
- apic_write(APIC_LVT1, value);
-}
-
-void disable_local_APIC(void)
-{
- unsigned int value;
-
- clear_local_APIC();
-
- /*
- * Disable APIC (implies clearing of registers
- * for 82489DX!).
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_SPIV_APIC_ENABLED;
- apic_write(APIC_SPIV, value);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
- unsigned int reg0, reg1;
-
- /*
- * The version register is read-only in a real APIC.
- */
- reg0 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
- apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
- reg1 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
- /*
- * The two version reads above should print the same
- * numbers. If the second one is different, then we
- * poke at a non-APIC.
- */
- if (reg1 != reg0)
- return 0;
-
- /*
- * Check if the version looks reasonably.
- */
- reg1 = GET_APIC_VERSION(reg0);
- if (reg1 == 0x00 || reg1 == 0xff)
- return 0;
- reg1 = get_maxlvt();
- if (reg1 < 0x02 || reg1 == 0xff)
- return 0;
-
- /*
- * The ID register is read/write in a real APIC.
- */
- reg0 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
- apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
- reg1 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
- apic_write(APIC_ID, reg0);
- if (reg1 != (reg0 ^ APIC_ID_MASK))
- return 0;
-
- /*
- * The next two are just to see if we have sane values.
- * They're only really relevant if we're in Virtual Wire
- * compatibility mode, but most boxes are anymore.
- */
- reg0 = apic_read(APIC_LVT0);
- apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0);
- reg1 = apic_read(APIC_LVT1);
- apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
- return 1;
-}
-
-void __init sync_Arb_IDs(void)
-{
- /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
- unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (ver >= 0x14) /* P4 or higher */
- return;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
- apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
- | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
- unsigned int value;
-
- /*
- * Don't do the setup now if we have a SMP BIOS as the
- * through-I/O-APIC virtual wire mode might be active.
- */
- if (smp_found_config || !cpu_has_apic)
- return;
-
- value = apic_read(APIC_LVR);
-
- /*
- * Do not trust the local APIC being empty at bootup.
- */
- clear_local_APIC();
-
- /*
- * Enable APIC.
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
- value |= APIC_SPIV_FOCUS_DISABLED;
- value |= SPURIOUS_APIC_VECTOR;
- apic_write(APIC_SPIV, value);
-
- /*
- * Set up the virtual wire mode.
- */
- apic_write(APIC_LVT0, APIC_DM_EXTINT);
- value = APIC_DM_NMI;
- apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit setup_local_APIC (void)
-{
- unsigned int value, maxlvt;
- int i, j;
-
- value = apic_read(APIC_LVR);
-
- BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
-
- /*
- * Double-check whether this APIC is really registered.
- * This is meaningless in clustered apic mode, so we skip it.
- */
- if (!apic_id_registered())
- BUG();
-
- /*
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
- init_apic_ldr();
-
- /*
- * Set Task Priority to 'accept all'. We never change this
- * later on.
- */
- value = apic_read(APIC_TASKPRI);
- value &= ~APIC_TPRI_MASK;
- apic_write(APIC_TASKPRI, value);
-
- /*
- * After a crash, we no longer service the interrupts and a pending
- * interrupt from previous kernel might still have ISR bit set.
- *
- * Most probably by now CPU has serviced that pending interrupt and
- * it might not have done the ack_APIC_irq() because it thought,
- * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
- * does not clear the ISR bit and cpu thinks it has already serivced
- * the interrupt. Hence a vector might get locked. It was noticed
- * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
- */
- for (i = APIC_ISR_NR - 1; i >= 0; i--) {
- value = apic_read(APIC_ISR + i*0x10);
- for (j = 31; j >= 0; j--) {
- if (value & (1<<j))
- ack_APIC_irq();
- }
- }
-
- /*
- * Now that we are all set up, enable the APIC
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- /*
- * Enable APIC
- */
- value |= APIC_SPIV_APIC_ENABLED;
-
- /* We always use processor focus */
-
- /*
- * Set spurious IRQ vector
- */
- value |= SPURIOUS_APIC_VECTOR;
- apic_write(APIC_SPIV, value);
-
- /*
- * Set up LVT0, LVT1:
- *
- * set up through-local-APIC on the BP's LINT0. This is not
- * strictly necessary in pure symmetric-IO mode, but sometimes
- * we delegate interrupts to the 8259A.
- */
- /*
- * TODO: set up through-local-APIC from through-I/O-APIC? --macro
- */
- value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!smp_processor_id() && !value) {
- value = APIC_DM_EXTINT;
- apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id());
- } else {
- value = APIC_DM_EXTINT | APIC_LVT_MASKED;
- apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", smp_processor_id());
- }
- apic_write(APIC_LVT0, value);
-
- /*
- * only the BP should see the LINT1 NMI signal, obviously.
- */
- if (!smp_processor_id())
- value = APIC_DM_NMI;
- else
- value = APIC_DM_NMI | APIC_LVT_MASKED;
- apic_write(APIC_LVT1, value);
-
- {
- unsigned oldvalue;
- maxlvt = get_maxlvt();
- oldvalue = apic_read(APIC_ESR);
- value = ERROR_APIC_VECTOR; // enables sending errors
- apic_write(APIC_LVTERR, value);
- /*
- * spec says clear errors after enabling vector.
- */
- if (maxlvt > 3)
- apic_write(APIC_ESR, 0);
- value = apic_read(APIC_ESR);
- if (value != oldvalue)
- apic_printk(APIC_VERBOSE,
- "ESR value after enabling vector: %08x, after %08x\n",
- oldvalue, value);
- }
-
- nmi_watchdog_default();
- setup_apic_nmi_watchdog(NULL);
- apic_pm_activate();
-}
-
-#ifdef CONFIG_PM
-
-static struct {
- /* 'active' is true if the local APIC was enabled by us and
- not the BIOS; this signifies that we are also responsible
- for disabling it before entering apm/acpi suspend */
- int active;
- /* r/w apic fields */
- unsigned int apic_id;
- unsigned int apic_taskpri;
- unsigned int apic_ldr;
- unsigned int apic_dfr;
- unsigned int apic_spiv;
- unsigned int apic_lvtt;
- unsigned int apic_lvtpc;
- unsigned int apic_lvt0;
- unsigned int apic_lvt1;
- unsigned int apic_lvterr;
- unsigned int apic_tmict;
- unsigned int apic_tdcr;
- unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
- unsigned long flags;
- int maxlvt;
-
- if (!apic_pm_state.active)
- return 0;
-
- maxlvt = get_maxlvt();
-
- apic_pm_state.apic_id = apic_read(APIC_ID);
- apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
- apic_pm_state.apic_ldr = apic_read(APIC_LDR);
- apic_pm_state.apic_dfr = apic_read(APIC_DFR);
- apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
- apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
- if (maxlvt >= 4)
- apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
- apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
- apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
- apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
- apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
- apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#ifdef CONFIG_X86_MCE_INTEL
- if (maxlvt >= 5)
- apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
- local_irq_save(flags);
- disable_local_APIC();
- local_irq_restore(flags);
- return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
- unsigned int l, h;
- unsigned long flags;
- int maxlvt;
-
- if (!apic_pm_state.active)
- return 0;
-
- maxlvt = get_maxlvt();
-
- local_irq_save(flags);
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
- wrmsr(MSR_IA32_APICBASE, l, h);
- apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
- apic_write(APIC_ID, apic_pm_state.apic_id);
- apic_write(APIC_DFR, apic_pm_state.apic_dfr);
- apic_write(APIC_LDR, apic_pm_state.apic_ldr);
- apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
- apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
- apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
- apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#ifdef CONFIG_X86_MCE_INTEL
- if (maxlvt >= 5)
- apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
- if (maxlvt >= 4)
- apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
- apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
- apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
- apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- local_irq_restore(flags);
- return 0;
-}
-
-static struct sysdev_class lapic_sysclass = {
- set_kset_name("lapic"),
- .resume = lapic_resume,
- .suspend = lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
- .id = 0,
- .cls = &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
- apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
- int error;
- if (!cpu_has_apic)
- return 0;
- /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
- error = sysdev_class_register(&lapic_sysclass);
- if (!error)
- error = sysdev_register(&device_lapic);
- return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else /* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif /* CONFIG_PM */
-
-static int __init apic_set_verbosity(char *str)
-{
- if (str == NULL) {
- skip_ioapic_setup = 0;
- ioapic_force = 1;
- return 0;
- }
- if (strcmp("debug", str) == 0)
- apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
- apic_verbosity = APIC_VERBOSE;
- else {
- printk(KERN_WARNING "APIC Verbosity level %s not recognised"
- " use apic=verbose or apic=debug\n", str);
- return -EINVAL;
- }
-
- return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-
-static int __init detect_init_APIC (void)
-{
- if (!cpu_has_apic) {
- printk(KERN_INFO "No local APIC present\n");
- return -1;
- }
-
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- boot_cpu_id = 0;
- return 0;
-}
-
-#ifdef CONFIG_X86_IO_APIC
-static struct resource * __init ioapic_setup_resources(void)
-{
-#define IOAPIC_RESOURCE_NAME_SIZE 11
- unsigned long n;
- struct resource *res;
- char *mem;
- int i;
-
- if (nr_ioapics <= 0)
- return NULL;
-
- n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
- n *= nr_ioapics;
-
- mem = alloc_bootmem(n);
- res = (void *)mem;
-
- if (mem != NULL) {
- memset(mem, 0, n);
- mem += sizeof(struct resource) * nr_ioapics;
-
- for (i = 0; i < nr_ioapics; i++) {
- res[i].name = mem;
- res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- sprintf(mem, "IOAPIC %u", i);
- mem += IOAPIC_RESOURCE_NAME_SIZE;
- }
- }
-
- ioapic_resources = res;
-
- return res;
-}
-
-static int __init ioapic_insert_resources(void)
-{
- int i;
- struct resource *r = ioapic_resources;
-
- if (!r) {
- printk("IO APIC resources could be not be allocated.\n");
- return -1;
- }
-
- for (i = 0; i < nr_ioapics; i++) {
- insert_resource(&iomem_resource, r);
- r++;
- }
-
- return 0;
-}
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
-#endif
-
-void __init init_apic_mappings(void)
-{
- unsigned long apic_phys;
-
- /*
- * If no local APIC can be found then set up a fake all
- * zeroes page to simulate the local APIC and another
- * one for the IO-APIC.
- */
- if (!smp_found_config && detect_init_APIC()) {
- apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
- apic_phys = __pa(apic_phys);
- } else
- apic_phys = mp_lapic_addr;
-
- set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
- apic_mapped = 1;
- apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
-
- /* Put local APIC into the resource map. */
- lapic_resource.start = apic_phys;
- lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
- insert_resource(&iomem_resource, &lapic_resource);
-
- /*
- * Fetch the APIC ID of the BSP in case we have a
- * default configuration (or the MP table is broken).
- */
- boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-
- {
- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
- int i;
- struct resource *ioapic_res;
-
- ioapic_res = ioapic_setup_resources();
- for (i = 0; i < nr_ioapics; i++) {
- if (smp_found_config) {
- ioapic_phys = mp_ioapics[i].mpc_apicaddr;
- } else {
- ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
- ioapic_phys = __pa(ioapic_phys);
- }
- set_fixmap_nocache(idx, ioapic_phys);
- apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
- __fix_to_virt(idx), ioapic_phys);
- idx++;
-
- if (ioapic_res != NULL) {
- ioapic_res->start = ioapic_phys;
- ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
- ioapic_res++;
- }
- }
- }
-}
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-
-#define APIC_DIVISOR 16
-
-static void __setup_APIC_LVTT(unsigned int clocks)
-{
- unsigned int lvtt_value, tmp_value;
- int cpu = smp_processor_id();
-
- lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
-
- if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
- lvtt_value |= APIC_LVT_MASKED;
-
- apic_write(APIC_LVTT, lvtt_value);
-
- /*
- * Divide PICLK by 16
- */
- tmp_value = apic_read(APIC_TDCR);
- apic_write(APIC_TDCR, (tmp_value
- & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
- | APIC_TDR_DIV_16);
-
- apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
-}
-
-static void setup_APIC_timer(unsigned int clocks)
-{
- unsigned long flags;
-
- local_irq_save(flags);
-
- /* wait for irq slice */
- if (hpet_address && hpet_use_timer) {
- u32 trigger = hpet_readl(HPET_T0_CMP);
- while (hpet_readl(HPET_T0_CMP) == trigger)
- /* do nothing */ ;
- } else {
- int c1, c2;
- outb_p(0x00, 0x43);
- c2 = inb_p(0x40);
- c2 |= inb_p(0x40) << 8;
- do {
- c1 = c2;
- outb_p(0x00, 0x43);
- c2 = inb_p(0x40);
- c2 |= inb_p(0x40) << 8;
- } while (c2 - c1 < 300);
- }
- __setup_APIC_LVTT(clocks);
- /* Turn off PIT interrupt if we use APIC timer as main timer.
- Only works with the PM timer right now
- TBD fix it for HPET too. */
- if ((pmtmr_ioport != 0) &&
- smp_processor_id() == boot_cpu_id &&
- apic_runs_main_timer == 1 &&
- !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) {
- stop_timer_interrupt();
- apic_runs_main_timer++;
- }
- local_irq_restore(flags);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
- unsigned apic, apic_start;
- unsigned long tsc, tsc_start;
- int result;
- /*
- * Put whatever arbitrary (but long enough) timeout
- * value into the APIC clock, we just want to get the
- * counter running for calibration.
- */
- __setup_APIC_LVTT(4000000000);
-
- apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
- if (apic_calibrate_pmtmr && pmtmr_ioport) {
- pmtimer_wait(5000); /* 5ms wait */
- apic = apic_read(APIC_TMCCT);
- result = (apic_start - apic) * 1000L / 5;
- } else
-#endif
- {
- rdtscll(tsc_start);
-
- do {
- apic = apic_read(APIC_TMCCT);
- rdtscll(tsc);
- } while ((tsc - tsc_start) < TICK_COUNT &&
- (apic_start - apic) < TICK_COUNT);
-
- result = (apic_start - apic) * 1000L * tsc_khz /
- (tsc - tsc_start);
- }
- printk("result %d\n", result);
-
-
- printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
- result / 1000 / 1000, result / 1000 % 1000);
-
- return result * APIC_DIVISOR / HZ;
-}
-
-static unsigned int calibration_result;
-
-void __init setup_boot_APIC_clock (void)
-{
- if (disable_apic_timer) {
- printk(KERN_INFO "Disabling APIC timer\n");
- return;
- }
-
- printk(KERN_INFO "Using local APIC timer interrupts.\n");
- using_apic_timer = 1;
-
- local_irq_disable();
-
- calibration_result = calibrate_APIC_clock();
- /*
- * Now set up the timer for real.
- */
- setup_APIC_timer(calibration_result);
-
- local_irq_enable();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
- local_irq_disable(); /* FIXME: Do we need this? --RR */
- setup_APIC_timer(calibration_result);
- local_irq_enable();
-}
-
-void disable_APIC_timer(void)
-{
- if (using_apic_timer) {
- unsigned long v;
-
- v = apic_read(APIC_LVTT);
- /*
- * When an illegal vector value (0-15) is written to an LVT
- * entry and delivery mode is Fixed, the APIC may signal an
- * illegal vector error, with out regard to whether the mask
- * bit is set or whether an interrupt is actually seen on input.
- *
- * Boot sequence might call this function when the LVTT has
- * '0' vector value. So make sure vector field is set to
- * valid value.
- */
- v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
- apic_write(APIC_LVTT, v);
- }
-}
-
-void enable_APIC_timer(void)
-{
- int cpu = smp_processor_id();
-
- if (using_apic_timer &&
- !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) {
- unsigned long v;
-
- v = apic_read(APIC_LVTT);
- apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED);
- }
-}
-
-void switch_APIC_timer_to_ipi(void *cpumask)
-{
- cpumask_t mask = *(cpumask_t *)cpumask;
- int cpu = smp_processor_id();
-
- if (cpu_isset(cpu, mask) &&
- !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) {
- disable_APIC_timer();
- cpu_set(cpu, timer_interrupt_broadcast_ipi_mask);
- }
-}
-EXPORT_SYMBOL(switch_APIC_timer_to_ipi);
-
-void smp_send_timer_broadcast_ipi(void)
-{
- int cpu = smp_processor_id();
- cpumask_t mask;
-
- cpus_and(mask, cpu_online_map, timer_interrupt_broadcast_ipi_mask);
-
- if (cpu_isset(cpu, mask)) {
- cpu_clear(cpu, mask);
- add_pda(apic_timer_irqs, 1);
- smp_local_timer_interrupt();
- }
-
- if (!cpus_empty(mask)) {
- send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
- }
-}
-
-void switch_ipi_to_APIC_timer(void *cpumask)
-{
- cpumask_t mask = *(cpumask_t *)cpumask;
- int cpu = smp_processor_id();
-
- if (cpu_isset(cpu, mask) &&
- cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) {
- cpu_clear(cpu, timer_interrupt_broadcast_ipi_mask);
- enable_APIC_timer();
- }
-}
-EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
-
-int setup_profiling_timer(unsigned int multiplier)
-{
- return -EINVAL;
-}
-
-void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
- unsigned char msg_type, unsigned char mask)
-{
- unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
- unsigned int v = (mask << 16) | (msg_type << 8) | vector;
- apic_write(reg, v);
-}
-
-#undef APIC_DIVISOR
-
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-
-void smp_local_timer_interrupt(void)
-{
- profile_tick(CPU_PROFILING);
-#ifdef CONFIG_SMP
- update_process_times(user_mode(get_irq_regs()));
-#endif
- if (apic_runs_main_timer > 1 && smp_processor_id() == boot_cpu_id)
- main_timer_handler();
- /*
- * We take the 'long' return path, and there every subsystem
- * grabs the appropriate locks (kernel lock/ irq lock).
- *
- * We might want to decouple profiling from the 'long path',
- * and do the profiling totally in assembly.
- *
- * Currently this isn't too much of an issue (performance wise),
- * we can take more than 100K local irqs per second on a 100 MHz P5.
- */
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- * interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- /*
- * the NMI deadlock-detector uses this.
- */
- add_pda(apic_timer_irqs, 1);
-
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- */
- ack_APIC_irq();
- /*
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- exit_idle();
- irq_enter();
- smp_local_timer_interrupt();
- irq_exit();
- set_irq_regs(old_regs);
-}
-
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
- int i, clusters, zeros;
- unsigned id;
- DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
- bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
- for (i = 0; i < NR_CPUS; i++) {
- id = bios_cpu_apicid[i];
- if (id != BAD_APICID)
- __set_bit(APIC_CLUSTERID(id), clustermap);
- }
-
- /* Problem: Partially populated chassis may not have CPUs in some of
- * the APIC clusters they have been allocated. Only present CPUs have
- * bios_cpu_apicid entries, thus causing zeroes in the bitmap. Since
- * clusters are allocated sequentially, count zeros only if they are
- * bounded by ones.
- */
- clusters = 0;
- zeros = 0;
- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
- if (test_bit(i, clustermap)) {
- clusters += 1 + zeros;
- zeros = 0;
- } else
- ++zeros;
- }
-
- /*
- * If clusters > 2, then should be multi-chassis.
- * May have to revisit this when multi-core + hyperthreaded CPUs come
- * out, but AFAIK this will work even for them.
- */
- return (clusters > 2);
-}
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-asmlinkage void smp_spurious_interrupt(void)
-{
- unsigned int v;
- exit_idle();
- irq_enter();
- /*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
- */
- v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
- if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
- ack_APIC_irq();
-
- irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-
-asmlinkage void smp_error_interrupt(void)
-{
- unsigned int v, v1;
-
- exit_idle();
- irq_enter();
- /* First tickle the hardware, only then report what went on. -- REW */
- v = apic_read(APIC_ESR);
- apic_write(APIC_ESR, 0);
- v1 = apic_read(APIC_ESR);
- ack_APIC_irq();
- atomic_inc(&irq_err_count);
-
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
- smp_processor_id(), v , v1);
- irq_exit();
-}
-
-int disable_apic;
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor (void)
-{
- if (disable_apic) {
- printk(KERN_INFO "Apic disabled\n");
- return -1;
- }
- if (!cpu_has_apic) {
- disable_apic = 1;
- printk(KERN_INFO "Apic disabled by BIOS\n");
- return -1;
- }
-
- verify_local_APIC();
-
- phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
- apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
-
- setup_local_APIC();
-
- if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
- else
- nr_ioapics = 0;
- setup_boot_APIC_clock();
- check_nmi_watchdog();
- return 0;
-}
-
-static __init int setup_disableapic(char *str)
-{
- disable_apic = 1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static __init int setup_nolapic(char *str)
-{
- return setup_disableapic(str);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
- local_apic_timer_c2_ok = 1;
- return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static __init int setup_noapictimer(char *str)
-{
- if (str[0] != ' ' && str[0] != 0)
- return 0;
- disable_apic_timer = 1;
- return 1;
-}
-
-static __init int setup_apicmaintimer(char *str)
-{
- apic_runs_main_timer = 1;
- nohpet = 1;
- return 1;
-}
-__setup("apicmaintimer", setup_apicmaintimer);
-
-static __init int setup_noapicmaintimer(char *str)
-{
- apic_runs_main_timer = -1;
- return 1;
-}
-__setup("noapicmaintimer", setup_noapicmaintimer);
-
-static __init int setup_apicpmtimer(char *s)
-{
- apic_calibrate_pmtmr = 1;
- notsc_setup(NULL);
- return setup_apicmaintimer(NULL);
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-
-__setup("noapictimer", setup_noapictimer);
-
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
deleted file mode 100644
index 778953bc636c..000000000000
--- a/arch/x86_64/kernel/asm-offsets.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <asm/pda.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
-#include <asm/ia32.h>
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define __NO_STUBS 1
-#undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
-#define __SYSCALL(nr, sym) [nr] = 1,
-static char syscalls[] = {
-#include <asm/unistd.h>
-};
-
-int main(void)
-{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
- ENTRY(state);
- ENTRY(flags);
- ENTRY(thread);
- ENTRY(pid);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry))
- ENTRY(flags);
- ENTRY(addr_limit);
- ENTRY(preempt_count);
- ENTRY(status);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- ENTRY(kernelstack);
- ENTRY(oldrsp);
- ENTRY(pcurrent);
- ENTRY(irqcount);
- ENTRY(cpunumber);
- ENTRY(irqstackptr);
- ENTRY(data_offset);
- BLANK();
-#undef ENTRY
-#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
- ENTRY(eax);
- ENTRY(ebx);
- ENTRY(ecx);
- ENTRY(edx);
- ENTRY(esi);
- ENTRY(edi);
- ENTRY(ebp);
- ENTRY(esp);
- ENTRY(eip);
- BLANK();
-#undef ENTRY
- DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe32, uc.uc_mcontext));
- BLANK();
-#endif
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
- BLANK();
- DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
- BLANK();
- DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
- BLANK();
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
- return 0;
-}
diff --git a/arch/x86_64/kernel/audit.c b/arch/x86_64/kernel/audit.c
deleted file mode 100644
index 06d3e5a14d9d..000000000000
--- a/arch/x86_64/kernel/audit.c
+++ /dev/null
@@ -1,81 +0,0 @@
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/audit.h>
-#include <asm/unistd.h>
-
-static unsigned dir_class[] = {
-#include <asm-generic/audit_dir_write.h>
-~0U
-};
-
-static unsigned read_class[] = {
-#include <asm-generic/audit_read.h>
-~0U
-};
-
-static unsigned write_class[] = {
-#include <asm-generic/audit_write.h>
-~0U
-};
-
-static unsigned chattr_class[] = {
-#include <asm-generic/audit_change_attr.h>
-~0U
-};
-
-static unsigned signal_class[] = {
-#include <asm-generic/audit_signal.h>
-~0U
-};
-
-int audit_classify_arch(int arch)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (arch == AUDIT_ARCH_I386)
- return 1;
-#endif
- return 0;
-}
-
-int audit_classify_syscall(int abi, unsigned syscall)
-{
-#ifdef CONFIG_IA32_EMULATION
- extern int ia32_classify_syscall(unsigned);
- if (abi == AUDIT_ARCH_I386)
- return ia32_classify_syscall(syscall);
-#endif
- switch(syscall) {
- case __NR_open:
- return 2;
- case __NR_openat:
- return 3;
- case __NR_execve:
- return 5;
- default:
- return 0;
- }
-}
-
-static int __init audit_classes_init(void)
-{
-#ifdef CONFIG_IA32_EMULATION
- extern __u32 ia32_dir_class[];
- extern __u32 ia32_write_class[];
- extern __u32 ia32_read_class[];
- extern __u32 ia32_chattr_class[];
- extern __u32 ia32_signal_class[];
- audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class);
- audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class);
- audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class);
- audit_register_class(AUDIT_CLASS_CHATTR_32, ia32_chattr_class);
- audit_register_class(AUDIT_CLASS_SIGNAL_32, ia32_signal_class);
-#endif
- audit_register_class(AUDIT_CLASS_WRITE, write_class);
- audit_register_class(AUDIT_CLASS_READ, read_class);
- audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
- audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
- audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
- return 0;
-}
-
-__initcall(audit_classes_init);
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
deleted file mode 100644
index 4e5e9d364d63..000000000000
--- a/arch/x86_64/kernel/bugs.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * arch/x86_64/kernel/bugs.c
- *
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2000 SuSE
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <asm/alternative.h>
-#include <asm/bugs.h>
-#include <asm/processor.h>
-#include <asm/mtrr.h>
-
-void __init check_bugs(void)
-{
- identify_cpu(&boot_cpu_data);
- mtrr_bp_init();
-#if !defined(CONFIG_SMP)
- printk("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
- alternative_instructions();
-}
diff --git a/arch/x86_64/kernel/cpufreq/Kconfig b/arch/x86_64/kernel/cpufreq/Kconfig
deleted file mode 100644
index a3fd51926cbd..000000000000
--- a/arch/x86_64/kernel/cpufreq/Kconfig
+++ /dev/null
@@ -1,108 +0,0 @@
-#
-# CPU Frequency scaling
-#
-
-menu "CPU Frequency scaling"
-
-source "drivers/cpufreq/Kconfig"
-
-if CPU_FREQ
-
-comment "CPUFreq processor drivers"
-
-config X86_POWERNOW_K8
- tristate "AMD Opteron/Athlon64 PowerNow!"
- select CPU_FREQ_TABLE
- help
- This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
-
- To compile this driver as a module, choose M here: the
- module will be called powernow-k8.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_POWERNOW_K8_ACPI
- bool
- depends on X86_POWERNOW_K8 && ACPI_PROCESSOR
- depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
- default y
-
-config X86_SPEEDSTEP_CENTRINO
- tristate "Intel Enhanced SpeedStep (deprecated)"
- select CPU_FREQ_TABLE
- depends on ACPI_PROCESSOR
- help
- This is deprecated and this functionality is now merged into
- acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
- speedstep_centrino.
- This adds the CPUFreq driver for Enhanced SpeedStep enabled
- mobile CPUs. This means Intel Pentium M (Centrino) CPUs
- or 64bit enabled Intel Xeons.
-
- To compile this driver as a module, choose M here: the
- module will be called speedstep-centrino.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_ACPI_CPUFREQ
- tristate "ACPI Processor P-States driver"
- select CPU_FREQ_TABLE
- depends on ACPI_PROCESSOR
- help
- This driver adds a CPUFreq driver which utilizes the ACPI
- Processor Performance States.
- This driver also supports Intel Enhanced Speedstep.
-
- To compile this driver as a module, choose M here: the
- module will be called acpi-cpufreq.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-comment "shared options"
-
-config X86_ACPI_CPUFREQ_PROC_INTF
- bool "/proc/acpi/processor/../performance interface (deprecated)"
- depends on PROC_FS
- depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K8_ACPI
- help
- This enables the deprecated /proc/acpi/processor/../performance
- interface. While it is helpful for debugging, the generic,
- cross-architecture cpufreq interfaces should be used.
-
- If in doubt, say N.
-
-config X86_P4_CLOCKMOD
- tristate "Intel Pentium 4 clock modulation"
- depends on EMBEDDED
- select CPU_FREQ_TABLE
- help
- This adds the clock modulation driver for Intel Pentium 4 / XEON
- processors. When enabled it will lower CPU temperature by skipping
- clocks.
-
- This driver should be only used in exceptional
- circumstances when very low power is needed because it causes severe
- slowdowns and noticeable latencies. Normally Speedstep should be used
- instead.
-
- To compile this driver as a module, choose M here: the
- module will be called p4-clockmod.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- Unless you are absolutely sure say N.
-
-
-config X86_SPEEDSTEP_LIB
- tristate
- default X86_P4_CLOCKMOD
-
-endif
-
-endmenu
diff --git a/arch/x86_64/kernel/cpufreq/Makefile b/arch/x86_64/kernel/cpufreq/Makefile
deleted file mode 100644
index 753ce1dd418e..000000000000
--- a/arch/x86_64/kernel/cpufreq/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Reuse the i386 cpufreq drivers
-#
-
-SRCDIR := ../../../i386/kernel/cpu/cpufreq
-
-obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
-obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
-obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
-obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
-
-powernow-k8-objs := ${SRCDIR}/powernow-k8.o
-speedstep-centrino-objs := ${SRCDIR}/speedstep-centrino.o
-acpi-cpufreq-objs := ${SRCDIR}/acpi-cpufreq.o
-p4-clockmod-objs := ${SRCDIR}/p4-clockmod.o
-speedstep-lib-objs := ${SRCDIR}/speedstep-lib.o
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
deleted file mode 100644
index 13432a1ae904..000000000000
--- a/arch/x86_64/kernel/crash.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Architecture specific (x86_64) functions for kexec based crash dumps.
- *
- * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
- *
- * Copyright (C) IBM Corporation, 2004. All rights reserved.
- *
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/irq.h>
-#include <linux/reboot.h>
-#include <linux/kexec.h>
-#include <linux/delay.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
-#include <linux/kdebug.h>
-
-#include <asm/processor.h>
-#include <asm/hardirq.h>
-#include <asm/nmi.h>
-#include <asm/hw_irq.h>
-#include <asm/mach_apic.h>
-
-/* This keeps a track of which one is crashing cpu. */
-static int crashing_cpu;
-
-#ifdef CONFIG_SMP
-static atomic_t waiting_for_crash_ipi;
-
-static int crash_nmi_callback(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct pt_regs *regs;
- int cpu;
-
- if (val != DIE_NMI_IPI)
- return NOTIFY_OK;
-
- regs = ((struct die_args *)data)->regs;
- cpu = raw_smp_processor_id();
-
- /*
- * Don't do anything if this handler is invoked on crashing cpu.
- * Otherwise, system will completely hang. Crashing cpu can get
- * an NMI if system was initially booted with nmi_watchdog parameter.
- */
- if (cpu == crashing_cpu)
- return NOTIFY_STOP;
- local_irq_disable();
-
- crash_save_cpu(regs, cpu);
- disable_local_APIC();
- atomic_dec(&waiting_for_crash_ipi);
- /* Assume hlt works */
- for(;;)
- halt();
-
- return 1;
-}
-
-static void smp_send_nmi_allbutself(void)
-{
- send_IPI_allbutself(NMI_VECTOR);
-}
-
-/*
- * This code is a best effort heuristic to get the
- * other cpus to stop executing. So races with
- * cpu hotplug shouldn't matter.
- */
-
-static struct notifier_block crash_nmi_nb = {
- .notifier_call = crash_nmi_callback,
-};
-
-static void nmi_shootdown_cpus(void)
-{
- unsigned long msecs;
-
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- if (register_die_notifier(&crash_nmi_nb))
- return; /* return what? */
-
- /*
- * Ensure the new callback function is set before sending
- * out the NMI
- */
- wmb();
-
- smp_send_nmi_allbutself();
-
- msecs = 1000; /* Wait at most a second for the other cpus to stop */
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
- mdelay(1);
- msecs--;
- }
- /* Leave the nmi callback set */
- disable_local_APIC();
-}
-#else
-static void nmi_shootdown_cpus(void)
-{
- /* There are no cpus to shootdown */
-}
-#endif
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
- /*
- * This function is only called after the system
- * has panicked or is otherwise in a critical state.
- * The minimum amount of code to allow a kexec'd kernel
- * to run successfully needs to happen here.
- *
- * In practice this means shooting down the other cpus in
- * an SMP system.
- */
- /* The kernel is broken so disable interrupts */
- local_irq_disable();
-
- /* Make a note of crashing cpu. Will be used in NMI callback.*/
- crashing_cpu = smp_processor_id();
- nmi_shootdown_cpus();
-
- if(cpu_has_apic)
- disable_local_APIC();
-
- disable_IO_APIC();
-
- crash_save_cpu(regs, smp_processor_id());
-}
diff --git a/arch/x86_64/kernel/crash_dump.c b/arch/x86_64/kernel/crash_dump.c
deleted file mode 100644
index 942deac4d43a..000000000000
--- a/arch/x86_64/kernel/crash_dump.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * kernel/crash_dump.c - Memory preserving reboot related code.
- *
- * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
- * Copyright (C) IBM Corporation, 2004. All rights reserved
- */
-
-#include <linux/errno.h>
-#include <linux/crash_dump.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-/**
- * copy_oldmem_page - copy one page from "oldmem"
- * @pfn: page frame number to be copied
- * @buf: target memory address for the copy; this can be in kernel address
- * space or user address space (see @userbuf)
- * @csize: number of bytes to copy
- * @offset: offset in bytes into the page (based on pfn) to begin the copy
- * @userbuf: if set, @buf is in user address space, use copy_to_user(),
- * otherwise @buf is in kernel address space, use memcpy().
- *
- * Copy a page from "oldmem". For this page, there is no pte mapped
- * in the current kernel. We stitch up a pte, similar to kmap_atomic.
- */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
-{
- void *vaddr;
-
- if (!csize)
- return 0;
-
- vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
-
- if (userbuf) {
- if (copy_to_user(buf, (vaddr + offset), csize)) {
- iounmap(vaddr);
- return -EFAULT;
- }
- } else
- memcpy(buf, (vaddr + offset), csize);
-
- iounmap(vaddr);
- return csize;
-}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
deleted file mode 100644
index 0f4d5e209e9b..000000000000
--- a/arch/x86_64/kernel/e820.c
+++ /dev/null
@@ -1,725 +0,0 @@
-/*
- * Handle the memory map.
- * The functions here do the job until bootmem takes over.
- *
- * Getting sanitize_e820_map() in sync with i386 version by applying change:
- * - Provisions for empty E820 memory regions (reported by certain BIOSes).
- * Alex Achenbach <xela@slit.de>, December 2002.
- * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- *
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/kexec.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/suspend.h>
-#include <linux/pfn.h>
-
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/e820.h>
-#include <asm/proto.h>
-#include <asm/bootsetup.h>
-#include <asm/sections.h>
-
-struct e820map e820;
-
-/*
- * PFN of last memory page.
- */
-unsigned long end_pfn;
-EXPORT_SYMBOL(end_pfn);
-
-/*
- * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
- * The direct mapping extends to end_pfn_map, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long end_pfn_map;
-
-/*
- * Last pfn which the user wants to use.
- */
-static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
-
-extern struct resource code_resource, data_resource;
-
-/* Check for some hardcoded bad areas that early boot is not allowed to touch */
-static inline int bad_addr(unsigned long *addrp, unsigned long size)
-{
- unsigned long addr = *addrp, last = addr + size;
-
- /* various gunk below that needed for SMP startup */
- if (addr < 0x8000) {
- *addrp = PAGE_ALIGN(0x8000);
- return 1;
- }
-
- /* direct mapping tables of the kernel */
- if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
- *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
- return 1;
- }
-
- /* initrd */
-#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
- addr < INITRD_START+INITRD_SIZE) {
- *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
- return 1;
- }
-#endif
- /* kernel code */
- if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
- *addrp = PAGE_ALIGN(__pa_symbol(&_end));
- return 1;
- }
-
- if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
- *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
- return 1;
- }
-
-#ifdef CONFIG_NUMA
- /* NUMA memory to node map */
- if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
- *addrp = nodemap_addr + nodemap_size;
- return 1;
- }
-#endif
- /* XXX ramdisk image here? */
- return 0;
-}
-
-/*
- * This function checks if any part of the range <start,end> is mapped
- * with type.
- */
-int
-e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- if (type && ei->type != type)
- continue;
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(e820_any_mapped);
-
-/*
- * This function checks if the entire range <start,end> is mapped with type.
- *
- * Note: this function only works correct if the e820 table is sorted and
- * not-overlapping, which is the case
- */
-int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- if (type && ei->type != type)
- continue;
- /* is the region (part) in overlap with the current region ?*/
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
-
- /* if the region is at the beginning of <start,end> we move
- * start to the end of the region since it's ok until there
- */
- if (ei->addr <= start)
- start = ei->addr + ei->size;
- /* if start is now at or beyond end, we're done, full coverage */
- if (start >= end)
- return 1; /* we're done */
- }
- return 0;
-}
-
-/*
- * Find a free area in a specific range.
- */
-unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long addr = ei->addr, last;
- if (ei->type != E820_RAM)
- continue;
- if (addr < start)
- addr = start;
- if (addr > ei->addr + ei->size)
- continue;
- while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
- ;
- last = PAGE_ALIGN(addr) + size;
- if (last > ei->addr + ei->size)
- continue;
- if (last > end)
- continue;
- return addr;
- }
- return -1UL;
-}
-
-/*
- * Find the highest page frame number we have available
- */
-unsigned long __init e820_end_of_ram(void)
-{
- unsigned long end_pfn = 0;
- end_pfn = find_max_pfn_with_active_regions();
-
- if (end_pfn > end_pfn_map)
- end_pfn_map = end_pfn;
- if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
- end_pfn_map = MAXMEM>>PAGE_SHIFT;
- if (end_pfn > end_user_pfn)
- end_pfn = end_user_pfn;
- if (end_pfn > end_pfn_map)
- end_pfn = end_pfn_map;
-
- printk("end_pfn_map = %lu\n", end_pfn_map);
- return end_pfn;
-}
-
-/*
- * Mark e820 reserved areas as busy for the resource manager.
- */
-void __init e820_reserve_resources(void)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct resource *res;
- res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820.map[i].type) {
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- request_resource(&iomem_resource, res);
- if (e820.map[i].type == E820_RAM) {
- /*
- * We don't know which RAM region contains kernel data,
- * so we try it repeatedly and let the resource manager
- * test it.
- */
- request_resource(res, &code_resource);
- request_resource(res, &data_resource);
-#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
-#endif
- }
- }
-}
-
-/*
- * Find the ranges of physical addresses that do not correspond to
- * e820 RAM areas and mark the corresponding pages as nosave for software
- * suspend and suspend to RAM.
- *
- * This function requires the e820 map to be sorted and without any
- * overlapping entries and assumes the first e820 area to be RAM.
- */
-void __init e820_mark_nosave_regions(void)
-{
- int i;
- unsigned long paddr;
-
- paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
- for (i = 1; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
-
- if (paddr < ei->addr)
- register_nosave_region(PFN_DOWN(paddr),
- PFN_UP(ei->addr));
-
- paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (ei->type != E820_RAM)
- register_nosave_region(PFN_UP(ei->addr),
- PFN_DOWN(paddr));
-
- if (paddr >= (end_pfn << PAGE_SHIFT))
- break;
- }
-}
-
-/*
- * Finds an active region in the address range from start_pfn to end_pfn and
- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
- */
-static int __init e820_find_active_region(const struct e820entry *ei,
- unsigned long start_pfn,
- unsigned long end_pfn,
- unsigned long *ei_startpfn,
- unsigned long *ei_endpfn)
-{
- *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
- *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
-
- /* Skip map entries smaller than a page */
- if (*ei_startpfn >= *ei_endpfn)
- return 0;
-
- /* Check if end_pfn_map should be updated */
- if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
- end_pfn_map = *ei_endpfn;
-
- /* Skip if map is outside the node */
- if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
- *ei_startpfn >= end_pfn)
- return 0;
-
- /* Check for overlaps */
- if (*ei_startpfn < start_pfn)
- *ei_startpfn = start_pfn;
- if (*ei_endpfn > end_pfn)
- *ei_endpfn = end_pfn;
-
- /* Obey end_user_pfn to save on memmap */
- if (*ei_startpfn >= end_user_pfn)
- return 0;
- if (*ei_endpfn > end_user_pfn)
- *ei_endpfn = end_user_pfn;
-
- return 1;
-}
-
-/* Walk the e820 map and register active regions within a node */
-void __init
-e820_register_active_regions(int nid, unsigned long start_pfn,
- unsigned long end_pfn)
-{
- unsigned long ei_startpfn;
- unsigned long ei_endpfn;
- int i;
-
- for (i = 0; i < e820.nr_map; i++)
- if (e820_find_active_region(&e820.map[i],
- start_pfn, end_pfn,
- &ei_startpfn, &ei_endpfn))
- add_active_range(nid, ei_startpfn, ei_endpfn);
-}
-
-/*
- * Add a memory region to the kernel e820 map.
- */
-void __init add_memory_region(unsigned long start, unsigned long size, int type)
-{
- int x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
-
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
-{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long end_pfn = end >> PAGE_SHIFT;
- unsigned long ei_startpfn;
- unsigned long ei_endpfn;
- unsigned long ram = 0;
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- if (e820_find_active_region(&e820.map[i],
- start_pfn, end_pfn,
- &ei_startpfn, &ei_endpfn))
- ram += ei_endpfn - ei_startpfn;
- }
- return end - start - (ram << PAGE_SHIFT);
-}
-
-void __init e820_print_map(char *who)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
- (unsigned long long) e820.map[i].addr,
- (unsigned long long) (e820.map[i].addr + e820.map[i].size));
- switch (e820.map[i].type) {
- case E820_RAM: printk("(usable)\n");
- break;
- case E820_RESERVED:
- printk("(reserved)\n");
- break;
- case E820_ACPI:
- printk("(ACPI data)\n");
- break;
- case E820_NVS:
- printk("(ACPI NVS)\n");
- break;
- default: printk("type %u\n", e820.map[i].type);
- break;
- }
- }
-}
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries. The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
-{
- struct change_member {
- struct e820entry *pbios; /* pointer to original bios entry */
- unsigned long long addr; /* address for this change point */
- };
- static struct change_member change_point_list[2*E820MAX] __initdata;
- static struct change_member *change_point[2*E820MAX] __initdata;
- static struct e820entry *overlap_list[E820MAX] __initdata;
- static struct e820entry new_bios[E820MAX] __initdata;
- struct change_member *change_tmp;
- unsigned long current_type, last_type;
- unsigned long long last_addr;
- int chgidx, still_changing;
- int overlap_entries;
- int new_bios_entry;
- int old_nr, new_nr, chg_nr;
- int i;
-
- /*
- Visually we're performing the following (1,2,3,4 = memory types)...
-
- Sample memory map (w/overlaps):
- ____22__________________
- ______________________4_
- ____1111________________
- _44_____________________
- 11111111________________
- ____________________33__
- ___________44___________
- __________33333_________
- ______________22________
- ___________________2222_
- _________111111111______
- _____________________11_
- _________________4______
-
- Sanitized equivalent (no overlap):
- 1_______________________
- _44_____________________
- ___1____________________
- ____22__________________
- ______11________________
- _________1______________
- __________3_____________
- ___________44___________
- _____________33_________
- _______________2________
- ________________1_______
- _________________4______
- ___________________2____
- ____________________33__
- ______________________4_
- */
-
- /* if there's only one memory region, don't bother */
- if (*pnr_map < 2)
- return -1;
-
- old_nr = *pnr_map;
-
- /* bail out if we find any unreasonable addresses in bios map */
- for (i=0; i<old_nr; i++)
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
- return -1;
-
- /* create pointers for initial change-point information (for sorting) */
- for (i=0; i < 2*old_nr; i++)
- change_point[i] = &change_point_list[i];
-
- /* record all known change-points (starting and ending addresses),
- omitting those that are for empty memory regions */
- chgidx = 0;
- for (i=0; i < old_nr; i++) {
- if (biosmap[i].size != 0) {
- change_point[chgidx]->addr = biosmap[i].addr;
- change_point[chgidx++]->pbios = &biosmap[i];
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
- change_point[chgidx++]->pbios = &biosmap[i];
- }
- }
- chg_nr = chgidx;
-
- /* sort change-point list by memory addresses (low -> high) */
- still_changing = 1;
- while (still_changing) {
- still_changing = 0;
- for (i=1; i < chg_nr; i++) {
- /* if <current_addr> > <last_addr>, swap */
- /* or, if current=<start_addr> & last=<end_addr>, swap */
- if ((change_point[i]->addr < change_point[i-1]->addr) ||
- ((change_point[i]->addr == change_point[i-1]->addr) &&
- (change_point[i]->addr == change_point[i]->pbios->addr) &&
- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
- )
- {
- change_tmp = change_point[i];
- change_point[i] = change_point[i-1];
- change_point[i-1] = change_tmp;
- still_changing=1;
- }
- }
- }
-
- /* create a new bios memory map, removing overlaps */
- overlap_entries=0; /* number of entries in the overlap table */
- new_bios_entry=0; /* index for creating new bios map entries */
- last_type = 0; /* start with undefined memory type */
- last_addr = 0; /* start with 0 as last starting address */
- /* loop through change-points, determining affect on the new bios map */
- for (chgidx=0; chgidx < chg_nr; chgidx++)
- {
- /* keep track of all overlapping bios entries */
- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
- {
- /* add map entry to overlap list (> 1 entry implies an overlap) */
- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
- }
- else
- {
- /* remove entry from list (order independent, so swap with last) */
- for (i=0; i<overlap_entries; i++)
- {
- if (overlap_list[i] == change_point[chgidx]->pbios)
- overlap_list[i] = overlap_list[overlap_entries-1];
- }
- overlap_entries--;
- }
- /* if there are overlapping entries, decide which "type" to use */
- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
- current_type = 0;
- for (i=0; i<overlap_entries; i++)
- if (overlap_list[i]->type > current_type)
- current_type = overlap_list[i]->type;
- /* continue building up new bios map based on this information */
- if (current_type != last_type) {
- if (last_type != 0) {
- new_bios[new_bios_entry].size =
- change_point[chgidx]->addr - last_addr;
- /* move forward only if the new size was non-zero */
- if (new_bios[new_bios_entry].size != 0)
- if (++new_bios_entry >= E820MAX)
- break; /* no more space left for new bios entries */
- }
- if (current_type != 0) {
- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
- new_bios[new_bios_entry].type = current_type;
- last_addr=change_point[chgidx]->addr;
- }
- last_type = current_type;
- }
- }
- new_nr = new_bios_entry; /* retain count for new bios entries */
-
- /* copy new bios mapping into original location */
- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
- *pnr_map = new_nr;
-
- return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory. If we aren't, we'll fake a memory map.
- */
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
-{
- /* Only one memory region (or negative)? Ignore it */
- if (nr_map < 2)
- return -1;
-
- do {
- unsigned long start = biosmap->addr;
- unsigned long size = biosmap->size;
- unsigned long end = start + size;
- unsigned long type = biosmap->type;
-
- /* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
- return -1;
-
- add_memory_region(start, size, type);
- } while (biosmap++,--nr_map);
- return 0;
-}
-
-void early_panic(char *msg)
-{
- early_printk(msg);
- panic(msg);
-}
-
-void __init setup_memory_region(void)
-{
- /*
- * Try to copy the BIOS-supplied E820-map.
- *
- * Otherwise fake a memory map; one section from 0k->640k,
- * the next section from 1mb->appropriate_mem_k
- */
- sanitize_e820_map(E820_MAP, &E820_MAP_NR);
- if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
- early_panic("Cannot find a valid memory map");
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- e820_print_map("BIOS-e820");
-}
-
-static int __init parse_memopt(char *p)
-{
- if (!p)
- return -EINVAL;
- end_user_pfn = memparse(p, &p);
- end_user_pfn >>= PAGE_SHIFT;
- return 0;
-}
-early_param("mem", parse_memopt);
-
-static int userdef __initdata;
-
-static int __init parse_memmap_opt(char *p)
-{
- char *oldp;
- unsigned long long start_at, mem_size;
-
- if (!strcmp(p, "exactmap")) {
-#ifdef CONFIG_CRASH_DUMP
- /* If we are doing a crash dump, we
- * still need to know the real mem
- * size before original memory map is
- * reset.
- */
- e820_register_active_regions(0, 0, -1UL);
- saved_max_pfn = e820_end_of_ram();
- remove_all_active_ranges();
-#endif
- end_pfn_map = 0;
- e820.nr_map = 0;
- userdef = 1;
- return 0;
- }
-
- oldp = p;
- mem_size = memparse(p, &p);
- if (p == oldp)
- return -EINVAL;
- if (*p == '@') {
- start_at = memparse(p+1, &p);
- add_memory_region(start_at, mem_size, E820_RAM);
- } else if (*p == '#') {
- start_at = memparse(p+1, &p);
- add_memory_region(start_at, mem_size, E820_ACPI);
- } else if (*p == '$') {
- start_at = memparse(p+1, &p);
- add_memory_region(start_at, mem_size, E820_RESERVED);
- } else {
- end_user_pfn = (mem_size >> PAGE_SHIFT);
- }
- return *p == '\0' ? 0 : -EINVAL;
-}
-early_param("memmap", parse_memmap_opt);
-
-void __init finish_e820_parsing(void)
-{
- if (userdef) {
- printk(KERN_INFO "user-defined physical RAM map:\n");
- e820_print_map("user");
- }
-}
-
-unsigned long pci_mem_start = 0xaeedbabe;
-EXPORT_SYMBOL(pci_mem_start);
-
-/*
- * Search for the biggest gap in the low 32 bits of the e820
- * memory space. We pass this space to PCI to assign MMIO resources
- * for hotplug or unconfigured devices in.
- * Hopefully the BIOS let enough space left.
- */
-__init void e820_setup_gap(void)
-{
- unsigned long gapstart, gapsize, round;
- unsigned long last;
- int i;
- int found = 0;
-
- last = 0x100000000ull;
- gapstart = 0x10000000;
- gapsize = 0x400000;
- i = e820.nr_map;
- while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
-
- /*
- * Since "last" is at most 4GB, we know we'll
- * fit in 32 bits if this condition is true
- */
- if (last > end) {
- unsigned long gap = last - end;
-
- if (gap > gapsize) {
- gapsize = gap;
- gapstart = end;
- found = 1;
- }
- }
- if (start < last)
- last = start;
- }
-
- if (!found) {
- gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n"
- KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n");
- }
-
- /*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
- */
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
-
- printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
- pci_mem_start, gapstart, gapsize);
-}
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
deleted file mode 100644
index 13aa4fd728f3..000000000000
--- a/arch/x86_64/kernel/early-quirks.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Various workarounds for chipset bugs.
- This code runs very early and can't use the regular PCI subsystem
- The entries are keyed to PCI bridges which usually identify chipsets
- uniquely.
- This is only for whole classes of chipsets with specific problems which
- need early invasive action (e.g. before the timers are initialized).
- Most PCI device specific workarounds can be done later and should be
- in standard PCI quirks
- Mainboard specific bugs should be handled by DMI entries.
- CPU specific bugs in setup.c */
-
-#include <linux/pci.h>
-#include <linux/acpi.h>
-#include <linux/pci_ids.h>
-#include <asm/pci-direct.h>
-#include <asm/proto.h>
-#include <asm/iommu.h>
-#include <asm/dma.h>
-
-static void __init via_bugs(void)
-{
-#ifdef CONFIG_IOMMU
- if ((end_pfn > MAX_DMA32_PFN || force_iommu) &&
- !iommu_aperture_allowed) {
- printk(KERN_INFO
- "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n");
- iommu_aperture_disabled = 1;
- }
-#endif
-}
-
-#ifdef CONFIG_ACPI
-
-static int __init nvidia_hpet_check(struct acpi_table_header *header)
-{
- return 0;
-}
-#endif
-
-static void __init nvidia_bugs(void)
-{
-#ifdef CONFIG_ACPI
- /*
- * All timer overrides on Nvidia are
- * wrong unless HPET is enabled.
- * Unfortunately that's not true on many Asus boards.
- * We don't know yet how to detect this automatically, but
- * at least allow a command line override.
- */
- if (acpi_use_timer_override)
- return;
-
- if (acpi_table_parse(ACPI_SIG_HPET, nvidia_hpet_check)) {
- acpi_skip_timer_override = 1;
- printk(KERN_INFO "Nvidia board "
- "detected. Ignoring ACPI "
- "timer override.\n");
- printk(KERN_INFO "If you got timer trouble "
- "try acpi_use_timer_override\n");
- }
-#endif
- /* RED-PEN skip them on mptables too? */
-
-}
-
-static void __init ati_bugs(void)
-{
- if (timer_over_8254 == 1) {
- timer_over_8254 = 0;
- printk(KERN_INFO
- "ATI board detected. Disabling timer routing over 8254.\n");
- }
-}
-
-struct chipset {
- u16 vendor;
- void (*f)(void);
-};
-
-static struct chipset early_qrk[] __initdata = {
- { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
- { PCI_VENDOR_ID_VIA, via_bugs },
- { PCI_VENDOR_ID_ATI, ati_bugs },
- {}
-};
-
-void __init early_quirks(void)
-{
- int num, slot, func;
-
- if (!early_pci_allowed())
- return;
-
- /* Poor man's PCI discovery */
- for (num = 0; num < 32; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- u32 class;
- u32 vendor;
- u8 type;
- int i;
- class = read_pci_config(num,slot,func,
- PCI_CLASS_REVISION);
- if (class == 0xffffffff)
- break;
-
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
- continue;
-
- vendor = read_pci_config(num, slot, func,
- PCI_VENDOR_ID);
- vendor &= 0xffff;
-
- for (i = 0; early_qrk[i].f; i++)
- if (early_qrk[i].vendor == vendor) {
- early_qrk[i].f();
- return;
- }
-
- type = read_pci_config_byte(num, slot, func,
- PCI_HEADER_TYPE);
- if (!(type & 0x80))
- break;
- }
- }
- }
-}
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
deleted file mode 100644
index fd9aff3f3890..000000000000
--- a/arch/x86_64/kernel/early_printk.c
+++ /dev/null
@@ -1,259 +0,0 @@
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/screen_info.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/fcntl.h>
-#include <xen/hvc-console.h>
-
-/* Simple VGA output */
-
-#ifdef __i386__
-#include <asm/setup.h>
-#else
-#include <asm/bootsetup.h>
-#endif
-#define VGABASE (__ISA_IO_base + 0xb8000)
-
-static int max_ypos = 25, max_xpos = 80;
-static int current_ypos = 25, current_xpos = 0;
-
-static void early_vga_write(struct console *con, const char *str, unsigned n)
-{
- char c;
- int i, k, j;
-
- while ((c = *str++) != '\0' && n-- > 0) {
- if (current_ypos >= max_ypos) {
- /* scroll 1 line up */
- for (k = 1, j = 0; k < max_ypos; k++, j++) {
- for (i = 0; i < max_xpos; i++) {
- writew(readw(VGABASE+2*(max_xpos*k+i)),
- VGABASE + 2*(max_xpos*j + i));
- }
- }
- for (i = 0; i < max_xpos; i++)
- writew(0x720, VGABASE + 2*(max_xpos*j + i));
- current_ypos = max_ypos-1;
- }
- if (c == '\n') {
- current_xpos = 0;
- current_ypos++;
- } else if (c != '\r') {
- writew(((0x7 << 8) | (unsigned short) c),
- VGABASE + 2*(max_xpos*current_ypos +
- current_xpos++));
- if (current_xpos >= max_xpos) {
- current_xpos = 0;
- current_ypos++;
- }
- }
- }
-}
-
-static struct console early_vga_console = {
- .name = "earlyvga",
- .write = early_vga_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
-/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */
-
-static int early_serial_base = 0x3f8; /* ttyS0 */
-
-#define XMTRDY 0x20
-
-#define DLAB 0x80
-
-#define TXR 0 /* Transmit register (WRITE) */
-#define RXR 0 /* Receive register (READ) */
-#define IER 1 /* Interrupt Enable */
-#define IIR 2 /* Interrupt ID */
-#define FCR 2 /* FIFO control */
-#define LCR 3 /* Line control */
-#define MCR 4 /* Modem control */
-#define LSR 5 /* Line Status */
-#define MSR 6 /* Modem Status */
-#define DLL 0 /* Divisor Latch Low */
-#define DLH 1 /* Divisor latch High */
-
-static int early_serial_putc(unsigned char ch)
-{
- unsigned timeout = 0xffff;
- while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
- cpu_relax();
- outb(ch, early_serial_base + TXR);
- return timeout ? 0 : -1;
-}
-
-static void early_serial_write(struct console *con, const char *s, unsigned n)
-{
- while (*s && n-- > 0) {
- if (*s == '\n')
- early_serial_putc('\r');
- early_serial_putc(*s);
- s++;
- }
-}
-
-#define DEFAULT_BAUD 9600
-
-static __init void early_serial_init(char *s)
-{
- unsigned char c;
- unsigned divisor;
- unsigned baud = DEFAULT_BAUD;
- char *e;
-
- if (*s == ',')
- ++s;
-
- if (*s) {
- unsigned port;
- if (!strncmp(s,"0x",2)) {
- early_serial_base = simple_strtoul(s, &e, 16);
- } else {
- static int bases[] = { 0x3f8, 0x2f8 };
-
- if (!strncmp(s,"ttyS",4))
- s += 4;
- port = simple_strtoul(s, &e, 10);
- if (port > 1 || s == e)
- port = 0;
- early_serial_base = bases[port];
- }
- s += strcspn(s, ",");
- if (*s == ',')
- s++;
- }
-
- outb(0x3, early_serial_base + LCR); /* 8n1 */
- outb(0, early_serial_base + IER); /* no interrupt */
- outb(0, early_serial_base + FCR); /* no fifo */
- outb(0x3, early_serial_base + MCR); /* DTR + RTS */
-
- if (*s) {
- baud = simple_strtoul(s, &e, 0);
- if (baud == 0 || s == e)
- baud = DEFAULT_BAUD;
- }
-
- divisor = 115200 / baud;
- c = inb(early_serial_base + LCR);
- outb(c | DLAB, early_serial_base + LCR);
- outb(divisor & 0xff, early_serial_base + DLL);
- outb((divisor >> 8) & 0xff, early_serial_base + DLH);
- outb(c & ~DLAB, early_serial_base + LCR);
-}
-
-static struct console early_serial_console = {
- .name = "earlyser",
- .write = early_serial_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
-/* Console interface to a host file on AMD's SimNow! */
-
-static int simnow_fd;
-
-enum {
- MAGIC1 = 0xBACCD00A,
- MAGIC2 = 0xCA110000,
- XOPEN = 5,
- XWRITE = 4,
-};
-
-static noinline long simnow(long cmd, long a, long b, long c)
-{
- long ret;
- asm volatile("cpuid" :
- "=a" (ret) :
- "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
- return ret;
-}
-
-static void __init simnow_init(char *str)
-{
- char *fn = "klog";
- if (*str == '=')
- fn = ++str;
- /* error ignored */
- simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
-}
-
-static void simnow_write(struct console *con, const char *s, unsigned n)
-{
- simnow(XWRITE, simnow_fd, (unsigned long)s, n);
-}
-
-static struct console simnow_console = {
- .name = "simnow",
- .write = simnow_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
-/* Direct interface for emergencies */
-struct console *early_console = &early_vga_console;
-static int early_console_initialized = 0;
-
-void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- va_start(ap,fmt);
- n = vscnprintf(buf,512,fmt,ap);
- early_console->write(early_console,buf,n);
- va_end(ap);
-}
-
-static int __initdata keep_early;
-
-static int __init setup_early_printk(char *buf)
-{
- if (!buf)
- return 0;
-
- if (early_console_initialized)
- return 0;
- early_console_initialized = 1;
-
- if (strstr(buf, "keep"))
- keep_early = 1;
-
- if (!strncmp(buf, "serial", 6)) {
- early_serial_init(buf + 6);
- early_console = &early_serial_console;
- } else if (!strncmp(buf, "ttyS", 4)) {
- early_serial_init(buf);
- early_console = &early_serial_console;
- } else if (!strncmp(buf, "vga", 3)
- && SCREEN_INFO.orig_video_isVGA == 1) {
- max_xpos = SCREEN_INFO.orig_video_cols;
- max_ypos = SCREEN_INFO.orig_video_lines;
- current_ypos = SCREEN_INFO.orig_y;
- early_console = &early_vga_console;
- } else if (!strncmp(buf, "simnow", 6)) {
- simnow_init(buf + 6);
- early_console = &simnow_console;
- keep_early = 1;
-#ifdef CONFIG_HVC_XEN
- } else if (!strncmp(buf, "xen", 3)) {
- early_console = &xenboot_console;
-#endif
- }
-
- if (keep_early)
- early_console->flags &= ~CON_BOOT;
- else
- early_console->flags |= CON_BOOT;
- register_console(early_console);
- return 0;
-}
-early_param("earlyprintk", setup_early_printk);
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
deleted file mode 100644
index 1d232e5f5658..000000000000
--- a/arch/x86_64/kernel/entry.S
+++ /dev/null
@@ -1,1172 +0,0 @@
-/*
- * linux/arch/x86_64/entry.S
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
- * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- */
-
-/*
- * entry.S contains the system-call and fault low-level handling routines.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after an interrupt and after each system call.
- *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
- * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
- * at the top of the kernel process stack.
- * - partial stack frame: partially saved registers upto R11.
- * - full stack frame: Like partial stack frame, but all register saved.
- *
- * Some macro usage:
- * - CFI macros are used to generate dwarf2 unwind information for better
- * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
- * - ENTRY/END Define functions in the symbol table.
- * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
- * frame that is otherwise undefined after a SYSCALL
- * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
- * - errorentry/paranoidentry/zeroentry - Define exception entry points.
- */
-
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/dwarf2.h>
-#include <asm/calling.h>
-#include <asm/asm-offsets.h>
-#include <asm/msr.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/hw_irq.h>
-#include <asm/page.h>
-#include <asm/irqflags.h>
-
- .code64
-
-#ifndef CONFIG_PREEMPT
-#define retint_kernel retint_restore_args
-#endif
-
-
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
-#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
- jnc 1f
- TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-/*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
- * fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
- /* %rsp:at FRAMEEND */
- .macro FIXUP_TOP_OF_STACK tmp
- movq %gs:pda_oldrsp,\tmp
- movq \tmp,RSP(%rsp)
- movq $__USER_DS,SS(%rsp)
- movq $__USER_CS,CS(%rsp)
- movq $-1,RCX(%rsp)
- movq R11(%rsp),\tmp /* get eflags */
- movq \tmp,EFLAGS(%rsp)
- .endm
-
- .macro RESTORE_TOP_OF_STACK tmp,offset=0
- movq RSP-\offset(%rsp),\tmp
- movq \tmp,%gs:pda_oldrsp
- movq EFLAGS-\offset(%rsp),\tmp
- movq \tmp,R11-\offset(%rsp)
- .endm
-
- .macro FAKE_STACK_FRAME child_rip
- /* push in order ss, rsp, eflags, cs, rip */
- xorl %eax, %eax
- pushq %rax /* ss */
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET ss,0*/
- pushq %rax /* rsp */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rsp,0
- pushq $(1<<9) /* eflags - interrupts on */
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET rflags,0*/
- pushq $__KERNEL_CS /* cs */
- CFI_ADJUST_CFA_OFFSET 8
- /*CFI_REL_OFFSET cs,0*/
- pushq \child_rip /* rip */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip,0
- pushq %rax /* orig rax */
- CFI_ADJUST_CFA_OFFSET 8
- .endm
-
- .macro UNFAKE_STACK_FRAME
- addq $8*6, %rsp
- CFI_ADJUST_CFA_OFFSET -(6*8)
- .endm
-
- .macro CFI_DEFAULT_STACK start=1
- .if \start
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8
- .else
- CFI_DEF_CFA_OFFSET SS+8
- .endif
- CFI_REL_OFFSET r15,R15
- CFI_REL_OFFSET r14,R14
- CFI_REL_OFFSET r13,R13
- CFI_REL_OFFSET r12,R12
- CFI_REL_OFFSET rbp,RBP
- CFI_REL_OFFSET rbx,RBX
- CFI_REL_OFFSET r11,R11
- CFI_REL_OFFSET r10,R10
- CFI_REL_OFFSET r9,R9
- CFI_REL_OFFSET r8,R8
- CFI_REL_OFFSET rax,RAX
- CFI_REL_OFFSET rcx,RCX
- CFI_REL_OFFSET rdx,RDX
- CFI_REL_OFFSET rsi,RSI
- CFI_REL_OFFSET rdi,RDI
- CFI_REL_OFFSET rip,RIP
- /*CFI_REL_OFFSET cs,CS*/
- /*CFI_REL_OFFSET rflags,EFLAGS*/
- CFI_REL_OFFSET rsp,RSP
- /*CFI_REL_OFFSET ss,SS*/
- .endm
-/*
- * A newly forked process directly context switches into this.
- */
-/* rdi: prev */
-ENTRY(ret_from_fork)
- CFI_DEFAULT_STACK
- push kernel_eflags(%rip)
- CFI_ADJUST_CFA_OFFSET 4
- popf # reset kernel eflags
- CFI_ADJUST_CFA_OFFSET -4
- call schedule_tail
- GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
- jnz rff_trace
-rff_action:
- RESTORE_REST
- testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
- je int_ret_from_sys_call
- testl $_TIF_IA32,threadinfo_flags(%rcx)
- jnz int_ret_from_sys_call
- RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
- jmp ret_from_sys_call
-rff_trace:
- movq %rsp,%rdi
- call syscall_trace_leave
- GET_THREAD_INFO(%rcx)
- jmp rff_action
- CFI_ENDPROC
-END(ret_from_fork)
-
-/*
- * System call entry. Upto 6 arguments in registers are supported.
- *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.
- */
-
-/*
- * Register setup:
- * rax system call number
- * rdi arg0
- * rcx return address for syscall/sysret, C arg3
- * rsi arg1
- * rdx arg2
- * r10 arg3 (--> moved to rcx for C)
- * r8 arg4
- * r9 arg5
- * r11 eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
- *
- * Interrupts are off on entry.
- * Only called from user space.
- *
- * XXX if we had a free scratch register we could save the RSP into the stack frame
- * and report it properly in ps. Unfortunately we haven't.
- *
- * When user can change the frames always force IRET. That is because
- * it deals with uncanonical addresses better. SYSRET has trouble
- * with them due to bugs in both AMD and Intel CPUs.
- */
-
-ENTRY(system_call)
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,PDA_STACKOFFSET
- CFI_REGISTER rip,rcx
- /*CFI_REGISTER rflags,r11*/
- swapgs
- movq %rsp,%gs:pda_oldrsp
- movq %gs:pda_kernelstack,%rsp
- /*
- * No need to follow this irqs off/on section - it's straight
- * and short:
- */
- sti
- SAVE_ARGS 8,1
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
- movq %rcx,RIP-ARGOFFSET(%rsp)
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
- GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
- jnz tracesys
- cmpq $__NR_syscall_max,%rax
- ja badsys
- movq %r10,%rcx
- call *sys_call_table(,%rax,8) # XXX: rip relative
- movq %rax,RAX-ARGOFFSET(%rsp)
-/*
- * Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
- */
-ret_from_sys_call:
- movl $_TIF_ALLWORK_MASK,%edi
- /* edi: flagmask */
-sysret_check:
- GET_THREAD_INFO(%rcx)
- cli
- TRACE_IRQS_OFF
- movl threadinfo_flags(%rcx),%edx
- andl %edi,%edx
- jnz sysret_careful
- CFI_REMEMBER_STATE
- /*
- * sysretq will re-enable interrupts:
- */
- TRACE_IRQS_ON
- movq RIP-ARGOFFSET(%rsp),%rcx
- CFI_REGISTER rip,rcx
- RESTORE_ARGS 0,-ARG_SKIP,1
- /*CFI_REGISTER rflags,r11*/
- movq %gs:pda_oldrsp,%rsp
- swapgs
- sysretq
-
- CFI_RESTORE_STATE
- /* Handle reschedules */
- /* edx: work, edi: workmask */
-sysret_careful:
- bt $TIF_NEED_RESCHED,%edx
- jnc sysret_signal
- TRACE_IRQS_ON
- sti
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
- call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
- jmp sysret_check
-
- /* Handle a signal */
-sysret_signal:
- TRACE_IRQS_ON
- sti
- testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
- jz 1f
-
- /* Really a signal */
- /* edx: work flags (arg3) */
- leaq do_notify_resume(%rip),%rax
- leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
- xorl %esi,%esi # oldset -> arg2
- call ptregscall_common
-1: movl $_TIF_NEED_RESCHED,%edi
- /* Use IRET because user could have changed frame. This
- works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
- cli
- TRACE_IRQS_OFF
- jmp int_with_check
-
-badsys:
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp ret_from_sys_call
-
- /* Do syscall tracing */
-tracesys:
- SAVE_REST
- movq $-ENOSYS,RAX(%rsp)
- FIXUP_TOP_OF_STACK %rdi
- movq %rsp,%rdi
- call syscall_trace_enter
- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
- RESTORE_REST
- cmpq $__NR_syscall_max,%rax
- movq $-ENOSYS,%rcx
- cmova %rcx,%rax
- ja 1f
- movq %r10,%rcx /* fixup for C */
- call *sys_call_table(,%rax,8)
-1: movq %rax,RAX-ARGOFFSET(%rsp)
- /* Use IRET because user could have changed frame */
-
-/*
- * Syscall return path ending with IRET.
- * Has correct top of stack, but partial stack frame.
- */
- .globl int_ret_from_sys_call
-int_ret_from_sys_call:
- cli
- TRACE_IRQS_OFF
- testl $3,CS-ARGOFFSET(%rsp)
- je retint_restore_args
- movl $_TIF_ALLWORK_MASK,%edi
- /* edi: mask to check */
-int_with_check:
- GET_THREAD_INFO(%rcx)
- movl threadinfo_flags(%rcx),%edx
- andl %edi,%edx
- jnz int_careful
- andl $~TS_COMPAT,threadinfo_status(%rcx)
- jmp retint_swapgs
-
- /* Either reschedule or signal or syscall exit tracking needed. */
- /* First do a reschedule test. */
- /* edx: work, edi: workmask */
-int_careful:
- bt $TIF_NEED_RESCHED,%edx
- jnc int_very_careful
- TRACE_IRQS_ON
- sti
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
- call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
- cli
- TRACE_IRQS_OFF
- jmp int_with_check
-
- /* handle signals and tracing -- both require a full stack frame */
-int_very_careful:
- TRACE_IRQS_ON
- sti
- SAVE_REST
- /* Check for syscall exit trace */
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
- jz int_signal
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
- leaq 8(%rsp),%rdi # &ptregs -> arg1
- call syscall_trace_leave
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
- andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
- jmp int_restore_rest
-
-int_signal:
- testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
- jz 1f
- movq %rsp,%rdi # &ptregs -> arg1
- xorl %esi,%esi # oldset -> arg2
- call do_notify_resume
-1: movl $_TIF_NEED_RESCHED,%edi
-int_restore_rest:
- RESTORE_REST
- cli
- TRACE_IRQS_OFF
- jmp int_with_check
- CFI_ENDPROC
-END(system_call)
-
-/*
- * Certain special system calls that need to save a complete full stack frame.
- */
-
- .macro PTREGSCALL label,func,arg
- .globl \label
-\label:
- leaq \func(%rip),%rax
- leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
- jmp ptregscall_common
-END(\label)
- .endm
-
- CFI_STARTPROC
-
- PTREGSCALL stub_clone, sys_clone, %r8
- PTREGSCALL stub_fork, sys_fork, %rdi
- PTREGSCALL stub_vfork, sys_vfork, %rdi
- PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
- PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
- PTREGSCALL stub_iopl, sys_iopl, %rsi
-
-ENTRY(ptregscall_common)
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
- SAVE_REST
- movq %r11, %r15
- CFI_REGISTER rip, r15
- FIXUP_TOP_OF_STACK %r11
- call *%rax
- RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- CFI_REGISTER rip, r11
- RESTORE_REST
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip, 0
- ret
- CFI_ENDPROC
-END(ptregscall_common)
-
-ENTRY(stub_execve)
- CFI_STARTPROC
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call sys_execve
- RESTORE_TOP_OF_STACK %r11
- movq %rax,RAX(%rsp)
- RESTORE_REST
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(stub_execve)
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
- CFI_STARTPROC
- addq $8, %rsp
- CFI_ADJUST_CFA_OFFSET -8
- SAVE_REST
- movq %rsp,%rdi
- FIXUP_TOP_OF_STACK %r11
- call sys_rt_sigreturn
- movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
- RESTORE_REST
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(stub_rt_sigreturn)
-
-/*
- * initial frame state for interrupts and exceptions
- */
- .macro _frame ref
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8-\ref
- /*CFI_REL_OFFSET ss,SS-\ref*/
- CFI_REL_OFFSET rsp,RSP-\ref
- /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
- /*CFI_REL_OFFSET cs,CS-\ref*/
- CFI_REL_OFFSET rip,RIP-\ref
- .endm
-
-/* initial frame state for interrupts (and exceptions without error code) */
-#define INTR_FRAME _frame RIP
-/* initial frame state for exceptions with error code (and interrupts with
- vector already pushed) */
-#define XCPT_FRAME _frame ORIG_RAX
-
-/*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
- *
- * Entry runs with interrupts off.
- */
-
-/* 0(%rsp): interrupt number */
- .macro interrupt func
- cld
- SAVE_ARGS
- leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
- pushq %rbp
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbp, 0
- movq %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
- testl $3,CS(%rdi)
- je 1f
- swapgs
- /* irqcount is used to check if a CPU is already on an interrupt
- stack or not. While this is essentially redundant with preempt_count
- it is a little cheaper to use a separate counter in the PDA
- (short of moving irq_enter into assembly, which would be too
- much work) */
-1: incl %gs:pda_irqcount
- cmoveq %gs:pda_irqstackptr,%rsp
- push %rbp # backlink for old unwinder
- /*
- * We entered an interrupt context - irqs are off:
- */
- TRACE_IRQS_OFF
- call \func
- .endm
-
-ENTRY(common_interrupt)
- XCPT_FRAME
- interrupt do_IRQ
- /* 0(%rsp): oldrsp-ARGOFFSET */
-ret_from_intr:
- cli
- TRACE_IRQS_OFF
- decl %gs:pda_irqcount
- leaveq
- CFI_DEF_CFA_REGISTER rsp
- CFI_ADJUST_CFA_OFFSET -8
-exit_intr:
- GET_THREAD_INFO(%rcx)
- testl $3,CS-ARGOFFSET(%rsp)
- je retint_kernel
-
- /* Interrupt came from user space */
- /*
- * Has a correct top of stack, but a partial stack frame
- * %rcx: thread info. Interrupts off.
- */
-retint_with_reschedule:
- movl $_TIF_WORK_MASK,%edi
-retint_check:
- movl threadinfo_flags(%rcx),%edx
- andl %edi,%edx
- CFI_REMEMBER_STATE
- jnz retint_careful
-retint_swapgs:
- /*
- * The iretq could re-enable interrupts:
- */
- cli
- TRACE_IRQS_IRETQ
- swapgs
- jmp restore_args
-
-retint_restore_args:
- cli
- /*
- * The iretq could re-enable interrupts:
- */
- TRACE_IRQS_IRETQ
-restore_args:
- RESTORE_ARGS 0,8,0
-iret_label:
- iretq
-
- .section __ex_table,"a"
- .quad iret_label,bad_iret
- .previous
- .section .fixup,"ax"
- /* force a signal here? this matches i386 behaviour */
- /* running with kernel gs */
-bad_iret:
- movq $11,%rdi /* SIGSEGV */
- TRACE_IRQS_ON
- sti
- jmp do_exit
- .previous
-
- /* edi: workmask, edx: work */
-retint_careful:
- CFI_RESTORE_STATE
- bt $TIF_NEED_RESCHED,%edx
- jnc retint_signal
- TRACE_IRQS_ON
- sti
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
- call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
- GET_THREAD_INFO(%rcx)
- cli
- TRACE_IRQS_OFF
- jmp retint_check
-
-retint_signal:
- testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
- jz retint_swapgs
- TRACE_IRQS_ON
- sti
- SAVE_REST
- movq $-1,ORIG_RAX(%rsp)
- xorl %esi,%esi # oldset
- movq %rsp,%rdi # &pt_regs
- call do_notify_resume
- RESTORE_REST
- cli
- TRACE_IRQS_OFF
- movl $_TIF_NEED_RESCHED,%edi
- GET_THREAD_INFO(%rcx)
- jmp retint_check
-
-#ifdef CONFIG_PREEMPT
- /* Returning to kernel space. Check if we need preemption */
- /* rcx: threadinfo. interrupts off. */
-ENTRY(retint_kernel)
- cmpl $0,threadinfo_preempt_count(%rcx)
- jnz retint_restore_args
- bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
- jnc retint_restore_args
- bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
- jnc retint_restore_args
- call preempt_schedule_irq
- jmp exit_intr
-#endif
-
- CFI_ENDPROC
-END(common_interrupt)
-
-/*
- * APIC interrupts.
- */
- .macro apicinterrupt num,func
- INTR_FRAME
- pushq $~(\num)
- CFI_ADJUST_CFA_OFFSET 8
- interrupt \func
- jmp ret_from_intr
- CFI_ENDPROC
- .endm
-
-ENTRY(thermal_interrupt)
- apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
-END(thermal_interrupt)
-
-ENTRY(threshold_interrupt)
- apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
-END(threshold_interrupt)
-
-#ifdef CONFIG_SMP
-ENTRY(reschedule_interrupt)
- apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
-END(reschedule_interrupt)
-
- .macro INVALIDATE_ENTRY num
-ENTRY(invalidate_interrupt\num)
- apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
-END(invalidate_interrupt\num)
- .endm
-
- INVALIDATE_ENTRY 0
- INVALIDATE_ENTRY 1
- INVALIDATE_ENTRY 2
- INVALIDATE_ENTRY 3
- INVALIDATE_ENTRY 4
- INVALIDATE_ENTRY 5
- INVALIDATE_ENTRY 6
- INVALIDATE_ENTRY 7
-
-ENTRY(call_function_interrupt)
- apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
-END(call_function_interrupt)
-ENTRY(irq_move_cleanup_interrupt)
- apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
-END(irq_move_cleanup_interrupt)
-#endif
-
-ENTRY(apic_timer_interrupt)
- apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
-END(apic_timer_interrupt)
-
-ENTRY(error_interrupt)
- apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
-END(error_interrupt)
-
-ENTRY(spurious_interrupt)
- apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
-END(spurious_interrupt)
-
-/*
- * Exception entry points.
- */
- .macro zeroentry sym
- INTR_FRAME
- pushq $0 /* push error code/oldrax */
- CFI_ADJUST_CFA_OFFSET 8
- pushq %rax /* push real oldrax to the rdi slot */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rax,0
- leaq \sym(%rip),%rax
- jmp error_entry
- CFI_ENDPROC
- .endm
-
- .macro errorentry sym
- XCPT_FRAME
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rax,0
- leaq \sym(%rip),%rax
- jmp error_entry
- CFI_ENDPROC
- .endm
-
- /* error code is on the stack already */
- /* handle NMI like exceptions that can happen everywhere */
- .macro paranoidentry sym, ist=0, irqtrace=1
- SAVE_ALL
- cld
- movl $1,%ebx
- movl $MSR_GS_BASE,%ecx
- rdmsr
- testl %edx,%edx
- js 1f
- swapgs
- xorl %ebx,%ebx
-1:
- .if \ist
- movq %gs:pda_data_offset, %rbp
- .endif
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi
- movq $-1,ORIG_RAX(%rsp)
- .if \ist
- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
- .endif
- call \sym
- .if \ist
- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
- .endif
- cli
- .if \irqtrace
- TRACE_IRQS_OFF
- .endif
- .endm
-
- /*
- * "Paranoid" exit path from exception stack.
- * Paranoid because this is used by NMIs and cannot take
- * any kernel state for granted.
- * We don't do kernel preemption checks here, because only
- * NMI should be common and it does not enable IRQs and
- * cannot get reschedule ticks.
- *
- * "trace" is 0 for the NMI handler only, because irq-tracing
- * is fundamentally NMI-unsafe. (we cannot change the soft and
- * hard flags at once, atomically)
- */
- .macro paranoidexit trace=1
- /* ebx: no swapgs flag */
-paranoid_exit\trace:
- testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_restore\trace
- testl $3,CS(%rsp)
- jnz paranoid_userspace\trace
-paranoid_swapgs\trace:
- .if \trace
- TRACE_IRQS_IRETQ 0
- .endif
- swapgs
-paranoid_restore\trace:
- RESTORE_ALL 8
- iretq
-paranoid_userspace\trace:
- GET_THREAD_INFO(%rcx)
- movl threadinfo_flags(%rcx),%ebx
- andl $_TIF_WORK_MASK,%ebx
- jz paranoid_swapgs\trace
- movq %rsp,%rdi /* &pt_regs */
- call sync_regs
- movq %rax,%rsp /* switch stack for scheduling */
- testl $_TIF_NEED_RESCHED,%ebx
- jnz paranoid_schedule\trace
- movl %ebx,%edx /* arg3: thread flags */
- .if \trace
- TRACE_IRQS_ON
- .endif
- sti
- xorl %esi,%esi /* arg2: oldset */
- movq %rsp,%rdi /* arg1: &pt_regs */
- call do_notify_resume
- cli
- .if \trace
- TRACE_IRQS_OFF
- .endif
- jmp paranoid_userspace\trace
-paranoid_schedule\trace:
- .if \trace
- TRACE_IRQS_ON
- .endif
- sti
- call schedule
- cli
- .if \trace
- TRACE_IRQS_OFF
- .endif
- jmp paranoid_userspace\trace
- CFI_ENDPROC
- .endm
-
-/*
- * Exception entry point. This expects an error code/orig_rax on the stack
- * and the exception handler in %rax.
- */
-KPROBE_ENTRY(error_entry)
- _frame RDI
- CFI_REL_OFFSET rax,0
- /* rdi slot contains rax, oldrax contains error code */
- cld
- subq $14*8,%rsp
- CFI_ADJUST_CFA_OFFSET (14*8)
- movq %rsi,13*8(%rsp)
- CFI_REL_OFFSET rsi,RSI
- movq 14*8(%rsp),%rsi /* load rax from rdi slot */
- CFI_REGISTER rax,rsi
- movq %rdx,12*8(%rsp)
- CFI_REL_OFFSET rdx,RDX
- movq %rcx,11*8(%rsp)
- CFI_REL_OFFSET rcx,RCX
- movq %rsi,10*8(%rsp) /* store rax */
- CFI_REL_OFFSET rax,RAX
- movq %r8, 9*8(%rsp)
- CFI_REL_OFFSET r8,R8
- movq %r9, 8*8(%rsp)
- CFI_REL_OFFSET r9,R9
- movq %r10,7*8(%rsp)
- CFI_REL_OFFSET r10,R10
- movq %r11,6*8(%rsp)
- CFI_REL_OFFSET r11,R11
- movq %rbx,5*8(%rsp)
- CFI_REL_OFFSET rbx,RBX
- movq %rbp,4*8(%rsp)
- CFI_REL_OFFSET rbp,RBP
- movq %r12,3*8(%rsp)
- CFI_REL_OFFSET r12,R12
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13,R13
- movq %r14,1*8(%rsp)
- CFI_REL_OFFSET r14,R14
- movq %r15,(%rsp)
- CFI_REL_OFFSET r15,R15
- xorl %ebx,%ebx
- testl $3,CS(%rsp)
- je error_kernelspace
-error_swapgs:
- swapgs
-error_sti:
- movq %rdi,RDI(%rsp)
- CFI_REL_OFFSET rdi,RDI
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi /* get error code */
- movq $-1,ORIG_RAX(%rsp)
- call *%rax
- /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
-error_exit:
- movl %ebx,%eax
- RESTORE_REST
- cli
- TRACE_IRQS_OFF
- GET_THREAD_INFO(%rcx)
- testl %eax,%eax
- jne retint_kernel
- movl threadinfo_flags(%rcx),%edx
- movl $_TIF_WORK_MASK,%edi
- andl %edi,%edx
- jnz retint_careful
- /*
- * The iret might restore flags:
- */
- TRACE_IRQS_IRETQ
- swapgs
- RESTORE_ARGS 0,8,0
- jmp iret_label
- CFI_ENDPROC
-
-error_kernelspace:
- incl %ebx
- /* There are two places in the kernel that can potentially fault with
- usergs. Handle them here. The exception handlers after
- iret run with kernel gs again, so don't set the user space flag.
- B stepping K8s sometimes report an truncated RIP for IRET
- exceptions returning to compat mode. Check for these here too. */
- leaq iret_label(%rip),%rbp
- cmpq %rbp,RIP(%rsp)
- je error_swapgs
- movl %ebp,%ebp /* zero extend */
- cmpq %rbp,RIP(%rsp)
- je error_swapgs
- cmpq $gs_change,RIP(%rsp)
- je error_swapgs
- jmp error_sti
-KPROBE_END(error_entry)
-
- /* Reload gs selector with exception handling */
- /* edi: new selector */
-ENTRY(load_gs_index)
- CFI_STARTPROC
- pushf
- CFI_ADJUST_CFA_OFFSET 8
- cli
- swapgs
-gs_change:
- movl %edi,%gs
-2: mfence /* workaround */
- swapgs
- popf
- CFI_ADJUST_CFA_OFFSET -8
- ret
- CFI_ENDPROC
-ENDPROC(load_gs_index)
-
- .section __ex_table,"a"
- .align 8
- .quad gs_change,bad_gs
- .previous
- .section .fixup,"ax"
- /* running with kernelgs */
-bad_gs:
- swapgs /* switch back to user gs */
- xorl %eax,%eax
- movl %eax,%gs
- jmp 2b
- .previous
-
-/*
- * Create a kernel thread.
- *
- * C extern interface:
- * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
- *
- * asm input arguments:
- * rdi: fn, rsi: arg, rdx: flags
- */
-ENTRY(kernel_thread)
- CFI_STARTPROC
- FAKE_STACK_FRAME $child_rip
- SAVE_ALL
-
- # rdi: flags, rsi: usp, rdx: will be &pt_regs
- movq %rdx,%rdi
- orq kernel_thread_flags(%rip),%rdi
- movq $-1, %rsi
- movq %rsp, %rdx
-
- xorl %r8d,%r8d
- xorl %r9d,%r9d
-
- # clone now
- call do_fork
- movq %rax,RAX(%rsp)
- xorl %edi,%edi
-
- /*
- * It isn't worth to check for reschedule here,
- * so internally to the x86_64 port you can rely on kernel_thread()
- * not to reschedule the child before returning, this avoids the need
- * of hacks for example to fork off the per-CPU idle tasks.
- * [Hopefully no generic code relies on the reschedule -AK]
- */
- RESTORE_ALL
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-ENDPROC(kernel_thread)
-
-child_rip:
- pushq $0 # fake return address
- CFI_STARTPROC
- /*
- * Here we are in the child and the registers are set as they were
- * at kernel_thread() invocation in the parent.
- */
- movq %rdi, %rax
- movq %rsi, %rdi
- call *%rax
- # exit
- xorl %edi, %edi
- call do_exit
- CFI_ENDPROC
-ENDPROC(child_rip)
-
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- * extern long execve(char *name, char **argv, char **envp)
- *
- * asm input arguments:
- * rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
- *
- * do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
- */
-ENTRY(kernel_execve)
- CFI_STARTPROC
- FAKE_STACK_FRAME $0
- SAVE_ALL
- call sys_execve
- movq %rax, RAX(%rsp)
- RESTORE_REST
- testq %rax,%rax
- je int_ret_from_sys_call
- RESTORE_ARGS
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-ENDPROC(kernel_execve)
-
-KPROBE_ENTRY(page_fault)
- errorentry do_page_fault
-KPROBE_END(page_fault)
-
-ENTRY(coprocessor_error)
- zeroentry do_coprocessor_error
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
- zeroentry do_simd_coprocessor_error
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
- zeroentry math_state_restore
-END(device_not_available)
-
- /* runs on exception stack */
-KPROBE_ENTRY(debug)
- INTR_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_debug, DEBUG_STACK
- paranoidexit
-KPROBE_END(debug)
-
- /* runs on exception stack */
-KPROBE_ENTRY(nmi)
- INTR_FRAME
- pushq $-1
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_nmi, 0, 0
-#ifdef CONFIG_TRACE_IRQFLAGS
- paranoidexit 0
-#else
- jmp paranoid_exit1
- CFI_ENDPROC
-#endif
-KPROBE_END(nmi)
-
-KPROBE_ENTRY(int3)
- INTR_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_int3, DEBUG_STACK
- jmp paranoid_exit1
- CFI_ENDPROC
-KPROBE_END(int3)
-
-ENTRY(overflow)
- zeroentry do_overflow
-END(overflow)
-
-ENTRY(bounds)
- zeroentry do_bounds
-END(bounds)
-
-ENTRY(invalid_op)
- zeroentry do_invalid_op
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
- zeroentry do_coprocessor_segment_overrun
-END(coprocessor_segment_overrun)
-
-ENTRY(reserved)
- zeroentry do_reserved
-END(reserved)
-
- /* runs on exception stack */
-ENTRY(double_fault)
- XCPT_FRAME
- paranoidentry do_double_fault
- jmp paranoid_exit1
- CFI_ENDPROC
-END(double_fault)
-
-ENTRY(invalid_TSS)
- errorentry do_invalid_TSS
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
- errorentry do_segment_not_present
-END(segment_not_present)
-
- /* runs on exception stack */
-ENTRY(stack_segment)
- XCPT_FRAME
- paranoidentry do_stack_segment
- jmp paranoid_exit1
- CFI_ENDPROC
-END(stack_segment)
-
-KPROBE_ENTRY(general_protection)
- errorentry do_general_protection
-KPROBE_END(general_protection)
-
-ENTRY(alignment_check)
- errorentry do_alignment_check
-END(alignment_check)
-
-ENTRY(divide_error)
- zeroentry do_divide_error
-END(divide_error)
-
-ENTRY(spurious_interrupt_bug)
- zeroentry do_spurious_interrupt_bug
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_X86_MCE
- /* runs on exception stack */
-ENTRY(machine_check)
- INTR_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_machine_check
- jmp paranoid_exit1
- CFI_ENDPROC
-END(machine_check)
-#endif
-
-/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(call_softirq)
- CFI_STARTPROC
- push %rbp
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbp,0
- mov %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
- incl %gs:pda_irqcount
- cmove %gs:pda_irqstackptr,%rsp
- push %rbp # backlink for old unwinder
- call __do_softirq
- leaveq
- CFI_DEF_CFA_REGISTER rsp
- CFI_ADJUST_CFA_OFFSET -8
- decl %gs:pda_irqcount
- ret
- CFI_ENDPROC
-ENDPROC(call_softirq)
-
-KPROBE_ENTRY(ignore_sysret)
- CFI_STARTPROC
- mov $-ENOSYS,%eax
- sysret
- CFI_ENDPROC
-ENDPROC(ignore_sysret)
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
deleted file mode 100644
index 47496a40e84f..000000000000
--- a/arch/x86_64/kernel/genapic.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Generic APIC sub-arch probe layer.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#include <asm/genapic.h>
-
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
- = { [0 ... NR_CPUS-1] = BAD_APICID };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
-
-u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-struct genapic __read_mostly *genapic = &apic_flat;
-
-/*
- * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
- */
-void __init setup_apic_routing(void)
-{
-#ifdef CONFIG_ACPI
- /*
- * Quirk: some x86_64 machines can only use physical APIC mode
- * regardless of how many processors are present (x86_64 ES7000
- * is an example).
- */
- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
- genapic = &apic_physflat;
- else
-#endif
-
- if (cpus_weight(cpu_possible_map) <= 8)
- genapic = &apic_flat;
- else
- genapic = &apic_physflat;
-
- printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
-}
-
-/* Same for both flat and physical. */
-
-void send_IPI_self(int vector)
-{
- __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
-}
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
deleted file mode 100644
index ecb01eefdd27..000000000000
--- a/arch/x86_64/kernel/genapic_flat.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Flat APIC subarch code.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/errno.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#include <asm/genapic.h>
-
-static cpumask_t flat_target_cpus(void)
-{
- return cpu_online_map;
-}
-
-static cpumask_t flat_vector_allocation_domain(int cpu)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static void flat_init_apic_ldr(void)
-{
- unsigned long val;
- unsigned long num, id;
-
- num = smp_processor_id();
- id = 1UL << num;
- x86_cpu_to_log_apicid[num] = id;
- apic_write(APIC_DFR, APIC_DFR_FLAT);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(id);
- apic_write(APIC_LDR, val);
-}
-
-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
-{
- unsigned long mask = cpus_addr(cpumask)[0];
- unsigned long flags;
-
- local_irq_save(flags);
- __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
- local_irq_restore(flags);
-}
-
-static void flat_send_IPI_allbutself(int vector)
-{
-#ifdef CONFIG_HOTPLUG_CPU
- int hotplug = 1;
-#else
- int hotplug = 0;
-#endif
- if (hotplug || vector == NMI_VECTOR) {
- cpumask_t allbutme = cpu_online_map;
-
- cpu_clear(smp_processor_id(), allbutme);
-
- if (!cpus_empty(allbutme))
- flat_send_IPI_mask(allbutme, vector);
- } else if (num_online_cpus() > 1) {
- __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
- }
-}
-
-static void flat_send_IPI_all(int vector)
-{
- if (vector == NMI_VECTOR)
- flat_send_IPI_mask(cpu_online_map, vector);
- else
- __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
-}
-
-static int flat_apic_id_registered(void)
-{
- return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map);
-}
-
-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
-{
- return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
-}
-
-static unsigned int phys_pkg_id(int index_msb)
-{
- return hard_smp_processor_id() >> index_msb;
-}
-
-struct genapic apic_flat = {
- .name = "flat",
- .int_delivery_mode = dest_LowestPrio,
- .int_dest_mode = (APIC_DEST_LOGICAL != 0),
- .target_cpus = flat_target_cpus,
- .vector_allocation_domain = flat_vector_allocation_domain,
- .apic_id_registered = flat_apic_id_registered,
- .init_apic_ldr = flat_init_apic_ldr,
- .send_IPI_all = flat_send_IPI_all,
- .send_IPI_allbutself = flat_send_IPI_allbutself,
- .send_IPI_mask = flat_send_IPI_mask,
- .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
- .phys_pkg_id = phys_pkg_id,
-};
-
-/*
- * Physflat mode is used when there are more than 8 CPUs on a AMD system.
- * We cannot use logical delivery in this case because the mask
- * overflows, so use physical mode.
- */
-
-static cpumask_t physflat_target_cpus(void)
-{
- return cpu_online_map;
-}
-
-static cpumask_t physflat_vector_allocation_domain(int cpu)
-{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
-}
-
-
-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
-{
- send_IPI_mask_sequence(cpumask, vector);
-}
-
-static void physflat_send_IPI_allbutself(int vector)
-{
- cpumask_t allbutme = cpu_online_map;
-
- cpu_clear(smp_processor_id(), allbutme);
- physflat_send_IPI_mask(allbutme, vector);
-}
-
-static void physflat_send_IPI_all(int vector)
-{
- physflat_send_IPI_mask(cpu_online_map, vector);
-}
-
-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
- return x86_cpu_to_apicid[cpu];
- else
- return BAD_APICID;
-}
-
-struct genapic apic_physflat = {
- .name = "physical flat",
- .int_delivery_mode = dest_Fixed,
- .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .target_cpus = physflat_target_cpus,
- .vector_allocation_domain = physflat_vector_allocation_domain,
- .apic_id_registered = flat_apic_id_registered,
- .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
- .send_IPI_all = physflat_send_IPI_all,
- .send_IPI_allbutself = physflat_send_IPI_allbutself,
- .send_IPI_mask = physflat_send_IPI_mask,
- .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
- .phys_pkg_id = phys_pkg_id,
-};
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
deleted file mode 100644
index b6167fe3330e..000000000000
--- a/arch/x86_64/kernel/head.S
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
- *
- * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
- * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
- * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
- */
-
-
-#include <linux/linkage.h>
-#include <linux/threads.h>
-#include <linux/init.h>
-#include <asm/desc.h>
-#include <asm/segment.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-#include <asm/cache.h>
-
-/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
- * because we need identity-mapped pages.
- *
- */
-
- .text
- .section .text.head
- .code64
- .globl startup_64
-startup_64:
-
- /*
- * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
- * and someone has loaded an identity mapped page table
- * for us. These identity mapped page tables map all of the
- * kernel pages and possibly all of memory.
- *
- * %esi holds a physical pointer to real_mode_data.
- *
- * We come here either directly from a 64bit bootloader, or from
- * arch/x86_64/boot/compressed/head.S.
- *
- * We only come here initially at boot nothing else comes here.
- *
- * Since we may be loaded at an address different from what we were
- * compiled to run at we first fixup the physical addresses in our page
- * tables and then reload them.
- */
-
- /* Compute the delta between the address I am compiled to run at and the
- * address I am actually running at.
- */
- leaq _text(%rip), %rbp
- subq $_text - __START_KERNEL_map, %rbp
-
- /* Is the address not 2M aligned? */
- movq %rbp, %rax
- andl $~LARGE_PAGE_MASK, %eax
- testl %eax, %eax
- jnz bad_address
-
- /* Is the address too large? */
- leaq _text(%rip), %rdx
- movq $PGDIR_SIZE, %rax
- cmpq %rax, %rdx
- jae bad_address
-
- /* Fixup the physical addresses in the page table
- */
- addq %rbp, init_level4_pgt + 0(%rip)
- addq %rbp, init_level4_pgt + (258*8)(%rip)
- addq %rbp, init_level4_pgt + (511*8)(%rip)
-
- addq %rbp, level3_ident_pgt + 0(%rip)
-
- addq %rbp, level3_kernel_pgt + (510*8)(%rip)
- addq %rbp, level3_kernel_pgt + (511*8)(%rip)
-
- addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
-
- /* Add an Identity mapping if I am above 1G */
- leaq _text(%rip), %rdi
- andq $LARGE_PAGE_MASK, %rdi
-
- movq %rdi, %rax
- shrq $PUD_SHIFT, %rax
- andq $(PTRS_PER_PUD - 1), %rax
- jz ident_complete
-
- leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
- leaq level3_ident_pgt(%rip), %rbx
- movq %rdx, 0(%rbx, %rax, 8)
-
- movq %rdi, %rax
- shrq $PMD_SHIFT, %rax
- andq $(PTRS_PER_PMD - 1), %rax
- leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
- leaq level2_spare_pgt(%rip), %rbx
- movq %rdx, 0(%rbx, %rax, 8)
-ident_complete:
-
- /* Fixup the kernel text+data virtual addresses
- */
- leaq level2_kernel_pgt(%rip), %rdi
- leaq 4096(%rdi), %r8
- /* See if it is a valid page table entry */
-1: testq $1, 0(%rdi)
- jz 2f
- addq %rbp, 0(%rdi)
- /* Go to the next page */
-2: addq $8, %rdi
- cmp %r8, %rdi
- jne 1b
-
- /* Fixup phys_base */
- addq %rbp, phys_base(%rip)
-
-#ifdef CONFIG_SMP
- addq %rbp, trampoline_level4_pgt + 0(%rip)
- addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
-#endif
-#ifdef CONFIG_ACPI_SLEEP
- addq %rbp, wakeup_level4_pgt + 0(%rip)
- addq %rbp, wakeup_level4_pgt + (511*8)(%rip)
-#endif
-
- /* Due to ENTRY(), sometimes the empty space gets filled with
- * zeros. Better take a jmp than relying on empty space being
- * filled with 0x90 (nop)
- */
- jmp secondary_startup_64
-ENTRY(secondary_startup_64)
- /*
- * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
- * and someone has loaded a mapped page table.
- *
- * %esi holds a physical pointer to real_mode_data.
- *
- * We come here either from startup_64 (using physical addresses)
- * or from trampoline.S (using virtual addresses).
- *
- * Using virtual addresses from trampoline.S removes the need
- * to have any identity mapped pages in the kernel page table
- * after the boot processor executes this code.
- */
-
- /* Enable PAE mode and PGE */
- xorq %rax, %rax
- btsq $5, %rax
- btsq $7, %rax
- movq %rax, %cr4
-
- /* Setup early boot stage 4 level pagetables. */
- movq $(init_level4_pgt - __START_KERNEL_map), %rax
- addq phys_base(%rip), %rax
- movq %rax, %cr3
-
- /* Ensure I am executing from virtual addresses */
- movq $1f, %rax
- jmp *%rax
-1:
-
- /* Check if nx is implemented */
- movl $0x80000001, %eax
- cpuid
- movl %edx,%edi
-
- /* Setup EFER (Extended Feature Enable Register) */
- movl $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_SCE, %eax /* Enable System Call */
- btl $20,%edi /* No Execute supported? */
- jnc 1f
- btsl $_EFER_NX, %eax
-1: wrmsr /* Make changes effective */
-
- /* Setup cr0 */
-#define CR0_PM 1 /* protected mode */
-#define CR0_MP (1<<1)
-#define CR0_ET (1<<4)
-#define CR0_NE (1<<5)
-#define CR0_WP (1<<16)
-#define CR0_AM (1<<18)
-#define CR0_PAGING (1<<31)
- movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax
- /* Make changes effective */
- movq %rax, %cr0
-
- /* Setup a boot time stack */
- movq init_rsp(%rip),%rsp
-
- /* zero EFLAGS after setting rsp */
- pushq $0
- popfq
-
- /*
- * We must switch to a new descriptor in kernel space for the GDT
- * because soon the kernel won't have access anymore to the userspace
- * addresses where we're currently running on. We have to do that here
- * because in 32bit we couldn't load a 64bit linear address.
- */
- lgdt cpu_gdt_descr(%rip)
-
- /* set up data segments. actually 0 would do too */
- movl $__KERNEL_DS,%eax
- movl %eax,%ds
- movl %eax,%ss
- movl %eax,%es
-
- /*
- * We don't really need to load %fs or %gs, but load them anyway
- * to kill any stale realmode selectors. This allows execution
- * under VT hardware.
- */
- movl %eax,%fs
- movl %eax,%gs
-
- /*
- * Setup up a dummy PDA. this is just for some early bootup code
- * that does in_interrupt()
- */
- movl $MSR_GS_BASE,%ecx
- movq $empty_zero_page,%rax
- movq %rax,%rdx
- shrq $32,%rdx
- wrmsr
-
- /* esi is pointer to real mode structure with interesting info.
- pass it to C */
- movl %esi, %edi
-
- /* Finally jump to run C code and to be on real kernel address
- * Since we are running on identity-mapped space we have to jump
- * to the full 64bit address, this is only possible as indirect
- * jump. In addition we need to ensure %cs is set so we make this
- * a far return.
- */
- movq initial_code(%rip),%rax
- pushq $0 # fake return address to stop unwinder
- pushq $__KERNEL_CS # set correct cs
- pushq %rax # target address in negative space
- lretq
-
- /* SMP bootup changes these two */
-#ifndef CONFIG_HOTPLUG_CPU
- .pushsection .init.data
-#endif
- .align 8
- .globl initial_code
-initial_code:
- .quad x86_64_start_kernel
-#ifndef CONFIG_HOTPLUG_CPU
- .popsection
-#endif
- .globl init_rsp
-init_rsp:
- .quad init_thread_union+THREAD_SIZE-8
-
-bad_address:
- jmp bad_address
-
-ENTRY(early_idt_handler)
- cmpl $2,early_recursion_flag(%rip)
- jz 1f
- incl early_recursion_flag(%rip)
- xorl %eax,%eax
- movq 8(%rsp),%rsi # get rip
- movq (%rsp),%rdx
- movq %cr2,%rcx
- leaq early_idt_msg(%rip),%rdi
- call early_printk
- cmpl $2,early_recursion_flag(%rip)
- jz 1f
- call dump_stack
-#ifdef CONFIG_KALLSYMS
- leaq early_idt_ripmsg(%rip),%rdi
- movq 8(%rsp),%rsi # get rip again
- call __print_symbol
-#endif
-1: hlt
- jmp 1b
-early_recursion_flag:
- .long 0
-
-early_idt_msg:
- .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n"
-early_idt_ripmsg:
- .asciz "RIP %s\n"
-
-.balign PAGE_SIZE
-
-#define NEXT_PAGE(name) \
- .balign PAGE_SIZE; \
-ENTRY(name)
-
-/* Automate the creation of 1 to 1 mapping pmd entries */
-#define PMDS(START, PERM, COUNT) \
- i = 0 ; \
- .rept (COUNT) ; \
- .quad (START) + (i << 21) + (PERM) ; \
- i = i + 1 ; \
- .endr
-
- /*
- * This default setting generates an ident mapping at address 0x100000
- * and a mapping for the kernel that precisely maps virtual address
- * 0xffffffff80000000 to physical address 0x000000. (always using
- * 2Mbyte large pages provided by PAE mode)
- */
-NEXT_PAGE(init_level4_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 257,8,0
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 252,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
-
-NEXT_PAGE(level3_ident_pgt)
- .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 511,8,0
-
-NEXT_PAGE(level3_kernel_pgt)
- .fill 510,8,0
- /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
- .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
-
-NEXT_PAGE(level2_fixmap_pgt)
- .fill 506,8,0
- .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
- /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
- .fill 5,8,0
-
-NEXT_PAGE(level1_fixmap_pgt)
- .fill 512,8,0
-
-NEXT_PAGE(level2_ident_pgt)
- /* Since I easily can, map the first 1G.
- * Don't set NX because code runs from these pages.
- */
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
-
-NEXT_PAGE(level2_kernel_pgt)
- /* 40MB kernel mapping. The kernel code cannot be bigger than that.
- When you change this change KERNEL_TEXT_SIZE in page.h too. */
- /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE)
- /* Module mapping starts here */
- .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
-
-NEXT_PAGE(level2_spare_pgt)
- .fill 512,8,0
-
-#undef PMDS
-#undef NEXT_PAGE
-
- .data
- .align 16
- .globl cpu_gdt_descr
-cpu_gdt_descr:
- .word gdt_end-cpu_gdt_table-1
-gdt:
- .quad cpu_gdt_table
-#ifdef CONFIG_SMP
- .rept NR_CPUS-1
- .word 0
- .quad 0
- .endr
-#endif
-
-ENTRY(phys_base)
- /* This must match the first entry in level2_kernel_pgt */
- .quad 0x0000000000000000
-
-/* We need valid kernel segments for data and code in long mode too
- * IRET will check the segment types kkeil 2000/10/28
- * Also sysret mandates a special GDT layout
- */
-
- .section .data.page_aligned, "aw"
- .align PAGE_SIZE
-
-/* The TLS descriptors are currently at a different place compared to i386.
- Hopefully nobody expects them at a fixed place (Wine?) */
-
-ENTRY(cpu_gdt_table)
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
- .quad 0x00af9b000000ffff /* __KERNEL_CS */
- .quad 0x00cf93000000ffff /* __KERNEL_DS */
- .quad 0x00cffb000000ffff /* __USER32_CS */
- .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
- .quad 0x00affb000000ffff /* __USER_CS */
- .quad 0x0 /* unused */
- .quad 0,0 /* TSS */
- .quad 0,0 /* LDT */
- .quad 0,0,0 /* three TLS descriptors */
- .quad 0x0000f40000000000 /* node/CPU stored in limit */
-gdt_end:
- /* asm/segment.h:GDT_ENTRIES must match this */
- /* This should be a multiple of the cache line size */
- /* GDTs of other CPUs are now dynamically allocated */
-
- /* zero the remaining page */
- .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
-
- .section .bss, "aw", @nobits
- .align L1_CACHE_BYTES
-ENTRY(idt_table)
- .skip 256 * 16
-
- .section .bss.page_aligned, "aw", @nobits
- .align PAGE_SIZE
-ENTRY(empty_zero_page)
- .skip PAGE_SIZE
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
deleted file mode 100644
index 6c34bdd22e26..000000000000
--- a/arch/x86_64/kernel/head64.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/head64.c -- prepare to run common code
- *
- * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/percpu.h>
-
-#include <asm/processor.h>
-#include <asm/proto.h>
-#include <asm/smp.h>
-#include <asm/bootsetup.h>
-#include <asm/setup.h>
-#include <asm/desc.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/sections.h>
-
-static void __init zap_identity_mappings(void)
-{
- pgd_t *pgd = pgd_offset_k(0UL);
- pgd_clear(pgd);
- __flush_tlb();
-}
-
-/* Don't add a printk in there. printk relies on the PDA which is not initialized
- yet. */
-static void __init clear_bss(void)
-{
- memset(__bss_start, 0,
- (unsigned long) __bss_stop - (unsigned long) __bss_start);
-}
-
-#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#define OLD_CL_MAGIC_ADDR 0x20
-#define OLD_CL_MAGIC 0xA33F
-#define OLD_CL_OFFSET 0x22
-
-static void __init copy_bootdata(char *real_mode_data)
-{
- unsigned long new_data;
- char * command_line;
-
- memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
- new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
- if (!new_data) {
- if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
- return;
- }
- new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
- }
- command_line = __va(new_data);
- memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
-}
-
-void __init x86_64_start_kernel(char * real_mode_data)
-{
- int i;
-
- /* clear bss before set_intr_gate with early_idt_handler */
- clear_bss();
-
- /* Make NULL pointers segfault */
- zap_identity_mappings();
-
- for (i = 0; i < IDT_ENTRIES; i++)
- set_intr_gate(i, early_idt_handler);
- asm volatile("lidt %0" :: "m" (idt_descr));
-
- early_printk("Kernel alive\n");
-
- for (i = 0; i < NR_CPUS; i++)
- cpu_pda(i) = &boot_cpu_pda[i];
-
- pda_init(0);
- copy_bootdata(__va(real_mode_data));
-#ifdef CONFIG_SMP
- cpu_set(0, cpu_online_map);
-#endif
- start_kernel();
-}
diff --git a/arch/x86_64/kernel/hpet.c b/arch/x86_64/kernel/hpet.c
deleted file mode 100644
index e2d1b912e154..000000000000
--- a/arch/x86_64/kernel/hpet.c
+++ /dev/null
@@ -1,493 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/mc146818rtc.h>
-#include <linux/time.h>
-#include <linux/clocksource.h>
-#include <linux/ioport.h>
-#include <linux/acpi.h>
-#include <linux/hpet.h>
-#include <asm/pgtable.h>
-#include <asm/vsyscall.h>
-#include <asm/timex.h>
-#include <asm/hpet.h>
-
-#define HPET_MASK 0xFFFFFFFF
-#define HPET_SHIFT 22
-
-/* FSEC = 10^-15 NSEC = 10^-9 */
-#define FSEC_PER_NSEC 1000000
-
-int nohpet __initdata;
-
-unsigned long hpet_address;
-unsigned long hpet_period; /* fsecs / HPET clock */
-unsigned long hpet_tick; /* HPET clocks / interrupt */
-
-int hpet_use_timer; /* Use counter of hpet for time keeping,
- * otherwise PIT
- */
-
-#ifdef CONFIG_HPET
-static __init int late_hpet_init(void)
-{
- struct hpet_data hd;
- unsigned int ntimer;
-
- if (!hpet_address)
- return 0;
-
- memset(&hd, 0, sizeof(hd));
-
- ntimer = hpet_readl(HPET_ID);
- ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
- ntimer++;
-
- /*
- * Register with driver.
- * Timer0 and Timer1 is used by platform.
- */
- hd.hd_phys_address = hpet_address;
- hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
- hd.hd_nirqs = ntimer;
- hd.hd_flags = HPET_DATA_PLATFORM;
- hpet_reserve_timer(&hd, 0);
-#ifdef CONFIG_HPET_EMULATE_RTC
- hpet_reserve_timer(&hd, 1);
-#endif
- hd.hd_irq[0] = HPET_LEGACY_8254;
- hd.hd_irq[1] = HPET_LEGACY_RTC;
- if (ntimer > 2) {
- struct hpet *hpet;
- struct hpet_timer *timer;
- int i;
-
- hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
- timer = &hpet->hpet_timers[2];
- for (i = 2; i < ntimer; timer++, i++)
- hd.hd_irq[i] = (timer->hpet_config &
- Tn_INT_ROUTE_CNF_MASK) >>
- Tn_INT_ROUTE_CNF_SHIFT;
-
- }
-
- hpet_alloc(&hd);
- return 0;
-}
-fs_initcall(late_hpet_init);
-#endif
-
-int hpet_timer_stop_set_go(unsigned long tick)
-{
- unsigned int cfg;
-
-/*
- * Stop the timers and reset the main counter.
- */
-
- cfg = hpet_readl(HPET_CFG);
- cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
- hpet_writel(cfg, HPET_CFG);
- hpet_writel(0, HPET_COUNTER);
- hpet_writel(0, HPET_COUNTER + 4);
-
-/*
- * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
- * and period also hpet_tick.
- */
- if (hpet_use_timer) {
- hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
- HPET_TN_32BIT, HPET_T0_CFG);
- hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
- hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
- cfg |= HPET_CFG_LEGACY;
- }
-/*
- * Go!
- */
-
- cfg |= HPET_CFG_ENABLE;
- hpet_writel(cfg, HPET_CFG);
-
- return 0;
-}
-
-static cycle_t read_hpet(void)
-{
- return (cycle_t)hpet_readl(HPET_COUNTER);
-}
-
-static cycle_t __vsyscall_fn vread_hpet(void)
-{
- return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
-}
-
-struct clocksource clocksource_hpet = {
- .name = "hpet",
- .rating = 250,
- .read = read_hpet,
- .mask = (cycle_t)HPET_MASK,
- .mult = 0, /* set below */
- .shift = HPET_SHIFT,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
- .vread = vread_hpet,
-};
-
-int __init hpet_arch_init(void)
-{
- unsigned int id;
- u64 tmp;
-
- if (!hpet_address)
- return -1;
- set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
- __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
-
-/*
- * Read the period, compute tick and quotient.
- */
-
- id = hpet_readl(HPET_ID);
-
- if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
- return -1;
-
- hpet_period = hpet_readl(HPET_PERIOD);
- if (hpet_period < 100000 || hpet_period > 100000000)
- return -1;
-
- hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
-
- hpet_use_timer = (id & HPET_ID_LEGSUP);
-
- /*
- * hpet period is in femto seconds per cycle
- * so we need to convert this to ns/cyc units
- * aproximated by mult/2^shift
- *
- * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
- * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
- * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
- * (fsec/cyc << shift)/1000000 = mult
- * (hpet_period << shift)/FSEC_PER_NSEC = mult
- */
- tmp = (u64)hpet_period << HPET_SHIFT;
- do_div(tmp, FSEC_PER_NSEC);
- clocksource_hpet.mult = (u32)tmp;
- clocksource_register(&clocksource_hpet);
-
- return hpet_timer_stop_set_go(hpet_tick);
-}
-
-int hpet_reenable(void)
-{
- return hpet_timer_stop_set_go(hpet_tick);
-}
-
-/*
- * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
- * it to the HPET timer of known frequency.
- */
-
-#define TICK_COUNT 100000000
-#define SMI_THRESHOLD 50000
-#define MAX_TRIES 5
-
-/*
- * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none
- * occurs between the reads of the hpet & TSC.
- */
-static void __init read_hpet_tsc(int *hpet, int *tsc)
-{
- int tsc1, tsc2, hpet1, i;
-
- for (i = 0; i < MAX_TRIES; i++) {
- tsc1 = get_cycles_sync();
- hpet1 = hpet_readl(HPET_COUNTER);
- tsc2 = get_cycles_sync();
- if ((tsc2 - tsc1) < SMI_THRESHOLD)
- break;
- }
- *hpet = hpet1;
- *tsc = tsc2;
-}
-
-unsigned int __init hpet_calibrate_tsc(void)
-{
- int tsc_start, hpet_start;
- int tsc_now, hpet_now;
- unsigned long flags;
-
- local_irq_save(flags);
-
- read_hpet_tsc(&hpet_start, &tsc_start);
-
- do {
- local_irq_disable();
- read_hpet_tsc(&hpet_now, &tsc_now);
- local_irq_restore(flags);
- } while ((tsc_now - tsc_start) < TICK_COUNT &&
- (hpet_now - hpet_start) < TICK_COUNT);
-
- return (tsc_now - tsc_start) * 1000000000L
- / ((hpet_now - hpet_start) * hpet_period / 1000);
-}
-
-#ifdef CONFIG_HPET_EMULATE_RTC
-/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
- * is enabled, we support RTC interrupt functionality in software.
- * RTC has 3 kinds of interrupts:
- * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
- * is updated
- * 2) Alarm Interrupt - generate an interrupt at a specific time of day
- * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
- * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
- * (1) and (2) above are implemented using polling at a frequency of
- * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
- * overhead. (DEFAULT_RTC_INT_FREQ)
- * For (3), we use interrupts at 64Hz or user specified periodic
- * frequency, whichever is higher.
- */
-#include <linux/rtc.h>
-
-#define DEFAULT_RTC_INT_FREQ 64
-#define RTC_NUM_INTS 1
-
-static unsigned long UIE_on;
-static unsigned long prev_update_sec;
-
-static unsigned long AIE_on;
-static struct rtc_time alarm_time;
-
-static unsigned long PIE_on;
-static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
-static unsigned long PIE_count;
-
-static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
-static unsigned int hpet_t1_cmp; /* cached comparator register */
-
-int is_hpet_enabled(void)
-{
- return hpet_address != 0;
-}
-
-/*
- * Timer 1 for RTC, we do not use periodic interrupt feature,
- * even if HPET supports periodic interrupts on Timer 1.
- * The reason being, to set up a periodic interrupt in HPET, we need to
- * stop the main counter. And if we do that everytime someone diables/enables
- * RTC, we will have adverse effect on main kernel timer running on Timer 0.
- * So, for the time being, simulate the periodic interrupt in software.
- *
- * hpet_rtc_timer_init() is called for the first time and during subsequent
- * interuppts reinit happens through hpet_rtc_timer_reinit().
- */
-int hpet_rtc_timer_init(void)
-{
- unsigned int cfg, cnt;
- unsigned long flags;
-
- if (!is_hpet_enabled())
- return 0;
- /*
- * Set the counter 1 and enable the interrupts.
- */
- if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
- hpet_rtc_int_freq = PIE_freq;
- else
- hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
-
- local_irq_save(flags);
-
- cnt = hpet_readl(HPET_COUNTER);
- cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
- hpet_writel(cnt, HPET_T1_CMP);
- hpet_t1_cmp = cnt;
-
- cfg = hpet_readl(HPET_T1_CFG);
- cfg &= ~HPET_TN_PERIODIC;
- cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_T1_CFG);
-
- local_irq_restore(flags);
-
- return 1;
-}
-
-static void hpet_rtc_timer_reinit(void)
-{
- unsigned int cfg, cnt, ticks_per_int, lost_ints;
-
- if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
- cfg = hpet_readl(HPET_T1_CFG);
- cfg &= ~HPET_TN_ENABLE;
- hpet_writel(cfg, HPET_T1_CFG);
- return;
- }
-
- if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
- hpet_rtc_int_freq = PIE_freq;
- else
- hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
-
- /* It is more accurate to use the comparator value than current count.*/
- ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
- hpet_t1_cmp += ticks_per_int;
- hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
-
- /*
- * If the interrupt handler was delayed too long, the write above tries
- * to schedule the next interrupt in the past and the hardware would
- * not interrupt until the counter had wrapped around.
- * So we have to check that the comparator wasn't set to a past time.
- */
- cnt = hpet_readl(HPET_COUNTER);
- if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
- lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
- /* Make sure that, even with the time needed to execute
- * this code, the next scheduled interrupt has been moved
- * back to the future: */
- lost_ints++;
-
- hpet_t1_cmp += lost_ints * ticks_per_int;
- hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
-
- if (PIE_on)
- PIE_count += lost_ints;
-
- if (printk_ratelimit())
- printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
- hpet_rtc_int_freq);
- }
-}
-
-/*
- * The functions below are called from rtc driver.
- * Return 0 if HPET is not being used.
- * Otherwise do the necessary changes and return 1.
- */
-int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
-{
- if (!is_hpet_enabled())
- return 0;
-
- if (bit_mask & RTC_UIE)
- UIE_on = 0;
- if (bit_mask & RTC_PIE)
- PIE_on = 0;
- if (bit_mask & RTC_AIE)
- AIE_on = 0;
-
- return 1;
-}
-
-int hpet_set_rtc_irq_bit(unsigned long bit_mask)
-{
- int timer_init_reqd = 0;
-
- if (!is_hpet_enabled())
- return 0;
-
- if (!(PIE_on | AIE_on | UIE_on))
- timer_init_reqd = 1;
-
- if (bit_mask & RTC_UIE) {
- UIE_on = 1;
- }
- if (bit_mask & RTC_PIE) {
- PIE_on = 1;
- PIE_count = 0;
- }
- if (bit_mask & RTC_AIE) {
- AIE_on = 1;
- }
-
- if (timer_init_reqd)
- hpet_rtc_timer_init();
-
- return 1;
-}
-
-int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
-{
- if (!is_hpet_enabled())
- return 0;
-
- alarm_time.tm_hour = hrs;
- alarm_time.tm_min = min;
- alarm_time.tm_sec = sec;
-
- return 1;
-}
-
-int hpet_set_periodic_freq(unsigned long freq)
-{
- if (!is_hpet_enabled())
- return 0;
-
- PIE_freq = freq;
- PIE_count = 0;
-
- return 1;
-}
-
-int hpet_rtc_dropped_irq(void)
-{
- if (!is_hpet_enabled())
- return 0;
-
- return 1;
-}
-
-irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
-{
- struct rtc_time curr_time;
- unsigned long rtc_int_flag = 0;
- int call_rtc_interrupt = 0;
-
- hpet_rtc_timer_reinit();
-
- if (UIE_on | AIE_on) {
- rtc_get_rtc_time(&curr_time);
- }
- if (UIE_on) {
- if (curr_time.tm_sec != prev_update_sec) {
- /* Set update int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag = RTC_UF;
- prev_update_sec = curr_time.tm_sec;
- }
- }
- if (PIE_on) {
- PIE_count++;
- if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
- /* Set periodic int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag |= RTC_PF;
- PIE_count = 0;
- }
- }
- if (AIE_on) {
- if ((curr_time.tm_sec == alarm_time.tm_sec) &&
- (curr_time.tm_min == alarm_time.tm_min) &&
- (curr_time.tm_hour == alarm_time.tm_hour)) {
- /* Set alarm int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag |= RTC_AF;
- }
- }
- if (call_rtc_interrupt) {
- rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
- rtc_interrupt(rtc_int_flag, dev_id);
- }
- return IRQ_HANDLED;
-}
-#endif
-
-static int __init nohpet_setup(char *s)
-{
- nohpet = 1;
- return 1;
-}
-
-__setup("nohpet", nohpet_setup);
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
deleted file mode 100644
index 1d58c13bc6bc..000000000000
--- a/arch/x86_64/kernel/i387.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/i387.c
- *
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2002 Andi Kleen, SuSE Labs
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * x86-64 rework 2002 Andi Kleen.
- * Does direct fxsave in and out of user space now for signal handlers.
- * All the FSAVE<->FXSAVE conversion code has been moved to the 32bit emulation,
- * the 64bit user space sees a FXSAVE frame directly.
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-
-unsigned int mxcsr_feature_mask __read_mostly = 0xffffffff;
-
-void mxcsr_feature_mask_init(void)
-{
- unsigned int mask;
- clts();
- memset(&current->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
- mask = current->thread.i387.fxsave.mxcsr_mask;
- if (mask == 0) mask = 0x0000ffbf;
- mxcsr_feature_mask &= mask;
- stts();
-}
-
-/*
- * Called at bootup to set up the initial FPU state that is later cloned
- * into all processes.
- */
-void __cpuinit fpu_init(void)
-{
- unsigned long oldcr0 = read_cr0();
- extern void __bad_fxsave_alignment(void);
-
- if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
- __bad_fxsave_alignment();
- set_in_cr4(X86_CR4_OSFXSR);
- set_in_cr4(X86_CR4_OSXMMEXCPT);
-
- write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */
-
- mxcsr_feature_mask_init();
- /* clean state in init */
- current_thread_info()->status = 0;
- clear_used_math();
-}
-
-void init_fpu(struct task_struct *child)
-{
- if (tsk_used_math(child)) {
- if (child == current)
- unlazy_fpu(child);
- return;
- }
- memset(&child->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- child->thread.i387.fxsave.cwd = 0x37f;
- child->thread.i387.fxsave.mxcsr = 0x1f80;
- /* only the device not available exception or ptrace can call init_fpu */
- set_stopped_child_used_math(child);
-}
-
-/*
- * Signal frame handlers.
- */
-
-int save_i387(struct _fpstate __user *buf)
-{
- struct task_struct *tsk = current;
- int err = 0;
-
- BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
- sizeof(tsk->thread.i387.fxsave));
-
- if ((unsigned long)buf % 16)
- printk("save_i387: bad fpstate %p\n",buf);
-
- if (!used_math())
- return 0;
- clear_used_math(); /* trigger finit */
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
- err = save_i387_checking((struct i387_fxsave_struct __user *)buf);
- if (err) return err;
- stts();
- } else {
- if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
- sizeof(struct i387_fxsave_struct)))
- return -1;
- }
- return 1;
-}
-
-/*
- * ptrace request handlers.
- */
-
-int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk)
-{
- init_fpu(tsk);
- return __copy_to_user(buf, &tsk->thread.i387.fxsave,
- sizeof(struct user_i387_struct)) ? -EFAULT : 0;
-}
-
-int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf)
-{
- if (__copy_from_user(&tsk->thread.i387.fxsave, buf,
- sizeof(struct user_i387_struct)))
- return -EFAULT;
- return 0;
-}
-
-/*
- * FPU state for core dumps.
- */
-
-int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
-{
- struct task_struct *tsk = current;
-
- if (!used_math())
- return 0;
-
- unlazy_fpu(tsk);
- memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct));
- return 1;
-}
-
-int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
-{
- int fpvalid = !!tsk_used_math(tsk);
-
- if (fpvalid) {
- if (tsk == current)
- unlazy_fpu(tsk);
- memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct));
-}
- return fpvalid;
-}
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
deleted file mode 100644
index 948cae646099..000000000000
--- a/arch/x86_64/kernel/i8259.c
+++ /dev/null
@@ -1,544 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/bitops.h>
-
-#include <asm/acpi.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/hw_irq.h>
-#include <asm/pgtable.h>
-#include <asm/delay.h>
-#include <asm/desc.h>
-#include <asm/apic.h>
-
-/*
- * Common place to define all x86 IRQ vectors
- *
- * This builds up the IRQ handler stubs using some ugly macros in irq.h
- *
- * These macros create the low-level assembly IRQ routines that save
- * register context and call do_IRQ(). do_IRQ() then does all the
- * operations that are needed to keep the AT (or SMP IOAPIC)
- * interrupt-controller happy.
- */
-
-#define BI(x,y) \
- BUILD_IRQ(x##y)
-
-#define BUILD_16_IRQS(x) \
- BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
- BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
- BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x30-0x3f)
- */
-
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
- BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
-BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
-BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
-BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
-
-#undef BUILD_16_IRQS
-#undef BI
-
-
-#define IRQ(x,y) \
- IRQ##x##y##_interrupt
-
-#define IRQLIST_16(x) \
- IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
- IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
- IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-
-/* for the irq vectors */
-static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
- IRQLIST_16(0x2), IRQLIST_16(0x3),
- IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
- IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
- IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
-};
-
-#undef IRQ
-#undef IRQLIST_16
-
-/*
- * This is the 'legacy' 8259A Programmable Interrupt Controller,
- * present in the majority of PC/AT boxes.
- * plus some generic x86 specific things if generic specifics makes
- * any sense at all.
- * this file should become arch/i386/kernel/irq.c when the old irq.c
- * moves to arch independent land
- */
-
-static int i8259A_auto_eoi;
-DEFINE_SPINLOCK(i8259A_lock);
-static void mask_and_ack_8259A(unsigned int);
-
-static struct irq_chip i8259A_chip = {
- .name = "XT-PIC",
- .mask = disable_8259A_irq,
- .disable = disable_8259A_irq,
- .unmask = enable_8259A_irq,
- .mask_ack = mask_and_ack_8259A,
-};
-
-/*
- * 8259A PIC functions to handle ISA devices:
- */
-
-/*
- * This contains the irq mask for both 8259A irq controllers,
- */
-static unsigned int cached_irq_mask = 0xffff;
-
-#define __byte(x,y) (((unsigned char *)&(y))[x])
-#define cached_21 (__byte(0,cached_irq_mask))
-#define cached_A1 (__byte(1,cached_irq_mask))
-
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not really connected to any IO-APIC pin,
- * it's fed to the master 8259A's IR0 line only.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs nothing because it's only used
- * at IRQ setup time.
- */
-unsigned long io_apic_irqs;
-
-void disable_8259A_irq(unsigned int irq)
-{
- unsigned int mask = 1 << irq;
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- cached_irq_mask |= mask;
- if (irq & 8)
- outb(cached_A1,0xA1);
- else
- outb(cached_21,0x21);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-void enable_8259A_irq(unsigned int irq)
-{
- unsigned int mask = ~(1 << irq);
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- cached_irq_mask &= mask;
- if (irq & 8)
- outb(cached_A1,0xA1);
- else
- outb(cached_21,0x21);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-int i8259A_irq_pending(unsigned int irq)
-{
- unsigned int mask = 1<<irq;
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- if (irq < 8)
- ret = inb(0x20) & mask;
- else
- ret = inb(0xA0) & (mask >> 8);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-
- return ret;
-}
-
-void make_8259A_irq(unsigned int irq)
-{
- disable_irq_nosync(irq);
- io_apic_irqs &= ~(1<<irq);
- set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
- "XT");
- enable_irq(irq);
-}
-
-/*
- * This function assumes to be called rarely. Switching between
- * 8259A registers is slow.
- * This has to be protected by the irq controller spinlock
- * before being called.
- */
-static inline int i8259A_irq_real(unsigned int irq)
-{
- int value;
- int irqmask = 1<<irq;
-
- if (irq < 8) {
- outb(0x0B,0x20); /* ISR register */
- value = inb(0x20) & irqmask;
- outb(0x0A,0x20); /* back to the IRR register */
- return value;
- }
- outb(0x0B,0xA0); /* ISR register */
- value = inb(0xA0) & (irqmask >> 8);
- outb(0x0A,0xA0); /* back to the IRR register */
- return value;
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-static void mask_and_ack_8259A(unsigned int irq)
-{
- unsigned int irqmask = 1 << irq;
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- /*
- * Lightweight spurious IRQ detection. We do not want
- * to overdo spurious IRQ handling - it's usually a sign
- * of hardware problems, so we only do the checks we can
- * do without slowing down good hardware unnecessarily.
- *
- * Note that IRQ7 and IRQ15 (the two spurious IRQs
- * usually resulting from the 8259A-1|2 PICs) occur
- * even if the IRQ is masked in the 8259A. Thus we
- * can check spurious 8259A IRQs without doing the
- * quite slow i8259A_irq_real() call for every IRQ.
- * This does not cover 100% of spurious interrupts,
- * but should be enough to warn the user that there
- * is something bad going on ...
- */
- if (cached_irq_mask & irqmask)
- goto spurious_8259A_irq;
- cached_irq_mask |= irqmask;
-
-handle_real_irq:
- if (irq & 8) {
- inb(0xA1); /* DUMMY - (do we need this?) */
- outb(cached_A1,0xA1);
- outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
- outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */
- } else {
- inb(0x21); /* DUMMY - (do we need this?) */
- outb(cached_21,0x21);
- outb(0x60+irq,0x20); /* 'Specific EOI' to master */
- }
- spin_unlock_irqrestore(&i8259A_lock, flags);
- return;
-
-spurious_8259A_irq:
- /*
- * this is the slow path - should happen rarely.
- */
- if (i8259A_irq_real(irq))
- /*
- * oops, the IRQ _is_ in service according to the
- * 8259A - not spurious, go handle it.
- */
- goto handle_real_irq;
-
- {
- static int spurious_irq_mask;
- /*
- * At this point we can be sure the IRQ is spurious,
- * lets ACK and report it. [once per IRQ]
- */
- if (!(spurious_irq_mask & irqmask)) {
- printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
- spurious_irq_mask |= irqmask;
- }
- atomic_inc(&irq_err_count);
- /*
- * Theoretically we do not have to handle this IRQ,
- * but in Linux this does not cause problems and is
- * simpler for us.
- */
- goto handle_real_irq;
- }
-}
-
-void init_8259A(int auto_eoi)
-{
- unsigned long flags;
-
- i8259A_auto_eoi = auto_eoi;
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- outb(0xff, 0x21); /* mask all of 8259A-1 */
- outb(0xff, 0xA1); /* mask all of 8259A-2 */
-
- /*
- * outb_p - this has to work on a wide range of PC hardware.
- */
- outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */
- outb_p(IRQ0_VECTOR, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
- outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */
- if (auto_eoi)
- outb_p(0x03, 0x21); /* master does Auto EOI */
- else
- outb_p(0x01, 0x21); /* master expects normal EOI */
-
- outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */
- outb_p(IRQ8_VECTOR, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
- outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */
- outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode
- is to be investigated) */
-
- if (auto_eoi)
- /*
- * in AEOI mode we just have to mask the interrupt
- * when acking.
- */
- i8259A_chip.mask_ack = disable_8259A_irq;
- else
- i8259A_chip.mask_ack = mask_and_ack_8259A;
-
- udelay(100); /* wait for 8259A to initialize */
-
- outb(cached_21, 0x21); /* restore master IRQ mask */
- outb(cached_A1, 0xA1); /* restore slave IRQ mask */
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-static char irq_trigger[2];
-/**
- * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
- */
-static void restore_ELCR(char *trigger)
-{
- outb(trigger[0], 0x4d0);
- outb(trigger[1], 0x4d1);
-}
-
-static void save_ELCR(char *trigger)
-{
- /* IRQ 0,1,2,8,13 are marked as reserved */
- trigger[0] = inb(0x4d0) & 0xF8;
- trigger[1] = inb(0x4d1) & 0xDE;
-}
-
-static int i8259A_resume(struct sys_device *dev)
-{
- init_8259A(i8259A_auto_eoi);
- restore_ELCR(irq_trigger);
- return 0;
-}
-
-static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
-{
- save_ELCR(irq_trigger);
- return 0;
-}
-
-static int i8259A_shutdown(struct sys_device *dev)
-{
- /* Put the i8259A into a quiescent state that
- * the kernel initialization code can get it
- * out of.
- */
- outb(0xff, 0x21); /* mask all of 8259A-1 */
- outb(0xff, 0xA1); /* mask all of 8259A-1 */
- return 0;
-}
-
-static struct sysdev_class i8259_sysdev_class = {
- set_kset_name("i8259"),
- .suspend = i8259A_suspend,
- .resume = i8259A_resume,
- .shutdown = i8259A_shutdown,
-};
-
-static struct sys_device device_i8259A = {
- .id = 0,
- .cls = &i8259_sysdev_class,
-};
-
-static int __init i8259A_init_sysfs(void)
-{
- int error = sysdev_class_register(&i8259_sysdev_class);
- if (!error)
- error = sysdev_register(&device_i8259A);
- return error;
-}
-
-device_initcall(i8259A_init_sysfs);
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-
-static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
- [0 ... IRQ0_VECTOR - 1] = -1,
- [IRQ0_VECTOR] = 0,
- [IRQ1_VECTOR] = 1,
- [IRQ2_VECTOR] = 2,
- [IRQ3_VECTOR] = 3,
- [IRQ4_VECTOR] = 4,
- [IRQ5_VECTOR] = 5,
- [IRQ6_VECTOR] = 6,
- [IRQ7_VECTOR] = 7,
- [IRQ8_VECTOR] = 8,
- [IRQ9_VECTOR] = 9,
- [IRQ10_VECTOR] = 10,
- [IRQ11_VECTOR] = 11,
- [IRQ12_VECTOR] = 12,
- [IRQ13_VECTOR] = 13,
- [IRQ14_VECTOR] = 14,
- [IRQ15_VECTOR] = 15,
- [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
-};
-
-void __init init_ISA_irqs (void)
-{
- int i;
-
- init_bsp_APIC();
- init_8259A(0);
-
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc[i].status = IRQ_DISABLED;
- irq_desc[i].action = NULL;
- irq_desc[i].depth = 1;
-
- if (i < 16) {
- /*
- * 16 old-style INTA-cycle interrupts:
- */
- set_irq_chip_and_handler_name(i, &i8259A_chip,
- handle_level_irq, "XT");
- } else {
- /*
- * 'high' PCI IRQs filled in on demand
- */
- irq_desc[i].chip = &no_irq_chip;
- }
- }
-}
-
-static void setup_timer_hardware(void)
-{
- outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff , 0x40); /* LSB */
- udelay(10);
- outb(LATCH >> 8 , 0x40); /* MSB */
-}
-
-static int timer_resume(struct sys_device *dev)
-{
- setup_timer_hardware();
- return 0;
-}
-
-void i8254_timer_resume(void)
-{
- setup_timer_hardware();
-}
-
-static struct sysdev_class timer_sysclass = {
- set_kset_name("timer_pit"),
- .resume = timer_resume,
-};
-
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timer_sysclass,
-};
-
-static int __init init_timer_sysfs(void)
-{
- int error = sysdev_class_register(&timer_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
-}
-
-device_initcall(init_timer_sysfs);
-
-void __init init_IRQ(void)
-{
- int i;
-
- init_ISA_irqs();
- /*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
- */
- for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
- int vector = FIRST_EXTERNAL_VECTOR + i;
- if (vector != IA32_SYSCALL_VECTOR)
- set_intr_gate(vector, interrupt[i]);
- }
-
-#ifdef CONFIG_SMP
- /*
- * The reschedule interrupt is a CPU-to-CPU reschedule-helper
- * IPI, driven by wakeup.
- */
- set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
- /* IPIs for invalidation */
- set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
- set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
- set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
- set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
- set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
- set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
- set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
- set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
-
- /* IPI for generic function call */
- set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
- /* Low priority IPI to cleanup after moving an irq */
- set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-#endif
- set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
- set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-
- /* self generated IPI for local APIC timer */
- set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
- /* IPI vectors for APIC spurious and error interrupts */
- set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
- set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-
- /*
- * Set the clock to HZ Hz, we already have a valid
- * vector now:
- */
- setup_timer_hardware();
-
- if (!acpi_ioapic)
- setup_irq(2, &irq2);
-}
diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c
deleted file mode 100644
index 4ff33d4f8551..000000000000
--- a/arch/x86_64/kernel/init_task.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
-/* Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
-#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
deleted file mode 100644
index 966fa1062491..000000000000
--- a/arch/x86_64/kernel/io_apic.c
+++ /dev/null
@@ -1,2202 +0,0 @@
-/*
- * Intel IO-APIC support for multi-Pentium hosts.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- * Many thanks to Stig Venaas for trying out countless experimental
- * patches and reporting/debugging problems patiently!
- *
- * (c) 1999, Multiple IO-APIC support, developed by
- * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- * further tested and cleaned up by Zach Brown <zab@redhat.com>
- * and Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively
- * Paul Diefenbaugh : Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/mc146818rtc.h>
-#include <linux/acpi.h>
-#include <linux/sysdev.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-#include <asm/idle.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/mach_apic.h>
-#include <asm/acpi.h>
-#include <asm/dma.h>
-#include <asm/nmi.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
-
-struct irq_cfg {
- cpumask_t domain;
- cpumask_t old_domain;
- unsigned move_cleanup_count;
- u8 vector;
- u8 move_in_progress : 1;
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
- [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
- [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
- [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
- [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
- [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
- [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
- [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
- [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
- [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
- [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
- [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
- [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
- [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
- [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
- [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
- [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
-};
-
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-#define __apicdebuginit __init
-
-int sis_apic_bug; /* not actually supported, dummy for compile */
-
-static int no_timer_check;
-
-static int disable_timer_pin_1 __initdata;
-
-int timer_over_8254 __initdata = 1;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-static struct irq_pin_list {
- short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
-
-struct io_apic {
- unsigned int index;
- unsigned int unused[3];
- unsigned int data;
-};
-
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
-{
- return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
- + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
-}
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
- writel(reg, &io_apic->index);
- return readl(&io_apic->data);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
- writel(reg, &io_apic->index);
- writel(value, &io_apic->data);
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
- writel(value, &io_apic->data);
-}
-
-static int io_apic_level_ack_pending(unsigned int irq)
-{
- struct irq_pin_list *entry;
- unsigned long flags;
- int pending = 0;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- entry = irq_2_pin + irq;
- for (;;) {
- unsigned int reg;
- int pin;
-
- pin = entry->pin;
- if (pin == -1)
- break;
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
- /* Is the remote IRR bit set? */
- pending |= (reg >> 14) & 1;
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
- return pending;
-}
-
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
- readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL) \
- \
-{ \
- int pin; \
- struct irq_pin_list *entry = irq_2_pin + irq; \
- \
- BUG_ON(irq >= NR_IRQS); \
- for (;;) { \
- unsigned int reg; \
- pin = entry->pin; \
- if (pin == -1) \
- break; \
- reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
- reg ACTION; \
- io_apic_modify(entry->apic, reg); \
- FINAL; \
- if (!entry->next) \
- break; \
- entry = irq_2_pin + entry->next; \
- } \
-}
-
-union entry_union {
- struct { u32 w1, w2; };
- struct IO_APIC_route_entry entry;
-};
-
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
-{
- union entry_union eu;
- unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- return eu.entry;
-}
-
-/*
- * When we write a new IO APIC routing entry, we need to write the high
- * word first! If the mask bit in the low word is clear, we will enable
- * the interrupt, and we need to make sure the entry is fully populated
- * before that happens.
- */
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
- union entry_union eu;
- eu.entry = e;
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-}
-
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
- unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
- __ioapic_write_entry(apic, pin, e);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * When we mask an IO APIC routing entry, we need to write the low
- * word first, in order to set the mask bit before we change the
- * high bits!
- */
-static void ioapic_mask_entry(int apic, int pin)
-{
- unsigned long flags;
- union entry_union eu = { .entry.mask = 1 };
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
- int apic, pin;
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- BUG_ON(irq >= NR_IRQS);
- for (;;) {
- unsigned int reg;
- apic = entry->apic;
- pin = entry->pin;
- if (pin == -1)
- break;
- io_apic_write(apic, 0x11 + pin*2, dest);
- reg = io_apic_read(apic, 0x10 + pin*2);
- reg &= ~0x000000ff;
- reg |= vector;
- io_apic_modify(apic, reg);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-}
-
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
- struct irq_cfg *cfg = irq_cfg + irq;
- unsigned long flags;
- unsigned int dest;
- cpumask_t tmp;
-
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- return;
-
- if (assign_irq_vector(irq, mask))
- return;
-
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
-
- /*
- * Only the high 8 bits are valid.
- */
- dest = SET_APIC_LOGICAL_ID(dest);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __target_IO_APIC_irq(irq, dest, cfg->vector);
- irq_desc[irq].affinity = mask;
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-#endif
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
- static int first_free_entry = NR_IRQS;
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- BUG_ON(irq >= NR_IRQS);
- while (entry->next)
- entry = irq_2_pin + entry->next;
-
- if (entry->pin != -1) {
- entry->next = first_free_entry;
- entry = irq_2_pin + entry->next;
- if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: ran out of irq_2_pin entries!");
- }
- entry->apic = apic;
- entry->pin = pin;
-}
-
-
-#define DO_ACTION(name,R,ACTION, FINAL) \
- \
- static void name##_IO_APIC_irq (unsigned int irq) \
- __DO_ACTION(R, ACTION, FINAL)
-
-DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
- /* mask = 1 */
-DO_ACTION( __unmask, 0, &= 0xfffeffff, )
- /* mask = 0 */
-
-static void mask_IO_APIC_irq (unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq (unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
- struct IO_APIC_route_entry entry;
-
- /* Check delivery_mode to be sure we're not clearing an SMI pin */
- entry = ioapic_read_entry(apic, pin);
- if (entry.delivery_mode == dest_SMI)
- return;
- /*
- * Disable it in the IO-APIC irq-routing table:
- */
- ioapic_mask_entry(apic, pin);
-}
-
-static void clear_IO_APIC (void)
-{
- int apic, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
- clear_IO_APIC_pin(apic, pin);
-}
-
-int skip_ioapic_setup;
-int ioapic_force;
-
-static int __init parse_noapic(char *str)
-{
- disable_ioapic_setup();
- return 0;
-}
-early_param("noapic", parse_noapic);
-
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
- disable_timer_pin_1 = 1;
- return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
-static int __init setup_disable_8254_timer(char *s)
-{
- timer_over_8254 = -1;
- return 1;
-}
-static int __init setup_enable_8254_timer(char *s)
-{
- timer_over_8254 = 2;
- return 1;
-}
-
-__setup("disable_8254_timer", setup_disable_8254_timer);
-__setup("enable_8254_timer", setup_enable_8254_timer);
-
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_irqtype == type &&
- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
- mp_irqs[i].mpc_dstirq == pin)
- return i;
-
- return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- if (test_bit(lbus, mp_bus_not_pci) &&
- (mp_irqs[i].mpc_irqtype == type) &&
- (mp_irqs[i].mpc_srcbusirq == irq))
-
- return mp_irqs[i].mpc_dstirq;
- }
- return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- if (test_bit(lbus, mp_bus_not_pci) &&
- (mp_irqs[i].mpc_irqtype == type) &&
- (mp_irqs[i].mpc_srcbusirq == irq))
- break;
- }
- if (i < mp_irq_entries) {
- int apic;
- for(apic = 0; apic < nr_ioapics; apic++) {
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
- return apic;
- }
- }
-
- return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
- int apic, i, best_guess = -1;
-
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
- bus, slot, pin);
- if (mp_bus_id_to_pci_bus[bus] == -1) {
- apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
- break;
-
- if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].mpc_irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- BUG_ON(best_guess >= NR_IRQS);
- return best_guess;
-}
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx) (0)
-#define default_ISA_polarity(idx) (0)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx) (1)
-#define default_PCI_polarity(idx) (1)
-
-static int __init MPBIOS_polarity(int idx)
-{
- int bus = mp_irqs[idx].mpc_srcbus;
- int polarity;
-
- /*
- * Determine IRQ line polarity (high active or low active):
- */
- switch (mp_irqs[idx].mpc_irqflag & 3)
- {
- case 0: /* conforms, ie. bus-type dependent polarity */
- if (test_bit(bus, mp_bus_not_pci))
- polarity = default_ISA_polarity(idx);
- else
- polarity = default_PCI_polarity(idx);
- break;
- case 1: /* high active */
- {
- polarity = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- case 3: /* low active */
- {
- polarity = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
- int bus = mp_irqs[idx].mpc_srcbus;
- int trigger;
-
- /*
- * Determine IRQ trigger mode (edge or level sensitive):
- */
- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
- {
- case 0: /* conforms, ie. bus-type dependent */
- if (test_bit(bus, mp_bus_not_pci))
- trigger = default_ISA_trigger(idx);
- else
- trigger = default_PCI_trigger(idx);
- break;
- case 1: /* edge */
- {
- trigger = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- case 3: /* level */
- {
- trigger = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 0;
- break;
- }
- }
- return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
-static int pin_2_irq(int idx, int apic, int pin)
-{
- int irq, i;
- int bus = mp_irqs[idx].mpc_srcbus;
-
- /*
- * Debugging check, we are in big trouble if this message pops up!
- */
- if (mp_irqs[idx].mpc_dstirq != pin)
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
- if (test_bit(bus, mp_bus_not_pci)) {
- irq = mp_irqs[idx].mpc_srcbusirq;
- } else {
- /*
- * PCI IRQs are mapped in order
- */
- i = irq = 0;
- while (i < apic)
- irq += nr_ioapic_registers[i++];
- irq += pin;
- }
- BUG_ON(irq >= NR_IRQS);
- return irq;
-}
-
-static int __assign_irq_vector(int irq, cpumask_t mask)
-{
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
- static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
- unsigned int old_vector;
- int cpu;
- struct irq_cfg *cfg;
-
- BUG_ON((unsigned)irq >= NR_IRQS);
- cfg = &irq_cfg[irq];
-
- /* Only try and allocate irqs on cpus that are present */
- cpus_and(mask, mask, cpu_online_map);
-
- if ((cfg->move_in_progress) || cfg->move_cleanup_count)
- return -EBUSY;
-
- old_vector = cfg->vector;
- if (old_vector) {
- cpumask_t tmp;
- cpus_and(tmp, cfg->domain, mask);
- if (!cpus_empty(tmp))
- return 0;
- }
-
- for_each_cpu_mask(cpu, mask) {
- cpumask_t domain, new_mask;
- int new_cpu;
- int vector, offset;
-
- domain = vector_allocation_domain(cpu);
- cpus_and(new_mask, domain, cpu_online_map);
-
- vector = current_vector;
- offset = current_offset;
-next:
- vector += 8;
- if (vector >= FIRST_SYSTEM_VECTOR) {
- /* If we run out of vectors on large boxen, must share them. */
- offset = (offset + 1) % 8;
- vector = FIRST_DEVICE_VECTOR + offset;
- }
- if (unlikely(current_vector == vector))
- continue;
- if (vector == IA32_SYSCALL_VECTOR)
- goto next;
- for_each_cpu_mask(new_cpu, new_mask)
- if (per_cpu(vector_irq, new_cpu)[vector] != -1)
- goto next;
- /* Found one! */
- current_vector = vector;
- current_offset = offset;
- if (old_vector) {
- cfg->move_in_progress = 1;
- cfg->old_domain = cfg->domain;
- }
- for_each_cpu_mask(new_cpu, new_mask)
- per_cpu(vector_irq, new_cpu)[vector] = irq;
- cfg->vector = vector;
- cfg->domain = domain;
- return 0;
- }
- return -ENOSPC;
-}
-
-static int assign_irq_vector(int irq, cpumask_t mask)
-{
- int err;
- unsigned long flags;
-
- spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, mask);
- spin_unlock_irqrestore(&vector_lock, flags);
- return err;
-}
-
-static void __clear_irq_vector(int irq)
-{
- struct irq_cfg *cfg;
- cpumask_t mask;
- int cpu, vector;
-
- BUG_ON((unsigned)irq >= NR_IRQS);
- cfg = &irq_cfg[irq];
- BUG_ON(!cfg->vector);
-
- vector = cfg->vector;
- cpus_and(mask, cfg->domain, cpu_online_map);
- for_each_cpu_mask(cpu, mask)
- per_cpu(vector_irq, cpu)[vector] = -1;
-
- cfg->vector = 0;
- cfg->domain = CPU_MASK_NONE;
-}
-
-void __setup_vector_irq(int cpu)
-{
- /* Initialize vector_irq on a new cpu */
- /* This function must be called with vector_lock held */
- int irq, vector;
-
- /* Mark the inuse vectors */
- for (irq = 0; irq < NR_IRQS; ++irq) {
- if (!cpu_isset(cpu, irq_cfg[irq].domain))
- continue;
- vector = irq_cfg[irq].vector;
- per_cpu(vector_irq, cpu)[vector] = irq;
- }
- /* Mark the free vectors */
- for (vector = 0; vector < NR_VECTORS; ++vector) {
- irq = per_cpu(vector_irq, cpu)[vector];
- if (irq < 0)
- continue;
- if (!cpu_isset(cpu, irq_cfg[irq].domain))
- per_cpu(vector_irq, cpu)[vector] = -1;
- }
-}
-
-
-static struct irq_chip ioapic_chip;
-
-static void ioapic_register_intr(int irq, unsigned long trigger)
-{
- if (trigger) {
- irq_desc[irq].status |= IRQ_LEVEL;
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_fasteoi_irq, "fasteoi");
- } else {
- irq_desc[irq].status &= ~IRQ_LEVEL;
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_edge_irq, "edge");
- }
-}
-
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
- int trigger, int polarity)
-{
- struct irq_cfg *cfg = irq_cfg + irq;
- struct IO_APIC_route_entry entry;
- cpumask_t mask;
-
- if (!IO_APIC_IRQ(irq))
- return;
-
- mask = TARGET_CPUS;
- if (assign_irq_vector(irq, mask))
- return;
-
- cpus_and(mask, cfg->domain, mask);
-
- apic_printk(APIC_VERBOSE,KERN_DEBUG
- "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
- "IRQ %d Mode:%i Active:%i)\n",
- apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
- irq, trigger, polarity);
-
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.dest = cpu_mask_to_apicid(mask);
- entry.mask = 0; /* enable IRQ */
- entry.trigger = trigger;
- entry.polarity = polarity;
- entry.vector = cfg->vector;
-
- /* Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
- */
- if (trigger)
- entry.mask = 1;
-
- ioapic_register_intr(irq, trigger);
- if (irq < 16)
- disable_8259A_irq(irq);
-
- ioapic_write_entry(apic, pin, entry);
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
- int apic, pin, idx, irq, first_notcon = 1;
-
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
- idx = find_irq_entry(apic,pin,mp_INT);
- if (idx == -1) {
- if (first_notcon) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
- first_notcon = 0;
- } else
- apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
- continue;
- }
-
- irq = pin_2_irq(idx, apic, pin);
- add_pin_to_irq(irq, apic, pin);
-
- setup_IO_APIC_irq(apic, pin, irq,
- irq_trigger(idx), irq_polarity(idx));
- }
- }
-
- if (!first_notcon)
- apic_printk(APIC_VERBOSE," not connected.\n");
-}
-
-/*
- * Set up the 8259A-master output pin as broadcast to all
- * CPUs.
- */
-static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- memset(&entry,0,sizeof(entry));
-
- disable_8259A_irq(0);
-
- /* mask LVT0 */
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-
- /*
- * We use logical delivery to get the timer IRQ
- * to the first CPU.
- */
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 0; /* unmask IRQ now */
- entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.polarity = 0;
- entry.trigger = 0;
- entry.vector = vector;
-
- /*
- * The timer IRQ doesn't have to know that behind the
- * scene we have a 8259A-master in AEOI mode ...
- */
- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- enable_8259A_irq(0);
-}
-
-void __apicdebuginit print_IO_APIC(void)
-{
- int apic, i;
- union IO_APIC_reg_00 reg_00;
- union IO_APIC_reg_01 reg_01;
- union IO_APIC_reg_02 reg_02;
- unsigned long flags;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (i = 0; i < nr_ioapics; i++)
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
-
- /*
- * We are a bit conservative about what we expect. We have to
- * know about every hardware change ASAP.
- */
- printk(KERN_INFO "testing the IO APIC.......................\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- reg_01.raw = io_apic_read(apic, 1);
- if (reg_01.bits.version >= 0x10)
- reg_02.raw = io_apic_read(apic, 2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk("\n");
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
-
- printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
-
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
-
- if (reg_01.bits.version >= 0x10) {
- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
- }
-
- printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
- printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
- " Stat Dmod Deli Vect: \n");
-
- for (i = 0; i <= reg_01.bits.entries; i++) {
- struct IO_APIC_route_entry entry;
-
- entry = ioapic_read_entry(apic, i);
-
- printk(KERN_DEBUG " %02x %03X ",
- i,
- entry.dest
- );
-
- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
- );
- }
- }
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
- struct irq_pin_list *entry = irq_2_pin + i;
- if (entry->pin < 0)
- continue;
- printk(KERN_DEBUG "IRQ%d ", i);
- for (;;) {
- printk("-> %d:%d", entry->apic, entry->pin);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- printk("\n");
- }
-
- printk(KERN_INFO ".................................... done.\n");
-
- return;
-}
-
-#if 0
-
-static __apicdebuginit void print_APIC_bitfield (int base)
-{
- unsigned int v;
- int i, j;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
- for (i = 0; i < 8; i++) {
- v = apic_read(base + i*0x10);
- for (j = 0; j < 32; j++) {
- if (v & (1<<j))
- printk("1");
- else
- printk("0");
- }
- printk("\n");
- }
-}
-
-void __apicdebuginit print_local_APIC(void * dummy)
-{
- unsigned int v, ver, maxlvt;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
- smp_processor_id(), hard_smp_processor_id());
- v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
- v = apic_read(APIC_LVR);
- printk(KERN_INFO "... APIC VERSION: %08x\n", v);
- ver = GET_APIC_VERSION(v);
- maxlvt = get_maxlvt();
-
- v = apic_read(APIC_TASKPRI);
- printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-
- v = apic_read(APIC_EOI);
- printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
- v = apic_read(APIC_RRR);
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
- v = apic_read(APIC_LDR);
- printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
- v = apic_read(APIC_DFR);
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
- v = apic_read(APIC_SPIV);
- printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
- printk(KERN_DEBUG "... APIC ISR field:\n");
- print_APIC_bitfield(APIC_ISR);
- printk(KERN_DEBUG "... APIC TMR field:\n");
- print_APIC_bitfield(APIC_TMR);
- printk(KERN_DEBUG "... APIC IRR field:\n");
- print_APIC_bitfield(APIC_IRR);
-
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
-
- v = apic_read(APIC_LVTT);
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
- if (maxlvt > 3) { /* PC is LVT#4. */
- v = apic_read(APIC_LVTPC);
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
- }
- v = apic_read(APIC_LVT0);
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
- v = apic_read(APIC_LVT1);
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
- if (maxlvt > 2) { /* ERR is LVT#3. */
- v = apic_read(APIC_LVTERR);
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
- }
-
- v = apic_read(APIC_TMICT);
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
- v = apic_read(APIC_TMCCT);
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
- v = apic_read(APIC_TDCR);
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
- printk("\n");
-}
-
-void print_all_local_APICs (void)
-{
- on_each_cpu(print_local_APIC, NULL, 1, 1);
-}
-
-void __apicdebuginit print_PIC(void)
-{
- unsigned int v;
- unsigned long flags;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "\nprinting PIC contents\n");
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- v = inb(0xa1) << 8 | inb(0x21);
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
-
- v = inb(0xa0) << 8 | inb(0x20);
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
-
- outb(0x0b,0xa0);
- outb(0x0b,0x20);
- v = inb(0xa0) << 8 | inb(0x20);
- outb(0x0a,0xa0);
- outb(0x0a,0x20);
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
-
- v = inb(0x4d1) << 8 | inb(0x4d0);
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-#endif /* 0 */
-
-static void __init enable_IO_APIC(void)
-{
- union IO_APIC_reg_01 reg_01;
- int i8259_apic, i8259_pin;
- int i, apic;
- unsigned long flags;
-
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
-
- /*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(apic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
- }
- for(apic = 0; apic < nr_ioapics; apic++) {
- int pin;
- /* See if any of the pins is in ExtINT mode */
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- struct IO_APIC_route_entry entry;
- entry = ioapic_read_entry(apic, pin);
-
- /* If the interrupt line is enabled and in ExtInt mode
- * I have found the pin where the i8259 is connected.
- */
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
- ioapic_i8259.apic = apic;
- ioapic_i8259.pin = pin;
- goto found_i8259;
- }
- }
- }
- found_i8259:
- /* Look to see what if the MP table has reported the ExtINT */
- i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
- i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
- /* Trust the MP table if nothing is setup in the hardware */
- if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
- ioapic_i8259.pin = i8259_pin;
- ioapic_i8259.apic = i8259_apic;
- }
- /* Complain if the MP table and the hardware disagree */
- if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
- {
- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
- }
-
- /*
- * Do not trust the IO-APIC being empty at bootup
- */
- clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
- /*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
- /*
- * If the i8259 is routed through an IOAPIC
- * Put that IOAPIC in virtual wire mode
- * so legacy interrupts can be delivered.
- */
- if (ioapic_i8259.pin != -1) {
- struct IO_APIC_route_entry entry;
-
- memset(&entry, 0, sizeof(entry));
- entry.mask = 0; /* Enabled */
- entry.trigger = 0; /* Edge */
- entry.irr = 0;
- entry.polarity = 0; /* High */
- entry.delivery_status = 0;
- entry.dest_mode = 0; /* Physical */
- entry.delivery_mode = dest_ExtINT; /* ExtInt */
- entry.vector = 0;
- entry.dest = GET_APIC_ID(apic_read(APIC_ID));
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
- }
-
- disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- * - timer IRQ defaults to IO-APIC IRQ
- * - if this function detects that timer IRQs are defunct, then we fall
- * back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
- unsigned long t1 = jiffies;
-
- local_irq_enable();
- /* Let ten ticks pass... */
- mdelay((10 * 1000) / HZ);
-
- /*
- * Expect a few ticks at least, to be sure some possible
- * glue logic does not lock up after one or two first
- * ticks in a non-ExtINT mode. Also the local APIC
- * might have cached one ExtINT interrupt. Finally, at
- * least one tick may be lost due to delays.
- */
-
- /* jiffies wrap? */
- if (jiffies - t1 > 4)
- return 1;
- return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
-
-static unsigned int startup_ioapic_irq(unsigned int irq)
-{
- int was_pending = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
- disable_8259A_irq(irq);
- if (i8259A_irq_pending(irq))
- was_pending = 1;
- }
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return was_pending;
-}
-
-static int ioapic_retrigger_irq(unsigned int irq)
-{
- struct irq_cfg *cfg = &irq_cfg[irq];
- cpumask_t mask;
- unsigned long flags;
-
- spin_lock_irqsave(&vector_lock, flags);
- cpus_clear(mask);
- cpu_set(first_cpu(cfg->domain), mask);
-
- send_IPI_mask(mask, cfg->vector);
- spin_unlock_irqrestore(&vector_lock, flags);
-
- return 1;
-}
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-#ifdef CONFIG_SMP
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
-{
- unsigned vector, me;
- ack_APIC_irq();
- exit_idle();
- irq_enter();
-
- me = smp_processor_id();
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
- unsigned int irq;
- struct irq_desc *desc;
- struct irq_cfg *cfg;
- irq = __get_cpu_var(vector_irq)[vector];
- if (irq >= NR_IRQS)
- continue;
-
- desc = irq_desc + irq;
- cfg = irq_cfg + irq;
- spin_lock(&desc->lock);
- if (!cfg->move_cleanup_count)
- goto unlock;
-
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
- goto unlock;
-
- __get_cpu_var(vector_irq)[vector] = -1;
- cfg->move_cleanup_count--;
-unlock:
- spin_unlock(&desc->lock);
- }
-
- irq_exit();
-}
-
-static void irq_complete_move(unsigned int irq)
-{
- struct irq_cfg *cfg = irq_cfg + irq;
- unsigned vector, me;
-
- if (likely(!cfg->move_in_progress))
- return;
-
- vector = ~get_irq_regs()->orig_rax;
- me = smp_processor_id();
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
- cpumask_t cleanup_mask;
-
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- cfg->move_in_progress = 0;
- }
-}
-#else
-static inline void irq_complete_move(unsigned int irq) {}
-#endif
-
-static void ack_apic_edge(unsigned int irq)
-{
- irq_complete_move(irq);
- move_native_irq(irq);
- ack_APIC_irq();
-}
-
-static void ack_apic_level(unsigned int irq)
-{
- int do_unmask_irq = 0;
-
- irq_complete_move(irq);
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
- /* If we are moving the irq we need to mask it */
- if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
- do_unmask_irq = 1;
- mask_IO_APIC_irq(irq);
- }
-#endif
-
- /*
- * We must acknowledge the irq before we move it or the acknowledge will
- * not propagate properly.
- */
- ack_APIC_irq();
-
- /* Now we can move and renable the irq */
- if (unlikely(do_unmask_irq)) {
- /* Only migrate the irq if the ack has been received.
- *
- * On rare occasions the broadcast level triggered ack gets
- * delayed going to ioapics, and if we reprogram the
- * vector while Remote IRR is still set the irq will never
- * fire again.
- *
- * To prevent this scenario we read the Remote IRR bit
- * of the ioapic. This has two effects.
- * - On any sane system the read of the ioapic will
- * flush writes (and acks) going to the ioapic from
- * this cpu.
- * - We get to see if the ACK has actually been delivered.
- *
- * Based on failed experiments of reprogramming the
- * ioapic entry from outside of irq context starting
- * with masking the ioapic entry and then polling until
- * Remote IRR was clear before reprogramming the
- * ioapic I don't trust the Remote IRR bit to be
- * completey accurate.
- *
- * However there appears to be no other way to plug
- * this race, so if the Remote IRR bit is not
- * accurate and is causing problems then it is a hardware bug
- * and you can go talk to the chipset vendor about it.
- */
- if (!io_apic_level_ack_pending(irq))
- move_masked_irq(irq);
- unmask_IO_APIC_irq(irq);
- }
-}
-
-static struct irq_chip ioapic_chip __read_mostly = {
- .name = "IO-APIC",
- .startup = startup_ioapic_irq,
- .mask = mask_IO_APIC_irq,
- .unmask = unmask_IO_APIC_irq,
- .ack = ack_apic_edge,
- .eoi = ack_apic_level,
-#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity_irq,
-#endif
- .retrigger = ioapic_retrigger_irq,
-};
-
-static inline void init_IO_APIC_traps(void)
-{
- int irq;
-
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
- for (irq = 0; irq < NR_IRQS ; irq++) {
- int tmp = irq;
- if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
- /*
- * Hmm.. We don't have an entry for this,
- * so default to an old-fashioned 8259
- * interrupt if we can..
- */
- if (irq < 16)
- make_8259A_irq(irq);
- else
- /* Strange. Oh, well.. */
- irq_desc[irq].chip = &no_irq_chip;
- }
- }
-}
-
-static void enable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static void disable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void ack_lapic_irq (unsigned int irq)
-{
- ack_APIC_irq();
-}
-
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
-
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
- .name = "local-APIC",
- .typename = "local-APIC-edge",
- .startup = NULL, /* startup_irq() not used for IRQ0 */
- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
- .enable = enable_lapic_irq,
- .disable = disable_lapic_irq,
- .ack = ack_lapic_irq,
- .end = end_lapic_irq,
-};
-
-static void setup_nmi (void)
-{
- /*
- * Dirty trick to enable the NMI watchdog ...
- * We put the 8259A master into AEOI mode and
- * unmask on all local APICs LVT0 as NMI.
- *
- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
- * is from Maciej W. Rozycki - so we do not have to EOI from
- * the NMI handler or the timer interrupt.
- */
- printk(KERN_INFO "activating NMI Watchdog ...");
-
- enable_NMI_through_LVT0(NULL);
-
- printk(" done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic. ICR does
- * not support the ExtINT mode, unfortunately. We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA. --macro
- */
-static inline void unlock_ExtINT_logic(void)
-{
- int apic, pin, i;
- struct IO_APIC_route_entry entry0, entry1;
- unsigned char save_control, save_freq_select;
- unsigned long flags;
-
- pin = find_isa_irq_pin(8, mp_INT);
- apic = find_isa_irq_apic(8, mp_INT);
- if (pin == -1)
- return;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- clear_IO_APIC_pin(apic, pin);
-
- memset(&entry1, 0, sizeof(entry1));
-
- entry1.dest_mode = 0; /* physical delivery */
- entry1.mask = 0; /* unmask IRQ now */
- entry1.dest = hard_smp_processor_id();
- entry1.delivery_mode = dest_ExtINT;
- entry1.polarity = entry0.polarity;
- entry1.trigger = 0;
- entry1.vector = 0;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- save_control = CMOS_READ(RTC_CONTROL);
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
- RTC_FREQ_SELECT);
- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
- i = 100;
- while (i-- > 0) {
- mdelay(10);
- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
- i -= 10;
- }
-
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- clear_IO_APIC_pin(apic, pin);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for modern platforms only.
- */
-static inline void check_timer(void)
-{
- struct irq_cfg *cfg = irq_cfg + 0;
- int apic1, pin1, apic2, pin2;
-
- /*
- * get/set the timer IRQ vector:
- */
- disable_8259A_irq(0);
- assign_irq_vector(0, TARGET_CPUS);
-
- /*
- * Subtle, code in do_timer_interrupt() expects an AEOI
- * mode for the 8259A whenever interrupts are routed
- * through I/O APICs. Also IRQ0 has to be enabled in
- * the 8259A which implies the virtual wire has to be
- * disabled in the local APIC.
- */
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
- init_8259A(1);
- if (timer_over_8254 > 0)
- enable_8259A_irq(0);
-
- pin1 = find_isa_irq_pin(0, mp_INT);
- apic1 = find_isa_irq_apic(0, mp_INT);
- pin2 = ioapic_i8259.pin;
- apic2 = ioapic_i8259.apic;
-
- apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
- cfg->vector, apic1, pin1, apic2, pin2);
-
- if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
- unmask_IO_APIC_irq(0);
- if (!no_timer_check && timer_irq_works()) {
- nmi_watchdog_default();
- if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
- setup_nmi();
- enable_8259A_irq(0);
- }
- if (disable_timer_pin_1 > 0)
- clear_IO_APIC_pin(0, pin1);
- return;
- }
- clear_IO_APIC_pin(apic1, pin1);
- apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
- "connected to IO-APIC\n");
- }
-
- apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
- "through the 8259A ... ");
- if (pin2 != -1) {
- apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
- apic2, pin2);
- /*
- * legacy devices should be connected to IO APIC #0
- */
- setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
- if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
- nmi_watchdog_default();
- if (nmi_watchdog == NMI_IO_APIC) {
- setup_nmi();
- }
- return;
- }
- /*
- * Cleanup, just in case ...
- */
- clear_IO_APIC_pin(apic2, pin2);
- }
- apic_printk(APIC_VERBOSE," failed.\n");
-
- if (nmi_watchdog == NMI_IO_APIC) {
- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
- nmi_watchdog = 0;
- }
-
- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
-
- disable_8259A_irq(0);
- irq_desc[0].chip = &lapic_irq_type;
- apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
- enable_8259A_irq(0);
-
- if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
- return;
- }
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
- apic_printk(APIC_VERBOSE," failed.\n");
-
- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
-
- init_8259A(0);
- make_8259A_irq(0);
- apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
- unlock_ExtINT_logic();
-
- if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
- return;
- }
- apic_printk(APIC_VERBOSE," failed :(.\n");
- panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
-}
-
-static int __init notimercheck(char *s)
-{
- no_timer_check = 1;
- return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- *
- * IRQ's that are handled by the PIC in the MPS IOAPIC case.
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- * Linux doesn't really care, as it's not actually used
- * for any interrupt handling anyway.
- */
-#define PIC_IRQS (1<<2)
-
-void __init setup_IO_APIC(void)
-{
- enable_IO_APIC();
-
- if (acpi_ioapic)
- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
- else
- io_apic_irqs = ~PIC_IRQS;
-
- apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-
- sync_Arb_IDs();
- setup_IO_APIC_irqs();
- init_IO_APIC_traps();
- check_timer();
- if (!acpi_ioapic)
- print_IO_APIC();
-}
-
-struct sysfs_ioapic_data {
- struct sys_device dev;
- struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
- int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
- *entry = ioapic_read_entry(dev->id, i);
-
- return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
- unsigned long flags;
- union IO_APIC_reg_00 reg_00;
- int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(dev->id, 0);
- if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
- reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
- io_apic_write(dev->id, 0, reg_00.raw);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
- for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
- ioapic_write_entry(dev->id, i, entry[i]);
-
- return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
- set_kset_name("ioapic"),
- .suspend = ioapic_suspend,
- .resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
- struct sys_device * dev;
- int i, size, error = 0;
-
- error = sysdev_class_register(&ioapic_sysdev_class);
- if (error)
- return error;
-
- for (i = 0; i < nr_ioapics; i++ ) {
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
- * sizeof(struct IO_APIC_route_entry);
- mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
- if (!mp_ioapic_data[i]) {
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- memset(mp_ioapic_data[i], 0, size);
- dev = &mp_ioapic_data[i]->dev;
- dev->id = i;
- dev->cls = &ioapic_sysdev_class;
- error = sysdev_register(dev);
- if (error) {
- kfree(mp_ioapic_data[i]);
- mp_ioapic_data[i] = NULL;
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- }
-
- return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/*
- * Dynamic irq allocate and deallocation
- */
-int create_irq(void)
-{
- /* Allocate an unused irq */
- int irq;
- int new;
- unsigned long flags;
-
- irq = -ENOSPC;
- spin_lock_irqsave(&vector_lock, flags);
- for (new = (NR_IRQS - 1); new >= 0; new--) {
- if (platform_legacy_irq(new))
- continue;
- if (irq_cfg[new].vector != 0)
- continue;
- if (__assign_irq_vector(new, TARGET_CPUS) == 0)
- irq = new;
- break;
- }
- spin_unlock_irqrestore(&vector_lock, flags);
-
- if (irq >= 0) {
- dynamic_irq_init(irq);
- }
- return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
- unsigned long flags;
-
- dynamic_irq_cleanup(irq);
-
- spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq);
- spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-/*
- * MSI mesage composition
- */
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
-{
- struct irq_cfg *cfg = irq_cfg + irq;
- int err;
- unsigned dest;
- cpumask_t tmp;
-
- tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, tmp);
- if (!err) {
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
-
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo =
- MSI_ADDR_BASE_LO |
- ((INT_DEST_MODE == 0) ?
- MSI_ADDR_DEST_MODE_PHYSICAL:
- MSI_ADDR_DEST_MODE_LOGICAL) |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI) |
- MSI_ADDR_DEST_ID(dest);
-
- msg->data =
- MSI_DATA_TRIGGER_EDGE |
- MSI_DATA_LEVEL_ASSERT |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI) |
- MSI_DATA_VECTOR(cfg->vector);
- }
- return err;
-}
-
-#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
- struct irq_cfg *cfg = irq_cfg + irq;
- struct msi_msg msg;
- unsigned int dest;
- cpumask_t tmp;
-
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- return;
-
- if (assign_irq_vector(irq, mask))
- return;
-
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
-
- read_msi_msg(irq, &msg);
-
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
- write_msi_msg(irq, &msg);
- irq_desc[irq].affinity = mask;
-}
-#endif /* CONFIG_SMP */
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
- .name = "PCI-MSI",
- .unmask = unmask_msi_irq,
- .mask = mask_msi_irq,
- .ack = ack_apic_edge,
-#ifdef CONFIG_SMP
- .set_affinity = set_msi_irq_affinity,
-#endif
- .retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
- struct msi_msg msg;
- int irq, ret;
- irq = create_irq();
- if (irq < 0)
- return irq;
-
- ret = msi_compose_msg(dev, irq, &msg);
- if (ret < 0) {
- destroy_irq(irq);
- return ret;
- }
-
- set_irq_msi(irq, desc);
- write_msi_msg(irq, &msg);
-
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
-
- return 0;
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
- destroy_irq(irq);
-}
-
-#endif /* CONFIG_PCI_MSI */
-
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-#ifdef CONFIG_SMP
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
- struct ht_irq_msg msg;
- fetch_ht_irq_msg(irq, &msg);
-
- msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
- msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
- write_ht_irq_msg(irq, &msg);
-}
-
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
-{
- struct irq_cfg *cfg = irq_cfg + irq;
- unsigned int dest;
- cpumask_t tmp;
-
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- return;
-
- if (assign_irq_vector(irq, mask))
- return;
-
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
-
- target_ht_irq(irq, dest, cfg->vector);
- irq_desc[irq].affinity = mask;
-}
-#endif
-
-static struct irq_chip ht_irq_chip = {
- .name = "PCI-HT",
- .mask = mask_ht_irq,
- .unmask = unmask_ht_irq,
- .ack = ack_apic_edge,
-#ifdef CONFIG_SMP
- .set_affinity = set_ht_irq_affinity,
-#endif
- .retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
- struct irq_cfg *cfg = irq_cfg + irq;
- int err;
- cpumask_t tmp;
-
- tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, tmp);
- if (!err) {
- struct ht_irq_msg msg;
- unsigned dest;
-
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
-
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
- msg.address_lo =
- HT_IRQ_LOW_BASE |
- HT_IRQ_LOW_DEST_ID(dest) |
- HT_IRQ_LOW_VECTOR(cfg->vector) |
- ((INT_DEST_MODE == 0) ?
- HT_IRQ_LOW_DM_PHYSICAL :
- HT_IRQ_LOW_DM_LOGICAL) |
- HT_IRQ_LOW_RQEOI_EDGE |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
- HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED) |
- HT_IRQ_LOW_IRQ_MASKED;
-
- write_ht_irq_msg(irq, &msg);
-
- set_irq_chip_and_handler_name(irq, &ht_irq_chip,
- handle_edge_irq, "edge");
- }
- return err;
-}
-#endif /* CONFIG_HT_IRQ */
-
-/* --------------------------------------------------------------------------
- ACPI-based IOAPIC Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-#define IO_APIC_MAX_ID 0xFE
-
-int __init io_apic_get_redir_entries (int ioapic)
-{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
- if (!IO_APIC_IRQ(irq)) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
- return -EINVAL;
- }
-
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
-
- setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
-
- return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
- int pin, ioapic, irq, irq_entry;
-
- if (skip_ioapic_setup == 1)
- return;
-
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
-
- /* setup_IO_APIC_irqs could fail to get vector for some device
- * when you have too many devices, because at that time only boot
- * cpu is online.
- */
- if (!irq_cfg[irq].vector)
- setup_IO_APIC_irq(ioapic, pin, irq,
- irq_trigger(irq_entry),
- irq_polarity(irq_entry));
- else
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
- }
-
- }
-}
-#endif
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c
deleted file mode 100644
index 653efa30b0f4..000000000000
--- a/arch/x86_64/kernel/ioport.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/ioport.c
- *
- * This contains the io-permission bitmap code - written by obz, with changes
- * by Linus.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/thread_info.h>
-#include <linux/syscalls.h>
-
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
-{
- int i;
- if (new_value)
- for (i = base; i < base + extent; i++)
- __set_bit(i, bitmap);
- else
- for (i = base; i < base + extent; i++)
- clear_bit(i, bitmap);
-}
-
-/*
- * this changes the io permissions bitmap in the current task.
- */
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-{
- unsigned int i, max_long, bytes, bytes_updated;
- struct thread_struct * t = &current->thread;
- struct tss_struct * tss;
- unsigned long *bitmap;
-
- if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
- return -EINVAL;
- if (turn_on && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- /*
- * If it's the first ioperm() call in this thread's lifetime, set the
- * IO bitmap up. ioperm() is much less timing critical than clone(),
- * this is why we delay this operation until now:
- */
- if (!t->io_bitmap_ptr) {
- bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
- if (!bitmap)
- return -ENOMEM;
-
- memset(bitmap, 0xff, IO_BITMAP_BYTES);
- t->io_bitmap_ptr = bitmap;
- set_thread_flag(TIF_IO_BITMAP);
- }
-
- /*
- * do it in the per-thread copy and in the TSS ...
- *
- * Disable preemption via get_cpu() - we must not switch away
- * because the ->io_bitmap_max value must match the bitmap
- * contents:
- */
- tss = &per_cpu(init_tss, get_cpu());
-
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
-
- /*
- * Search for a (possibly new) maximum. This is simple and stupid,
- * to keep it obviously correct:
- */
- max_long = 0;
- for (i = 0; i < IO_BITMAP_LONGS; i++)
- if (t->io_bitmap_ptr[i] != ~0UL)
- max_long = i;
-
- bytes = (max_long + 1) * sizeof(long);
- bytes_updated = max(bytes, t->io_bitmap_max);
-
- t->io_bitmap_max = bytes;
-
- /* Update the TSS: */
- memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
-
- put_cpu();
-
- return 0;
-}
-
-/*
- * sys_iopl has to be used when you want to access the IO ports
- * beyond the 0x3ff range: to get the full 65536 ports bitmapped
- * you'd need 8kB of bitmaps/process, which is a bit excessive.
- *
- * Here we just change the eflags value on the stack: we allow
- * only the super-user to do it. This depends on the stack-layout
- * on system-call entry - see also fork() and the signal handling
- * code.
- */
-
-asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
-{
- unsigned int old = (regs->eflags >> 12) & 3;
-
- if (level > 3)
- return -EINVAL;
- /* Trying to gain more privileges? */
- if (level > old) {
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
- }
- regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12);
- return 0;
-}
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
deleted file mode 100644
index 39cb3fa83ebb..000000000000
--- a/arch/x86_64/kernel/irq.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/irq.c
- *
- * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
- *
- * This file contains the lowest level x86_64-specific interrupt
- * entry and irq statistics code. All the remaining irq logic is
- * done by the generic kernel/irq/ code and in the
- * x86_64-specific irq controller code. (e.g. i8259.c and
- * io_apic.c.)
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <asm/uaccess.h>
-#include <asm/io_apic.h>
-#include <asm/idle.h>
-#include <asm/smp.h>
-
-atomic_t irq_err_count;
-
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-/*
- * Probabilistic stack overflow check:
- *
- * Only check the stack in process context, because everything else
- * runs on the big interrupt stacks. Checking reliably is too expensive,
- * so we just check from interrupts.
- */
-static inline void stack_overflow_check(struct pt_regs *regs)
-{
- u64 curbase = (u64)task_stack_page(current);
- static unsigned long warned = -60*HZ;
-
- if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
- regs->rsp < curbase + sizeof(struct thread_info) + 128 &&
- time_after(jiffies, warned + 60*HZ)) {
- printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
- current->comm, curbase, regs->rsp);
- show_stack(NULL,NULL);
- warned = jiffies;
- }
-}
-#endif
-
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
- int i = *(loff_t *) v, j;
- struct irqaction * action;
- unsigned long flags;
-
- if (i == 0) {
- seq_printf(p, " ");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%-8d",j);
- seq_putc(p, '\n');
- }
-
- if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
- if (!action)
- goto skip;
- seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
- seq_printf(p, " %8s", irq_desc[i].chip->name);
- seq_printf(p, "-%-8s", irq_desc[i].name);
-
- seq_printf(p, " %s", action->name);
- for (action=action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- } else if (i == NR_IRQS) {
- seq_printf(p, "NMI: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
- seq_putc(p, '\n');
- seq_printf(p, "LOC: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
- seq_putc(p, '\n');
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
- }
- return 0;
-}
-
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- /* high bit used in ret_from_ code */
- unsigned vector = ~regs->orig_rax;
- unsigned irq;
-
- exit_idle();
- irq_enter();
- irq = __get_cpu_var(vector_irq)[vector];
-
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
- stack_overflow_check(regs);
-#endif
-
- if (likely(irq < NR_IRQS))
- generic_handle_irq(irq);
- else {
- if (!disable_apic)
- ack_APIC_irq();
-
- if (printk_ratelimit())
- printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
- __func__, smp_processor_id(), vector);
- }
-
- irq_exit();
-
- set_irq_regs(old_regs);
- return 1;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
-{
- unsigned int irq;
- static int warned;
-
- for (irq = 0; irq < NR_IRQS; irq++) {
- cpumask_t mask;
- int break_affinity = 0;
- int set_affinity = 1;
-
- if (irq == 2)
- continue;
-
- /* interrupt's are disabled at this point */
- spin_lock(&irq_desc[irq].lock);
-
- if (!irq_has_action(irq) ||
- cpus_equal(irq_desc[irq].affinity, map)) {
- spin_unlock(&irq_desc[irq].lock);
- continue;
- }
-
- cpus_and(mask, irq_desc[irq].affinity, map);
- if (cpus_empty(mask)) {
- break_affinity = 1;
- mask = map;
- }
-
- if (irq_desc[irq].chip->mask)
- irq_desc[irq].chip->mask(irq);
-
- if (irq_desc[irq].chip->set_affinity)
- irq_desc[irq].chip->set_affinity(irq, mask);
- else if (!(warned++))
- set_affinity = 0;
-
- if (irq_desc[irq].chip->unmask)
- irq_desc[irq].chip->unmask(irq);
-
- spin_unlock(&irq_desc[irq].lock);
-
- if (break_affinity && set_affinity)
- printk("Broke affinity for irq %i\n", irq);
- else if (!set_affinity)
- printk("Cannot set affinity for irq %i\n", irq);
- }
-
- /* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-}
-#endif
-
-extern void call_softirq(void);
-
-asmlinkage void do_softirq(void)
-{
- __u32 pending;
- unsigned long flags;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
- pending = local_softirq_pending();
- /* Switch to interrupt stack */
- if (pending) {
- call_softirq();
- WARN_ON_ONCE(softirq_count());
- }
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(do_softirq);
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
deleted file mode 100644
index 7377ccb21335..000000000000
--- a/arch/x86_64/kernel/k8.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Shared support code for AMD K8 northbridges and derivates.
- * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
- */
-#include <linux/gfp.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <asm/k8.h>
-
-int num_k8_northbridges;
-EXPORT_SYMBOL(num_k8_northbridges);
-
-static u32 *flush_words;
-
-struct pci_device_id k8_nb_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
- {}
-};
-EXPORT_SYMBOL(k8_nb_ids);
-
-struct pci_dev **k8_northbridges;
-EXPORT_SYMBOL(k8_northbridges);
-
-static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
-{
- do {
- dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
- if (!dev)
- break;
- } while (!pci_match_id(&k8_nb_ids[0], dev));
- return dev;
-}
-
-int cache_k8_northbridges(void)
-{
- int i;
- struct pci_dev *dev;
-
- if (num_k8_northbridges)
- return 0;
-
- dev = NULL;
- while ((dev = next_k8_northbridge(dev)) != NULL)
- num_k8_northbridges++;
-
- k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
- GFP_KERNEL);
- if (!k8_northbridges)
- return -ENOMEM;
-
- if (!num_k8_northbridges) {
- k8_northbridges[0] = NULL;
- return 0;
- }
-
- flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
- if (!flush_words) {
- kfree(k8_northbridges);
- return -ENOMEM;
- }
-
- dev = NULL;
- i = 0;
- while ((dev = next_k8_northbridge(dev)) != NULL) {
- k8_northbridges[i] = dev;
- pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
- }
- k8_northbridges[i] = NULL;
- return 0;
-}
-EXPORT_SYMBOL_GPL(cache_k8_northbridges);
-
-/* Ignores subdevice/subvendor but as far as I can figure out
- they're useless anyways */
-int __init early_is_k8_nb(u32 device)
-{
- struct pci_device_id *id;
- u32 vendor = device & 0xffff;
- device >>= 16;
- for (id = k8_nb_ids; id->vendor; id++)
- if (vendor == id->vendor && device == id->device)
- return 1;
- return 0;
-}
-
-void k8_flush_garts(void)
-{
- int flushed, i;
- unsigned long flags;
- static DEFINE_SPINLOCK(gart_lock);
-
- /* Avoid races between AGP and IOMMU. In theory it's not needed
- but I'm not sure if the hardware won't lose flush requests
- when another is pending. This whole thing is so expensive anyways
- that it doesn't matter to serialize more. -AK */
- spin_lock_irqsave(&gart_lock, flags);
- flushed = 0;
- for (i = 0; i < num_k8_northbridges; i++) {
- pci_write_config_dword(k8_northbridges[i], 0x9c,
- flush_words[i]|1);
- flushed++;
- }
- for (i = 0; i < num_k8_northbridges; i++) {
- u32 w;
- /* Make sure the hardware actually executed the flush*/
- for (;;) {
- pci_read_config_dword(k8_northbridges[i],
- 0x9c, &w);
- if (!(w & 1))
- break;
- cpu_relax();
- }
- }
- spin_unlock_irqrestore(&gart_lock, flags);
- if (!flushed)
- printk("nothing to flush?\n");
-}
-EXPORT_SYMBOL_GPL(k8_flush_garts);
-
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
deleted file mode 100644
index a30e004682e2..000000000000
--- a/arch/x86_64/kernel/kprobes.c
+++ /dev/null
@@ -1,749 +0,0 @@
-/*
- * Kernel Probes (KProbes)
- * arch/x86_64/kernel/kprobes.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- * Probes initial implementation ( includes contributions from
- * Rusty Russell).
- * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
- * interface to access function arguments.
- * 2004-Oct Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi
- * <prasanna@in.ibm.com> adapted for x86_64
- * 2005-Mar Roland McGrath <roland@redhat.com>
- * Fixed to handle %rip-relative addressing mode correctly.
- * 2005-May Rusty Lynch <rusty.lynch@intel.com>
- * Added function return probes functionality
- */
-
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/preempt.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/alternative.h>
-
-void jprobe_return_end(void);
-static void __kprobes arch_copy_kprobe(struct kprobe *p);
-
-DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
-DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-
-/*
- * returns non-zero if opcode modifies the interrupt flag.
- */
-static __always_inline int is_IF_modifier(kprobe_opcode_t *insn)
-{
- switch (*insn) {
- case 0xfa: /* cli */
- case 0xfb: /* sti */
- case 0xcf: /* iret/iretd */
- case 0x9d: /* popf/popfd */
- return 1;
- }
-
- if (*insn >= 0x40 && *insn <= 0x4f && *++insn == 0xcf)
- return 1;
- return 0;
-}
-
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
- /* insn: must be on special executable page on x86_64. */
- p->ainsn.insn = get_insn_slot();
- if (!p->ainsn.insn) {
- return -ENOMEM;
- }
- arch_copy_kprobe(p);
- return 0;
-}
-
-/*
- * Determine if the instruction uses the %rip-relative addressing mode.
- * If it does, return the address of the 32-bit displacement word.
- * If not, return null.
- */
-static s32 __kprobes *is_riprel(u8 *insn)
-{
-#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
- (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
- (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
- (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
- (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
- << (row % 64))
- static const u64 onebyte_has_modrm[256 / 64] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ------------------------------- */
- W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */
- W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */
- W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */
- W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */
- W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
- W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */
- W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */
- W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
- W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
- W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */
- W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */
- W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */
- W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */
- W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */
- W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */
- W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */
- /* ------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- };
- static const u64 twobyte_has_modrm[256 / 64] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ------------------------------- */
- W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */
- W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */
- W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */
- W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */
- W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */
- W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */
- W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */
- W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */
- W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */
- W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */
- W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */
- W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */
- W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */
- W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */
- W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */
- W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */
- /* ------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- };
-#undef W
- int need_modrm;
-
- /* Skip legacy instruction prefixes. */
- while (1) {
- switch (*insn) {
- case 0x66:
- case 0x67:
- case 0x2e:
- case 0x3e:
- case 0x26:
- case 0x64:
- case 0x65:
- case 0x36:
- case 0xf0:
- case 0xf3:
- case 0xf2:
- ++insn;
- continue;
- }
- break;
- }
-
- /* Skip REX instruction prefix. */
- if ((*insn & 0xf0) == 0x40)
- ++insn;
-
- if (*insn == 0x0f) { /* Two-byte opcode. */
- ++insn;
- need_modrm = test_bit(*insn, twobyte_has_modrm);
- } else { /* One-byte opcode. */
- need_modrm = test_bit(*insn, onebyte_has_modrm);
- }
-
- if (need_modrm) {
- u8 modrm = *++insn;
- if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */
- /* Displacement follows ModRM byte. */
- return (s32 *) ++insn;
- }
- }
-
- /* No %rip-relative addressing mode here. */
- return NULL;
-}
-
-static void __kprobes arch_copy_kprobe(struct kprobe *p)
-{
- s32 *ripdisp;
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
- ripdisp = is_riprel(p->ainsn.insn);
- if (ripdisp) {
- /*
- * The copied instruction uses the %rip-relative
- * addressing mode. Adjust the displacement for the
- * difference between the original location of this
- * instruction and the location of the copy that will
- * actually be run. The tricky bit here is making sure
- * that the sign extension happens correctly in this
- * calculation, since we need a signed 32-bit result to
- * be sign-extended to 64 bits when it's added to the
- * %rip value and yield the same 64-bit result that the
- * sign-extension of the original signed 32-bit
- * displacement would have given.
- */
- s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn;
- BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
- *ripdisp = disp;
- }
- p->opcode = *p->addr;
-}
-
-void __kprobes arch_arm_kprobe(struct kprobe *p)
-{
- text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
-}
-
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
-{
- text_poke(p->addr, &p->opcode, 1);
-}
-
-void __kprobes arch_remove_kprobe(struct kprobe *p)
-{
- mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn, 0);
- mutex_unlock(&kprobe_mutex);
-}
-
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- kcb->prev_kprobe.kp = kprobe_running();
- kcb->prev_kprobe.status = kcb->kprobe_status;
- kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags;
- kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
-}
-
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
- kcb->kprobe_status = kcb->prev_kprobe.status;
- kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags;
- kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
-}
-
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- __get_cpu_var(current_kprobe) = p;
- kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags
- = (regs->eflags & (TF_MASK | IF_MASK));
- if (is_IF_modifier(p->ainsn.insn))
- kcb->kprobe_saved_rflags &= ~IF_MASK;
-}
-
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
- regs->eflags |= TF_MASK;
- regs->eflags &= ~IF_MASK;
- /*single step inline if the instruction is an int3*/
- if (p->opcode == BREAKPOINT_INSTRUCTION)
- regs->rip = (unsigned long)p->addr;
- else
- regs->rip = (unsigned long)p->ainsn.insn;
-}
-
-/* Called with kretprobe_lock held */
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
-{
- unsigned long *sara = (unsigned long *)regs->rsp;
-
- ri->ret_addr = (kprobe_opcode_t *) *sara;
- /* Replace the return addr with trampoline addr */
- *sara = (unsigned long) &kretprobe_trampoline;
-}
-
-int __kprobes kprobe_handler(struct pt_regs *regs)
-{
- struct kprobe *p;
- int ret = 0;
- kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t));
- struct kprobe_ctlblk *kcb;
-
- /*
- * We don't want to be preempted for the entire
- * duration of kprobe processing
- */
- preempt_disable();
- kcb = get_kprobe_ctlblk();
-
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- p = get_kprobe(addr);
- if (p) {
- if (kcb->kprobe_status == KPROBE_HIT_SS &&
- *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
- regs->eflags &= ~TF_MASK;
- regs->eflags |= kcb->kprobe_saved_rflags;
- goto no_kprobe;
- } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
- /* TODO: Provide re-entrancy from
- * post_kprobes_handler() and avoid exception
- * stack corruption while single-stepping on
- * the instruction of the new probe.
- */
- arch_disarm_kprobe(p);
- regs->rip = (unsigned long)p->addr;
- reset_current_kprobe();
- ret = 1;
- } else {
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the
- * handler. We here save the original kprobe
- * variables and just single step on instruction
- * of the new probe without calling any user
- * handlers.
- */
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
- kprobes_inc_nmissed_count(p);
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_REENTER;
- return 1;
- }
- } else {
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /* The breakpoint instruction was removed by
- * another cpu right after we hit, no further
- * handling of this interrupt is appropriate
- */
- regs->rip = (unsigned long)addr;
- ret = 1;
- goto no_kprobe;
- }
- p = __get_cpu_var(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- goto ss_probe;
- }
- }
- goto no_kprobe;
- }
-
- p = get_kprobe(addr);
- if (!p) {
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /*
- * The breakpoint instruction was removed right
- * after we hit it. Another cpu has removed
- * either a probepoint or a debugger breakpoint
- * at this address. In either case, no further
- * handling of this interrupt is appropriate.
- * Back up over the (now missing) int3 and run
- * the original instruction.
- */
- regs->rip = (unsigned long)addr;
- ret = 1;
- }
- /* Not one of ours: let kernel handle it */
- goto no_kprobe;
- }
-
- set_current_kprobe(p, regs, kcb);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-
- if (p->pre_handler && p->pre_handler(p, regs))
- /* handler has already set things up, so skip ss setup */
- return 1;
-
-ss_probe:
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_HIT_SS;
- return 1;
-
-no_kprobe:
- preempt_enable_no_resched();
- return ret;
-}
-
-/*
- * For function-return probes, init_kprobes() establishes a probepoint
- * here. When a retprobed function returns, this probe is hit and
- * trampoline_probe_handler() runs, calling the kretprobe's handler.
- */
- void kretprobe_trampoline_holder(void)
- {
- asm volatile ( ".global kretprobe_trampoline\n"
- "kretprobe_trampoline: \n"
- "nop\n");
- }
-
-/*
- * Called when we hit the probe point at kretprobe_trampoline
- */
-int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head, empty_rp;
- struct hlist_node *node, *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-
- INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more then one return
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always inserted at the head of the list
- * - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
- * real return address, and all the rest will point to
- * kretprobe_trampoline
- */
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- if (ri->rp && ri->rp->handler)
- ri->rp->handler(ri, regs);
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
- regs->rip = orig_ret_address;
-
- reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
- preempt_enable_no_resched();
-
- hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
-}
-
-/*
- * Called after single-stepping. p->addr is the address of the
- * instruction whose first byte has been replaced by the "int 3"
- * instruction. To avoid the SMP problems that can occur when we
- * temporarily put back the original opcode to single-step, we
- * single-stepped a copy of the instruction. The address of this
- * copy is p->ainsn.insn.
- *
- * This function prepares to return from the post-single-step
- * interrupt. We have to fix up the stack as follows:
- *
- * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new rip is relative to the copied instruction. We need to make
- * it relative to the original instruction.
- *
- * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed eflags, and may need to be cleared.
- *
- * 2) If the single-stepped instruction was a call, the return address
- * that is atop the stack is the address following the copied instruction.
- * We need to make it the address following the original instruction.
- */
-static void __kprobes resume_execution(struct kprobe *p,
- struct pt_regs *regs, struct kprobe_ctlblk *kcb)
-{
- unsigned long *tos = (unsigned long *)regs->rsp;
- unsigned long next_rip = 0;
- unsigned long copy_rip = (unsigned long)p->ainsn.insn;
- unsigned long orig_rip = (unsigned long)p->addr;
- kprobe_opcode_t *insn = p->ainsn.insn;
-
- /*skip the REX prefix*/
- if (*insn >= 0x40 && *insn <= 0x4f)
- insn++;
-
- switch (*insn) {
- case 0x9c: /* pushfl */
- *tos &= ~(TF_MASK | IF_MASK);
- *tos |= kcb->kprobe_old_rflags;
- break;
- case 0xc3: /* ret/lret */
- case 0xcb:
- case 0xc2:
- case 0xca:
- regs->eflags &= ~TF_MASK;
- /* rip is already adjusted, no more changes required*/
- return;
- case 0xe8: /* call relative - Fix return addr */
- *tos = orig_rip + (*tos - copy_rip);
- break;
- case 0xff:
- if ((insn[1] & 0x30) == 0x10) {
- /* call absolute, indirect */
- /* Fix return addr; rip is correct. */
- next_rip = regs->rip;
- *tos = orig_rip + (*tos - copy_rip);
- } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
- ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
- /* rip is correct. */
- next_rip = regs->rip;
- }
- break;
- case 0xea: /* jmp absolute -- rip is correct */
- next_rip = regs->rip;
- break;
- default:
- break;
- }
-
- regs->eflags &= ~TF_MASK;
- if (next_rip) {
- regs->rip = next_rip;
- } else {
- regs->rip = orig_rip + (regs->rip - copy_rip);
- }
-}
-
-int __kprobes post_kprobe_handler(struct pt_regs *regs)
-{
- struct kprobe *cur = kprobe_running();
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- if (!cur)
- return 0;
-
- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- cur->post_handler(cur, regs, 0);
- }
-
- resume_execution(cur, regs, kcb);
- regs->eflags |= kcb->kprobe_saved_rflags;
-
- /* Restore the original saved kprobes variables and continue. */
- if (kcb->kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe(kcb);
- goto out;
- }
- reset_current_kprobe();
-out:
- preempt_enable_no_resched();
-
- /*
- * if somebody else is singlestepping across a probe point, eflags
- * will have TF set, in which case, continue the remaining processing
- * of do_debug, as if this is not a probe hit.
- */
- if (regs->eflags & TF_MASK)
- return 0;
-
- return 1;
-}
-
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
-{
- struct kprobe *cur = kprobe_running();
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- const struct exception_table_entry *fixup;
-
- switch(kcb->kprobe_status) {
- case KPROBE_HIT_SS:
- case KPROBE_REENTER:
- /*
- * We are here because the instruction being single
- * stepped caused a page fault. We reset the current
- * kprobe and the rip points back to the probe address
- * and allow the page fault handler to continue as a
- * normal page fault.
- */
- regs->rip = (unsigned long)cur->addr;
- regs->eflags |= kcb->kprobe_old_rflags;
- if (kcb->kprobe_status == KPROBE_REENTER)
- restore_previous_kprobe(kcb);
- else
- reset_current_kprobe();
- preempt_enable_no_resched();
- break;
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
- /*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accouting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
- * In case the user-specified fault handler returned
- * zero, try to fix up.
- */
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return 1;
- }
-
- /*
- * fixup() could not handle it,
- * Let do_page_fault() fix it.
- */
- break;
- default:
- break;
- }
- return 0;
-}
-
-/*
- * Wrapper routine for handling exceptions.
- */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct die_args *args = (struct die_args *)data;
- int ret = NOTIFY_DONE;
-
- if (args->regs && user_mode(args->regs))
- return ret;
-
- switch (val) {
- case DIE_INT3:
- if (kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_GPF:
- case DIE_PAGE_FAULT:
- /* kprobe_running() needs smp_processor_id() */
- preempt_disable();
- if (kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
- ret = NOTIFY_STOP;
- preempt_enable();
- break;
- default:
- break;
- }
- return ret;
-}
-
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- unsigned long addr;
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- kcb->jprobe_saved_regs = *regs;
- kcb->jprobe_saved_rsp = (long *) regs->rsp;
- addr = (unsigned long)(kcb->jprobe_saved_rsp);
- /*
- * As Linus pointed out, gcc assumes that the callee
- * owns the argument space and could overwrite it, e.g.
- * tailcall optimization. So, to be absolutely safe
- * we also save and restore enough stack bytes to cover
- * the argument area.
- */
- memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
- MIN_STACK_SIZE(addr));
- regs->eflags &= ~IF_MASK;
- regs->rip = (unsigned long)(jp->entry);
- return 1;
-}
-
-void __kprobes jprobe_return(void)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- asm volatile (" xchg %%rbx,%%rsp \n"
- " int3 \n"
- " .globl jprobe_return_end \n"
- " jprobe_return_end: \n"
- " nop \n"::"b"
- (kcb->jprobe_saved_rsp):"memory");
-}
-
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- u8 *addr = (u8 *) (regs->rip - 1);
- unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp);
- struct jprobe *jp = container_of(p, struct jprobe, kp);
-
- if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
- if ((long *)regs->rsp != kcb->jprobe_saved_rsp) {
- struct pt_regs *saved_regs =
- container_of(kcb->jprobe_saved_rsp,
- struct pt_regs, rsp);
- printk("current rsp %p does not match saved rsp %p\n",
- (long *)regs->rsp, kcb->jprobe_saved_rsp);
- printk("Saved registers for jprobe %p\n", jp);
- show_registers(saved_regs);
- printk("Current registers\n");
- show_registers(regs);
- BUG();
- }
- *regs = kcb->jprobe_saved_regs;
- memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
- MIN_STACK_SIZE(stack_addr));
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-
-static struct kprobe trampoline_p = {
- .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
- .pre_handler = trampoline_probe_handler
-};
-
-int __init arch_init_kprobes(void)
-{
- return register_kprobe(&trampoline_p);
-}
-
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
-{
- if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
- return 1;
-
- return 0;
-}
diff --git a/arch/x86_64/kernel/ldt.c b/arch/x86_64/kernel/ldt.c
deleted file mode 100644
index bc9ffd5c19cc..000000000000
--- a/arch/x86_64/kernel/ldt.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/ldt.c
- *
- * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2002 Andi Kleen
- *
- * This handles calls from both 32bit and 64bit mode.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
-static void flush_ldt(void *null)
-{
- if (current->active_mm)
- load_LDT(&current->active_mm->context);
-}
-#endif
-
-static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
-{
- void *oldldt;
- void *newldt;
- unsigned oldsize;
-
- if (mincount <= (unsigned)pc->size)
- return 0;
- oldsize = pc->size;
- mincount = (mincount+511)&(~511);
- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
- else
- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
- if (!newldt)
- return -ENOMEM;
-
- if (oldsize)
- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
- oldldt = pc->ldt;
- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
- wmb();
- pc->ldt = newldt;
- wmb();
- pc->size = mincount;
- wmb();
- if (reload) {
-#ifdef CONFIG_SMP
- cpumask_t mask;
-
- preempt_disable();
- mask = cpumask_of_cpu(smp_processor_id());
- load_LDT(pc);
- if (!cpus_equal(current->mm->cpu_vm_mask, mask))
- smp_call_function(flush_ldt, NULL, 1, 1);
- preempt_enable();
-#else
- load_LDT(pc);
-#endif
- }
- if (oldsize) {
- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(oldldt);
- else
- kfree(oldldt);
- }
- return 0;
-}
-
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
-{
- int err = alloc_ldt(new, old->size, 0);
- if (err < 0)
- return err;
- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
- return 0;
-}
-
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- struct mm_struct * old_mm;
- int retval = 0;
-
- init_MUTEX(&mm->context.sem);
- mm->context.size = 0;
- old_mm = current->mm;
- if (old_mm && old_mm->context.size > 0) {
- down(&old_mm->context.sem);
- retval = copy_ldt(&mm->context, &old_mm->context);
- up(&old_mm->context.sem);
- }
- return retval;
-}
-
-/*
- *
- * Don't touch the LDT register - we're already in the next thread.
- */
-void destroy_context(struct mm_struct *mm)
-{
- if (mm->context.size) {
- if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(mm->context.ldt);
- else
- kfree(mm->context.ldt);
- mm->context.size = 0;
- }
-}
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
- int err;
- unsigned long size;
- struct mm_struct * mm = current->mm;
-
- if (!mm->context.size)
- return 0;
- if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
- bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
-
- down(&mm->context.sem);
- size = mm->context.size*LDT_ENTRY_SIZE;
- if (size > bytecount)
- size = bytecount;
-
- err = 0;
- if (copy_to_user(ptr, mm->context.ldt, size))
- err = -EFAULT;
- up(&mm->context.sem);
- if (err < 0)
- goto error_return;
- if (size != bytecount) {
- /* zero-fill the rest */
- if (clear_user(ptr+size, bytecount-size) != 0) {
- err = -EFAULT;
- goto error_return;
- }
- }
- return bytecount;
-error_return:
- return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
- /* Arbitrary number */
- /* x86-64 default LDT is all zeros */
- if (bytecount > 128)
- bytecount = 128;
- if (clear_user(ptr, bytecount))
- return -EFAULT;
- return bytecount;
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
-{
- struct task_struct *me = current;
- struct mm_struct * mm = me->mm;
- __u32 entry_1, entry_2, *lp;
- int error;
- struct user_desc ldt_info;
-
- error = -EINVAL;
-
- if (bytecount != sizeof(ldt_info))
- goto out;
- error = -EFAULT;
- if (copy_from_user(&ldt_info, ptr, bytecount))
- goto out;
-
- error = -EINVAL;
- if (ldt_info.entry_number >= LDT_ENTRIES)
- goto out;
- if (ldt_info.contents == 3) {
- if (oldmode)
- goto out;
- if (ldt_info.seg_not_present == 0)
- goto out;
- }
-
- down(&mm->context.sem);
- if (ldt_info.entry_number >= (unsigned)mm->context.size) {
- error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
- if (error < 0)
- goto out_unlock;
- }
-
- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
-
- /* Allow LDTs to be cleared by the user. */
- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode || LDT_empty(&ldt_info)) {
- entry_1 = 0;
- entry_2 = 0;
- goto install;
- }
- }
-
- entry_1 = LDT_entry_a(&ldt_info);
- entry_2 = LDT_entry_b(&ldt_info);
- if (oldmode)
- entry_2 &= ~(1 << 20);
-
- /* Install the new entry ... */
-install:
- *lp = entry_1;
- *(lp+1) = entry_2;
- error = 0;
-
-out_unlock:
- up(&mm->context.sem);
-out:
- return error;
-}
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
- int ret = -ENOSYS;
-
- switch (func) {
- case 0:
- ret = read_ldt(ptr, bytecount);
- break;
- case 1:
- ret = write_ldt(ptr, bytecount, 1);
- break;
- case 2:
- ret = read_default_ldt(ptr, bytecount);
- break;
- case 0x11:
- ret = write_ldt(ptr, bytecount, 0);
- break;
- }
- return ret;
-}
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
deleted file mode 100644
index c3a554703672..000000000000
--- a/arch/x86_64/kernel/machine_kexec.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * machine_kexec.c - handle transition of Linux booting another kernel
- * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/kexec.h>
-#include <linux/string.h>
-#include <linux/reboot.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/io.h>
-
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-static u64 kexec_pgd[512] PAGE_ALIGNED;
-static u64 kexec_pud0[512] PAGE_ALIGNED;
-static u64 kexec_pmd0[512] PAGE_ALIGNED;
-static u64 kexec_pte0[512] PAGE_ALIGNED;
-static u64 kexec_pud1[512] PAGE_ALIGNED;
-static u64 kexec_pmd1[512] PAGE_ALIGNED;
-static u64 kexec_pte1[512] PAGE_ALIGNED;
-
-static void init_level2_page(pmd_t *level2p, unsigned long addr)
-{
- unsigned long end_addr;
-
- addr &= PAGE_MASK;
- end_addr = addr + PUD_SIZE;
- while (addr < end_addr) {
- set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
- addr += PMD_SIZE;
- }
-}
-
-static int init_level3_page(struct kimage *image, pud_t *level3p,
- unsigned long addr, unsigned long last_addr)
-{
- unsigned long end_addr;
- int result;
-
- result = 0;
- addr &= PAGE_MASK;
- end_addr = addr + PGDIR_SIZE;
- while ((addr < last_addr) && (addr < end_addr)) {
- struct page *page;
- pmd_t *level2p;
-
- page = kimage_alloc_control_pages(image, 0);
- if (!page) {
- result = -ENOMEM;
- goto out;
- }
- level2p = (pmd_t *)page_address(page);
- init_level2_page(level2p, addr);
- set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
- addr += PUD_SIZE;
- }
- /* clear the unused entries */
- while (addr < end_addr) {
- pud_clear(level3p++);
- addr += PUD_SIZE;
- }
-out:
- return result;
-}
-
-
-static int init_level4_page(struct kimage *image, pgd_t *level4p,
- unsigned long addr, unsigned long last_addr)
-{
- unsigned long end_addr;
- int result;
-
- result = 0;
- addr &= PAGE_MASK;
- end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
- while ((addr < last_addr) && (addr < end_addr)) {
- struct page *page;
- pud_t *level3p;
-
- page = kimage_alloc_control_pages(image, 0);
- if (!page) {
- result = -ENOMEM;
- goto out;
- }
- level3p = (pud_t *)page_address(page);
- result = init_level3_page(image, level3p, addr, last_addr);
- if (result) {
- goto out;
- }
- set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
- addr += PGDIR_SIZE;
- }
- /* clear the unused entries */
- while (addr < end_addr) {
- pgd_clear(level4p++);
- addr += PGDIR_SIZE;
- }
-out:
- return result;
-}
-
-
-static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
-{
- pgd_t *level4p;
- level4p = (pgd_t *)__va(start_pgtable);
- return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
-}
-
-static void set_idt(void *newidt, u16 limit)
-{
- struct desc_ptr curidt;
-
- /* x86-64 supports unaliged loads & stores */
- curidt.size = limit;
- curidt.address = (unsigned long)newidt;
-
- __asm__ __volatile__ (
- "lidtq %0\n"
- : : "m" (curidt)
- );
-};
-
-
-static void set_gdt(void *newgdt, u16 limit)
-{
- struct desc_ptr curgdt;
-
- /* x86-64 supports unaligned loads & stores */
- curgdt.size = limit;
- curgdt.address = (unsigned long)newgdt;
-
- __asm__ __volatile__ (
- "lgdtq %0\n"
- : : "m" (curgdt)
- );
-};
-
-static void load_segments(void)
-{
- __asm__ __volatile__ (
- "\tmovl %0,%%ds\n"
- "\tmovl %0,%%es\n"
- "\tmovl %0,%%ss\n"
- "\tmovl %0,%%fs\n"
- "\tmovl %0,%%gs\n"
- : : "a" (__KERNEL_DS) : "memory"
- );
-}
-
-int machine_kexec_prepare(struct kimage *image)
-{
- unsigned long start_pgtable;
- int result;
-
- /* Calculate the offsets */
- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-
- /* Setup the identity mapped 64bit page table */
- result = init_pgtable(image, start_pgtable);
- if (result)
- return result;
-
- return 0;
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
- return;
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-NORET_TYPE void machine_kexec(struct kimage *image)
-{
- unsigned long page_list[PAGES_NR];
- void *control_page;
-
- /* Interrupts aren't acceptable while we reboot */
- local_irq_disable();
-
- control_page = page_address(image->control_code_page) + PAGE_SIZE;
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
-
- page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
- page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
- page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
- page_list[VA_PGD] = (unsigned long)kexec_pgd;
- page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
- page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
- page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
- page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
- page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
- page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
- page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
- page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
- page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
- page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
- page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
- page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
- page_list[PA_TABLE_PAGE] =
- (unsigned long)__pa(page_address(image->control_code_page));
-
- /* The segment registers are funny things, they have both a
- * visible and an invisible part. Whenever the visible part is
- * set to a specific selector, the invisible part is loaded
- * with from a table in memory. At no other time is the
- * descriptor table in memory accessed.
- *
- * I take advantage of this here by force loading the
- * segments, before I zap the gdt with an invalid value.
- */
- load_segments();
- /* The gdt & idt are now invalid.
- * If you want to load them you must set up your own idt & gdt.
- */
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
-
- /* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start);
-}
-
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init setup_crashkernel(char *arg)
-{
- unsigned long size, base;
- char *p;
- if (!arg)
- return -EINVAL;
- size = memparse(arg, &p);
- if (arg == p)
- return -EINVAL;
- if (*p == '@') {
- base = memparse(p+1, &p);
- /* FIXME: Do I want a sanity check to validate the
- * memory range? Yes you do, but it's too early for
- * e820 -AK */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- return 0;
-}
-early_param("crashkernel", setup_crashkernel);
-
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
deleted file mode 100644
index a66d607f5b92..000000000000
--- a/arch/x86_64/kernel/mce.c
+++ /dev/null
@@ -1,875 +0,0 @@
-/*
- * Machine check handler.
- * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
- * Rest from unknown author(s).
- * 2004 Andi Kleen. Rewrote most of it.
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/rcupdate.h>
-#include <linux/kallsyms.h>
-#include <linux/sysdev.h>
-#include <linux/miscdevice.h>
-#include <linux/fs.h>
-#include <linux/capability.h>
-#include <linux/cpu.h>
-#include <linux/percpu.h>
-#include <linux/poll.h>
-#include <linux/thread_info.h>
-#include <linux/ctype.h>
-#include <linux/kmod.h>
-#include <linux/kdebug.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/mce.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/idle.h>
-
-#define MISC_MCELOG_MINOR 227
-#define NR_BANKS 6
-
-atomic_t mce_entry;
-
-static int mce_dont_init;
-
-/*
- * Tolerant levels:
- * 0: always panic on uncorrected errors, log corrected errors
- * 1: panic or SIGBUS on uncorrected errors, log corrected errors
- * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
- * 3: never panic or SIGBUS, log all errors (for testing only)
- */
-static int tolerant = 1;
-static int banks;
-static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
-static unsigned long notify_user;
-static int rip_msr;
-static int mce_bootlog = 1;
-static atomic_t mce_events;
-
-static char trigger[128];
-static char *trigger_argv[2] = { trigger, NULL };
-
-static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
-
-/*
- * Lockless MCE logging infrastructure.
- * This avoids deadlocks on printk locks without having to break locks. Also
- * separate MCEs from kernel messages to avoid bogus bug reports.
- */
-
-struct mce_log mcelog = {
- MCE_LOG_SIGNATURE,
- MCE_LOG_LEN,
-};
-
-void mce_log(struct mce *mce)
-{
- unsigned next, entry;
- atomic_inc(&mce_events);
- mce->finished = 0;
- wmb();
- for (;;) {
- entry = rcu_dereference(mcelog.next);
- /* The rmb forces the compiler to reload next in each
- iteration */
- rmb();
- for (;;) {
- /* When the buffer fills up discard new entries. Assume
- that the earlier errors are the more interesting. */
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW, &mcelog.flags);
- return;
- }
- /* Old left over entry. Skip. */
- if (mcelog.entry[entry].finished) {
- entry++;
- continue;
- }
- break;
- }
- smp_rmb();
- next = entry + 1;
- if (cmpxchg(&mcelog.next, entry, next) == entry)
- break;
- }
- memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
- wmb();
- mcelog.entry[entry].finished = 1;
- wmb();
-
- set_bit(0, &notify_user);
-}
-
-static void print_mce(struct mce *m)
-{
- printk(KERN_EMERG "\n"
- KERN_EMERG "HARDWARE ERROR\n"
- KERN_EMERG
- "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
- m->cpu, m->mcgstatus, m->bank, m->status);
- if (m->rip) {
- printk(KERN_EMERG
- "RIP%s %02x:<%016Lx> ",
- !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->rip);
- if (m->cs == __KERNEL_CS)
- print_symbol("{%s}", m->rip);
- printk("\n");
- }
- printk(KERN_EMERG "TSC %Lx ", m->tsc);
- if (m->addr)
- printk("ADDR %Lx ", m->addr);
- if (m->misc)
- printk("MISC %Lx ", m->misc);
- printk("\n");
- printk(KERN_EMERG "This is not a software problem!\n");
- printk(KERN_EMERG
- "Run through mcelog --ascii to decode and contact your hardware vendor\n");
-}
-
-static void mce_panic(char *msg, struct mce *backup, unsigned long start)
-{
- int i;
-
- oops_begin();
- for (i = 0; i < MCE_LOG_LEN; i++) {
- unsigned long tsc = mcelog.entry[i].tsc;
- if (time_before(tsc, start))
- continue;
- print_mce(&mcelog.entry[i]);
- if (backup && mcelog.entry[i].tsc == backup->tsc)
- backup = NULL;
- }
- if (backup)
- print_mce(backup);
- panic(msg);
-}
-
-static int mce_available(struct cpuinfo_x86 *c)
-{
- return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
-}
-
-static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
-{
- if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
- m->rip = regs->rip;
- m->cs = regs->cs;
- } else {
- m->rip = 0;
- m->cs = 0;
- }
- if (rip_msr) {
- /* Assume the RIP in the MSR is exact. Is this true? */
- m->mcgstatus |= MCG_STATUS_EIPV;
- rdmsrl(rip_msr, m->rip);
- m->cs = 0;
- }
-}
-
-/*
- * The actual machine check handler
- */
-
-void do_machine_check(struct pt_regs * regs, long error_code)
-{
- struct mce m, panicm;
- u64 mcestart = 0;
- int i;
- int panicm_found = 0;
- /*
- * If no_way_out gets set, there is no safe way to recover from this
- * MCE. If tolerant is cranked up, we'll try anyway.
- */
- int no_way_out = 0;
- /*
- * If kill_it gets set, there might be a way to recover from this
- * error.
- */
- int kill_it = 0;
-
- atomic_inc(&mce_entry);
-
- if (regs)
- notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
- if (!banks)
- goto out2;
-
- memset(&m, 0, sizeof(struct mce));
- m.cpu = smp_processor_id();
- rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
- /* if the restart IP is not valid, we're done for */
- if (!(m.mcgstatus & MCG_STATUS_RIPV))
- no_way_out = 1;
-
- rdtscll(mcestart);
- barrier();
-
- for (i = 0; i < banks; i++) {
- if (!bank[i])
- continue;
-
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
- m.tsc = 0;
-
- rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
- if ((m.status & MCI_STATUS_VAL) == 0)
- continue;
-
- if (m.status & MCI_STATUS_EN) {
- /* if PCC was set, there's no way out */
- no_way_out |= !!(m.status & MCI_STATUS_PCC);
- /*
- * If this error was uncorrectable and there was
- * an overflow, we're in trouble. If no overflow,
- * we might get away with just killing a task.
- */
- if (m.status & MCI_STATUS_UC) {
- if (tolerant < 1 || m.status & MCI_STATUS_OVER)
- no_way_out = 1;
- kill_it = 1;
- }
- }
-
- if (m.status & MCI_STATUS_MISCV)
- rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
- if (m.status & MCI_STATUS_ADDRV)
- rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
-
- mce_get_rip(&m, regs);
- if (error_code >= 0)
- rdtscll(m.tsc);
- if (error_code != -2)
- mce_log(&m);
-
- /* Did this bank cause the exception? */
- /* Assume that the bank with uncorrectable errors did it,
- and that there is only a single one. */
- if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
- panicm = m;
- panicm_found = 1;
- }
-
- add_taint(TAINT_MACHINE_CHECK);
- }
-
- /* Never do anything final in the polling timer */
- if (!regs)
- goto out;
-
- /* If we didn't find an uncorrectable error, pick
- the last one (shouldn't happen, just being safe). */
- if (!panicm_found)
- panicm = m;
-
- /*
- * If we have decided that we just CAN'T continue, and the user
- * has not set tolerant to an insane level, give up and die.
- */
- if (no_way_out && tolerant < 3)
- mce_panic("Machine check", &panicm, mcestart);
-
- /*
- * If the error seems to be unrecoverable, something should be
- * done. Try to kill as little as possible. If we can kill just
- * one task, do that. If the user has set the tolerance very
- * high, don't try to do anything at all.
- */
- if (kill_it && tolerant < 3) {
- int user_space = 0;
-
- /*
- * If the EIPV bit is set, it means the saved IP is the
- * instruction which caused the MCE.
- */
- if (m.mcgstatus & MCG_STATUS_EIPV)
- user_space = panicm.rip && (panicm.cs & 3);
-
- /*
- * If we know that the error was in user space, send a
- * SIGBUS. Otherwise, panic if tolerance is low.
- *
- * do_exit() takes an awful lot of locks and has a slight
- * risk of deadlocking.
- */
- if (user_space) {
- do_exit(SIGBUS);
- } else if (panic_on_oops || tolerant < 2) {
- mce_panic("Uncorrected machine check",
- &panicm, mcestart);
- }
- }
-
- /* notify userspace ASAP */
- set_thread_flag(TIF_MCE_NOTIFY);
-
- out:
- /* the last thing we do is clear state */
- for (i = 0; i < banks; i++)
- wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
- wrmsrl(MSR_IA32_MCG_STATUS, 0);
- out2:
- atomic_dec(&mce_entry);
-}
-
-#ifdef CONFIG_X86_MCE_INTEL
-/***
- * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
- * @cpu: The CPU on which the event occured.
- * @status: Event status information
- *
- * This function should be called by the thermal interrupt after the
- * event has been processed and the decision was made to log the event
- * further.
- *
- * The status parameter will be saved to the 'status' field of 'struct mce'
- * and historically has been the register value of the
- * MSR_IA32_THERMAL_STATUS (Intel) msr.
- */
-void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
-{
- struct mce m;
-
- memset(&m, 0, sizeof(m));
- m.cpu = cpu;
- m.bank = MCE_THERMAL_BANK;
- m.status = status;
- rdtscll(m.tsc);
- mce_log(&m);
-}
-#endif /* CONFIG_X86_MCE_INTEL */
-
-/*
- * Periodic polling timer for "silent" machine check errors. If the
- * poller finds an MCE, poll 2x faster. When the poller finds no more
- * errors, poll 2x slower (up to check_interval seconds).
- */
-
-static int check_interval = 5 * 60; /* 5 minutes */
-static int next_interval; /* in jiffies */
-static void mcheck_timer(struct work_struct *work);
-static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
-
-static void mcheck_check_cpu(void *info)
-{
- if (mce_available(&current_cpu_data))
- do_machine_check(NULL, 0);
-}
-
-static void mcheck_timer(struct work_struct *work)
-{
- on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
-
- /*
- * Alert userspace if needed. If we logged an MCE, reduce the
- * polling interval, otherwise increase the polling interval.
- */
- if (mce_notify_user()) {
- next_interval = max(next_interval/2, HZ/100);
- } else {
- next_interval = min(next_interval*2,
- (int)round_jiffies_relative(check_interval*HZ));
- }
-
- schedule_delayed_work(&mcheck_work, next_interval);
-}
-
-/*
- * This is only called from process context. This is where we do
- * anything we need to alert userspace about new MCEs. This is called
- * directly from the poller and also from entry.S and idle, thanks to
- * TIF_MCE_NOTIFY.
- */
-int mce_notify_user(void)
-{
- clear_thread_flag(TIF_MCE_NOTIFY);
- if (test_and_clear_bit(0, &notify_user)) {
- static unsigned long last_print;
- unsigned long now = jiffies;
-
- wake_up_interruptible(&mce_wait);
- if (trigger[0])
- call_usermodehelper(trigger, trigger_argv, NULL,
- UMH_NO_WAIT);
-
- if (time_after_eq(now, last_print + (check_interval*HZ))) {
- last_print = now;
- printk(KERN_INFO "Machine check events logged\n");
- }
-
- return 1;
- }
- return 0;
-}
-
-/* see if the idle task needs to notify userspace */
-static int
-mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
-{
- /* IDLE_END should be safe - interrupts are back on */
- if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
- mce_notify_user();
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block mce_idle_notifier = {
- .notifier_call = mce_idle_callback,
-};
-
-static __init int periodic_mcheck_init(void)
-{
- next_interval = check_interval * HZ;
- if (next_interval)
- schedule_delayed_work(&mcheck_work,
- round_jiffies_relative(next_interval));
- idle_notifier_register(&mce_idle_notifier);
- return 0;
-}
-__initcall(periodic_mcheck_init);
-
-
-/*
- * Initialize Machine Checks for a CPU.
- */
-static void mce_init(void *dummy)
-{
- u64 cap;
- int i;
-
- rdmsrl(MSR_IA32_MCG_CAP, cap);
- banks = cap & 0xff;
- if (banks > NR_BANKS) {
- printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
- banks = NR_BANKS;
- }
- /* Use accurate RIP reporting if available. */
- if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
- rip_msr = MSR_IA32_MCG_EIP;
-
- /* Log the machine checks left over from the previous reset.
- This also clears all registers */
- do_machine_check(NULL, mce_bootlog ? -1 : -2);
-
- set_in_cr4(X86_CR4_MCE);
-
- if (cap & MCG_CTL_P)
- wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-
- for (i = 0; i < banks; i++) {
- wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
- wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
- }
-}
-
-/* Add per CPU specific workarounds here */
-static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
-{
- /* This should be disabled by the BIOS, but isn't always */
- if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
- /* disable GART TBL walk error reporting, which trips off
- incorrectly with the IOMMU & 3ware & Cerberus. */
- clear_bit(10, &bank[4]);
- /* Lots of broken BIOS around that don't clear them
- by default and leave crap in there. Don't log. */
- mce_bootlog = 0;
- }
-
-}
-
-static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
-{
- switch (c->x86_vendor) {
- case X86_VENDOR_INTEL:
- mce_intel_feature_init(c);
- break;
- case X86_VENDOR_AMD:
- mce_amd_feature_init(c);
- break;
- default:
- break;
- }
-}
-
-/*
- * Called for each booted CPU to set up machine checks.
- * Must be called with preempt off.
- */
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
-{
- static cpumask_t mce_cpus = CPU_MASK_NONE;
-
- mce_cpu_quirks(c);
-
- if (mce_dont_init ||
- cpu_test_and_set(smp_processor_id(), mce_cpus) ||
- !mce_available(c))
- return;
-
- mce_init(NULL);
- mce_cpu_features(c);
-}
-
-/*
- * Character device to read and clear the MCE log.
- */
-
-static DEFINE_SPINLOCK(mce_state_lock);
-static int open_count; /* #times opened */
-static int open_exclu; /* already open exclusive? */
-
-static int mce_open(struct inode *inode, struct file *file)
-{
- spin_lock(&mce_state_lock);
-
- if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
- spin_unlock(&mce_state_lock);
- return -EBUSY;
- }
-
- if (file->f_flags & O_EXCL)
- open_exclu = 1;
- open_count++;
-
- spin_unlock(&mce_state_lock);
-
- return nonseekable_open(inode, file);
-}
-
-static int mce_release(struct inode *inode, struct file *file)
-{
- spin_lock(&mce_state_lock);
-
- open_count--;
- open_exclu = 0;
-
- spin_unlock(&mce_state_lock);
-
- return 0;
-}
-
-static void collect_tscs(void *data)
-{
- unsigned long *cpu_tsc = (unsigned long *)data;
- rdtscll(cpu_tsc[smp_processor_id()]);
-}
-
-static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
-{
- unsigned long *cpu_tsc;
- static DECLARE_MUTEX(mce_read_sem);
- unsigned next;
- char __user *buf = ubuf;
- int i, err;
-
- cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
- if (!cpu_tsc)
- return -ENOMEM;
-
- down(&mce_read_sem);
- next = rcu_dereference(mcelog.next);
-
- /* Only supports full reads right now */
- if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
- up(&mce_read_sem);
- kfree(cpu_tsc);
- return -EINVAL;
- }
-
- err = 0;
- for (i = 0; i < next; i++) {
- unsigned long start = jiffies;
- while (!mcelog.entry[i].finished) {
- if (time_after_eq(jiffies, start + 2)) {
- memset(mcelog.entry + i,0, sizeof(struct mce));
- goto timeout;
- }
- cpu_relax();
- }
- smp_rmb();
- err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
- buf += sizeof(struct mce);
- timeout:
- ;
- }
-
- memset(mcelog.entry, 0, next * sizeof(struct mce));
- mcelog.next = 0;
-
- synchronize_sched();
-
- /* Collect entries that were still getting written before the synchronize. */
-
- on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
- for (i = next; i < MCE_LOG_LEN; i++) {
- if (mcelog.entry[i].finished &&
- mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
- err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
- smp_rmb();
- buf += sizeof(struct mce);
- memset(&mcelog.entry[i], 0, sizeof(struct mce));
- }
- }
- up(&mce_read_sem);
- kfree(cpu_tsc);
- return err ? -EFAULT : buf - ubuf;
-}
-
-static unsigned int mce_poll(struct file *file, poll_table *wait)
-{
- poll_wait(file, &mce_wait, wait);
- if (rcu_dereference(mcelog.next))
- return POLLIN | POLLRDNORM;
- return 0;
-}
-
-static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
-{
- int __user *p = (int __user *)arg;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- switch (cmd) {
- case MCE_GET_RECORD_LEN:
- return put_user(sizeof(struct mce), p);
- case MCE_GET_LOG_LEN:
- return put_user(MCE_LOG_LEN, p);
- case MCE_GETCLEAR_FLAGS: {
- unsigned flags;
- do {
- flags = mcelog.flags;
- } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
- return put_user(flags, p);
- }
- default:
- return -ENOTTY;
- }
-}
-
-static const struct file_operations mce_chrdev_ops = {
- .open = mce_open,
- .release = mce_release,
- .read = mce_read,
- .poll = mce_poll,
- .ioctl = mce_ioctl,
-};
-
-static struct miscdevice mce_log_device = {
- MISC_MCELOG_MINOR,
- "mcelog",
- &mce_chrdev_ops,
-};
-
-static unsigned long old_cr4 __initdata;
-
-void __init stop_mce(void)
-{
- old_cr4 = read_cr4();
- clear_in_cr4(X86_CR4_MCE);
-}
-
-void __init restart_mce(void)
-{
- if (old_cr4 & X86_CR4_MCE)
- set_in_cr4(X86_CR4_MCE);
-}
-
-/*
- * Old style boot options parsing. Only for compatibility.
- */
-
-static int __init mcheck_disable(char *str)
-{
- mce_dont_init = 1;
- return 1;
-}
-
-/* mce=off disables machine check. Note you can reenable it later
- using sysfs.
- mce=TOLERANCELEVEL (number, see above)
- mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
- mce=nobootlog Don't log MCEs from before booting. */
-static int __init mcheck_enable(char *str)
-{
- if (*str == '=')
- str++;
- if (!strcmp(str, "off"))
- mce_dont_init = 1;
- else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
- mce_bootlog = str[0] == 'b';
- else if (isdigit(str[0]))
- get_option(&str, &tolerant);
- else
- printk("mce= argument %s ignored. Please use /sys", str);
- return 1;
-}
-
-__setup("nomce", mcheck_disable);
-__setup("mce", mcheck_enable);
-
-/*
- * Sysfs support
- */
-
-/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
- Only one CPU is active at this time, the others get readded later using
- CPU hotplug. */
-static int mce_resume(struct sys_device *dev)
-{
- mce_init(NULL);
- return 0;
-}
-
-/* Reinit MCEs after user configuration changes */
-static void mce_restart(void)
-{
- if (next_interval)
- cancel_delayed_work(&mcheck_work);
- /* Timer race is harmless here */
- on_each_cpu(mce_init, NULL, 1, 1);
- next_interval = check_interval * HZ;
- if (next_interval)
- schedule_delayed_work(&mcheck_work,
- round_jiffies_relative(next_interval));
-}
-
-static struct sysdev_class mce_sysclass = {
- .resume = mce_resume,
- set_kset_name("machinecheck"),
-};
-
-DEFINE_PER_CPU(struct sys_device, device_mce);
-
-/* Why are there no generic functions for this? */
-#define ACCESSOR(name, var, start) \
- static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
- return sprintf(buf, "%lx\n", (unsigned long)var); \
- } \
- static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
- char *end; \
- unsigned long new = simple_strtoul(buf, &end, 0); \
- if (end == buf) return -EINVAL; \
- var = new; \
- start; \
- return end-buf; \
- } \
- static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
-
-/* TBD should generate these dynamically based on number of available banks */
-ACCESSOR(bank0ctl,bank[0],mce_restart())
-ACCESSOR(bank1ctl,bank[1],mce_restart())
-ACCESSOR(bank2ctl,bank[2],mce_restart())
-ACCESSOR(bank3ctl,bank[3],mce_restart())
-ACCESSOR(bank4ctl,bank[4],mce_restart())
-ACCESSOR(bank5ctl,bank[5],mce_restart())
-
-static ssize_t show_trigger(struct sys_device *s, char *buf)
-{
- strcpy(buf, trigger);
- strcat(buf, "\n");
- return strlen(trigger) + 1;
-}
-
-static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
-{
- char *p;
- int len;
- strncpy(trigger, buf, sizeof(trigger));
- trigger[sizeof(trigger)-1] = 0;
- len = strlen(trigger);
- p = strchr(trigger, '\n');
- if (*p) *p = 0;
- return len;
-}
-
-static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
-ACCESSOR(tolerant,tolerant,)
-ACCESSOR(check_interval,check_interval,mce_restart())
-static struct sysdev_attribute *mce_attributes[] = {
- &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
- &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
- &attr_tolerant, &attr_check_interval, &attr_trigger,
- NULL
-};
-
-/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
-static __cpuinit int mce_create_device(unsigned int cpu)
-{
- int err;
- int i;
- if (!mce_available(&cpu_data[cpu]))
- return -EIO;
-
- per_cpu(device_mce,cpu).id = cpu;
- per_cpu(device_mce,cpu).cls = &mce_sysclass;
-
- err = sysdev_register(&per_cpu(device_mce,cpu));
-
- if (!err) {
- for (i = 0; mce_attributes[i]; i++)
- sysdev_create_file(&per_cpu(device_mce,cpu),
- mce_attributes[i]);
- }
- return err;
-}
-
-static void mce_remove_device(unsigned int cpu)
-{
- int i;
-
- for (i = 0; mce_attributes[i]; i++)
- sysdev_remove_file(&per_cpu(device_mce,cpu),
- mce_attributes[i]);
- sysdev_unregister(&per_cpu(device_mce,cpu));
- memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
-}
-
-/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int
-mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- mce_create_device(cpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- mce_remove_device(cpu);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block mce_cpu_notifier = {
- .notifier_call = mce_cpu_callback,
-};
-
-static __init int mce_init_device(void)
-{
- int err;
- int i = 0;
-
- if (!mce_available(&boot_cpu_data))
- return -EIO;
- err = sysdev_class_register(&mce_sysclass);
-
- for_each_online_cpu(i) {
- mce_create_device(i);
- }
-
- register_hotcpu_notifier(&mce_cpu_notifier);
- misc_register(&mce_log_device);
- return err;
-}
-
-device_initcall(mce_init_device);
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
deleted file mode 100644
index 2f8a7f18b0fe..000000000000
--- a/arch/x86_64/kernel/mce_amd.c
+++ /dev/null
@@ -1,689 +0,0 @@
-/*
- * (c) 2005, 2006 Advanced Micro Devices, Inc.
- * Your use of this code is subject to the terms and conditions of the
- * GNU general public license version 2. See "COPYING" or
- * http://www.gnu.org/licenses/gpl.html
- *
- * Written by Jacob Shin - AMD, Inc.
- *
- * Support : jacob.shin@amd.com
- *
- * April 2006
- * - added support for AMD Family 0x10 processors
- *
- * All MC4_MISCi registers are shared between multi-cores
- */
-
-#include <linux/cpu.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kobject.h>
-#include <linux/notifier.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/sysdev.h>
-#include <linux/sysfs.h>
-#include <asm/apic.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-#include <asm/percpu.h>
-#include <asm/idle.h>
-
-#define PFX "mce_threshold: "
-#define VERSION "version 1.1.1"
-#define NR_BANKS 6
-#define NR_BLOCKS 9
-#define THRESHOLD_MAX 0xFFF
-#define INT_TYPE_APIC 0x00020000
-#define MASK_VALID_HI 0x80000000
-#define MASK_CNTP_HI 0x40000000
-#define MASK_LOCKED_HI 0x20000000
-#define MASK_LVTOFF_HI 0x00F00000
-#define MASK_COUNT_EN_HI 0x00080000
-#define MASK_INT_TYPE_HI 0x00060000
-#define MASK_OVERFLOW_HI 0x00010000
-#define MASK_ERR_COUNT_HI 0x00000FFF
-#define MASK_BLKPTR_LO 0xFF000000
-#define MCG_XBLK_ADDR 0xC0000400
-
-struct threshold_block {
- unsigned int block;
- unsigned int bank;
- unsigned int cpu;
- u32 address;
- u16 interrupt_enable;
- u16 threshold_limit;
- struct kobject kobj;
- struct list_head miscj;
-};
-
-/* defaults used early on boot */
-static struct threshold_block threshold_defaults = {
- .interrupt_enable = 0,
- .threshold_limit = THRESHOLD_MAX,
-};
-
-struct threshold_bank {
- struct kobject kobj;
- struct threshold_block *blocks;
- cpumask_t cpus;
-};
-static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
-
-#ifdef CONFIG_SMP
-static unsigned char shared_bank[NR_BANKS] = {
- 0, 0, 0, 0, 1
-};
-#endif
-
-static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
-
-/*
- * CPU Initialization
- */
-
-/* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_block *b,
- int reset, u16 old_limit)
-{
- u32 mci_misc_hi, mci_misc_lo;
-
- rdmsr(b->address, mci_misc_lo, mci_misc_hi);
-
- if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
- reset = 1; /* limit cannot be lower than err count */
-
- if (reset) { /* reset err count and overflow bit */
- mci_misc_hi =
- (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
- (THRESHOLD_MAX - b->threshold_limit);
- } else if (old_limit) { /* change limit w/o reset */
- int new_count = (mci_misc_hi & THRESHOLD_MAX) +
- (old_limit - b->threshold_limit);
- mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
- (new_count & THRESHOLD_MAX);
- }
-
- b->interrupt_enable ?
- (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
- (mci_misc_hi &= ~MASK_INT_TYPE_HI);
-
- mci_misc_hi |= MASK_COUNT_EN_HI;
- wrmsr(b->address, mci_misc_lo, mci_misc_hi);
-}
-
-/* cpu init entry point, called from mce.c with preempt off */
-void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
-{
- unsigned int bank, block;
- unsigned int cpu = smp_processor_id();
- u32 low = 0, high = 0, address = 0;
-
- for (bank = 0; bank < NR_BANKS; ++bank) {
- for (block = 0; block < NR_BLOCKS; ++block) {
- if (block == 0)
- address = MSR_IA32_MC0_MISC + bank * 4;
- else if (block == 1) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- break;
- address += MCG_XBLK_ADDR;
- }
- else
- ++address;
-
- if (rdmsr_safe(address, &low, &high))
- break;
-
- if (!(high & MASK_VALID_HI)) {
- if (block)
- continue;
- else
- break;
- }
-
- if (!(high & MASK_CNTP_HI) ||
- (high & MASK_LOCKED_HI))
- continue;
-
- if (!block)
- per_cpu(bank_map, cpu) |= (1 << bank);
-#ifdef CONFIG_SMP
- if (shared_bank[bank] && c->cpu_core_id)
- break;
-#endif
- high &= ~MASK_LVTOFF_HI;
- high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
- wrmsr(address, low, high);
-
- setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
- THRESHOLD_APIC_VECTOR,
- K8_APIC_EXT_INT_MSG_FIX, 0);
-
- threshold_defaults.address = address;
- threshold_restart_bank(&threshold_defaults, 0, 0);
- }
- }
-}
-
-/*
- * APIC Interrupt Handler
- */
-
-/*
- * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
- * the interrupt goes off when error_count reaches threshold_limit.
- * the handler will simply log mcelog w/ software defined bank number.
- */
-asmlinkage void mce_threshold_interrupt(void)
-{
- unsigned int bank, block;
- struct mce m;
- u32 low = 0, high = 0, address = 0;
-
- ack_APIC_irq();
- exit_idle();
- irq_enter();
-
- memset(&m, 0, sizeof(m));
- rdtscll(m.tsc);
- m.cpu = smp_processor_id();
-
- /* assume first bank caused it */
- for (bank = 0; bank < NR_BANKS; ++bank) {
- if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
- continue;
- for (block = 0; block < NR_BLOCKS; ++block) {
- if (block == 0)
- address = MSR_IA32_MC0_MISC + bank * 4;
- else if (block == 1) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- break;
- address += MCG_XBLK_ADDR;
- }
- else
- ++address;
-
- if (rdmsr_safe(address, &low, &high))
- break;
-
- if (!(high & MASK_VALID_HI)) {
- if (block)
- continue;
- else
- break;
- }
-
- if (!(high & MASK_CNTP_HI) ||
- (high & MASK_LOCKED_HI))
- continue;
-
- /* Log the machine check that caused the threshold
- event. */
- do_machine_check(NULL, 0);
-
- if (high & MASK_OVERFLOW_HI) {
- rdmsrl(address, m.misc);
- rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
- m.status);
- m.bank = K8_MCE_THRESHOLD_BASE
- + bank * NR_BLOCKS
- + block;
- mce_log(&m);
- goto out;
- }
- }
- }
-out:
- irq_exit();
-}
-
-/*
- * Sysfs Interface
- */
-
-struct threshold_attr {
- struct attribute attr;
- ssize_t(*show) (struct threshold_block *, char *);
- ssize_t(*store) (struct threshold_block *, const char *, size_t count);
-};
-
-static cpumask_t affinity_set(unsigned int cpu)
-{
- cpumask_t oldmask = current->cpus_allowed;
- cpumask_t newmask = CPU_MASK_NONE;
- cpu_set(cpu, newmask);
- set_cpus_allowed(current, newmask);
- return oldmask;
-}
-
-static void affinity_restore(cpumask_t oldmask)
-{
- set_cpus_allowed(current, oldmask);
-}
-
-#define SHOW_FIELDS(name) \
-static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
-{ \
- return sprintf(buf, "%lx\n", (unsigned long) b->name); \
-}
-SHOW_FIELDS(interrupt_enable)
-SHOW_FIELDS(threshold_limit)
-
-static ssize_t store_interrupt_enable(struct threshold_block *b,
- const char *buf, size_t count)
-{
- char *end;
- cpumask_t oldmask;
- unsigned long new = simple_strtoul(buf, &end, 0);
- if (end == buf)
- return -EINVAL;
- b->interrupt_enable = !!new;
-
- oldmask = affinity_set(b->cpu);
- threshold_restart_bank(b, 0, 0);
- affinity_restore(oldmask);
-
- return end - buf;
-}
-
-static ssize_t store_threshold_limit(struct threshold_block *b,
- const char *buf, size_t count)
-{
- char *end;
- cpumask_t oldmask;
- u16 old;
- unsigned long new = simple_strtoul(buf, &end, 0);
- if (end == buf)
- return -EINVAL;
- if (new > THRESHOLD_MAX)
- new = THRESHOLD_MAX;
- if (new < 1)
- new = 1;
- old = b->threshold_limit;
- b->threshold_limit = new;
-
- oldmask = affinity_set(b->cpu);
- threshold_restart_bank(b, 0, old);
- affinity_restore(oldmask);
-
- return end - buf;
-}
-
-static ssize_t show_error_count(struct threshold_block *b, char *buf)
-{
- u32 high, low;
- cpumask_t oldmask;
- oldmask = affinity_set(b->cpu);
- rdmsr(b->address, low, high);
- affinity_restore(oldmask);
- return sprintf(buf, "%x\n",
- (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
-}
-
-static ssize_t store_error_count(struct threshold_block *b,
- const char *buf, size_t count)
-{
- cpumask_t oldmask;
- oldmask = affinity_set(b->cpu);
- threshold_restart_bank(b, 1, 0);
- affinity_restore(oldmask);
- return 1;
-}
-
-#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .show = _show, \
- .store = _store, \
-};
-
-#define RW_ATTR(name) \
-static struct threshold_attr name = \
- THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
-
-RW_ATTR(interrupt_enable);
-RW_ATTR(threshold_limit);
-RW_ATTR(error_count);
-
-static struct attribute *default_attrs[] = {
- &interrupt_enable.attr,
- &threshold_limit.attr,
- &error_count.attr,
- NULL
-};
-
-#define to_block(k) container_of(k, struct threshold_block, kobj)
-#define to_attr(a) container_of(a, struct threshold_attr, attr)
-
-static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
- struct threshold_block *b = to_block(kobj);
- struct threshold_attr *a = to_attr(attr);
- ssize_t ret;
- ret = a->show ? a->show(b, buf) : -EIO;
- return ret;
-}
-
-static ssize_t store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t count)
-{
- struct threshold_block *b = to_block(kobj);
- struct threshold_attr *a = to_attr(attr);
- ssize_t ret;
- ret = a->store ? a->store(b, buf, count) : -EIO;
- return ret;
-}
-
-static struct sysfs_ops threshold_ops = {
- .show = show,
- .store = store,
-};
-
-static struct kobj_type threshold_ktype = {
- .sysfs_ops = &threshold_ops,
- .default_attrs = default_attrs,
-};
-
-static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
- unsigned int bank,
- unsigned int block,
- u32 address)
-{
- int err;
- u32 low, high;
- struct threshold_block *b = NULL;
-
- if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
- return 0;
-
- if (rdmsr_safe(address, &low, &high))
- return 0;
-
- if (!(high & MASK_VALID_HI)) {
- if (block)
- goto recurse;
- else
- return 0;
- }
-
- if (!(high & MASK_CNTP_HI) ||
- (high & MASK_LOCKED_HI))
- goto recurse;
-
- b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
- if (!b)
- return -ENOMEM;
-
- b->block = block;
- b->bank = bank;
- b->cpu = cpu;
- b->address = address;
- b->interrupt_enable = 0;
- b->threshold_limit = THRESHOLD_MAX;
-
- INIT_LIST_HEAD(&b->miscj);
-
- if (per_cpu(threshold_banks, cpu)[bank]->blocks)
- list_add(&b->miscj,
- &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
- else
- per_cpu(threshold_banks, cpu)[bank]->blocks = b;
-
- kobject_set_name(&b->kobj, "misc%i", block);
- b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj;
- b->kobj.ktype = &threshold_ktype;
- err = kobject_register(&b->kobj);
- if (err)
- goto out_free;
-recurse:
- if (!block) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- return 0;
- address += MCG_XBLK_ADDR;
- } else
- ++address;
-
- err = allocate_threshold_blocks(cpu, bank, ++block, address);
- if (err)
- goto out_free;
-
- return err;
-
-out_free:
- if (b) {
- kobject_unregister(&b->kobj);
- kfree(b);
- }
- return err;
-}
-
-/* symlinks sibling shared banks to first core. first core owns dir/files. */
-static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
-{
- int i, err = 0;
- struct threshold_bank *b = NULL;
- cpumask_t oldmask = CPU_MASK_NONE;
- char name[32];
-
- sprintf(name, "threshold_bank%i", bank);
-
-#ifdef CONFIG_SMP
- if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */
- i = first_cpu(cpu_core_map[cpu]);
-
- /* first core not up yet */
- if (cpu_data[i].cpu_core_id)
- goto out;
-
- /* already linked */
- if (per_cpu(threshold_banks, cpu)[bank])
- goto out;
-
- b = per_cpu(threshold_banks, i)[bank];
-
- if (!b)
- goto out;
-
- err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
- &b->kobj, name);
- if (err)
- goto out;
-
- b->cpus = cpu_core_map[cpu];
- per_cpu(threshold_banks, cpu)[bank] = b;
- goto out;
- }
-#endif
-
- b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
- if (!b) {
- err = -ENOMEM;
- goto out;
- }
-
- kobject_set_name(&b->kobj, "threshold_bank%i", bank);
- b->kobj.parent = &per_cpu(device_mce, cpu).kobj;
-#ifndef CONFIG_SMP
- b->cpus = CPU_MASK_ALL;
-#else
- b->cpus = cpu_core_map[cpu];
-#endif
- err = kobject_register(&b->kobj);
- if (err)
- goto out_free;
-
- per_cpu(threshold_banks, cpu)[bank] = b;
-
- oldmask = affinity_set(cpu);
- err = allocate_threshold_blocks(cpu, bank, 0,
- MSR_IA32_MC0_MISC + bank * 4);
- affinity_restore(oldmask);
-
- if (err)
- goto out_free;
-
- for_each_cpu_mask(i, b->cpus) {
- if (i == cpu)
- continue;
-
- err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
- &b->kobj, name);
- if (err)
- goto out;
-
- per_cpu(threshold_banks, i)[bank] = b;
- }
-
- goto out;
-
-out_free:
- per_cpu(threshold_banks, cpu)[bank] = NULL;
- kfree(b);
-out:
- return err;
-}
-
-/* create dir/files for all valid threshold banks */
-static __cpuinit int threshold_create_device(unsigned int cpu)
-{
- unsigned int bank;
- int err = 0;
-
- for (bank = 0; bank < NR_BANKS; ++bank) {
- if (!(per_cpu(bank_map, cpu) & 1 << bank))
- continue;
- err = threshold_create_bank(cpu, bank);
- if (err)
- goto out;
- }
-out:
- return err;
-}
-
-/*
- * let's be hotplug friendly.
- * in case of multiple core processors, the first core always takes ownership
- * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
- */
-
-static void deallocate_threshold_block(unsigned int cpu,
- unsigned int bank)
-{
- struct threshold_block *pos = NULL;
- struct threshold_block *tmp = NULL;
- struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
-
- if (!head)
- return;
-
- list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
- kobject_unregister(&pos->kobj);
- list_del(&pos->miscj);
- kfree(pos);
- }
-
- kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
- per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
-}
-
-static void threshold_remove_bank(unsigned int cpu, int bank)
-{
- int i = 0;
- struct threshold_bank *b;
- char name[32];
-
- b = per_cpu(threshold_banks, cpu)[bank];
-
- if (!b)
- return;
-
- if (!b->blocks)
- goto free_out;
-
- sprintf(name, "threshold_bank%i", bank);
-
-#ifdef CONFIG_SMP
- /* sibling symlink */
- if (shared_bank[bank] && b->blocks->cpu != cpu) {
- sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
- per_cpu(threshold_banks, cpu)[bank] = NULL;
- return;
- }
-#endif
-
- /* remove all sibling symlinks before unregistering */
- for_each_cpu_mask(i, b->cpus) {
- if (i == cpu)
- continue;
-
- sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
- per_cpu(threshold_banks, i)[bank] = NULL;
- }
-
- deallocate_threshold_block(cpu, bank);
-
-free_out:
- kobject_unregister(&b->kobj);
- kfree(b);
- per_cpu(threshold_banks, cpu)[bank] = NULL;
-}
-
-static void threshold_remove_device(unsigned int cpu)
-{
- unsigned int bank;
-
- for (bank = 0; bank < NR_BANKS; ++bank) {
- if (!(per_cpu(bank_map, cpu) & 1 << bank))
- continue;
- threshold_remove_bank(cpu, bank);
- }
-}
-
-/* get notified when a cpu comes on/off */
-static int threshold_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- /* cpu was unsigned int to begin with */
- unsigned int cpu = (unsigned long)hcpu;
-
- if (cpu >= NR_CPUS)
- goto out;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- threshold_create_device(cpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- threshold_remove_device(cpu);
- break;
- default:
- break;
- }
- out:
- return NOTIFY_OK;
-}
-
-static struct notifier_block threshold_cpu_notifier = {
- .notifier_call = threshold_cpu_callback,
-};
-
-static __init int threshold_init_device(void)
-{
- unsigned lcpu = 0;
-
- /* to hit CPUs online before the notifier is up */
- for_each_online_cpu(lcpu) {
- int err = threshold_create_device(lcpu);
- if (err)
- return err;
- }
- register_hotcpu_notifier(&threshold_cpu_notifier);
- return 0;
-}
-
-device_initcall(threshold_init_device);
diff --git a/arch/x86_64/kernel/mce_intel.c b/arch/x86_64/kernel/mce_intel.c
deleted file mode 100644
index 6551505d8a2c..000000000000
--- a/arch/x86_64/kernel/mce_intel.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Intel specific MCE features.
- * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/percpu.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/mce.h>
-#include <asm/hw_irq.h>
-#include <asm/idle.h>
-#include <asm/therm_throt.h>
-
-asmlinkage void smp_thermal_interrupt(void)
-{
- __u64 msr_val;
-
- ack_APIC_irq();
-
- exit_idle();
- irq_enter();
-
- rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
- if (therm_throt_process(msr_val & 1))
- mce_log_therm_throt_event(smp_processor_id(), msr_val);
-
- irq_exit();
-}
-
-static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int tm2 = 0;
- unsigned int cpu = smp_processor_id();
-
- if (!cpu_has(c, X86_FEATURE_ACPI))
- return;
-
- if (!cpu_has(c, X86_FEATURE_ACC))
- return;
-
- /* first check if TM1 is already enabled by the BIOS, in which
- * case there might be some SMM goo which handles it, so we can't even
- * put a handler since it might be delivered via SMI already.
- */
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- h = apic_read(APIC_LVTTHMR);
- if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
- printk(KERN_DEBUG
- "CPU%d: Thermal monitoring handled by SMI\n", cpu);
- return;
- }
-
- if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
- tm2 = 1;
-
- if (h & APIC_VECTOR_MASK) {
- printk(KERN_DEBUG
- "CPU%d: Thermal LVT vector (%#x) already "
- "installed\n", cpu, (h & APIC_VECTOR_MASK));
- return;
- }
-
- h = THERMAL_APIC_VECTOR;
- h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
- apic_write(APIC_LVTTHMR, h);
-
- rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
- wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
-
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
-
- l = apic_read(APIC_LVTTHMR);
- apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
- cpu, tm2 ? "TM2" : "TM1");
-
- /* enable thermal throttle processing */
- atomic_set(&therm_throt_en, 1);
- return;
-}
-
-void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c)
-{
- intel_init_thermal(c);
-}
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c
deleted file mode 100644
index a888e67f5874..000000000000
--- a/arch/x86_64/kernel/module.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/* Kernel module help for x86-64
- Copyright (C) 2001 Rusty Russell.
- Copyright (C) 2002,2003 Andi Kleen, SuSE Labs.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/bug.h>
-
-#include <asm/system.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-
-#define DEBUGP(fmt...)
-
-#ifndef CONFIG_UML
-void module_free(struct module *mod, void *module_region)
-{
- vfree(module_region);
- /* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
-}
-
-void *module_alloc(unsigned long size)
-{
- struct vm_struct *area;
-
- if (!size)
- return NULL;
- size = PAGE_ALIGN(size);
- if (size > MODULES_LEN)
- return NULL;
-
- area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END);
- if (!area)
- return NULL;
-
- return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC);
-}
-#endif
-
-/* We don't need anything special. */
-int module_frob_arch_sections(Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- char *secstrings,
- struct module *mod)
-{
- return 0;
-}
-
-int apply_relocate_add(Elf64_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *me)
-{
- unsigned int i;
- Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
- Elf64_Sym *sym;
- void *loc;
- u64 val;
-
- DEBUGP("Applying relocate section %u to %u\n", relsec,
- sechdrs[relsec].sh_info);
- for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
- /* This is where to make the change */
- loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
- + rel[i].r_offset;
-
- /* This is the symbol it is referring to. Note that all
- undefined symbols have been resolved. */
- sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
- + ELF64_R_SYM(rel[i].r_info);
-
- DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
- (int)ELF64_R_TYPE(rel[i].r_info),
- sym->st_value, rel[i].r_addend, (u64)loc);
-
- val = sym->st_value + rel[i].r_addend;
-
- switch (ELF64_R_TYPE(rel[i].r_info)) {
- case R_X86_64_NONE:
- break;
- case R_X86_64_64:
- *(u64 *)loc = val;
- break;
- case R_X86_64_32:
- *(u32 *)loc = val;
- if (val != *(u32 *)loc)
- goto overflow;
- break;
- case R_X86_64_32S:
- *(s32 *)loc = val;
- if ((s64)val != *(s32 *)loc)
- goto overflow;
- break;
- case R_X86_64_PC32:
- val -= (u64)loc;
- *(u32 *)loc = val;
-#if 0
- if ((s64)val != *(s32 *)loc)
- goto overflow;
-#endif
- break;
- default:
- printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n",
- me->name, ELF64_R_TYPE(rel[i].r_info));
- return -ENOEXEC;
- }
- }
- return 0;
-
-overflow:
- printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
- (int)ELF64_R_TYPE(rel[i].r_info), val);
- printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
- me->name);
- return -ENOEXEC;
-}
-
-int apply_relocate(Elf_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *me)
-{
- printk("non add relocation not supported\n");
- return -ENOSYS;
-}
-
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
-{
- const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
- char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
- for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
- if (!strcmp(".text", secstrings + s->sh_name))
- text = s;
- if (!strcmp(".altinstructions", secstrings + s->sh_name))
- alt = s;
- if (!strcmp(".smp_locks", secstrings + s->sh_name))
- locks= s;
- }
-
- if (alt) {
- /* patch .altinstructions */
- void *aseg = (void *)alt->sh_addr;
- apply_alternatives(aseg, aseg + alt->sh_size);
- }
- if (locks && text) {
- void *lseg = (void *)locks->sh_addr;
- void *tseg = (void *)text->sh_addr;
- alternatives_smp_module_add(me, me->name,
- lseg, lseg + locks->sh_size,
- tseg, tseg + text->sh_size);
- }
-
- return module_bug_finalize(hdr, sechdrs, me);
-}
-
-void module_arch_cleanup(struct module *mod)
-{
- alternatives_smp_module_del(mod);
- module_bug_cleanup(mod);
-}
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
deleted file mode 100644
index 8bf0ca03ac8e..000000000000
--- a/arch/x86_64/kernel/mpparse.c
+++ /dev/null
@@ -1,852 +0,0 @@
-/*
- * Intel Multiprocessor Specification 1.1 and 1.4
- * compliant MP-table parsing routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Erich Boleyn : MP v1.4 and additional changes.
- * Alan Cox : Added EBDA scanning
- * Ingo Molnar : various cleanups and rewrites
- * Maciej W. Rozycki: Bits for default MP configurations
- * Paul Diefenbaugh: Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/pgalloc.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/acpi.h>
-
-/* Have we found an MP table */
-int smp_found_config;
-
-/*
- * Various Linux-internal data structures created from the
- * MP-table.
- */
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-
-static int mp_current_pci_id = 0;
-/* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* MP IRQ source entries */
-int mp_irq_entries;
-
-int nr_ioapics;
-unsigned long mp_lapic_addr = 0;
-
-
-
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_id = -1U;
-/* Internal processor count */
-unsigned int num_processors __cpuinitdata = 0;
-
-unsigned disabled_cpus __cpuinitdata;
-
-/* Bitmask of physically existing CPUs */
-physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
-
-u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-
-/*
- * Intel MP BIOS table parsing routines:
- */
-
-/*
- * Checksum an MP configuration block.
- */
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
- int sum = 0;
-
- while (len--)
- sum += *mp++;
-
- return sum & 0xFF;
-}
-
-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
-{
- int cpu;
- cpumask_t tmp_map;
- char *bootup_cpu = "";
-
- if (!(m->mpc_cpuflag & CPU_ENABLED)) {
- disabled_cpus++;
- return;
- }
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- bootup_cpu = " (Bootup-CPU)";
- boot_cpu_id = m->mpc_apicid;
- }
-
- printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
-
- if (num_processors >= NR_CPUS) {
- printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
- " Processor ignored.\n", NR_CPUS);
- return;
- }
-
- num_processors++;
- cpus_complement(tmp_map, cpu_present_map);
- cpu = first_cpu(tmp_map);
-
- physid_set(m->mpc_apicid, phys_cpu_present_map);
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- /*
- * bios_cpu_apicid is required to have processors listed
- * in same order as logical cpu numbers. Hence the first
- * entry is BSP, and so on.
- */
- cpu = 0;
- }
- bios_cpu_apicid[cpu] = m->mpc_apicid;
- x86_cpu_to_apicid[cpu] = m->mpc_apicid;
-
- cpu_set(cpu, cpu_possible_map);
- cpu_set(cpu, cpu_present_map);
-}
-
-static void __init MP_bus_info (struct mpc_config_bus *m)
-{
- char str[7];
-
- memcpy(str, m->mpc_bustype, 6);
- str[6] = 0;
- Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
-
- if (strncmp(str, "ISA", 3) == 0) {
- set_bit(m->mpc_busid, mp_bus_not_pci);
- } else if (strncmp(str, "PCI", 3) == 0) {
- clear_bit(m->mpc_busid, mp_bus_not_pci);
- mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
- mp_current_pci_id++;
- } else {
- printk(KERN_ERR "Unknown bustype %s\n", str);
- }
-}
-
-static int bad_ioapic(unsigned long address)
-{
- if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
- "(found %d)\n", MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!\n");
- }
- if (!address) {
- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
- " found in table, skipping!\n");
- return 1;
- }
- return 0;
-}
-
-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
-{
- if (!(m->mpc_flags & MPC_APIC_USABLE))
- return;
-
- printk("I/O APIC #%d at 0x%X.\n",
- m->mpc_apicid, m->mpc_apicaddr);
-
- if (bad_ioapic(m->mpc_apicaddr))
- return;
-
- mp_ioapics[nr_ioapics] = *m;
- nr_ioapics++;
-}
-
-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
-{
- mp_irqs [mp_irq_entries] = *m;
- Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
- m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- if (++mp_irq_entries >= MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
-}
-
-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
-{
- Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
-}
-
-/*
- * Read/parse the MPC
- */
-
-static int __init smp_read_mpc(struct mp_config_table *mpc)
-{
- char str[16];
- int count=sizeof(*mpc);
- unsigned char *mpt=((unsigned char *)mpc)+count;
-
- if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
- printk("MPTABLE: bad signature [%c%c%c%c]!\n",
- mpc->mpc_signature[0],
- mpc->mpc_signature[1],
- mpc->mpc_signature[2],
- mpc->mpc_signature[3]);
- return 0;
- }
- if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
- printk("MPTABLE: checksum error!\n");
- return 0;
- }
- if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
- printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
- mpc->mpc_spec);
- return 0;
- }
- if (!mpc->mpc_lapic) {
- printk(KERN_ERR "MPTABLE: null local APIC address!\n");
- return 0;
- }
- memcpy(str,mpc->mpc_oem,8);
- str[8] = 0;
- printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
-
- memcpy(str,mpc->mpc_productid,12);
- str[12] = 0;
- printk("MPTABLE: Product ID: %s ",str);
-
- printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
-
- /* save the local APIC address, it might be non-default */
- if (!acpi_lapic)
- mp_lapic_addr = mpc->mpc_lapic;
-
- /*
- * Now process the configuration blocks.
- */
- while (count < mpc->mpc_length) {
- switch(*mpt) {
- case MP_PROCESSOR:
- {
- struct mpc_config_processor *m=
- (struct mpc_config_processor *)mpt;
- if (!acpi_lapic)
- MP_processor_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_BUS:
- {
- struct mpc_config_bus *m=
- (struct mpc_config_bus *)mpt;
- MP_bus_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_IOAPIC:
- {
- struct mpc_config_ioapic *m=
- (struct mpc_config_ioapic *)mpt;
- MP_ioapic_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_INTSRC:
- {
- struct mpc_config_intsrc *m=
- (struct mpc_config_intsrc *)mpt;
-
- MP_intsrc_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_LINTSRC:
- {
- struct mpc_config_lintsrc *m=
- (struct mpc_config_lintsrc *)mpt;
- MP_lintsrc_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- }
- }
- setup_apic_routing();
- if (!num_processors)
- printk(KERN_ERR "MPTABLE: no processors registered!\n");
- return num_processors;
-}
-
-static int __init ELCR_trigger(unsigned int irq)
-{
- unsigned int port;
-
- port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
-}
-
-static void __init construct_default_ioirq_mptable(int mpc_default_type)
-{
- struct mpc_config_intsrc intsrc;
- int i;
- int ELCR_fallback = 0;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqflag = 0; /* conforming */
- intsrc.mpc_srcbus = 0;
- intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
-
- intsrc.mpc_irqtype = mp_INT;
-
- /*
- * If true, we have an ISA/PCI system with no IRQ entries
- * in the MP table. To prevent the PCI interrupts from being set up
- * incorrectly, we try to use the ELCR. The sanity check to see if
- * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
- * never be level sensitive, so we simply see if the ELCR agrees.
- * If it does, we assume it's valid.
- */
- if (mpc_default_type == 5) {
- printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
-
- if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
- printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
- else {
- printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
- ELCR_fallback = 1;
- }
- }
-
- for (i = 0; i < 16; i++) {
- switch (mpc_default_type) {
- case 2:
- if (i == 0 || i == 13)
- continue; /* IRQ0 & IRQ13 not connected */
- /* fall through */
- default:
- if (i == 2)
- continue; /* IRQ2 is never connected */
- }
-
- if (ELCR_fallback) {
- /*
- * If the ELCR indicates a level-sensitive interrupt, we
- * copy that information over to the MP table in the
- * irqflag field (level sensitive, active high polarity).
- */
- if (ELCR_trigger(i))
- intsrc.mpc_irqflag = 13;
- else
- intsrc.mpc_irqflag = 0;
- }
-
- intsrc.mpc_srcbusirq = i;
- intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
- MP_intsrc_info(&intsrc);
- }
-
- intsrc.mpc_irqtype = mp_ExtINT;
- intsrc.mpc_srcbusirq = 0;
- intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
- MP_intsrc_info(&intsrc);
-}
-
-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
-{
- struct mpc_config_processor processor;
- struct mpc_config_bus bus;
- struct mpc_config_ioapic ioapic;
- struct mpc_config_lintsrc lintsrc;
- int linttypes[2] = { mp_ExtINT, mp_NMI };
- int i;
-
- /*
- * local APIC has default address
- */
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
- /*
- * 2 CPUs, numbered 0 & 1.
- */
- processor.mpc_type = MP_PROCESSOR;
- processor.mpc_apicver = 0;
- processor.mpc_cpuflag = CPU_ENABLED;
- processor.mpc_cpufeature = 0;
- processor.mpc_featureflag = 0;
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
- for (i = 0; i < 2; i++) {
- processor.mpc_apicid = i;
- MP_processor_info(&processor);
- }
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- switch (mpc_default_type) {
- default:
- printk(KERN_ERR "???\nUnknown standard configuration %d\n",
- mpc_default_type);
- /* fall through */
- case 1:
- case 5:
- memcpy(bus.mpc_bustype, "ISA ", 6);
- break;
- }
- MP_bus_info(&bus);
- if (mpc_default_type > 4) {
- bus.mpc_busid = 1;
- memcpy(bus.mpc_bustype, "PCI ", 6);
- MP_bus_info(&bus);
- }
-
- ioapic.mpc_type = MP_IOAPIC;
- ioapic.mpc_apicid = 2;
- ioapic.mpc_apicver = 0;
- ioapic.mpc_flags = MPC_APIC_USABLE;
- ioapic.mpc_apicaddr = 0xFEC00000;
- MP_ioapic_info(&ioapic);
-
- /*
- * We set up most of the low 16 IO-APIC pins according to MPS rules.
- */
- construct_default_ioirq_mptable(mpc_default_type);
-
- lintsrc.mpc_type = MP_LINTSRC;
- lintsrc.mpc_irqflag = 0; /* conforming */
- lintsrc.mpc_srcbusid = 0;
- lintsrc.mpc_srcbusirq = 0;
- lintsrc.mpc_destapic = MP_APIC_ALL;
- for (i = 0; i < 2; i++) {
- lintsrc.mpc_irqtype = linttypes[i];
- lintsrc.mpc_destapiclint = i;
- MP_lintsrc_info(&lintsrc);
- }
-}
-
-static struct intel_mp_floating *mpf_found;
-
-/*
- * Scan the memory blocks for an SMP configuration block.
- */
-void __init get_smp_config (void)
-{
- struct intel_mp_floating *mpf = mpf_found;
-
- /*
- * ACPI supports both logical (e.g. Hyper-Threading) and physical
- * processors, where MPS only supports physical.
- */
- if (acpi_lapic && acpi_ioapic) {
- printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
- return;
- }
- else if (acpi_lapic)
- printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
-
- printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
-
- /*
- * Now see if we need to read further.
- */
- if (mpf->mpf_feature1 != 0) {
-
- printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
- construct_default_ISA_mptable(mpf->mpf_feature1);
-
- } else if (mpf->mpf_physptr) {
-
- /*
- * Read the physical hardware table. Anything here will
- * override the defaults.
- */
- if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {
- smp_found_config = 0;
- printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
- printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
- return;
- }
- /*
- * If there are no explicit MP IRQ entries, then we are
- * broken. We set up most of the low 16 IO-APIC pins to
- * ISA defaults and hope it will work.
- */
- if (!mp_irq_entries) {
- struct mpc_config_bus bus;
-
- printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- memcpy(bus.mpc_bustype, "ISA ", 6);
- MP_bus_info(&bus);
-
- construct_default_ioirq_mptable(0);
- }
-
- } else
- BUG();
-
- printk(KERN_INFO "Processors: %d\n", num_processors);
- /*
- * Only use the first configuration found.
- */
-}
-
-static int __init smp_scan_config (unsigned long base, unsigned long length)
-{
- extern void __bad_mpf_size(void);
- unsigned int *bp = phys_to_virt(base);
- struct intel_mp_floating *mpf;
-
- Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
- if (sizeof(*mpf) != 16)
- __bad_mpf_size();
-
- while (length > 0) {
- mpf = (struct intel_mp_floating *)bp;
- if ((*bp == SMP_MAGIC_IDENT) &&
- (mpf->mpf_length == 1) &&
- !mpf_checksum((unsigned char *)bp, 16) &&
- ((mpf->mpf_specification == 1)
- || (mpf->mpf_specification == 4)) ) {
-
- smp_found_config = 1;
- reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
- if (mpf->mpf_physptr)
- reserve_bootmem_generic(mpf->mpf_physptr, PAGE_SIZE);
- mpf_found = mpf;
- return 1;
- }
- bp += 4;
- length -= 16;
- }
- return 0;
-}
-
-void __init find_smp_config(void)
-{
- unsigned int address;
-
- /*
- * FIXME: Linux assumes you have 640K of base ram..
- * this continues the error...
- *
- * 1) Scan the bottom 1K for a signature
- * 2) Scan the top 1K of base RAM
- * 3) Scan the 64K of bios
- */
- if (smp_scan_config(0x0,0x400) ||
- smp_scan_config(639*0x400,0x400) ||
- smp_scan_config(0xF0000,0x10000))
- return;
- /*
- * If it is an SMP machine we should know now.
- *
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E, calculate and scan it here.
- *
- * NOTE! There are Linux loaders that will corrupt the EBDA
- * area, and as such this kind of SMP config may be less
- * trustworthy, simply because the SMP table may have been
- * stomped on during early boot. These loaders are buggy and
- * should be fixed.
- */
-
- address = *(unsigned short *)phys_to_virt(0x40E);
- address <<= 4;
- if (smp_scan_config(address, 0x1000))
- return;
-
- /* If we have come this far, we did not find an MP table */
- printk(KERN_INFO "No mptable found.\n");
-}
-
-/* --------------------------------------------------------------------------
- ACPI-based MP Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-void __init mp_register_lapic_address(u64 address)
-{
- mp_lapic_addr = (unsigned long) address;
- set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
- if (boot_cpu_id == -1U)
- boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-}
-
-void __cpuinit mp_register_lapic (u8 id, u8 enabled)
-{
- struct mpc_config_processor processor;
- int boot_cpu = 0;
-
- if (id == boot_cpu_id)
- boot_cpu = 1;
-
- processor.mpc_type = MP_PROCESSOR;
- processor.mpc_apicid = id;
- processor.mpc_apicver = 0;
- processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
- processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
- processor.mpc_cpufeature = 0;
- processor.mpc_featureflag = 0;
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
-
- MP_processor_info(&processor);
-}
-
-#define MP_ISA_BUS 0
-#define MP_MAX_IOAPIC_PIN 127
-
-static struct mp_ioapic_routing {
- int apic_id;
- int gsi_start;
- int gsi_end;
- u32 pin_programmed[4];
-} mp_ioapic_routing[MAX_IO_APICS];
-
-static int mp_find_ioapic(int gsi)
-{
- int i = 0;
-
- /* Find the IOAPIC that manages this GSI. */
- for (i = 0; i < nr_ioapics; i++) {
- if ((gsi >= mp_ioapic_routing[i].gsi_start)
- && (gsi <= mp_ioapic_routing[i].gsi_end))
- return i;
- }
-
- printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
- return -1;
-}
-
-static u8 uniq_ioapic_id(u8 id)
-{
- int i;
- DECLARE_BITMAP(used, 256);
- bitmap_zero(used, 256);
- for (i = 0; i < nr_ioapics; i++) {
- struct mpc_config_ioapic *ia = &mp_ioapics[i];
- __set_bit(ia->mpc_apicid, used);
- }
- if (!test_bit(id, used))
- return id;
- return find_first_zero_bit(used, 256);
-}
-
-void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
-{
- int idx = 0;
-
- if (bad_ioapic(address))
- return;
-
- idx = nr_ioapics;
-
- mp_ioapics[idx].mpc_type = MP_IOAPIC;
- mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
- mp_ioapics[idx].mpc_apicaddr = address;
-
- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
- mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
- mp_ioapics[idx].mpc_apicver = 0;
-
- /*
- * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
- * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
- */
- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
- mp_ioapic_routing[idx].gsi_start = gsi_base;
- mp_ioapic_routing[idx].gsi_end = gsi_base +
- io_apic_get_redir_entries(idx);
-
- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
- "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
- mp_ioapics[idx].mpc_apicaddr,
- mp_ioapic_routing[idx].gsi_start,
- mp_ioapic_routing[idx].gsi_end);
-
- nr_ioapics++;
-}
-
-void __init
-mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
-{
- struct mpc_config_intsrc intsrc;
- int ioapic = -1;
- int pin = -1;
-
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
-
- /*
- * TBD: This check is for faulty timer entries, where the override
- * erroneously sets the trigger to level, resulting in a HUGE
- * increase of timer interrupts!
- */
- if ((bus_irq == 0) && (trigger == 3))
- trigger = 1;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqtype = mp_INT;
- intsrc.mpc_irqflag = (trigger << 2) | polarity;
- intsrc.mpc_srcbus = MP_ISA_BUS;
- intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
- intsrc.mpc_dstirq = pin; /* INTIN# */
-
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
- intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
-}
-
-void __init mp_config_acpi_legacy_irqs(void)
-{
- struct mpc_config_intsrc intsrc;
- int i = 0;
- int ioapic = -1;
-
- /*
- * Fabricate the legacy ISA bus (bus #31).
- */
- set_bit(MP_ISA_BUS, mp_bus_not_pci);
-
- /*
- * Locate the IOAPIC that manages the ISA IRQs (0-15).
- */
- ioapic = mp_find_ioapic(0);
- if (ioapic < 0)
- return;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqflag = 0; /* Conforming */
- intsrc.mpc_srcbus = MP_ISA_BUS;
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
-
- /*
- * Use the default configuration for the IRQs 0-15. Unless
- * overridden by (MADT) interrupt source override entries.
- */
- for (i = 0; i < 16; i++) {
- int idx;
-
- for (idx = 0; idx < mp_irq_entries; idx++) {
- struct mpc_config_intsrc *irq = mp_irqs + idx;
-
- /* Do we already have a mapping for this ISA IRQ? */
- if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
- break;
-
- /* Do we already have a mapping for this IOAPIC pin */
- if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
- (irq->mpc_dstirq == i))
- break;
- }
-
- if (idx != mp_irq_entries) {
- printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
- continue; /* IRQ already used */
- }
-
- intsrc.mpc_irqtype = mp_INT;
- intsrc.mpc_srcbusirq = i; /* Identity mapped */
- intsrc.mpc_dstirq = i;
-
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
- "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
- intsrc.mpc_dstirq);
-
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
- }
-}
-
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
-{
- int ioapic = -1;
- int ioapic_pin = 0;
- int idx, bit = 0;
-
- if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
- return gsi;
-
- /* Don't set up the ACPI SCI because it's already set up */
- if (acpi_gbl_FADT.sci_interrupt == gsi)
- return gsi;
-
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0) {
- printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
- return gsi;
- }
-
- ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
-
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
- bit = ioapic_pin % 32;
- idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
- if (idx > 3) {
- printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
- ioapic_pin);
- return gsi;
- }
- if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
- Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
- return gsi;
- }
-
- mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
-
- io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
- triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
- polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
- return gsi;
-}
-#endif /*CONFIG_ACPI*/
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
deleted file mode 100644
index 0ec6d2ddb931..000000000000
--- a/arch/x86_64/kernel/nmi.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * linux/arch/x86_64/nmi.c
- *
- * NMI watchdog support on APIC systems
- *
- * Started by Ingo Molnar <mingo@redhat.com>
- *
- * Fixes:
- * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
- * Mikael Pettersson : Power Management for local APIC NMI watchdog.
- * Pavel Machek and
- * Mikael Pettersson : PM converted to driver model. Disable/enable API.
- */
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kdebug.h>
-
-#include <asm/smp.h>
-#include <asm/nmi.h>
-#include <asm/proto.h>
-#include <asm/mce.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-int panic_on_unrecovered_nmi;
-
-static cpumask_t backtrace_mask = CPU_MASK_NONE;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- * be enabled
- * 0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
-int panic_on_timeout;
-
-unsigned int nmi_watchdog = NMI_DEFAULT;
-static unsigned int nmi_hz = HZ;
-
-static DEFINE_PER_CPU(short, wd_enabled);
-
-/* local prototypes */
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
-
-/* Run after command line and cpu_init init, but before all other checks */
-void nmi_watchdog_default(void)
-{
- if (nmi_watchdog != NMI_DEFAULT)
- return;
- nmi_watchdog = NMI_NONE;
-}
-
-static int endflag __initdata = 0;
-
-#ifdef CONFIG_SMP
-/* The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
- local_irq_enable_in_hardirq();
- /* Intentionally don't use cpu_relax here. This is
- to make sure that the performance counter really ticks,
- even if there is a simulator or similar that catches the
- pause instruction. On a real HT machine this is fine because
- all other CPUs are busy with "useless" delay loops and don't
- care if they get somewhat less cycles. */
- while (endflag == 0)
- mb();
-}
-#endif
-
-int __init check_nmi_watchdog (void)
-{
- int *counts;
- int cpu;
-
- if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
- return 0;
-
- if (!atomic_read(&nmi_active))
- return 0;
-
- counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
- if (!counts)
- return -1;
-
- printk(KERN_INFO "testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
- if (nmi_watchdog == NMI_LOCAL_APIC)
- smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
-#endif
-
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- counts[cpu] = cpu_pda(cpu)->__nmi_count;
- local_irq_enable();
- mdelay((20*1000)/nmi_hz); // wait 20 ticks
-
- for_each_online_cpu(cpu) {
- if (!per_cpu(wd_enabled, cpu))
- continue;
- if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
- printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
- cpu,
- counts[cpu],
- cpu_pda(cpu)->__nmi_count);
- per_cpu(wd_enabled, cpu) = 0;
- atomic_dec(&nmi_active);
- }
- }
- if (!atomic_read(&nmi_active)) {
- kfree(counts);
- atomic_set(&nmi_active, -1);
- endflag = 1;
- return -1;
- }
- endflag = 1;
- printk("OK.\n");
-
- /* now that we know it works we can reduce NMI frequency to
- something more reasonable; makes a difference in some configs */
- if (nmi_watchdog == NMI_LOCAL_APIC)
- nmi_hz = lapic_adjust_nmi_hz(1);
-
- kfree(counts);
- return 0;
-}
-
-int __init setup_nmi_watchdog(char *str)
-{
- int nmi;
-
- if (!strncmp(str,"panic",5)) {
- panic_on_timeout = 1;
- str = strchr(str, ',');
- if (!str)
- return 1;
- ++str;
- }
-
- get_option(&str, &nmi);
-
- if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
- return 0;
-
- nmi_watchdog = nmi;
- return 1;
-}
-
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-
-static void __acpi_nmi_disable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
-}
-
-static void __acpi_nmi_enable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
-}
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
- /* only CPU0 goes here, other CPUs should be offline */
- nmi_pm_active = atomic_read(&nmi_active);
- stop_apic_nmi_watchdog(NULL);
- BUG_ON(atomic_read(&nmi_active) != 0);
- return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
- /* only CPU0 goes here, other CPUs should be offline */
- if (nmi_pm_active > 0) {
- setup_apic_nmi_watchdog(NULL);
- touch_nmi_watchdog();
- }
- return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
- set_kset_name("lapic_nmi"),
- .resume = lapic_nmi_resume,
- .suspend = lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
- .id = 0,
- .cls = &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
- int error;
-
- /* should really be a BUG_ON but b/c this is an
- * init call, it just doesn't work. -dcz
- */
- if (nmi_watchdog != NMI_LOCAL_APIC)
- return 0;
-
- if ( atomic_read(&nmi_active) < 0 )
- return 0;
-
- error = sysdev_class_register(&nmi_sysclass);
- if (!error)
- error = sysdev_register(&device_lapic_nmi);
- return error;
-}
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif /* CONFIG_PM */
-
-void setup_apic_nmi_watchdog(void *unused)
-{
- if (__get_cpu_var(wd_enabled) == 1)
- return;
-
- /* cheap hack to support suspend/resume */
- /* if cpu0 is not active neither should the other cpus */
- if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
- return;
-
- switch (nmi_watchdog) {
- case NMI_LOCAL_APIC:
- __get_cpu_var(wd_enabled) = 1;
- if (lapic_watchdog_init(nmi_hz) < 0) {
- __get_cpu_var(wd_enabled) = 0;
- return;
- }
- /* FALL THROUGH */
- case NMI_IO_APIC:
- __get_cpu_var(wd_enabled) = 1;
- atomic_inc(&nmi_active);
- }
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
- /* only support LOCAL and IO APICs for now */
- if ((nmi_watchdog != NMI_LOCAL_APIC) &&
- (nmi_watchdog != NMI_IO_APIC))
- return;
- if (__get_cpu_var(wd_enabled) == 0)
- return;
- if (nmi_watchdog == NMI_LOCAL_APIC)
- lapic_watchdog_stop();
- __get_cpu_var(wd_enabled) = 0;
- atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(local_t, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
- if (nmi_watchdog > 0) {
- unsigned cpu;
-
- /*
- * Tell other CPUs to reset their alert counters. We cannot
- * do it ourselves because the alert count increase is not
- * atomic.
- */
- for_each_present_cpu(cpu) {
- if (per_cpu(nmi_touch, cpu) != 1)
- per_cpu(nmi_touch, cpu) = 1;
- }
- }
-
- touch_softlockup_watchdog();
-}
-
-int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
-{
- int sum;
- int touched = 0;
- int cpu = smp_processor_id();
- int rc = 0;
-
- /* check for other users first */
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP) {
- rc = 1;
- touched = 1;
- }
-
- sum = read_pda(apic_timer_irqs);
- if (__get_cpu_var(nmi_touch)) {
- __get_cpu_var(nmi_touch) = 0;
- touched = 1;
- }
-
- if (cpu_isset(cpu, backtrace_mask)) {
- static DEFINE_SPINLOCK(lock); /* Serialise the printks */
-
- spin_lock(&lock);
- printk("NMI backtrace for cpu %d\n", cpu);
- dump_stack();
- spin_unlock(&lock);
- cpu_clear(cpu, backtrace_mask);
- }
-
-#ifdef CONFIG_X86_MCE
- /* Could check oops_in_progress here too, but it's safer
- not too */
- if (atomic_read(&mce_entry) > 0)
- touched = 1;
-#endif
- /* if the apic timer isn't firing, this cpu isn't doing much */
- if (!touched && __get_cpu_var(last_irq_sum) == sum) {
- /*
- * Ayiee, looks like this CPU is stuck ...
- * wait a few IRQs (5 seconds) before doing the oops ...
- */
- local_inc(&__get_cpu_var(alert_counter));
- if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
- die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
- panic_on_timeout);
- } else {
- __get_cpu_var(last_irq_sum) = sum;
- local_set(&__get_cpu_var(alert_counter), 0);
- }
-
- /* see if the nmi watchdog went off */
- if (!__get_cpu_var(wd_enabled))
- return rc;
- switch (nmi_watchdog) {
- case NMI_LOCAL_APIC:
- rc |= lapic_wd_event(nmi_hz);
- break;
- case NMI_IO_APIC:
- /* don't know how to accurately check for this.
- * just assume it was a watchdog timer interrupt
- * This matches the old behaviour.
- */
- rc = 1;
- break;
- }
- return rc;
-}
-
-static unsigned ignore_nmis;
-
-asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
-{
- nmi_enter();
- add_pda(__nmi_count,1);
- if (!ignore_nmis)
- default_do_nmi(regs);
- nmi_exit();
-}
-
-int do_nmi_callback(struct pt_regs * regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
- if (unknown_nmi_panic)
- return unknown_nmi_panic_callback(regs, cpu);
-#endif
- return 0;
-}
-
-void stop_nmi(void)
-{
- acpi_nmi_disable();
- ignore_nmis++;
-}
-
-void restart_nmi(void)
-{
- ignore_nmis--;
- acpi_nmi_enable();
-}
-
-#ifdef CONFIG_SYSCTL
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
- unsigned char reason = get_nmi_reason();
- char buf[64];
-
- sprintf(buf, "NMI received for unknown reason %02x\n", reason);
- die_nmi(buf, regs, 1); /* Always panic here */
- return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
- void __user *buffer, size_t *length, loff_t *ppos)
-{
- int old_state;
-
- nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
- old_state = nmi_watchdog_enabled;
- proc_dointvec(table, write, file, buffer, length, ppos);
- if (!!old_state == !!nmi_watchdog_enabled)
- return 0;
-
- if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) {
- printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
- return -EIO;
- }
-
- /* if nmi_watchdog is not set yet, then set it */
- nmi_watchdog_default();
-
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- if (nmi_watchdog_enabled)
- enable_lapic_nmi_watchdog();
- else
- disable_lapic_nmi_watchdog();
- } else {
- printk( KERN_WARNING
- "NMI watchdog doesn't know what hardware to touch\n");
- return -EIO;
- }
- return 0;
-}
-
-#endif
-
-void __trigger_all_cpu_backtrace(void)
-{
- int i;
-
- backtrace_mask = cpu_online_map;
- /* Wait for up to 10 seconds for all CPUs to do the backtrace */
- for (i = 0; i < 10 * 1000; i++) {
- if (cpus_empty(backtrace_mask))
- break;
- mdelay(1);
- }
-}
-
-EXPORT_SYMBOL(nmi_active);
-EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(touch_nmi_watchdog);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
deleted file mode 100644
index 71da01e73f03..000000000000
--- a/arch/x86_64/kernel/pci-calgary.c
+++ /dev/null
@@ -1,1578 +0,0 @@
-/*
- * Derived from arch/powerpc/kernel/iommu.c
- *
- * Copyright IBM Corporation, 2006-2007
- * Copyright (C) 2006 Jon Mason <jdmason@kudzu.us>
- *
- * Author: Jon Mason <jdmason@kudzu.us>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
-
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/pci_ids.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <asm/iommu.h>
-#include <asm/calgary.h>
-#include <asm/tce.h>
-#include <asm/pci-direct.h>
-#include <asm/system.h>
-#include <asm/dma.h>
-#include <asm/rio.h>
-
-#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
-int use_calgary __read_mostly = 1;
-#else
-int use_calgary __read_mostly = 0;
-#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
-
-#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
-#define PCI_DEVICE_ID_IBM_CALIOC2 0x0308
-
-/* register offsets inside the host bridge space */
-#define CALGARY_CONFIG_REG 0x0108
-#define PHB_CSR_OFFSET 0x0110 /* Channel Status */
-#define PHB_PLSSR_OFFSET 0x0120
-#define PHB_CONFIG_RW_OFFSET 0x0160
-#define PHB_IOBASE_BAR_LOW 0x0170
-#define PHB_IOBASE_BAR_HIGH 0x0180
-#define PHB_MEM_1_LOW 0x0190
-#define PHB_MEM_1_HIGH 0x01A0
-#define PHB_IO_ADDR_SIZE 0x01B0
-#define PHB_MEM_1_SIZE 0x01C0
-#define PHB_MEM_ST_OFFSET 0x01D0
-#define PHB_AER_OFFSET 0x0200
-#define PHB_CONFIG_0_HIGH 0x0220
-#define PHB_CONFIG_0_LOW 0x0230
-#define PHB_CONFIG_0_END 0x0240
-#define PHB_MEM_2_LOW 0x02B0
-#define PHB_MEM_2_HIGH 0x02C0
-#define PHB_MEM_2_SIZE_HIGH 0x02D0
-#define PHB_MEM_2_SIZE_LOW 0x02E0
-#define PHB_DOSHOLE_OFFSET 0x08E0
-
-/* CalIOC2 specific */
-#define PHB_SAVIOR_L2 0x0DB0
-#define PHB_PAGE_MIG_CTRL 0x0DA8
-#define PHB_PAGE_MIG_DEBUG 0x0DA0
-#define PHB_ROOT_COMPLEX_STATUS 0x0CB0
-
-/* PHB_CONFIG_RW */
-#define PHB_TCE_ENABLE 0x20000000
-#define PHB_SLOT_DISABLE 0x1C000000
-#define PHB_DAC_DISABLE 0x01000000
-#define PHB_MEM2_ENABLE 0x00400000
-#define PHB_MCSR_ENABLE 0x00100000
-/* TAR (Table Address Register) */
-#define TAR_SW_BITS 0x0000ffffffff800fUL
-#define TAR_VALID 0x0000000000000008UL
-/* CSR (Channel/DMA Status Register) */
-#define CSR_AGENT_MASK 0xffe0ffff
-/* CCR (Calgary Configuration Register) */
-#define CCR_2SEC_TIMEOUT 0x000000000000000EUL
-/* PMCR/PMDR (Page Migration Control/Debug Registers */
-#define PMR_SOFTSTOP 0x80000000
-#define PMR_SOFTSTOPFAULT 0x40000000
-#define PMR_HARDSTOP 0x20000000
-
-#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
-#define MAX_NUM_CHASSIS 8 /* max number of chassis */
-/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */
-#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2)
-#define PHBS_PER_CALGARY 4
-
-/* register offsets in Calgary's internal register space */
-static const unsigned long tar_offsets[] = {
- 0x0580 /* TAR0 */,
- 0x0588 /* TAR1 */,
- 0x0590 /* TAR2 */,
- 0x0598 /* TAR3 */
-};
-
-static const unsigned long split_queue_offsets[] = {
- 0x4870 /* SPLIT QUEUE 0 */,
- 0x5870 /* SPLIT QUEUE 1 */,
- 0x6870 /* SPLIT QUEUE 2 */,
- 0x7870 /* SPLIT QUEUE 3 */
-};
-
-static const unsigned long phb_offsets[] = {
- 0x8000 /* PHB0 */,
- 0x9000 /* PHB1 */,
- 0xA000 /* PHB2 */,
- 0xB000 /* PHB3 */
-};
-
-/* PHB debug registers */
-
-static const unsigned long phb_debug_offsets[] = {
- 0x4000 /* PHB 0 DEBUG */,
- 0x5000 /* PHB 1 DEBUG */,
- 0x6000 /* PHB 2 DEBUG */,
- 0x7000 /* PHB 3 DEBUG */
-};
-
-/*
- * STUFF register for each debug PHB,
- * byte 1 = start bus number, byte 2 = end bus number
- */
-
-#define PHB_DEBUG_STUFF_OFFSET 0x0020
-
-#define EMERGENCY_PAGES 32 /* = 128KB */
-
-unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
-static int translate_empty_slots __read_mostly = 0;
-static int calgary_detected __read_mostly = 0;
-
-static struct rio_table_hdr *rio_table_hdr __initdata;
-static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
-static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata;
-
-struct calgary_bus_info {
- void *tce_space;
- unsigned char translation_disabled;
- signed char phbid;
- void __iomem *bbar;
-};
-
-static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
-static void calgary_tce_cache_blast(struct iommu_table *tbl);
-static void calgary_dump_error_regs(struct iommu_table *tbl);
-static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
-static void calioc2_tce_cache_blast(struct iommu_table *tbl);
-static void calioc2_dump_error_regs(struct iommu_table *tbl);
-
-static struct cal_chipset_ops calgary_chip_ops = {
- .handle_quirks = calgary_handle_quirks,
- .tce_cache_blast = calgary_tce_cache_blast,
- .dump_error_regs = calgary_dump_error_regs
-};
-
-static struct cal_chipset_ops calioc2_chip_ops = {
- .handle_quirks = calioc2_handle_quirks,
- .tce_cache_blast = calioc2_tce_cache_blast,
- .dump_error_regs = calioc2_dump_error_regs
-};
-
-static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
-
-/* enable this to stress test the chip's TCE cache */
-#ifdef CONFIG_IOMMU_DEBUG
-int debugging __read_mostly = 1;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
- int expected, unsigned long start, unsigned long end)
-{
- unsigned long idx = start;
-
- BUG_ON(start >= end);
-
- while (idx < end) {
- if (!!test_bit(idx, bitmap) != expected)
- return idx;
- ++idx;
- }
-
- /* all bits have the expected value */
- return ~0UL;
-}
-#else /* debugging is disabled */
-int debugging __read_mostly = 0;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
- int expected, unsigned long start, unsigned long end)
-{
- return ~0UL;
-}
-
-#endif /* CONFIG_IOMMU_DEBUG */
-
-static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
-{
- unsigned int npages;
-
- npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK);
- npages >>= PAGE_SHIFT;
-
- return npages;
-}
-
-static inline int translate_phb(struct pci_dev* dev)
-{
- int disabled = bus_info[dev->bus->number].translation_disabled;
- return !disabled;
-}
-
-static void iommu_range_reserve(struct iommu_table *tbl,
- unsigned long start_addr, unsigned int npages)
-{
- unsigned long index;
- unsigned long end;
- unsigned long badbit;
- unsigned long flags;
-
- index = start_addr >> PAGE_SHIFT;
-
- /* bail out if we're asked to reserve a region we don't cover */
- if (index >= tbl->it_size)
- return;
-
- end = index + npages;
- if (end > tbl->it_size) /* don't go off the table */
- end = tbl->it_size;
-
- spin_lock_irqsave(&tbl->it_lock, flags);
-
- badbit = verify_bit_range(tbl->it_map, 0, index, end);
- if (badbit != ~0UL) {
- if (printk_ratelimit())
- printk(KERN_ERR "Calgary: entry already allocated at "
- "0x%lx tbl %p dma 0x%lx npages %u\n",
- badbit, tbl, start_addr, npages);
- }
-
- set_bit_string(tbl->it_map, index, npages);
-
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-}
-
-static unsigned long iommu_range_alloc(struct iommu_table *tbl,
- unsigned int npages)
-{
- unsigned long flags;
- unsigned long offset;
-
- BUG_ON(npages == 0);
-
- spin_lock_irqsave(&tbl->it_lock, flags);
-
- offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
- tbl->it_size, npages);
- if (offset == ~0UL) {
- tbl->chip_ops->tce_cache_blast(tbl);
- offset = find_next_zero_string(tbl->it_map, 0,
- tbl->it_size, npages);
- if (offset == ~0UL) {
- printk(KERN_WARNING "Calgary: IOMMU full.\n");
- spin_unlock_irqrestore(&tbl->it_lock, flags);
- if (panic_on_overflow)
- panic("Calgary: fix the allocator.\n");
- else
- return bad_dma_address;
- }
- }
-
- set_bit_string(tbl->it_map, offset, npages);
- tbl->it_hint = offset + npages;
- BUG_ON(tbl->it_hint > tbl->it_size);
-
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-
- return offset;
-}
-
-static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
- unsigned int npages, int direction)
-{
- unsigned long entry;
- dma_addr_t ret = bad_dma_address;
-
- entry = iommu_range_alloc(tbl, npages);
-
- if (unlikely(entry == bad_dma_address))
- goto error;
-
- /* set the return dma address */
- ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
-
- /* put the TCEs in the HW table */
- tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
- direction);
-
- return ret;
-
-error:
- printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
- "iommu %p\n", npages, tbl);
- return bad_dma_address;
-}
-
-static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned int npages)
-{
- unsigned long entry;
- unsigned long badbit;
- unsigned long badend;
- unsigned long flags;
-
- /* were we called with bad_dma_address? */
- badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
- if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) {
- printk(KERN_ERR "Calgary: driver tried unmapping bad DMA "
- "address 0x%Lx\n", dma_addr);
- WARN_ON(1);
- return;
- }
-
- entry = dma_addr >> PAGE_SHIFT;
-
- BUG_ON(entry + npages > tbl->it_size);
-
- tce_free(tbl, entry, npages);
-
- spin_lock_irqsave(&tbl->it_lock, flags);
-
- badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
- if (badbit != ~0UL) {
- if (printk_ratelimit())
- printk(KERN_ERR "Calgary: bit is off at 0x%lx "
- "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
- badbit, tbl, dma_addr, entry, npages);
- }
-
- __clear_bit_string(tbl->it_map, entry, npages);
-
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-}
-
-static inline struct iommu_table *find_iommu_table(struct device *dev)
-{
- struct pci_dev *pdev;
- struct pci_bus *pbus;
- struct iommu_table *tbl;
-
- pdev = to_pci_dev(dev);
-
- pbus = pdev->bus;
-
- /* is the device behind a bridge? Look for the root bus */
- while (pbus->parent)
- pbus = pbus->parent;
-
- tbl = pci_iommu(pbus);
-
- BUG_ON(tbl && (tbl->it_busno != pbus->number));
-
- return tbl;
-}
-
-static void calgary_unmap_sg(struct device *dev,
- struct scatterlist *sglist, int nelems, int direction)
-{
- struct iommu_table *tbl = find_iommu_table(dev);
-
- if (!translate_phb(to_pci_dev(dev)))
- return;
-
- while (nelems--) {
- unsigned int npages;
- dma_addr_t dma = sglist->dma_address;
- unsigned int dmalen = sglist->dma_length;
-
- if (dmalen == 0)
- break;
-
- npages = num_dma_pages(dma, dmalen);
- iommu_free(tbl, dma, npages);
- sglist++;
- }
-}
-
-static int calgary_nontranslate_map_sg(struct device* dev,
- struct scatterlist *sg, int nelems, int direction)
-{
- int i;
-
- for (i = 0; i < nelems; i++ ) {
- struct scatterlist *s = &sg[i];
- BUG_ON(!s->page);
- s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
- s->dma_length = s->length;
- }
- return nelems;
-}
-
-static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
- int nelems, int direction)
-{
- struct iommu_table *tbl = find_iommu_table(dev);
- unsigned long vaddr;
- unsigned int npages;
- unsigned long entry;
- int i;
-
- if (!translate_phb(to_pci_dev(dev)))
- return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
-
- for (i = 0; i < nelems; i++ ) {
- struct scatterlist *s = &sg[i];
- BUG_ON(!s->page);
-
- vaddr = (unsigned long)page_address(s->page) + s->offset;
- npages = num_dma_pages(vaddr, s->length);
-
- entry = iommu_range_alloc(tbl, npages);
- if (entry == bad_dma_address) {
- /* makes sure unmap knows to stop */
- s->dma_length = 0;
- goto error;
- }
-
- s->dma_address = (entry << PAGE_SHIFT) | s->offset;
-
- /* insert into HW table */
- tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
- direction);
-
- s->dma_length = s->length;
- }
-
- return nelems;
-error:
- calgary_unmap_sg(dev, sg, nelems, direction);
- for (i = 0; i < nelems; i++) {
- sg[i].dma_address = bad_dma_address;
- sg[i].dma_length = 0;
- }
- return 0;
-}
-
-static dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
- size_t size, int direction)
-{
- dma_addr_t dma_handle = bad_dma_address;
- unsigned long uaddr;
- unsigned int npages;
- struct iommu_table *tbl = find_iommu_table(dev);
-
- uaddr = (unsigned long)vaddr;
- npages = num_dma_pages(uaddr, size);
-
- if (translate_phb(to_pci_dev(dev)))
- dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
- else
- dma_handle = virt_to_bus(vaddr);
-
- return dma_handle;
-}
-
-static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
- size_t size, int direction)
-{
- struct iommu_table *tbl = find_iommu_table(dev);
- unsigned int npages;
-
- if (!translate_phb(to_pci_dev(dev)))
- return;
-
- npages = num_dma_pages(dma_handle, size);
- iommu_free(tbl, dma_handle, npages);
-}
-
-static void* calgary_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
-{
- void *ret = NULL;
- dma_addr_t mapping;
- unsigned int npages, order;
- struct iommu_table *tbl = find_iommu_table(dev);
-
- size = PAGE_ALIGN(size); /* size rounded up to full pages */
- npages = size >> PAGE_SHIFT;
- order = get_order(size);
-
- /* alloc enough pages (and possibly more) */
- ret = (void *)__get_free_pages(flag, order);
- if (!ret)
- goto error;
- memset(ret, 0, size);
-
- if (translate_phb(to_pci_dev(dev))) {
- /* set up tces to cover the allocated range */
- mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
- if (mapping == bad_dma_address)
- goto free;
-
- *dma_handle = mapping;
- } else /* non translated slot */
- *dma_handle = virt_to_bus(ret);
-
- return ret;
-
-free:
- free_pages((unsigned long)ret, get_order(size));
- ret = NULL;
-error:
- return ret;
-}
-
-static const struct dma_mapping_ops calgary_dma_ops = {
- .alloc_coherent = calgary_alloc_coherent,
- .map_single = calgary_map_single,
- .unmap_single = calgary_unmap_single,
- .map_sg = calgary_map_sg,
- .unmap_sg = calgary_unmap_sg,
-};
-
-static inline void __iomem * busno_to_bbar(unsigned char num)
-{
- return bus_info[num].bbar;
-}
-
-static inline int busno_to_phbid(unsigned char num)
-{
- return bus_info[num].phbid;
-}
-
-static inline unsigned long split_queue_offset(unsigned char num)
-{
- size_t idx = busno_to_phbid(num);
-
- return split_queue_offsets[idx];
-}
-
-static inline unsigned long tar_offset(unsigned char num)
-{
- size_t idx = busno_to_phbid(num);
-
- return tar_offsets[idx];
-}
-
-static inline unsigned long phb_offset(unsigned char num)
-{
- size_t idx = busno_to_phbid(num);
-
- return phb_offsets[idx];
-}
-
-static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
-{
- unsigned long target = ((unsigned long)bar) | offset;
- return (void __iomem*)target;
-}
-
-static inline int is_calioc2(unsigned short device)
-{
- return (device == PCI_DEVICE_ID_IBM_CALIOC2);
-}
-
-static inline int is_calgary(unsigned short device)
-{
- return (device == PCI_DEVICE_ID_IBM_CALGARY);
-}
-
-static inline int is_cal_pci_dev(unsigned short device)
-{
- return (is_calgary(device) || is_calioc2(device));
-}
-
-static void calgary_tce_cache_blast(struct iommu_table *tbl)
-{
- u64 val;
- u32 aer;
- int i = 0;
- void __iomem *bbar = tbl->bbar;
- void __iomem *target;
-
- /* disable arbitration on the bus */
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
- aer = readl(target);
- writel(0, target);
-
- /* read plssr to ensure it got there */
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
- val = readl(target);
-
- /* poll split queues until all DMA activity is done */
- target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
- do {
- val = readq(target);
- i++;
- } while ((val & 0xff) != 0xff && i < 100);
- if (i == 100)
- printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
- "continuing anyway\n");
-
- /* invalidate TCE cache */
- target = calgary_reg(bbar, tar_offset(tbl->it_busno));
- writeq(tbl->tar_val, target);
-
- /* enable arbitration */
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
- writel(aer, target);
- (void)readl(target); /* flush */
-}
-
-static void calioc2_tce_cache_blast(struct iommu_table *tbl)
-{
- void __iomem *bbar = tbl->bbar;
- void __iomem *target;
- u64 val64;
- u32 val;
- int i = 0;
- int count = 1;
- unsigned char bus = tbl->it_busno;
-
-begin:
- printk(KERN_DEBUG "Calgary: CalIOC2 bus 0x%x entering tce cache blast "
- "sequence - count %d\n", bus, count);
-
- /* 1. using the Page Migration Control reg set SoftStop */
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "1a. read 0x%x [LE] from %p\n", val, target);
- val |= PMR_SOFTSTOP;
- printk(KERN_DEBUG "1b. writing 0x%x [LE] to %p\n", val, target);
- writel(cpu_to_be32(val), target);
-
- /* 2. poll split queues until all DMA activity is done */
- printk(KERN_DEBUG "2a. starting to poll split queues\n");
- target = calgary_reg(bbar, split_queue_offset(bus));
- do {
- val64 = readq(target);
- i++;
- } while ((val64 & 0xff) != 0xff && i < 100);
- if (i == 100)
- printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, "
- "continuing anyway\n");
-
- /* 3. poll Page Migration DEBUG for SoftStopFault */
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "3. read 0x%x [LE] from %p\n", val, target);
-
- /* 4. if SoftStopFault - goto (1) */
- if (val & PMR_SOFTSTOPFAULT) {
- if (++count < 100)
- goto begin;
- else {
- printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, "
- "aborting TCE cache flush sequence!\n");
- return; /* pray for the best */
- }
- }
-
- /* 5. Slam into HardStop by reading PHB_PAGE_MIG_CTRL */
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
- printk(KERN_DEBUG "5a. slamming into HardStop by reading %p\n", target);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "5b. read 0x%x [LE] from %p\n", val, target);
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "5c. read 0x%x [LE] from %p (debug)\n", val, target);
-
- /* 6. invalidate TCE cache */
- printk(KERN_DEBUG "6. invalidating TCE cache\n");
- target = calgary_reg(bbar, tar_offset(bus));
- writeq(tbl->tar_val, target);
-
- /* 7. Re-read PMCR */
- printk(KERN_DEBUG "7a. Re-reading PMCR\n");
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "7b. read 0x%x [LE] from %p\n", val, target);
-
- /* 8. Remove HardStop */
- printk(KERN_DEBUG "8a. removing HardStop from PMCR\n");
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
- val = 0;
- printk(KERN_DEBUG "8b. writing 0x%x [LE] to %p\n", val, target);
- writel(cpu_to_be32(val), target);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "8c. read 0x%x [LE] from %p\n", val, target);
-}
-
-static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
- u64 limit)
-{
- unsigned int numpages;
-
- limit = limit | 0xfffff;
- limit++;
-
- numpages = ((limit - start) >> PAGE_SHIFT);
- iommu_range_reserve(pci_iommu(dev->bus), start, numpages);
-}
-
-static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
-{
- void __iomem *target;
- u64 low, high, sizelow;
- u64 start, limit;
- struct iommu_table *tbl = pci_iommu(dev->bus);
- unsigned char busnum = dev->bus->number;
- void __iomem *bbar = tbl->bbar;
-
- /* peripheral MEM_1 region */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
- low = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
- high = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
- sizelow = be32_to_cpu(readl(target));
-
- start = (high << 32) | low;
- limit = sizelow;
-
- calgary_reserve_mem_region(dev, start, limit);
-}
-
-static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
-{
- void __iomem *target;
- u32 val32;
- u64 low, high, sizelow, sizehigh;
- u64 start, limit;
- struct iommu_table *tbl = pci_iommu(dev->bus);
- unsigned char busnum = dev->bus->number;
- void __iomem *bbar = tbl->bbar;
-
- /* is it enabled? */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
- val32 = be32_to_cpu(readl(target));
- if (!(val32 & PHB_MEM2_ENABLE))
- return;
-
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
- low = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
- high = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
- sizelow = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
- sizehigh = be32_to_cpu(readl(target));
-
- start = (high << 32) | low;
- limit = (sizehigh << 32) | sizelow;
-
- calgary_reserve_mem_region(dev, start, limit);
-}
-
-/*
- * some regions of the IO address space do not get translated, so we
- * must not give devices IO addresses in those regions. The regions
- * are the 640KB-1MB region and the two PCI peripheral memory holes.
- * Reserve all of them in the IOMMU bitmap to avoid giving them out
- * later.
- */
-static void __init calgary_reserve_regions(struct pci_dev *dev)
-{
- unsigned int npages;
- u64 start;
- struct iommu_table *tbl = pci_iommu(dev->bus);
-
- /* reserve EMERGENCY_PAGES from bad_dma_address and up */
- iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES);
-
- /* avoid the BIOS/VGA first 640KB-1MB region */
- /* for CalIOC2 - avoid the entire first MB */
- if (is_calgary(dev->device)) {
- start = (640 * 1024);
- npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
- } else { /* calioc2 */
- start = 0;
- npages = (1 * 1024 * 1024) >> PAGE_SHIFT;
- }
- iommu_range_reserve(tbl, start, npages);
-
- /* reserve the two PCI peripheral memory regions in IO space */
- calgary_reserve_peripheral_mem_1(dev);
- calgary_reserve_peripheral_mem_2(dev);
-}
-
-static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
-{
- u64 val64;
- u64 table_phys;
- void __iomem *target;
- int ret;
- struct iommu_table *tbl;
-
- /* build TCE tables for each PHB */
- ret = build_tce_table(dev, bbar);
- if (ret)
- return ret;
-
- tbl = pci_iommu(dev->bus);
- tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
- tce_free(tbl, 0, tbl->it_size);
-
- if (is_calgary(dev->device))
- tbl->chip_ops = &calgary_chip_ops;
- else if (is_calioc2(dev->device))
- tbl->chip_ops = &calioc2_chip_ops;
- else
- BUG();
-
- calgary_reserve_regions(dev);
-
- /* set TARs for each PHB */
- target = calgary_reg(bbar, tar_offset(dev->bus->number));
- val64 = be64_to_cpu(readq(target));
-
- /* zero out all TAR bits under sw control */
- val64 &= ~TAR_SW_BITS;
- table_phys = (u64)__pa(tbl->it_base);
-
- val64 |= table_phys;
-
- BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
- val64 |= (u64) specified_table_size;
-
- tbl->tar_val = cpu_to_be64(val64);
-
- writeq(tbl->tar_val, target);
- readq(target); /* flush */
-
- return 0;
-}
-
-static void __init calgary_free_bus(struct pci_dev *dev)
-{
- u64 val64;
- struct iommu_table *tbl = pci_iommu(dev->bus);
- void __iomem *target;
- unsigned int bitmapsz;
-
- target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
- val64 = be64_to_cpu(readq(target));
- val64 &= ~TAR_SW_BITS;
- writeq(cpu_to_be64(val64), target);
- readq(target); /* flush */
-
- bitmapsz = tbl->it_size / BITS_PER_BYTE;
- free_pages((unsigned long)tbl->it_map, get_order(bitmapsz));
- tbl->it_map = NULL;
-
- kfree(tbl);
-
- set_pci_iommu(dev->bus, NULL);
-
- /* Can't free bootmem allocated memory after system is up :-( */
- bus_info[dev->bus->number].tce_space = NULL;
-}
-
-static void calgary_dump_error_regs(struct iommu_table *tbl)
-{
- void __iomem *bbar = tbl->bbar;
- void __iomem *target;
- u32 csr, plssr;
-
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
- csr = be32_to_cpu(readl(target));
-
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
- plssr = be32_to_cpu(readl(target));
-
- /* If no error, the agent ID in the CSR is not valid */
- printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, "
- "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr);
-}
-
-static void calioc2_dump_error_regs(struct iommu_table *tbl)
-{
- void __iomem *bbar = tbl->bbar;
- u32 csr, csmr, plssr, mck, rcstat;
- void __iomem *target;
- unsigned long phboff = phb_offset(tbl->it_busno);
- unsigned long erroff;
- u32 errregs[7];
- int i;
-
- /* dump CSR */
- target = calgary_reg(bbar, phboff | PHB_CSR_OFFSET);
- csr = be32_to_cpu(readl(target));
- /* dump PLSSR */
- target = calgary_reg(bbar, phboff | PHB_PLSSR_OFFSET);
- plssr = be32_to_cpu(readl(target));
- /* dump CSMR */
- target = calgary_reg(bbar, phboff | 0x290);
- csmr = be32_to_cpu(readl(target));
- /* dump mck */
- target = calgary_reg(bbar, phboff | 0x800);
- mck = be32_to_cpu(readl(target));
-
- printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n",
- tbl->it_busno);
-
- printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
- csr, plssr, csmr, mck);
-
- /* dump rest of error regs */
- printk(KERN_EMERG "Calgary: ");
- for (i = 0; i < ARRAY_SIZE(errregs); i++) {
- /* err regs are at 0x810 - 0x870 */
- erroff = (0x810 + (i * 0x10));
- target = calgary_reg(bbar, phboff | erroff);
- errregs[i] = be32_to_cpu(readl(target));
- printk("0x%08x@0x%lx ", errregs[i], erroff);
- }
- printk("\n");
-
- /* root complex status */
- target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS);
- rcstat = be32_to_cpu(readl(target));
- printk(KERN_EMERG "Calgary: 0x%08x@0x%x\n", rcstat,
- PHB_ROOT_COMPLEX_STATUS);
-}
-
-static void calgary_watchdog(unsigned long data)
-{
- struct pci_dev *dev = (struct pci_dev *)data;
- struct iommu_table *tbl = pci_iommu(dev->bus);
- void __iomem *bbar = tbl->bbar;
- u32 val32;
- void __iomem *target;
-
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
- val32 = be32_to_cpu(readl(target));
-
- /* If no error, the agent ID in the CSR is not valid */
- if (val32 & CSR_AGENT_MASK) {
- tbl->chip_ops->dump_error_regs(tbl);
-
- /* reset error */
- writel(0, target);
-
- /* Disable bus that caused the error */
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
- PHB_CONFIG_RW_OFFSET);
- val32 = be32_to_cpu(readl(target));
- val32 |= PHB_SLOT_DISABLE;
- writel(cpu_to_be32(val32), target);
- readl(target); /* flush */
- } else {
- /* Reset the timer */
- mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
- }
-}
-
-static void __init calgary_set_split_completion_timeout(void __iomem *bbar,
- unsigned char busnum, unsigned long timeout)
-{
- u64 val64;
- void __iomem *target;
- unsigned int phb_shift = ~0; /* silence gcc */
- u64 mask;
-
- switch (busno_to_phbid(busnum)) {
- case 0: phb_shift = (63 - 19);
- break;
- case 1: phb_shift = (63 - 23);
- break;
- case 2: phb_shift = (63 - 27);
- break;
- case 3: phb_shift = (63 - 35);
- break;
- default:
- BUG_ON(busno_to_phbid(busnum));
- }
-
- target = calgary_reg(bbar, CALGARY_CONFIG_REG);
- val64 = be64_to_cpu(readq(target));
-
- /* zero out this PHB's timer bits */
- mask = ~(0xFUL << phb_shift);
- val64 &= mask;
- val64 |= (timeout << phb_shift);
- writeq(cpu_to_be64(val64), target);
- readq(target); /* flush */
-}
-
-static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev)
-{
- unsigned char busnum = dev->bus->number;
- void __iomem *bbar = tbl->bbar;
- void __iomem *target;
- u32 val;
-
- /*
- * CalIOC2 designers recommend setting bit 8 in 0xnDB0 to 1
- */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_SAVIOR_L2);
- val = cpu_to_be32(readl(target));
- val |= 0x00800000;
- writel(cpu_to_be32(val), target);
-}
-
-static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev)
-{
- unsigned char busnum = dev->bus->number;
-
- /*
- * Give split completion a longer timeout on bus 1 for aic94xx
- * http://bugzilla.kernel.org/show_bug.cgi?id=7180
- */
- if (is_calgary(dev->device) && (busnum == 1))
- calgary_set_split_completion_timeout(tbl->bbar, busnum,
- CCR_2SEC_TIMEOUT);
-}
-
-static void __init calgary_enable_translation(struct pci_dev *dev)
-{
- u32 val32;
- unsigned char busnum;
- void __iomem *target;
- void __iomem *bbar;
- struct iommu_table *tbl;
-
- busnum = dev->bus->number;
- tbl = pci_iommu(dev->bus);
- bbar = tbl->bbar;
-
- /* enable TCE in PHB Config Register */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
- val32 = be32_to_cpu(readl(target));
- val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
-
- printk(KERN_INFO "Calgary: enabling translation on %s PHB %#x\n",
- (dev->device == PCI_DEVICE_ID_IBM_CALGARY) ?
- "Calgary" : "CalIOC2", busnum);
- printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
- "bus.\n");
-
- writel(cpu_to_be32(val32), target);
- readl(target); /* flush */
-
- init_timer(&tbl->watchdog_timer);
- tbl->watchdog_timer.function = &calgary_watchdog;
- tbl->watchdog_timer.data = (unsigned long)dev;
- mod_timer(&tbl->watchdog_timer, jiffies);
-}
-
-static void __init calgary_disable_translation(struct pci_dev *dev)
-{
- u32 val32;
- unsigned char busnum;
- void __iomem *target;
- void __iomem *bbar;
- struct iommu_table *tbl;
-
- busnum = dev->bus->number;
- tbl = pci_iommu(dev->bus);
- bbar = tbl->bbar;
-
- /* disable TCE in PHB Config Register */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
- val32 = be32_to_cpu(readl(target));
- val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
-
- printk(KERN_INFO "Calgary: disabling translation on PHB %#x!\n", busnum);
- writel(cpu_to_be32(val32), target);
- readl(target); /* flush */
-
- del_timer_sync(&tbl->watchdog_timer);
-}
-
-static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
-{
- pci_dev_get(dev);
- set_pci_iommu(dev->bus, NULL);
-
- /* is the device behind a bridge? */
- if (dev->bus->parent)
- dev->bus->parent->self = dev;
- else
- dev->bus->self = dev;
-}
-
-static int __init calgary_init_one(struct pci_dev *dev)
-{
- void __iomem *bbar;
- struct iommu_table *tbl;
- int ret;
-
- BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
-
- bbar = busno_to_bbar(dev->bus->number);
- ret = calgary_setup_tar(dev, bbar);
- if (ret)
- goto done;
-
- pci_dev_get(dev);
-
- if (dev->bus->parent) {
- if (dev->bus->parent->self)
- printk(KERN_WARNING "Calgary: IEEEE, dev %p has "
- "bus->parent->self!\n", dev);
- dev->bus->parent->self = dev;
- } else
- dev->bus->self = dev;
-
- tbl = pci_iommu(dev->bus);
- tbl->chip_ops->handle_quirks(tbl, dev);
-
- calgary_enable_translation(dev);
-
- return 0;
-
-done:
- return ret;
-}
-
-static int __init calgary_locate_bbars(void)
-{
- int ret;
- int rioidx, phb, bus;
- void __iomem *bbar;
- void __iomem *target;
- unsigned long offset;
- u8 start_bus, end_bus;
- u32 val;
-
- ret = -ENODATA;
- for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
- struct rio_detail *rio = rio_devs[rioidx];
-
- if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
- continue;
-
- /* map entire 1MB of Calgary config space */
- bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
- if (!bbar)
- goto error;
-
- for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
- offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
- target = calgary_reg(bbar, offset);
-
- val = be32_to_cpu(readl(target));
-
- start_bus = (u8)((val & 0x00FF0000) >> 16);
- end_bus = (u8)((val & 0x0000FF00) >> 8);
-
- if (end_bus) {
- for (bus = start_bus; bus <= end_bus; bus++) {
- bus_info[bus].bbar = bbar;
- bus_info[bus].phbid = phb;
- }
- } else {
- bus_info[start_bus].bbar = bbar;
- bus_info[start_bus].phbid = phb;
- }
- }
- }
-
- return 0;
-
-error:
- /* scan bus_info and iounmap any bbars we previously ioremap'd */
- for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
- if (bus_info[bus].bbar)
- iounmap(bus_info[bus].bbar);
-
- return ret;
-}
-
-static int __init calgary_init(void)
-{
- int ret;
- struct pci_dev *dev = NULL;
- void *tce_space;
-
- ret = calgary_locate_bbars();
- if (ret)
- return ret;
-
- do {
- dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
- if (!dev)
- break;
- if (!is_cal_pci_dev(dev->device))
- continue;
- if (!translate_phb(dev)) {
- calgary_init_one_nontraslated(dev);
- continue;
- }
- tce_space = bus_info[dev->bus->number].tce_space;
- if (!tce_space && !translate_empty_slots)
- continue;
-
- ret = calgary_init_one(dev);
- if (ret)
- goto error;
- } while (1);
-
- return ret;
-
-error:
- do {
- dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM,
- PCI_ANY_ID, dev);
- if (!dev)
- break;
- if (!is_cal_pci_dev(dev->device))
- continue;
- if (!translate_phb(dev)) {
- pci_dev_put(dev);
- continue;
- }
- if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
- continue;
-
- calgary_disable_translation(dev);
- calgary_free_bus(dev);
- pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
- } while (1);
-
- return ret;
-}
-
-static inline int __init determine_tce_table_size(u64 ram)
-{
- int ret;
-
- if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
- return specified_table_size;
-
- /*
- * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
- * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
- * larger table size has twice as many entries, so shift the
- * max ram address by 13 to divide by 8K and then look at the
- * order of the result to choose between 0-7.
- */
- ret = get_order(ram >> 13);
- if (ret > TCE_TABLE_SIZE_8M)
- ret = TCE_TABLE_SIZE_8M;
-
- return ret;
-}
-
-static int __init build_detail_arrays(void)
-{
- unsigned long ptr;
- int i, scal_detail_size, rio_detail_size;
-
- if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
- printk(KERN_WARNING
- "Calgary: MAX_NUMNODES too low! Defined as %d, "
- "but system has %d nodes.\n",
- MAX_NUMNODES, rio_table_hdr->num_scal_dev);
- return -ENODEV;
- }
-
- switch (rio_table_hdr->version){
- case 2:
- scal_detail_size = 11;
- rio_detail_size = 13;
- break;
- case 3:
- scal_detail_size = 12;
- rio_detail_size = 15;
- break;
- default:
- printk(KERN_WARNING
- "Calgary: Invalid Rio Grande Table Version: %d\n",
- rio_table_hdr->version);
- return -EPROTO;
- }
-
- ptr = ((unsigned long)rio_table_hdr) + 3;
- for (i = 0; i < rio_table_hdr->num_scal_dev;
- i++, ptr += scal_detail_size)
- scal_devs[i] = (struct scal_detail *)ptr;
-
- for (i = 0; i < rio_table_hdr->num_rio_dev;
- i++, ptr += rio_detail_size)
- rio_devs[i] = (struct rio_detail *)ptr;
-
- return 0;
-}
-
-static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
-{
- int dev;
- u32 val;
-
- if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) {
- /*
- * FIXME: properly scan for devices accross the
- * PCI-to-PCI bridge on every CalIOC2 port.
- */
- return 1;
- }
-
- for (dev = 1; dev < 8; dev++) {
- val = read_pci_config(bus, dev, 0, 0);
- if (val != 0xffffffff)
- break;
- }
- return (val != 0xffffffff);
-}
-
-void __init detect_calgary(void)
-{
- int bus;
- void *tbl;
- int calgary_found = 0;
- unsigned long ptr;
- unsigned int offset, prev_offset;
- int ret;
-
- /*
- * if the user specified iommu=off or iommu=soft or we found
- * another HW IOMMU already, bail out.
- */
- if (swiotlb || no_iommu || iommu_detected)
- return;
-
- if (!use_calgary)
- return;
-
- if (!early_pci_allowed())
- return;
-
- printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n");
-
- ptr = (unsigned long)phys_to_virt(get_bios_ebda());
-
- rio_table_hdr = NULL;
- prev_offset = 0;
- offset = 0x180;
- /*
- * The next offset is stored in the 1st word.
- * Only parse up until the offset increases:
- */
- while (offset > prev_offset) {
- /* The block id is stored in the 2nd word */
- if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
- /* set the pointer past the offset & block id */
- rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
- break;
- }
- prev_offset = offset;
- offset = *((unsigned short *)(ptr + offset));
- }
- if (!rio_table_hdr) {
- printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table "
- "in EBDA - bailing!\n");
- return;
- }
-
- ret = build_detail_arrays();
- if (ret) {
- printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret);
- return;
- }
-
- specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
-
- for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
- struct calgary_bus_info *info = &bus_info[bus];
- unsigned short pci_device;
- u32 val;
-
- val = read_pci_config(bus, 0, 0, 0);
- pci_device = (val & 0xFFFF0000) >> 16;
-
- if (!is_cal_pci_dev(pci_device))
- continue;
-
- if (info->translation_disabled)
- continue;
-
- if (calgary_bus_has_devices(bus, pci_device) ||
- translate_empty_slots) {
- tbl = alloc_tce_table();
- if (!tbl)
- goto cleanup;
- info->tce_space = tbl;
- calgary_found = 1;
- }
- }
-
- printk(KERN_DEBUG "Calgary: finished detection, Calgary %s\n",
- calgary_found ? "found" : "not found");
-
- if (calgary_found) {
- iommu_detected = 1;
- calgary_detected = 1;
- printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
- printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
- "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
- debugging ? "enabled" : "disabled");
- }
- return;
-
-cleanup:
- for (--bus; bus >= 0; --bus) {
- struct calgary_bus_info *info = &bus_info[bus];
-
- if (info->tce_space)
- free_tce_table(info->tce_space);
- }
-}
-
-int __init calgary_iommu_init(void)
-{
- int ret;
-
- if (no_iommu || swiotlb)
- return -ENODEV;
-
- if (!calgary_detected)
- return -ENODEV;
-
- /* ok, we're trying to use Calgary - let's roll */
- printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
-
- ret = calgary_init();
- if (ret) {
- printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
- "falling back to no_iommu\n", ret);
- if (end_pfn > MAX_DMA32_PFN)
- printk(KERN_ERR "WARNING more than 4GB of memory, "
- "32bit PCI may malfunction.\n");
- return ret;
- }
-
- force_iommu = 1;
- bad_dma_address = 0x0;
- dma_ops = &calgary_dma_ops;
-
- return 0;
-}
-
-static int __init calgary_parse_options(char *p)
-{
- unsigned int bridge;
- size_t len;
- char* endp;
-
- while (*p) {
- if (!strncmp(p, "64k", 3))
- specified_table_size = TCE_TABLE_SIZE_64K;
- else if (!strncmp(p, "128k", 4))
- specified_table_size = TCE_TABLE_SIZE_128K;
- else if (!strncmp(p, "256k", 4))
- specified_table_size = TCE_TABLE_SIZE_256K;
- else if (!strncmp(p, "512k", 4))
- specified_table_size = TCE_TABLE_SIZE_512K;
- else if (!strncmp(p, "1M", 2))
- specified_table_size = TCE_TABLE_SIZE_1M;
- else if (!strncmp(p, "2M", 2))
- specified_table_size = TCE_TABLE_SIZE_2M;
- else if (!strncmp(p, "4M", 2))
- specified_table_size = TCE_TABLE_SIZE_4M;
- else if (!strncmp(p, "8M", 2))
- specified_table_size = TCE_TABLE_SIZE_8M;
-
- len = strlen("translate_empty_slots");
- if (!strncmp(p, "translate_empty_slots", len))
- translate_empty_slots = 1;
-
- len = strlen("disable");
- if (!strncmp(p, "disable", len)) {
- p += len;
- if (*p == '=')
- ++p;
- if (*p == '\0')
- break;
- bridge = simple_strtol(p, &endp, 0);
- if (p == endp)
- break;
-
- if (bridge < MAX_PHB_BUS_NUM) {
- printk(KERN_INFO "Calgary: disabling "
- "translation for PHB %#x\n", bridge);
- bus_info[bridge].translation_disabled = 1;
- }
- }
-
- p = strpbrk(p, ",");
- if (!p)
- break;
-
- p++; /* skip ',' */
- }
- return 1;
-}
-__setup("calgary=", calgary_parse_options);
-
-static void __init calgary_fixup_one_tce_space(struct pci_dev *dev)
-{
- struct iommu_table *tbl;
- unsigned int npages;
- int i;
-
- tbl = pci_iommu(dev->bus);
-
- for (i = 0; i < 4; i++) {
- struct resource *r = &dev->resource[PCI_BRIDGE_RESOURCES + i];
-
- /* Don't give out TCEs that map MEM resources */
- if (!(r->flags & IORESOURCE_MEM))
- continue;
-
- /* 0-based? we reserve the whole 1st MB anyway */
- if (!r->start)
- continue;
-
- /* cover the whole region */
- npages = (r->end - r->start) >> PAGE_SHIFT;
- npages++;
-
- iommu_range_reserve(tbl, r->start, npages);
- }
-}
-
-static int __init calgary_fixup_tce_spaces(void)
-{
- struct pci_dev *dev = NULL;
- void *tce_space;
-
- if (no_iommu || swiotlb || !calgary_detected)
- return -ENODEV;
-
- printk(KERN_DEBUG "Calgary: fixing up tce spaces\n");
-
- do {
- dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
- if (!dev)
- break;
- if (!is_cal_pci_dev(dev->device))
- continue;
- if (!translate_phb(dev))
- continue;
-
- tce_space = bus_info[dev->bus->number].tce_space;
- if (!tce_space)
- continue;
-
- calgary_fixup_one_tce_space(dev);
-
- } while (1);
-
- return 0;
-}
-
-/*
- * We need to be call after pcibios_assign_resources (fs_initcall level)
- * and before device_initcall.
- */
-rootfs_initcall(calgary_fixup_tce_spaces);
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
deleted file mode 100644
index 29711445c818..000000000000
--- a/arch/x86_64/kernel/pci-dma.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/iommu.h>
-#include <asm/calgary.h>
-
-int iommu_merge __read_mostly = 0;
-EXPORT_SYMBOL(iommu_merge);
-
-dma_addr_t bad_dma_address __read_mostly;
-EXPORT_SYMBOL(bad_dma_address);
-
-/* This tells the BIO block layer to assume merging. Default to off
- because we cannot guarantee merging later. */
-int iommu_bio_merge __read_mostly = 0;
-EXPORT_SYMBOL(iommu_bio_merge);
-
-static int iommu_sac_force __read_mostly = 0;
-
-int no_iommu __read_mostly;
-#ifdef CONFIG_IOMMU_DEBUG
-int panic_on_overflow __read_mostly = 1;
-int force_iommu __read_mostly = 1;
-#else
-int panic_on_overflow __read_mostly = 0;
-int force_iommu __read_mostly= 0;
-#endif
-
-/* Set this to 1 if there is a HW IOMMU in the system */
-int iommu_detected __read_mostly = 0;
-
-/* Dummy device used for NULL arguments (normally ISA). Better would
- be probably a smaller DMA mask, but this is bug-to-bug compatible
- to i386. */
-struct device fallback_dev = {
- .bus_id = "fallback device",
- .coherent_dma_mask = DMA_32BIT_MASK,
- .dma_mask = &fallback_dev.coherent_dma_mask,
-};
-
-/* Allocate DMA memory on node near device */
-noinline static void *
-dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
-{
- struct page *page;
- int node;
-#ifdef CONFIG_PCI
- if (dev->bus == &pci_bus_type)
- node = pcibus_to_node(to_pci_dev(dev)->bus);
- else
-#endif
- node = numa_node_id();
-
- if (node < first_node(node_online_map))
- node = first_node(node_online_map);
-
- page = alloc_pages_node(node, gfp, order);
- return page ? page_address(page) : NULL;
-}
-
-/*
- * Allocate memory for a coherent mapping.
- */
-void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp)
-{
- void *memory;
- unsigned long dma_mask = 0;
- u64 bus;
-
- if (!dev)
- dev = &fallback_dev;
- dma_mask = dev->coherent_dma_mask;
- if (dma_mask == 0)
- dma_mask = DMA_32BIT_MASK;
-
- /* Device not DMA able */
- if (dev->dma_mask == NULL)
- return NULL;
-
- /* Don't invoke OOM killer */
- gfp |= __GFP_NORETRY;
-
- /* Kludge to make it bug-to-bug compatible with i386. i386
- uses the normal dma_mask for alloc_coherent. */
- dma_mask &= *dev->dma_mask;
-
- /* Why <=? Even when the mask is smaller than 4GB it is often
- larger than 16MB and in this case we have a chance of
- finding fitting memory in the next higher zone first. If
- not retry with true GFP_DMA. -AK */
- if (dma_mask <= DMA_32BIT_MASK)
- gfp |= GFP_DMA32;
-
- again:
- memory = dma_alloc_pages(dev, gfp, get_order(size));
- if (memory == NULL)
- return NULL;
-
- {
- int high, mmu;
- bus = virt_to_bus(memory);
- high = (bus + size) >= dma_mask;
- mmu = high;
- if (force_iommu && !(gfp & GFP_DMA))
- mmu = 1;
- else if (high) {
- free_pages((unsigned long)memory,
- get_order(size));
-
- /* Don't use the 16MB ZONE_DMA unless absolutely
- needed. It's better to use remapping first. */
- if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
- gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
- goto again;
- }
-
- /* Let low level make its own zone decisions */
- gfp &= ~(GFP_DMA32|GFP_DMA);
-
- if (dma_ops->alloc_coherent)
- return dma_ops->alloc_coherent(dev, size,
- dma_handle, gfp);
- return NULL;
- }
-
- memset(memory, 0, size);
- if (!mmu) {
- *dma_handle = virt_to_bus(memory);
- return memory;
- }
- }
-
- if (dma_ops->alloc_coherent) {
- free_pages((unsigned long)memory, get_order(size));
- gfp &= ~(GFP_DMA|GFP_DMA32);
- return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
- }
-
- if (dma_ops->map_simple) {
- *dma_handle = dma_ops->map_simple(dev, memory,
- size,
- PCI_DMA_BIDIRECTIONAL);
- if (*dma_handle != bad_dma_address)
- return memory;
- }
-
- if (panic_on_overflow)
- panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size);
- free_pages((unsigned long)memory, get_order(size));
- return NULL;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-/*
- * Unmap coherent memory.
- * The caller must ensure that the device has finished accessing the mapping.
- */
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus)
-{
- if (dma_ops->unmap_single)
- dma_ops->unmap_single(dev, bus, size, 0);
- free_pages((unsigned long)vaddr, get_order(size));
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-static int forbid_dac __read_mostly;
-
-int dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PCI
- if (mask > 0xffffffff && forbid_dac > 0) {
-
-
-
- printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id);
- return 0;
- }
-#endif
-
- if (dma_ops->dma_supported)
- return dma_ops->dma_supported(dev, mask);
-
- /* Copied from i386. Doesn't make much sense, because it will
- only work for pci_alloc_coherent.
- The caller just has to use GFP_DMA in this case. */
- if (mask < DMA_24BIT_MASK)
- return 0;
-
- /* Tell the device to use SAC when IOMMU force is on. This
- allows the driver to use cheaper accesses in some cases.
-
- Problem with this is that if we overflow the IOMMU area and
- return DAC as fallback address the device may not handle it
- correctly.
-
- As a special case some controllers have a 39bit address
- mode that is as efficient as 32bit (aic79xx). Don't force
- SAC for these. Assume all masks <= 40 bits are of this
- type. Normally this doesn't make any difference, but gives
- more gentle handling of IOMMU overflow. */
- if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
- printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(dma_supported);
-
-int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
- *dev->dma_mask = mask;
- return 0;
-}
-EXPORT_SYMBOL(dma_set_mask);
-
-/*
- * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
- * documentation.
- */
-__init int iommu_setup(char *p)
-{
- iommu_merge = 1;
-
- if (!p)
- return -EINVAL;
-
- while (*p) {
- if (!strncmp(p,"off",3))
- no_iommu = 1;
- /* gart_parse_options has more force support */
- if (!strncmp(p,"force",5))
- force_iommu = 1;
- if (!strncmp(p,"noforce",7)) {
- iommu_merge = 0;
- force_iommu = 0;
- }
-
- if (!strncmp(p, "biomerge",8)) {
- iommu_bio_merge = 4096;
- iommu_merge = 1;
- force_iommu = 1;
- }
- if (!strncmp(p, "panic",5))
- panic_on_overflow = 1;
- if (!strncmp(p, "nopanic",7))
- panic_on_overflow = 0;
- if (!strncmp(p, "merge",5)) {
- iommu_merge = 1;
- force_iommu = 1;
- }
- if (!strncmp(p, "nomerge",7))
- iommu_merge = 0;
- if (!strncmp(p, "forcesac",8))
- iommu_sac_force = 1;
- if (!strncmp(p, "allowdac", 8))
- forbid_dac = 0;
- if (!strncmp(p, "nodac", 5))
- forbid_dac = -1;
-
-#ifdef CONFIG_SWIOTLB
- if (!strncmp(p, "soft",4))
- swiotlb = 1;
-#endif
-
-#ifdef CONFIG_IOMMU
- gart_parse_options(p);
-#endif
-
-#ifdef CONFIG_CALGARY_IOMMU
- if (!strncmp(p, "calgary", 7))
- use_calgary = 1;
-#endif /* CONFIG_CALGARY_IOMMU */
-
- p += strcspn(p, ",");
- if (*p == ',')
- ++p;
- }
- return 0;
-}
-early_param("iommu", iommu_setup);
-
-void __init pci_iommu_alloc(void)
-{
- /*
- * The order of these functions is important for
- * fall-back/fail-over reasons
- */
-#ifdef CONFIG_IOMMU
- iommu_hole_init();
-#endif
-
-#ifdef CONFIG_CALGARY_IOMMU
- detect_calgary();
-#endif
-
-#ifdef CONFIG_SWIOTLB
- pci_swiotlb_init();
-#endif
-}
-
-static int __init pci_iommu_init(void)
-{
-#ifdef CONFIG_CALGARY_IOMMU
- calgary_iommu_init();
-#endif
-
-#ifdef CONFIG_IOMMU
- gart_iommu_init();
-#endif
-
- no_iommu_init();
- return 0;
-}
-
-void pci_iommu_shutdown(void)
-{
- gart_iommu_shutdown();
-}
-
-#ifdef CONFIG_PCI
-/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
-
-static __devinit void via_no_dac(struct pci_dev *dev)
-{
- if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
- printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
- forbid_dac = 1;
- }
-}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
-#endif
-/* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
deleted file mode 100644
index 4918c575d582..000000000000
--- a/arch/x86_64/kernel/pci-gart.c
+++ /dev/null
@@ -1,740 +0,0 @@
-/*
- * Dynamic DMA mapping support for AMD Hammer.
- *
- * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
- * This allows to use PCI devices that only support 32bit addresses on systems
- * with more than 4GB.
- *
- * See Documentation/DMA-mapping.txt for the interface specification.
- *
- * Copyright 2002 Andi Kleen, SuSE Labs.
- */
-
-#include <linux/types.h>
-#include <linux/ctype.h>
-#include <linux/agp_backend.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/topology.h>
-#include <linux/interrupt.h>
-#include <linux/bitops.h>
-#include <linux/kdebug.h>
-#include <asm/atomic.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/pgtable.h>
-#include <asm/proto.h>
-#include <asm/iommu.h>
-#include <asm/cacheflush.h>
-#include <asm/swiotlb.h>
-#include <asm/dma.h>
-#include <asm/k8.h>
-
-unsigned long iommu_bus_base; /* GART remapping area (physical) */
-static unsigned long iommu_size; /* size of remapping area bytes */
-static unsigned long iommu_pages; /* .. and in pages */
-
-u32 *iommu_gatt_base; /* Remapping table */
-
-/* If this is disabled the IOMMU will use an optimized flushing strategy
- of only flushing when an mapping is reused. With it true the GART is flushed
- for every mapping. Problem is that doing the lazy flush seems to trigger
- bugs with some popular PCI cards, in particular 3ware (but has been also
- also seen with Qlogic at least). */
-int iommu_fullflush = 1;
-
-/* Allocation bitmap for the remapping area */
-static DEFINE_SPINLOCK(iommu_bitmap_lock);
-static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
-
-static u32 gart_unmapped_entry;
-
-#define GPTE_VALID 1
-#define GPTE_COHERENT 2
-#define GPTE_ENCODE(x) \
- (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
-#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-
-#define to_pages(addr,size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
-#define EMERGENCY_PAGES 32 /* = 128KB */
-
-#ifdef CONFIG_AGP
-#define AGPEXTERN extern
-#else
-#define AGPEXTERN
-#endif
-
-/* backdoor interface to AGP driver */
-AGPEXTERN int agp_memory_reserved;
-AGPEXTERN __u32 *agp_gatt_table;
-
-static unsigned long next_bit; /* protected by iommu_bitmap_lock */
-static int need_flush; /* global flush state. set for each gart wrap */
-
-static unsigned long alloc_iommu(int size)
-{
- unsigned long offset, flags;
-
- spin_lock_irqsave(&iommu_bitmap_lock, flags);
- offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
- if (offset == -1) {
- need_flush = 1;
- offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
- }
- if (offset != -1) {
- set_bit_string(iommu_gart_bitmap, offset, size);
- next_bit = offset+size;
- if (next_bit >= iommu_pages) {
- next_bit = 0;
- need_flush = 1;
- }
- }
- if (iommu_fullflush)
- need_flush = 1;
- spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
- return offset;
-}
-
-static void free_iommu(unsigned long offset, int size)
-{
- unsigned long flags;
- spin_lock_irqsave(&iommu_bitmap_lock, flags);
- __clear_bit_string(iommu_gart_bitmap, offset, size);
- spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-}
-
-/*
- * Use global flush state to avoid races with multiple flushers.
- */
-static void flush_gart(void)
-{
- unsigned long flags;
- spin_lock_irqsave(&iommu_bitmap_lock, flags);
- if (need_flush) {
- k8_flush_garts();
- need_flush = 0;
- }
- spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-}
-
-#ifdef CONFIG_IOMMU_LEAK
-
-#define SET_LEAK(x) if (iommu_leak_tab) \
- iommu_leak_tab[x] = __builtin_return_address(0);
-#define CLEAR_LEAK(x) if (iommu_leak_tab) \
- iommu_leak_tab[x] = NULL;
-
-/* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
-static int leak_trace;
-int iommu_leak_pages = 20;
-void dump_leak(void)
-{
- int i;
- static int dump;
- if (dump || !iommu_leak_tab) return;
- dump = 1;
- show_stack(NULL,NULL);
- /* Very crude. dump some from the end of the table too */
- printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
- for (i = 0; i < iommu_leak_pages; i+=2) {
- printk("%lu: ", iommu_pages-i);
- printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
- printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
- }
- printk("\n");
-}
-#else
-#define SET_LEAK(x)
-#define CLEAR_LEAK(x)
-#endif
-
-static void iommu_full(struct device *dev, size_t size, int dir)
-{
- /*
- * Ran out of IOMMU space for this operation. This is very bad.
- * Unfortunately the drivers cannot handle this operation properly.
- * Return some non mapped prereserved space in the aperture and
- * let the Northbridge deal with it. This will result in garbage
- * in the IO operation. When the size exceeds the prereserved space
- * memory corruption will occur or random memory will be DMAed
- * out. Hopefully no network devices use single mappings that big.
- */
-
- printk(KERN_ERR
- "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
- size, dev->bus_id);
-
- if (size > PAGE_SIZE*EMERGENCY_PAGES) {
- if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic("PCI-DMA: Memory would be corrupted\n");
- if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n");
- }
-
-#ifdef CONFIG_IOMMU_LEAK
- dump_leak();
-#endif
-}
-
-static inline int need_iommu(struct device *dev, unsigned long addr, size_t size)
-{
- u64 mask = *dev->dma_mask;
- int high = addr + size > mask;
- int mmu = high;
- if (force_iommu)
- mmu = 1;
- return mmu;
-}
-
-static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
-{
- u64 mask = *dev->dma_mask;
- int high = addr + size > mask;
- int mmu = high;
- return mmu;
-}
-
-/* Map a single continuous physical area into the IOMMU.
- * Caller needs to check if the iommu is needed and flush.
- */
-static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
- size_t size, int dir)
-{
- unsigned long npages = to_pages(phys_mem, size);
- unsigned long iommu_page = alloc_iommu(npages);
- int i;
- if (iommu_page == -1) {
- if (!nonforced_iommu(dev, phys_mem, size))
- return phys_mem;
- if (panic_on_overflow)
- panic("dma_map_area overflow %lu bytes\n", size);
- iommu_full(dev, size, dir);
- return bad_dma_address;
- }
-
- for (i = 0; i < npages; i++) {
- iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
- SET_LEAK(iommu_page + i);
- phys_mem += PAGE_SIZE;
- }
- return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
-}
-
-static dma_addr_t gart_map_simple(struct device *dev, char *buf,
- size_t size, int dir)
-{
- dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
- flush_gart();
- return map;
-}
-
-/* Map a single area into the IOMMU */
-static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
-{
- unsigned long phys_mem, bus;
-
- if (!dev)
- dev = &fallback_dev;
-
- phys_mem = virt_to_phys(addr);
- if (!need_iommu(dev, phys_mem, size))
- return phys_mem;
-
- bus = gart_map_simple(dev, addr, size, dir);
- return bus;
-}
-
-/*
- * Free a DMA mapping.
- */
-static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, int direction)
-{
- unsigned long iommu_page;
- int npages;
- int i;
-
- if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
- dma_addr >= iommu_bus_base + iommu_size)
- return;
- iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
- npages = to_pages(dma_addr, size);
- for (i = 0; i < npages; i++) {
- iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
- CLEAR_LEAK(iommu_page + i);
- }
- free_iommu(iommu_page, npages);
-}
-
-/*
- * Wrapper for pci_unmap_single working with scatterlists.
- */
-static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
-{
- int i;
-
- for (i = 0; i < nents; i++) {
- struct scatterlist *s = &sg[i];
- if (!s->dma_length || !s->length)
- break;
- gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
- }
-}
-
-/* Fallback for dma_map_sg in case of overflow */
-static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
- int nents, int dir)
-{
- int i;
-
-#ifdef CONFIG_IOMMU_DEBUG
- printk(KERN_DEBUG "dma_map_sg overflow\n");
-#endif
-
- for (i = 0; i < nents; i++ ) {
- struct scatterlist *s = &sg[i];
- unsigned long addr = page_to_phys(s->page) + s->offset;
- if (nonforced_iommu(dev, addr, s->length)) {
- addr = dma_map_area(dev, addr, s->length, dir);
- if (addr == bad_dma_address) {
- if (i > 0)
- gart_unmap_sg(dev, sg, i, dir);
- nents = 0;
- sg[0].dma_length = 0;
- break;
- }
- }
- s->dma_address = addr;
- s->dma_length = s->length;
- }
- flush_gart();
- return nents;
-}
-
-/* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
- struct scatterlist *sout, unsigned long pages)
-{
- unsigned long iommu_start = alloc_iommu(pages);
- unsigned long iommu_page = iommu_start;
- int i;
-
- if (iommu_start == -1)
- return -1;
-
- for (i = start; i < stopat; i++) {
- struct scatterlist *s = &sg[i];
- unsigned long pages, addr;
- unsigned long phys_addr = s->dma_address;
-
- BUG_ON(i > start && s->offset);
- if (i == start) {
- *sout = *s;
- sout->dma_address = iommu_bus_base;
- sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
- sout->dma_length = s->length;
- } else {
- sout->dma_length += s->length;
- }
-
- addr = phys_addr;
- pages = to_pages(s->offset, s->length);
- while (pages--) {
- iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
- SET_LEAK(iommu_page);
- addr += PAGE_SIZE;
- iommu_page++;
- }
- }
- BUG_ON(iommu_page - iommu_start != pages);
- return 0;
-}
-
-static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
- struct scatterlist *sout,
- unsigned long pages, int need)
-{
- if (!need) {
- BUG_ON(stopat - start != 1);
- *sout = sg[start];
- sout->dma_length = sg[start].length;
- return 0;
- }
- return __dma_map_cont(sg, start, stopat, sout, pages);
-}
-
-/*
- * DMA map all entries in a scatterlist.
- * Merge chunks that have page aligned sizes into a continuous mapping.
- */
-int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
-{
- int i;
- int out;
- int start;
- unsigned long pages = 0;
- int need = 0, nextneed;
-
- if (nents == 0)
- return 0;
-
- if (!dev)
- dev = &fallback_dev;
-
- out = 0;
- start = 0;
- for (i = 0; i < nents; i++) {
- struct scatterlist *s = &sg[i];
- dma_addr_t addr = page_to_phys(s->page) + s->offset;
- s->dma_address = addr;
- BUG_ON(s->length == 0);
-
- nextneed = need_iommu(dev, addr, s->length);
-
- /* Handle the previous not yet processed entries */
- if (i > start) {
- struct scatterlist *ps = &sg[i-1];
- /* Can only merge when the last chunk ends on a page
- boundary and the new one doesn't have an offset. */
- if (!iommu_merge || !nextneed || !need || s->offset ||
- (ps->offset + ps->length) % PAGE_SIZE) {
- if (dma_map_cont(sg, start, i, sg+out, pages,
- need) < 0)
- goto error;
- out++;
- pages = 0;
- start = i;
- }
- }
-
- need = nextneed;
- pages += to_pages(s->offset, s->length);
- }
- if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
- goto error;
- out++;
- flush_gart();
- if (out < nents)
- sg[out].dma_length = 0;
- return out;
-
-error:
- flush_gart();
- gart_unmap_sg(dev, sg, nents, dir);
- /* When it was forced or merged try again in a dumb way */
- if (force_iommu || iommu_merge) {
- out = dma_map_sg_nonforce(dev, sg, nents, dir);
- if (out > 0)
- return out;
- }
- if (panic_on_overflow)
- panic("dma_map_sg: overflow on %lu pages\n", pages);
- iommu_full(dev, pages << PAGE_SHIFT, dir);
- for (i = 0; i < nents; i++)
- sg[i].dma_address = bad_dma_address;
- return 0;
-}
-
-static int no_agp;
-
-static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
-{
- unsigned long a;
- if (!iommu_size) {
- iommu_size = aper_size;
- if (!no_agp)
- iommu_size /= 2;
- }
-
- a = aper + iommu_size;
- iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
-
- if (iommu_size < 64*1024*1024)
- printk(KERN_WARNING
- "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
-
- return iommu_size;
-}
-
-static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
-{
- unsigned aper_size = 0, aper_base_32;
- u64 aper_base;
- unsigned aper_order;
-
- pci_read_config_dword(dev, 0x94, &aper_base_32);
- pci_read_config_dword(dev, 0x90, &aper_order);
- aper_order = (aper_order >> 1) & 7;
-
- aper_base = aper_base_32 & 0x7fff;
- aper_base <<= 25;
-
- aper_size = (32 * 1024 * 1024) << aper_order;
- if (aper_base + aper_size > 0x100000000UL || !aper_size)
- aper_base = 0;
-
- *size = aper_size;
- return aper_base;
-}
-
-/*
- * Private Northbridge GATT initialization in case we cannot use the
- * AGP driver for some reason.
- */
-static __init int init_k8_gatt(struct agp_kern_info *info)
-{
- struct pci_dev *dev;
- void *gatt;
- unsigned aper_base, new_aper_base;
- unsigned aper_size, gatt_size, new_aper_size;
- int i;
-
- printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
- aper_size = aper_base = info->aper_size = 0;
- dev = NULL;
- for (i = 0; i < num_k8_northbridges; i++) {
- dev = k8_northbridges[i];
- new_aper_base = read_aperture(dev, &new_aper_size);
- if (!new_aper_base)
- goto nommu;
-
- if (!aper_base) {
- aper_size = new_aper_size;
- aper_base = new_aper_base;
- }
- if (aper_size != new_aper_size || aper_base != new_aper_base)
- goto nommu;
- }
- if (!aper_base)
- goto nommu;
- info->aper_base = aper_base;
- info->aper_size = aper_size>>20;
-
- gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
- gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
- if (!gatt)
- panic("Cannot allocate GATT table");
- if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE))
- panic("Could not set GART PTEs to uncacheable pages");
- global_flush_tlb();
-
- memset(gatt, 0, gatt_size);
- agp_gatt_table = gatt;
-
- for (i = 0; i < num_k8_northbridges; i++) {
- u32 ctl;
- u32 gatt_reg;
-
- dev = k8_northbridges[i];
- gatt_reg = __pa(gatt) >> 12;
- gatt_reg <<= 4;
- pci_write_config_dword(dev, 0x98, gatt_reg);
- pci_read_config_dword(dev, 0x90, &ctl);
-
- ctl |= 1;
- ctl &= ~((1<<4) | (1<<5));
-
- pci_write_config_dword(dev, 0x90, ctl);
- }
- flush_gart();
-
- printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
- return 0;
-
- nommu:
- /* Should not happen anymore */
- printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
- KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n");
- return -1;
-}
-
-extern int agp_amd64_init(void);
-
-static const struct dma_mapping_ops gart_dma_ops = {
- .mapping_error = NULL,
- .map_single = gart_map_single,
- .map_simple = gart_map_simple,
- .unmap_single = gart_unmap_single,
- .sync_single_for_cpu = NULL,
- .sync_single_for_device = NULL,
- .sync_single_range_for_cpu = NULL,
- .sync_single_range_for_device = NULL,
- .sync_sg_for_cpu = NULL,
- .sync_sg_for_device = NULL,
- .map_sg = gart_map_sg,
- .unmap_sg = gart_unmap_sg,
-};
-
-void gart_iommu_shutdown(void)
-{
- struct pci_dev *dev;
- int i;
-
- if (no_agp && (dma_ops != &gart_dma_ops))
- return;
-
- for (i = 0; i < num_k8_northbridges; i++) {
- u32 ctl;
-
- dev = k8_northbridges[i];
- pci_read_config_dword(dev, 0x90, &ctl);
-
- ctl &= ~1;
-
- pci_write_config_dword(dev, 0x90, ctl);
- }
-}
-
-void __init gart_iommu_init(void)
-{
- struct agp_kern_info info;
- unsigned long aper_size;
- unsigned long iommu_start;
- unsigned long scratch;
- long i;
-
- if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
- printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
- return;
- }
-
-#ifndef CONFIG_AGP_AMD64
- no_agp = 1;
-#else
- /* Makefile puts PCI initialization via subsys_initcall first. */
- /* Add other K8 AGP bridge drivers here */
- no_agp = no_agp ||
- (agp_amd64_init() < 0) ||
- (agp_copy_info(agp_bridge, &info) < 0);
-#endif
-
- if (swiotlb)
- return;
-
- /* Did we detect a different HW IOMMU? */
- if (iommu_detected && !iommu_aperture)
- return;
-
- if (no_iommu ||
- (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
- !iommu_aperture ||
- (no_agp && init_k8_gatt(&info) < 0)) {
- if (end_pfn > MAX_DMA32_PFN) {
- printk(KERN_ERR "WARNING more than 4GB of memory "
- "but GART IOMMU not available.\n"
- KERN_ERR "WARNING 32bit PCI may malfunction.\n");
- }
- return;
- }
-
- printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
- aper_size = info.aper_size * 1024 * 1024;
- iommu_size = check_iommu_size(info.aper_base, aper_size);
- iommu_pages = iommu_size >> PAGE_SHIFT;
-
- iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
- get_order(iommu_pages/8));
- if (!iommu_gart_bitmap)
- panic("Cannot allocate iommu bitmap\n");
- memset(iommu_gart_bitmap, 0, iommu_pages/8);
-
-#ifdef CONFIG_IOMMU_LEAK
- if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
- get_order(iommu_pages*sizeof(void *)));
- if (iommu_leak_tab)
- memset(iommu_leak_tab, 0, iommu_pages * 8);
- else
- printk("PCI-DMA: Cannot allocate leak trace area\n");
- }
-#endif
-
- /*
- * Out of IOMMU space handling.
- * Reserve some invalid pages at the beginning of the GART.
- */
- set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
-
- agp_memory_reserved = iommu_size;
- printk(KERN_INFO
- "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
- iommu_size>>20);
-
- iommu_start = aper_size - iommu_size;
- iommu_bus_base = info.aper_base + iommu_start;
- bad_dma_address = iommu_bus_base;
- iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
-
- /*
- * Unmap the IOMMU part of the GART. The alias of the page is
- * always mapped with cache enabled and there is no full cache
- * coherency across the GART remapping. The unmapping avoids
- * automatic prefetches from the CPU allocating cache lines in
- * there. All CPU accesses are done via the direct mapping to
- * the backing memory. The GART address is only used by PCI
- * devices.
- */
- clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
-
- /*
- * Try to workaround a bug (thanks to BenH)
- * Set unmapped entries to a scratch page instead of 0.
- * Any prefetches that hit unmapped entries won't get an bus abort
- * then.
- */
- scratch = get_zeroed_page(GFP_KERNEL);
- if (!scratch)
- panic("Cannot allocate iommu scratch page");
- gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
- for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
- iommu_gatt_base[i] = gart_unmapped_entry;
-
- flush_gart();
- dma_ops = &gart_dma_ops;
-}
-
-void __init gart_parse_options(char *p)
-{
- int arg;
-
-#ifdef CONFIG_IOMMU_LEAK
- if (!strncmp(p,"leak",4)) {
- leak_trace = 1;
- p += 4;
- if (*p == '=') ++p;
- if (isdigit(*p) && get_option(&p, &arg))
- iommu_leak_pages = arg;
- }
-#endif
- if (isdigit(*p) && get_option(&p, &arg))
- iommu_size = arg;
- if (!strncmp(p, "fullflush",8))
- iommu_fullflush = 1;
- if (!strncmp(p, "nofullflush",11))
- iommu_fullflush = 0;
- if (!strncmp(p,"noagp",5))
- no_agp = 1;
- if (!strncmp(p, "noaperture",10))
- fix_aperture = 0;
- /* duplicated from pci-dma.c */
- if (!strncmp(p,"force",5))
- iommu_aperture_allowed = 1;
- if (!strncmp(p,"allowed",7))
- iommu_aperture_allowed = 1;
- if (!strncmp(p, "memaper", 7)) {
- fallback_aper_force = 1;
- p += 7;
- if (*p == '=') {
- ++p;
- if (get_option(&p, &arg))
- fallback_aper_order = arg;
- }
- }
-}
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
deleted file mode 100644
index 2a34c6c025a9..000000000000
--- a/arch/x86_64/kernel/pci-nommu.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/* Fallback functions when the main IOMMU code is not compiled in. This
- code is roughly equivalent to i386. */
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/iommu.h>
-#include <asm/processor.h>
-#include <asm/dma.h>
-
-static int
-check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
-{
- if (hwdev && bus + size > *hwdev->dma_mask) {
- if (*hwdev->dma_mask >= DMA_32BIT_MASK)
- printk(KERN_ERR
- "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
- name, (long long)bus, size,
- (long long)*hwdev->dma_mask);
- return 0;
- }
- return 1;
-}
-
-static dma_addr_t
-nommu_map_single(struct device *hwdev, void *ptr, size_t size,
- int direction)
-{
- dma_addr_t bus = virt_to_bus(ptr);
- if (!check_addr("map_single", hwdev, bus, size))
- return bad_dma_address;
- return bus;
-}
-
-static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
- int direction)
-{
-}
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scatter-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction)
-{
- int i;
-
- for (i = 0; i < nents; i++ ) {
- struct scatterlist *s = &sg[i];
- BUG_ON(!s->page);
- s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
- if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
- return 0;
- s->dma_length = s->length;
- }
- return nents;
-}
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, int dir)
-{
-}
-
-const struct dma_mapping_ops nommu_dma_ops = {
- .map_single = nommu_map_single,
- .unmap_single = nommu_unmap_single,
- .map_sg = nommu_map_sg,
- .unmap_sg = nommu_unmap_sg,
- .is_phys = 1,
-};
-
-void __init no_iommu_init(void)
-{
- if (dma_ops)
- return;
-
- force_iommu = 0; /* no HW IOMMU */
- dma_ops = &nommu_dma_ops;
-}
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
deleted file mode 100644
index b2f405ea7c85..000000000000
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Glue code to lib/swiotlb.c */
-
-#include <linux/pci.h>
-#include <linux/cache.h>
-#include <linux/module.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/iommu.h>
-#include <asm/swiotlb.h>
-#include <asm/dma.h>
-
-int swiotlb __read_mostly;
-EXPORT_SYMBOL(swiotlb);
-
-const struct dma_mapping_ops swiotlb_dma_ops = {
- .mapping_error = swiotlb_dma_mapping_error,
- .alloc_coherent = swiotlb_alloc_coherent,
- .free_coherent = swiotlb_free_coherent,
- .map_single = swiotlb_map_single,
- .unmap_single = swiotlb_unmap_single,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
- .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .map_sg = swiotlb_map_sg,
- .unmap_sg = swiotlb_unmap_sg,
- .dma_supported = NULL,
-};
-
-void __init pci_swiotlb_init(void)
-{
- /* don't initialize swiotlb if iommu=off (no_iommu=1) */
- if (!iommu_detected && !no_iommu && end_pfn > MAX_DMA32_PFN)
- swiotlb = 1;
- if (swiotlb_force)
- swiotlb = 1;
- if (swiotlb) {
- printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
- swiotlb_init();
- dma_ops = &swiotlb_dma_ops;
- }
-}
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
deleted file mode 100644
index ae8f91214f15..000000000000
--- a/arch/x86_64/kernel/pmtimer.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Ported over from i386 by AK, original copyright was:
- *
- * (C) Dominik Brodowski <linux@brodo.de> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- *
- * Dropped all the hardware bug workarounds for now. Hopefully they
- * are not needed on 64bit chipsets.
- */
-
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/cpumask.h>
-#include <asm/io.h>
-#include <asm/proto.h>
-#include <asm/msr.h>
-#include <asm/vsyscall.h>
-
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
-static inline u32 cyc2us(u32 cycles)
-{
- /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
- * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
- *
- * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
- * easily be multiplied with 286 (=0x11E) without having to fear
- * u32 overflows.
- */
- cycles *= 286;
- return (cycles >> 10);
-}
-
-static unsigned pmtimer_wait_tick(void)
-{
- u32 a, b;
- for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK;
- a == b;
- b = inl(pmtmr_ioport) & ACPI_PM_MASK)
- cpu_relax();
- return b;
-}
-
-/* note: wait time is rounded up to one tick */
-void pmtimer_wait(unsigned us)
-{
- u32 a, b;
- a = pmtimer_wait_tick();
- do {
- b = inl(pmtmr_ioport);
- cpu_relax();
- } while (cyc2us(b - a) < us);
-}
-
-static int __init nopmtimer_setup(char *s)
-{
- pmtmr_ioport = 0;
- return 1;
-}
-
-__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
deleted file mode 100644
index 98956555450b..000000000000
--- a/arch/x86_64/kernel/process.c
+++ /dev/null
@@ -1,903 +0,0 @@
-/*
- * linux/arch/x86-64/kernel/process.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * X86-64 port
- * Andi Kleen.
- *
- * CPU hotplug support - ashok.raj@intel.com
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
- */
-
-#include <stdarg.h>
-
-#include <linux/cpu.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/elfcore.h>
-#include <linux/smp.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/module.h>
-#include <linux/a.out.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/ptrace.h>
-#include <linux/utsname.h>
-#include <linux/random.h>
-#include <linux/notifier.h>
-#include <linux/kprobes.h>
-#include <linux/kdebug.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/mmu_context.h>
-#include <asm/pda.h>
-#include <asm/prctl.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/ia32.h>
-#include <asm/idle.h>
-
-asmlinkage extern void ret_from_fork(void);
-
-unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
-
-unsigned long boot_option_idle_override = 0;
-EXPORT_SYMBOL(boot_option_idle_override);
-
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
-
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
- atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL(idle_notifier_unregister);
-
-void enter_idle(void)
-{
- write_pda(isidle, 1);
- atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
-}
-
-static void __exit_idle(void)
-{
- if (test_and_clear_bit_pda(0, isidle) == 0)
- return;
- atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
-}
-
-/* Called from interrupts to signify idle end */
-void exit_idle(void)
-{
- /* idle loop has pid 0 */
- if (current->pid)
- return;
- __exit_idle();
-}
-
-/*
- * We use this if we don't have any better
- * idle routine..
- */
-static void default_idle(void)
-{
- current_thread_info()->status &= ~TS_POLLING;
- /*
- * TS_POLLING-cleared state must be visible before we
- * test NEED_RESCHED:
- */
- smp_mb();
- local_irq_disable();
- if (!need_resched()) {
- /* Enables interrupts one instruction before HLT.
- x86 special cases this so there is no race. */
- safe_halt();
- } else
- local_irq_enable();
- current_thread_info()->status |= TS_POLLING;
-}
-
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle (void)
-{
- local_irq_enable();
- cpu_relax();
-}
-
-void cpu_idle_wait(void)
-{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map, tmp = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) &&
- !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- } while (!cpus_empty(map));
-
- set_cpus_allowed(current, tmp);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-#ifdef CONFIG_HOTPLUG_CPU
-DECLARE_PER_CPU(int, cpu_state);
-
-#include <asm/nmi.h>
-/* We halt the CPU with physical CPU hotplug */
-static inline void play_dead(void)
-{
- idle_task_exit();
- wbinvd();
- mb();
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- local_irq_disable();
- while (1)
- halt();
-}
-#else
-static inline void play_dead(void)
-{
- BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle (void)
-{
- current_thread_info()->status |= TS_POLLING;
- /* endless idle loop with no priority at all */
- while (1) {
- while (!need_resched()) {
- void (*idle)(void);
-
- if (__get_cpu_var(cpu_idle_state))
- __get_cpu_var(cpu_idle_state) = 0;
-
- rmb();
- idle = pm_idle;
- if (!idle)
- idle = default_idle;
- if (cpu_is_offline(smp_processor_id()))
- play_dead();
- /*
- * Idle routines should keep interrupts disabled
- * from here on, until they go to idle.
- * Otherwise, idle callbacks can misfire.
- */
- local_irq_disable();
- enter_idle();
- idle();
- /* In many cases the interrupt that ended idle
- has already called exit_idle. But some idle
- loops can be woken up without interrupt. */
- __exit_idle();
- }
-
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- }
-}
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
-{
- if (!need_resched()) {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __mwait(eax, ecx);
- }
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
- if (!need_resched()) {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __sti_mwait(0, 0);
- else
- local_irq_enable();
- } else {
- local_irq_enable();
- }
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
- static int printed;
- if (cpu_has(c, X86_FEATURE_MWAIT)) {
- /*
- * Skip, if setup has overridden idle.
- * One CPU supports mwait => All CPUs supports mwait
- */
- if (!pm_idle) {
- if (!printed) {
- printk(KERN_INFO "using mwait in idle threads.\n");
- printed = 1;
- }
- pm_idle = mwait_idle;
- }
- }
-}
-
-static int __init idle_setup (char *str)
-{
- if (!strcmp(str, "poll")) {
- printk("using polling idle threads.\n");
- pm_idle = poll_idle;
- } else if (!strcmp(str, "mwait"))
- force_mwait = 1;
- else
- return -1;
-
- boot_option_idle_override = 1;
- return 0;
-}
-early_param("idle", idle_setup);
-
-/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
-{
- unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
- unsigned long d0, d1, d2, d3, d6, d7;
- unsigned int fsindex,gsindex;
- unsigned int ds,cs,es;
-
- printk("\n");
- print_modules();
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
- printk_address(regs->rip);
- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
- regs->eflags);
- printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
- regs->rax, regs->rbx, regs->rcx);
- printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
- regs->rdx, regs->rsi, regs->rdi);
- printk("RBP: %016lx R08: %016lx R09: %016lx\n",
- regs->rbp, regs->r8, regs->r9);
- printk("R10: %016lx R11: %016lx R12: %016lx\n",
- regs->r10, regs->r11, regs->r12);
- printk("R13: %016lx R14: %016lx R15: %016lx\n",
- regs->r13, regs->r14, regs->r15);
-
- asm("movl %%ds,%0" : "=r" (ds));
- asm("movl %%cs,%0" : "=r" (cs));
- asm("movl %%es,%0" : "=r" (es));
- asm("movl %%fs,%0" : "=r" (fsindex));
- asm("movl %%gs,%0" : "=r" (gsindex));
-
- rdmsrl(MSR_FS_BASE, fs);
- rdmsrl(MSR_GS_BASE, gs);
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
-
- cr0 = read_cr0();
- cr2 = read_cr2();
- cr3 = read_cr3();
- cr4 = read_cr4();
-
- printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
- fs,fsindex,gs,gsindex,shadowgs);
- printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
- printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
-
- get_debugreg(d0, 0);
- get_debugreg(d1, 1);
- get_debugreg(d2, 2);
- printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
- get_debugreg(d3, 3);
- get_debugreg(d6, 6);
- get_debugreg(d7, 7);
- printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
-}
-
-void show_regs(struct pt_regs *regs)
-{
- printk("CPU %d:", smp_processor_id());
- __show_regs(regs);
- show_trace(NULL, regs, (void *)(regs + 1));
-}
-
-/*
- * Free current thread data structures etc..
- */
-void exit_thread(void)
-{
- struct task_struct *me = current;
- struct thread_struct *t = &me->thread;
-
- if (me->thread.io_bitmap_ptr) {
- struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
-
- kfree(t->io_bitmap_ptr);
- t->io_bitmap_ptr = NULL;
- clear_thread_flag(TIF_IO_BITMAP);
- /*
- * Careful, clear this in the TSS too:
- */
- memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
- t->io_bitmap_max = 0;
- put_cpu();
- }
-}
-
-void flush_thread(void)
-{
- struct task_struct *tsk = current;
-
- if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
- clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
- if (test_tsk_thread_flag(tsk, TIF_IA32)) {
- clear_tsk_thread_flag(tsk, TIF_IA32);
- } else {
- set_tsk_thread_flag(tsk, TIF_IA32);
- current_thread_info()->status |= TS_COMPAT;
- }
- }
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- /*
- * Forget coprocessor state..
- */
- clear_fpu(tsk);
- clear_used_math();
-}
-
-void release_thread(struct task_struct *dead_task)
-{
- if (dead_task->mm) {
- if (dead_task->mm->context.size) {
- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
- dead_task->comm,
- dead_task->mm->context.ldt,
- dead_task->mm->context.size);
- BUG();
- }
- }
-}
-
-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
- struct user_desc ud = {
- .base_addr = addr,
- .limit = 0xfffff,
- .seg_32bit = 1,
- .limit_in_pages = 1,
- .useable = 1,
- };
- struct n_desc_struct *desc = (void *)t->thread.tls_array;
- desc += tls;
- desc->a = LDT_entry_a(&ud);
- desc->b = LDT_entry_b(&ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
- struct desc_struct *desc = (void *)t->thread.tls_array;
- desc += tls;
- return desc->base0 |
- (((u32)desc->base1) << 16) |
- (((u32)desc->base2) << 24);
-}
-
-/*
- * This gets called before we allocate a new thread and copy
- * the current task into it.
- */
-void prepare_to_copy(struct task_struct *tsk)
-{
- unlazy_fpu(tsk);
-}
-
-int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
- unsigned long unused,
- struct task_struct * p, struct pt_regs * regs)
-{
- int err;
- struct pt_regs * childregs;
- struct task_struct *me = current;
-
- childregs = ((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(p))) - 1;
- *childregs = *regs;
-
- childregs->rax = 0;
- childregs->rsp = rsp;
- if (rsp == ~0UL)
- childregs->rsp = (unsigned long)childregs;
-
- p->thread.rsp = (unsigned long) childregs;
- p->thread.rsp0 = (unsigned long) (childregs+1);
- p->thread.userrsp = me->thread.userrsp;
-
- set_tsk_thread_flag(p, TIF_FORK);
-
- p->thread.fs = me->thread.fs;
- p->thread.gs = me->thread.gs;
-
- asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
- asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
- asm("mov %%es,%0" : "=m" (p->thread.es));
- asm("mov %%ds,%0" : "=m" (p->thread.ds));
-
- if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
- p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
- if (!p->thread.io_bitmap_ptr) {
- p->thread.io_bitmap_max = 0;
- return -ENOMEM;
- }
- memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
- IO_BITMAP_BYTES);
- set_tsk_thread_flag(p, TIF_IO_BITMAP);
- }
-
- /*
- * Set a new TLS for the child thread?
- */
- if (clone_flags & CLONE_SETTLS) {
-#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32))
- err = ia32_child_tls(p, childregs);
- else
-#endif
- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
- if (err)
- goto out;
- }
- err = 0;
-out:
- if (err && p->thread.io_bitmap_ptr) {
- kfree(p->thread.io_bitmap_ptr);
- p->thread.io_bitmap_max = 0;
- }
- return err;
-}
-
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
-
-static inline void __switch_to_xtra(struct task_struct *prev_p,
- struct task_struct *next_p,
- struct tss_struct *tss)
-{
- struct thread_struct *prev, *next;
-
- prev = &prev_p->thread,
- next = &next_p->thread;
-
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- loaddebug(next, 0);
- loaddebug(next, 1);
- loaddebug(next, 2);
- loaddebug(next, 3);
- /* no 4 and 5 */
- loaddebug(next, 6);
- loaddebug(next, 7);
- }
-
- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
- /*
- * Copy the relevant range of the IO bitmap.
- * Normally this is 128 bytes or less:
- */
- memcpy(tss->io_bitmap, next->io_bitmap_ptr,
- max(prev->io_bitmap_max, next->io_bitmap_max));
- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
- /*
- * Clear any possible leftover bits:
- */
- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
- }
-}
-
-/*
- * switch_to(x,y) should switch tasks from x to y.
- *
- * This could still be optimized:
- * - fold all the options into a flag word and test it with a single test.
- * - could test fs/gs bitsliced
- *
- * Kprobes not supported here. Set the probe on schedule instead.
- */
-__kprobes struct task_struct *
-__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-{
- struct thread_struct *prev = &prev_p->thread,
- *next = &next_p->thread;
- int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
- /* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter>5)
- prefetch(&next->i387.fxsave);
-
- /*
- * Reload esp0, LDT and the page table pointer:
- */
- tss->rsp0 = next->rsp0;
-
- /*
- * Switch DS and ES.
- * This won't pick up thread selector changes, but I guess that is ok.
- */
- asm volatile("mov %%es,%0" : "=m" (prev->es));
- if (unlikely(next->es | prev->es))
- loadsegment(es, next->es);
-
- asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
- if (unlikely(next->ds | prev->ds))
- loadsegment(ds, next->ds);
-
- load_TLS(next, cpu);
-
- /*
- * Switch FS and GS.
- */
- {
- unsigned fsindex;
- asm volatile("movl %%fs,%0" : "=r" (fsindex));
- /* segment register != 0 always requires a reload.
- also reload when it has changed.
- when prev process used 64bit base always reload
- to avoid an information leak. */
- if (unlikely(fsindex | next->fsindex | prev->fs)) {
- loadsegment(fs, next->fsindex);
- /* check if the user used a selector != 0
- * if yes clear 64bit base, since overloaded base
- * is always mapped to the Null selector
- */
- if (fsindex)
- prev->fs = 0;
- }
- /* when next process has a 64bit base use it */
- if (next->fs)
- wrmsrl(MSR_FS_BASE, next->fs);
- prev->fsindex = fsindex;
- }
- {
- unsigned gsindex;
- asm volatile("movl %%gs,%0" : "=r" (gsindex));
- if (unlikely(gsindex | next->gsindex | prev->gs)) {
- load_gs_index(next->gsindex);
- if (gsindex)
- prev->gs = 0;
- }
- if (next->gs)
- wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
- prev->gsindex = gsindex;
- }
-
- /* Must be after DS reload */
- unlazy_fpu(prev_p);
-
- /*
- * Switch the PDA and FPU contexts.
- */
- prev->userrsp = read_pda(oldrsp);
- write_pda(oldrsp, next->userrsp);
- write_pda(pcurrent, next_p);
-
- write_pda(kernelstack,
- (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
-#ifdef CONFIG_CC_STACKPROTECTOR
- write_pda(stack_canary, next_p->stack_canary);
- /*
- * Build time only check to make sure the stack_canary is at
- * offset 40 in the pda; this is a gcc ABI requirement
- */
- BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
-#endif
-
- /*
- * Now maybe reload the debug registers and handle I/O bitmaps
- */
- if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
- || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
- __switch_to_xtra(prev_p, next_p, tss);
-
- /* If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- if (next_p->fpu_counter>5)
- math_state_restore();
- return prev_p;
-}
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage
-long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs regs)
-{
- long error;
- char * filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, &regs);
- if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
- }
- putname(filename);
- return error;
-}
-
-void set_personality_64bit(void)
-{
- /* inherit personality from parent */
-
- /* Make sure to be in 64bit mode */
- clear_thread_flag(TIF_IA32);
-
- /* TBD: overwrites user setup. Should have two bits.
- But 64bit processes have always behaved this way,
- so it's not too bad. The main problem is just that
- 32bit childs are affected again. */
- current->personality &= ~READ_IMPLIES_EXEC;
-}
-
-asmlinkage long sys_fork(struct pt_regs *regs)
-{
- return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
-}
-
-asmlinkage long
-sys_clone(unsigned long clone_flags, unsigned long newsp,
- void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
-{
- if (!newsp)
- newsp = regs->rsp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage long sys_vfork(struct pt_regs *regs)
-{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
- NULL, NULL);
-}
-
-unsigned long get_wchan(struct task_struct *p)
-{
- unsigned long stack;
- u64 fp,rip;
- int count = 0;
-
- if (!p || p == current || p->state==TASK_RUNNING)
- return 0;
- stack = (unsigned long)task_stack_page(p);
- if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
- return 0;
- fp = *(u64 *)(p->thread.rsp);
- do {
- if (fp < (unsigned long)stack ||
- fp > (unsigned long)stack+THREAD_SIZE)
- return 0;
- rip = *(u64 *)(fp+8);
- if (!in_sched_functions(rip))
- return rip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
- return 0;
-}
-
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
- int ret = 0;
- int doit = task == current;
- int cpu;
-
- switch (code) {
- case ARCH_SET_GS:
- if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
- cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
- }
- task->thread.gsindex = GS_TLS_SEL;
- task->thread.gs = 0;
- } else {
- task->thread.gsindex = 0;
- task->thread.gs = addr;
- if (doit) {
- load_gs_index(0);
- ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
- }
- }
- put_cpu();
- break;
- case ARCH_SET_FS:
- /* Not strictly needed for fs, but do it for symmetry
- with gs */
- if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
- cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, FS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
- }
- task->thread.fsindex = FS_TLS_SEL;
- task->thread.fs = 0;
- } else {
- task->thread.fsindex = 0;
- task->thread.fs = addr;
- if (doit) {
- /* set the selector to 0 to not confuse
- __switch_to */
- asm volatile("movl %0,%%fs" :: "r" (0));
- ret = checking_wrmsrl(MSR_FS_BASE, addr);
- }
- }
- put_cpu();
- break;
- case ARCH_GET_FS: {
- unsigned long base;
- if (task->thread.fsindex == FS_TLS_SEL)
- base = read_32bit_tls(task, FS_TLS);
- else if (doit)
- rdmsrl(MSR_FS_BASE, base);
- else
- base = task->thread.fs;
- ret = put_user(base, (unsigned long __user *)addr);
- break;
- }
- case ARCH_GET_GS: {
- unsigned long base;
- unsigned gsindex;
- if (task->thread.gsindex == GS_TLS_SEL)
- base = read_32bit_tls(task, GS_TLS);
- else if (doit) {
- asm("movl %%gs,%0" : "=r" (gsindex));
- if (gsindex)
- rdmsrl(MSR_KERNEL_GS_BASE, base);
- else
- base = task->thread.gs;
- }
- else
- base = task->thread.gs;
- ret = put_user(base, (unsigned long __user *)addr);
- break;
- }
-
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
-long sys_arch_prctl(int code, unsigned long addr)
-{
- return do_arch_prctl(current, code, addr);
-}
-
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
- struct pt_regs *pp, ptregs;
-
- pp = task_pt_regs(tsk);
-
- ptregs = *pp;
- ptregs.cs &= 0xffff;
- ptregs.ss &= 0xffff;
-
- elf_core_copy_regs(regs, &ptregs);
-
- return 1;
-}
-
-unsigned long arch_align_stack(unsigned long sp)
-{
- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() % 8192;
- return sp & ~0xf;
-}
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
deleted file mode 100644
index eea3702427b4..000000000000
--- a/arch/x86_64/kernel/ptrace.c
+++ /dev/null
@@ -1,627 +0,0 @@
-/* ptrace.c */
-/* By Ross Biro 1/23/92 */
-/*
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * x86-64 port 2000-2002 Andi Kleen
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/audit.h>
-#include <linux/seccomp.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/debugreg.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/ia32.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
- * Also masks reserved bits (63-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x54dd5UL
-
-/* set's the trap flag. */
-#define TRAP_FLAG 0x100UL
-
-/*
- * eflags and offset of eflags on child stack..
- */
-#define EFLAGS offsetof(struct pt_regs, eflags)
-#define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs)))
-
-/*
- * this routine will get a word off of the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline unsigned long get_stack_long(struct task_struct *task, int offset)
-{
- unsigned char *stack;
-
- stack = (unsigned char *)task->thread.rsp0;
- stack += offset;
- return (*((unsigned long *)stack));
-}
-
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline long put_stack_long(struct task_struct *task, int offset,
- unsigned long data)
-{
- unsigned char * stack;
-
- stack = (unsigned char *) task->thread.rsp0;
- stack += offset;
- *(unsigned long *) stack = data;
- return 0;
-}
-
-#define LDT_SEGMENT 4
-
-unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
-{
- unsigned long addr, seg;
-
- addr = regs->rip;
- seg = regs->cs & 0xffff;
-
- /*
- * We'll assume that the code segments in the GDT
- * are all zero-based. That is largely true: the
- * TLS segments are used for data, and the PNPBIOS
- * and APM bios ones we just ignore here.
- */
- if (seg & LDT_SEGMENT) {
- u32 *desc;
- unsigned long base;
-
- seg &= ~7UL;
-
- down(&child->mm->context.sem);
- if (unlikely((seg >> 3) >= child->mm->context.size))
- addr = -1L; /* bogus selector, access would fault */
- else {
- desc = child->mm->context.ldt + seg;
- base = ((desc[0] >> 16) |
- ((desc[1] & 0xff) << 16) |
- (desc[1] & 0xff000000));
-
- /* 16-bit code segment? */
- if (!((desc[1] >> 22) & 1))
- addr &= 0xffff;
- addr += base;
- }
- up(&child->mm->context.sem);
- }
-
- return addr;
-}
-
-static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
-{
- int i, copied;
- unsigned char opcode[15];
- unsigned long addr = convert_rip_to_linear(child, regs);
-
- copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
- for (i = 0; i < copied; i++) {
- switch (opcode[i]) {
- /* popf and iret */
- case 0x9d: case 0xcf:
- return 1;
-
- /* CHECKME: 64 65 */
-
- /* opcode and address size prefixes */
- case 0x66: case 0x67:
- continue;
- /* irrelevant prefixes (segment overrides and repeats) */
- case 0x26: case 0x2e:
- case 0x36: case 0x3e:
- case 0x64: case 0x65:
- case 0xf2: case 0xf3:
- continue;
-
- case 0x40 ... 0x4f:
- if (regs->cs != __USER_CS)
- /* 32-bit mode: register increment */
- return 0;
- /* 64-bit mode: REX prefix */
- continue;
-
- /* CHECKME: f2, f3 */
-
- /*
- * pushf: NOTE! We should probably not let
- * the user see the TF bit being set. But
- * it's more pain than it's worth to avoid
- * it, and a debugger could emulate this
- * all in user space if it _really_ cares.
- */
- case 0x9c:
- default:
- return 0;
- }
- }
- return 0;
-}
-
-static void set_singlestep(struct task_struct *child)
-{
- struct pt_regs *regs = task_pt_regs(child);
-
- /*
- * Always set TIF_SINGLESTEP - this guarantees that
- * we single-step system calls etc.. This will also
- * cause us to set TF when returning to user mode.
- */
- set_tsk_thread_flag(child, TIF_SINGLESTEP);
-
- /*
- * If TF was already set, don't do anything else
- */
- if (regs->eflags & TRAP_FLAG)
- return;
-
- /* Set TF on the kernel stack.. */
- regs->eflags |= TRAP_FLAG;
-
- /*
- * ..but if TF is changed by the instruction we will trace,
- * don't mark it as being "us" that set it, so that we
- * won't clear it by hand later.
- */
- if (is_setting_trap_flag(child, regs))
- return;
-
- child->ptrace |= PT_DTRACE;
-}
-
-static void clear_singlestep(struct task_struct *child)
-{
- /* Always clear TIF_SINGLESTEP... */
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-
- /* But touch TF only if it was set by us.. */
- if (child->ptrace & PT_DTRACE) {
- struct pt_regs *regs = task_pt_regs(child);
- regs->eflags &= ~TRAP_FLAG;
- child->ptrace &= ~PT_DTRACE;
- }
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
- clear_singlestep(child);
-}
-
-static int putreg(struct task_struct *child,
- unsigned long regno, unsigned long value)
-{
- unsigned long tmp;
-
- switch (regno) {
- case offsetof(struct user_regs_struct,fs):
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.fsindex = value & 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,gs):
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.gsindex = value & 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,ds):
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.ds = value & 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,es):
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.es = value & 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,ss):
- if ((value & 3) != 3)
- return -EIO;
- value &= 0xffff;
- return 0;
- case offsetof(struct user_regs_struct,fs_base):
- if (value >= TASK_SIZE_OF(child))
- return -EIO;
- child->thread.fs = value;
- return 0;
- case offsetof(struct user_regs_struct,gs_base):
- if (value >= TASK_SIZE_OF(child))
- return -EIO;
- child->thread.gs = value;
- return 0;
- case offsetof(struct user_regs_struct, eflags):
- value &= FLAG_MASK;
- tmp = get_stack_long(child, EFL_OFFSET);
- tmp &= ~FLAG_MASK;
- value |= tmp;
- break;
- case offsetof(struct user_regs_struct,cs):
- if ((value & 3) != 3)
- return -EIO;
- value &= 0xffff;
- break;
- }
- put_stack_long(child, regno - sizeof(struct pt_regs), value);
- return 0;
-}
-
-static unsigned long getreg(struct task_struct *child, unsigned long regno)
-{
- unsigned long val;
- switch (regno) {
- case offsetof(struct user_regs_struct, fs):
- return child->thread.fsindex;
- case offsetof(struct user_regs_struct, gs):
- return child->thread.gsindex;
- case offsetof(struct user_regs_struct, ds):
- return child->thread.ds;
- case offsetof(struct user_regs_struct, es):
- return child->thread.es;
- case offsetof(struct user_regs_struct, fs_base):
- return child->thread.fs;
- case offsetof(struct user_regs_struct, gs_base):
- return child->thread.gs;
- default:
- regno = regno - sizeof(struct pt_regs);
- val = get_stack_long(child, regno);
- if (test_tsk_thread_flag(child, TIF_IA32))
- val &= 0xffffffff;
- return val;
- }
-
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
- long i, ret;
- unsigned ui;
-
- switch (request) {
- /* when I and D space are separate, these will need to be fixed. */
- case PTRACE_PEEKTEXT: /* read word at location addr. */
- case PTRACE_PEEKDATA:
- ret = generic_ptrace_peekdata(child, addr, data);
- break;
-
- /* read the word at location addr in the USER area. */
- case PTRACE_PEEKUSR: {
- unsigned long tmp;
-
- ret = -EIO;
- if ((addr & 7) ||
- addr > sizeof(struct user) - 7)
- break;
-
- switch (addr) {
- case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
- tmp = getreg(child, addr);
- break;
- case offsetof(struct user, u_debugreg[0]):
- tmp = child->thread.debugreg0;
- break;
- case offsetof(struct user, u_debugreg[1]):
- tmp = child->thread.debugreg1;
- break;
- case offsetof(struct user, u_debugreg[2]):
- tmp = child->thread.debugreg2;
- break;
- case offsetof(struct user, u_debugreg[3]):
- tmp = child->thread.debugreg3;
- break;
- case offsetof(struct user, u_debugreg[6]):
- tmp = child->thread.debugreg6;
- break;
- case offsetof(struct user, u_debugreg[7]):
- tmp = child->thread.debugreg7;
- break;
- default:
- tmp = 0;
- break;
- }
- ret = put_user(tmp,(unsigned long __user *) data);
- break;
- }
-
- /* when I and D space are separate, this will have to be fixed. */
- case PTRACE_POKETEXT: /* write the word at location addr. */
- case PTRACE_POKEDATA:
- ret = generic_ptrace_pokedata(child, addr, data);
- break;
-
- case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
- {
- int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
- ret = -EIO;
- if ((addr & 7) ||
- addr > sizeof(struct user) - 7)
- break;
-
- switch (addr) {
- case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
- ret = putreg(child, addr, data);
- break;
- /* Disallows to set a breakpoint into the vsyscall */
- case offsetof(struct user, u_debugreg[0]):
- if (data >= TASK_SIZE_OF(child) - dsize) break;
- child->thread.debugreg0 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[1]):
- if (data >= TASK_SIZE_OF(child) - dsize) break;
- child->thread.debugreg1 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[2]):
- if (data >= TASK_SIZE_OF(child) - dsize) break;
- child->thread.debugreg2 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[3]):
- if (data >= TASK_SIZE_OF(child) - dsize) break;
- child->thread.debugreg3 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[6]):
- if (data >> 32)
- break;
- child->thread.debugreg6 = data;
- ret = 0;
- break;
- case offsetof(struct user, u_debugreg[7]):
- /* See arch/i386/kernel/ptrace.c for an explanation of
- * this awkward check.*/
- data &= ~DR_CONTROL_RESERVED;
- for(i=0; i<4; i++)
- if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- break;
- if (i == 4) {
- child->thread.debugreg7 = data;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- ret = 0;
- }
- break;
- }
- break;
- }
- case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
- case PTRACE_CONT: /* restart after signal. */
-
- ret = -EIO;
- if (!valid_signal(data))
- break;
- if (request == PTRACE_SYSCALL)
- set_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
- else
- clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
- child->exit_code = data;
- /* make sure the single step bit is not set. */
- clear_singlestep(child);
- wake_up_process(child);
- ret = 0;
- break;
-
-#ifdef CONFIG_IA32_EMULATION
- /* This makes only sense with 32bit programs. Allow a
- 64bit debugger to fully examine them too. Better
- don't use it against 64bit processes, use
- PTRACE_ARCH_PRCTL instead. */
- case PTRACE_SET_THREAD_AREA: {
- struct user_desc __user *p;
- int old;
- p = (struct user_desc __user *)data;
- get_user(old, &p->entry_number);
- put_user(addr, &p->entry_number);
- ret = do_set_thread_area(&child->thread, p);
- put_user(old, &p->entry_number);
- break;
- case PTRACE_GET_THREAD_AREA:
- p = (struct user_desc __user *)data;
- get_user(old, &p->entry_number);
- put_user(addr, &p->entry_number);
- ret = do_get_thread_area(&child->thread, p);
- put_user(old, &p->entry_number);
- break;
- }
-#endif
- /* normal 64bit interface to access TLS data.
- Works just like arch_prctl, except that the arguments
- are reversed. */
- case PTRACE_ARCH_PRCTL:
- ret = do_arch_prctl(child, data, addr);
- break;
-
-/*
- * make the child exit. Best I can do is send it a sigkill.
- * perhaps it should be put in the status that it wants to
- * exit.
- */
- case PTRACE_KILL:
- ret = 0;
- if (child->exit_state == EXIT_ZOMBIE) /* already dead */
- break;
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
- child->exit_code = SIGKILL;
- /* make sure the single step bit is not set. */
- clear_singlestep(child);
- wake_up_process(child);
- break;
-
- case PTRACE_SINGLESTEP: /* set the trap flag. */
- ret = -EIO;
- if (!valid_signal(data))
- break;
- clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
- set_singlestep(child);
- child->exit_code = data;
- /* give it a chance to run. */
- wake_up_process(child);
- ret = 0;
- break;
-
- case PTRACE_DETACH:
- /* detach a process that was attached. */
- ret = ptrace_detach(child, data);
- break;
-
- case PTRACE_GETREGS: { /* Get all gp regs from the child. */
- if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
- sizeof(struct user_regs_struct))) {
- ret = -EIO;
- break;
- }
- ret = 0;
- for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) {
- ret |= __put_user(getreg(child, ui),(unsigned long __user *) data);
- data += sizeof(long);
- }
- break;
- }
-
- case PTRACE_SETREGS: { /* Set all gp regs in the child. */
- unsigned long tmp;
- if (!access_ok(VERIFY_READ, (unsigned __user *)data,
- sizeof(struct user_regs_struct))) {
- ret = -EIO;
- break;
- }
- ret = 0;
- for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) {
- ret = __get_user(tmp, (unsigned long __user *) data);
- if (ret)
- break;
- ret = putreg(child, ui, tmp);
- if (ret)
- break;
- data += sizeof(long);
- }
- break;
- }
-
- case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */
- if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
- sizeof(struct user_i387_struct))) {
- ret = -EIO;
- break;
- }
- ret = get_fpregs((struct user_i387_struct __user *)data, child);
- break;
- }
-
- case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */
- if (!access_ok(VERIFY_READ, (unsigned __user *)data,
- sizeof(struct user_i387_struct))) {
- ret = -EIO;
- break;
- }
- set_stopped_child_used_math(child);
- ret = set_fpregs(child, (struct user_i387_struct __user *)data);
- break;
- }
-
- default:
- ret = ptrace_request(child, request, addr, data);
- break;
- }
- return ret;
-}
-
-static void syscall_trace(struct pt_regs *regs)
-{
-
-#if 0
- printk("trace %s rip %lx rsp %lx rax %d origrax %d caller %lx tiflags %x ptrace %x\n",
- current->comm,
- regs->rip, regs->rsp, regs->rax, regs->orig_rax, __builtin_return_address(0),
- current_thread_info()->flags, current->ptrace);
-#endif
-
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
-}
-
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
-{
- /* do the secure computing check first */
- secure_computing(regs->orig_rax);
-
- if (test_thread_flag(TIF_SYSCALL_TRACE)
- && (current->ptrace & PT_PTRACED))
- syscall_trace(regs);
-
- if (unlikely(current->audit_context)) {
- if (test_thread_flag(TIF_IA32)) {
- audit_syscall_entry(AUDIT_ARCH_I386,
- regs->orig_rax,
- regs->rbx, regs->rcx,
- regs->rdx, regs->rsi);
- } else {
- audit_syscall_entry(AUDIT_ARCH_X86_64,
- regs->orig_rax,
- regs->rdi, regs->rsi,
- regs->rdx, regs->r10);
- }
- }
-}
-
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
-{
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax);
-
- if ((test_thread_flag(TIF_SYSCALL_TRACE)
- || test_thread_flag(TIF_SINGLESTEP))
- && (current->ptrace & PT_PTRACED))
- syscall_trace(regs);
-}
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
deleted file mode 100644
index 368db2b9c5ac..000000000000
--- a/arch/x86_64/kernel/reboot.c
+++ /dev/null
@@ -1,171 +0,0 @@
-/* Various gunk just to reboot the machine. */
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/string.h>
-#include <linux/pm.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-#include <asm/hw_irq.h>
-#include <asm/system.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/apic.h>
-#include <asm/iommu.h>
-
-/*
- * Power off function, if any
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-static long no_idt[3];
-static enum {
- BOOT_TRIPLE = 't',
- BOOT_KBD = 'k'
-} reboot_type = BOOT_KBD;
-static int reboot_mode = 0;
-int reboot_force;
-
-/* reboot=t[riple] | k[bd] [, [w]arm | [c]old]
- warm Don't set the cold reboot flag
- cold Set the cold reboot flag
- triple Force a triple fault (init)
- kbd Use the keyboard controller. cold reset (default)
- force Avoid anything that could hang.
- */
-static int __init reboot_setup(char *str)
-{
- for (;;) {
- switch (*str) {
- case 'w':
- reboot_mode = 0x1234;
- break;
-
- case 'c':
- reboot_mode = 0;
- break;
-
- case 't':
- case 'b':
- case 'k':
- reboot_type = *str;
- break;
- case 'f':
- reboot_force = 1;
- break;
- }
- if((str = strchr(str,',')) != NULL)
- str++;
- else
- break;
- }
- return 1;
-}
-
-__setup("reboot=", reboot_setup);
-
-static inline void kb_wait(void)
-{
- int i;
-
- for (i=0; i<0x10000; i++)
- if ((inb_p(0x64) & 0x02) == 0)
- break;
-}
-
-void machine_shutdown(void)
-{
- unsigned long flags;
-
- /* Stop the cpus and apics */
-#ifdef CONFIG_SMP
- int reboot_cpu_id;
-
- /* The boot cpu is always logical cpu 0 */
- reboot_cpu_id = 0;
-
- /* Make certain the cpu I'm about to reboot on is online */
- if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
- reboot_cpu_id = smp_processor_id();
- }
-
- /* Make certain I only run on the appropriate processor */
- set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
-
- /* O.K Now that I'm on the appropriate processor,
- * stop all of the others.
- */
- smp_send_stop();
-#endif
-
- local_irq_save(flags);
-
-#ifndef CONFIG_SMP
- disable_local_APIC();
-#endif
-
- disable_IO_APIC();
-
- local_irq_restore(flags);
-
- pci_iommu_shutdown();
-}
-
-void machine_emergency_restart(void)
-{
- int i;
-
- /* Tell the BIOS if we want cold or warm reboot */
- *((unsigned short *)__va(0x472)) = reboot_mode;
-
- for (;;) {
- /* Could also try the reset bit in the Hammer NB */
- switch (reboot_type) {
- case BOOT_KBD:
- for (i=0; i<10; i++) {
- kb_wait();
- udelay(50);
- outb(0xfe,0x64); /* pulse reset low */
- udelay(50);
- }
-
- case BOOT_TRIPLE:
- __asm__ __volatile__("lidt (%0)": :"r" (&no_idt));
- __asm__ __volatile__("int3");
-
- reboot_type = BOOT_KBD;
- break;
- }
- }
-}
-
-void machine_restart(char * __unused)
-{
- printk("machine restart\n");
-
- if (!reboot_force) {
- machine_shutdown();
- }
- machine_emergency_restart();
-}
-
-void machine_halt(void)
-{
-}
-
-void machine_power_off(void)
-{
- if (pm_power_off) {
- if (!reboot_force) {
- machine_shutdown();
- }
- pm_power_off();
- }
-}
-
diff --git a/arch/x86_64/kernel/relocate_kernel.S b/arch/x86_64/kernel/relocate_kernel.S
deleted file mode 100644
index 14e95872c6a3..000000000000
--- a/arch/x86_64/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * relocate_kernel.S - put the kernel image in place to boot
- * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/kexec.h>
-
-/*
- * Must be relocatable PIC code callable as a C function
- */
-
-#define PTR(x) (x << 3)
-#define PAGE_ALIGNED (1 << PAGE_SHIFT)
-#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
-
- .text
- .align PAGE_ALIGNED
- .code64
- .globl relocate_kernel
-relocate_kernel:
- /* %rdi indirection_page
- * %rsi page_list
- * %rdx start address
- */
-
- /* map the control page at its virtual address */
-
- movq $0x0000ff8000000000, %r10 /* mask */
- mov $(39 - 3), %cl /* bits to shift */
- movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PGD)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PUD_0)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PUD_0)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PMD_0)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PMD_0)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PTE_0)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PTE_0)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- /* identity map the control page at its physical address */
-
- movq $0x0000ff8000000000, %r10 /* mask */
- mov $(39 - 3), %cl /* bits to shift */
- movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PGD)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PUD_1)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PUD_1)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PMD_1)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PMD_1)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PTE_1)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PTE_1)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
-relocate_new_kernel:
- /* %rdi indirection_page
- * %rsi page_list
- * %rdx start address
- */
-
- /* zero out flags, and disable interrupts */
- pushq $0
- popfq
-
- /* get physical address of control page now */
- /* this is impossible after page table switch */
- movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
-
- /* get physical address of page table now too */
- movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
-
- /* switch to new set of page tables */
- movq PTR(PA_PGD)(%rsi), %r9
- movq %r9, %cr3
-
- /* setup a new stack at the end of the physical control page */
- lea 4096(%r8), %rsp
-
- /* jump to identity mapped page */
- addq $(identity_mapped - relocate_kernel), %r8
- pushq %r8
- ret
-
-identity_mapped:
- /* store the start address on the stack */
- pushq %rdx
-
- /* Set cr0 to a known state:
- * 31 1 == Paging enabled
- * 18 0 == Alignment check disabled
- * 16 0 == Write protect disabled
- * 3 0 == No task switch
- * 2 0 == Don't do FP software emulation.
- * 0 1 == Proctected mode enabled
- */
- movq %cr0, %rax
- andq $~((1<<18)|(1<<16)|(1<<3)|(1<<2)), %rax
- orl $((1<<31)|(1<<0)), %eax
- movq %rax, %cr0
-
- /* Set cr4 to a known state:
- * 10 0 == xmm exceptions disabled
- * 9 0 == xmm registers instructions disabled
- * 8 0 == performance monitoring counter disabled
- * 7 0 == page global disabled
- * 6 0 == machine check exceptions disabled
- * 5 1 == physical address extension enabled
- * 4 0 == page size extensions disabled
- * 3 0 == Debug extensions disabled
- * 2 0 == Time stamp disable (disabled)
- * 1 0 == Protected mode virtual interrupts disabled
- * 0 0 == VME disabled
- */
-
- movq $((1<<5)), %rax
- movq %rax, %cr4
-
- jmp 1f
-1:
-
- /* Switch to the identity mapped page tables,
- * and flush the TLB.
- */
- movq %rcx, %cr3
-
- /* Do the copies */
- movq %rdi, %rcx /* Put the page_list in %rcx */
- xorq %rdi, %rdi
- xorq %rsi, %rsi
- jmp 1f
-
-0: /* top, read another word for the indirection page */
-
- movq (%rbx), %rcx
- addq $8, %rbx
-1:
- testq $0x1, %rcx /* is it a destination page? */
- jz 2f
- movq %rcx, %rdi
- andq $0xfffffffffffff000, %rdi
- jmp 0b
-2:
- testq $0x2, %rcx /* is it an indirection page? */
- jz 2f
- movq %rcx, %rbx
- andq $0xfffffffffffff000, %rbx
- jmp 0b
-2:
- testq $0x4, %rcx /* is it the done indicator? */
- jz 2f
- jmp 3f
-2:
- testq $0x8, %rcx /* is it the source indicator? */
- jz 0b /* Ignore it otherwise */
- movq %rcx, %rsi /* For ever source page do a copy */
- andq $0xfffffffffffff000, %rsi
-
- movq $512, %rcx
- rep ; movsq
- jmp 0b
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * I need to execute a serializing instruction here.
- * So I flush the TLB by reloading %cr3 here, it's handy,
- * and not processor dependent.
- */
- movq %cr3, %rax
- movq %rax, %cr3
-
- /* set all of the registers to known values */
- /* leave %rsp alone */
-
- xorq %rax, %rax
- xorq %rbx, %rbx
- xorq %rcx, %rcx
- xorq %rdx, %rdx
- xorq %rsi, %rsi
- xorq %rdi, %rdi
- xorq %rbp, %rbp
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r9
- xorq %r11, %r11
- xorq %r12, %r12
- xorq %r13, %r13
- xorq %r14, %r14
- xorq %r15, %r15
-
- ret
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
deleted file mode 100644
index af838f6b0b7f..000000000000
--- a/arch/x86_64/kernel/setup.c
+++ /dev/null
@@ -1,1117 +0,0 @@
-/*
- * linux/arch/x86-64/kernel/setup.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Nov 2001 Dave Jones <davej@suse.de>
- * Forked from i386 setup code.
- */
-
-/*
- * This file handles the architecture-dependent parts of initialization
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/screen_info.h>
-#include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/initrd.h>
-#include <linux/highmem.h>
-#include <linux/bootmem.h>
-#include <linux/module.h>
-#include <asm/processor.h>
-#include <linux/console.h>
-#include <linux/seq_file.h>
-#include <linux/crash_dump.h>
-#include <linux/root_dev.h>
-#include <linux/pci.h>
-#include <linux/acpi.h>
-#include <linux/kallsyms.h>
-#include <linux/edd.h>
-#include <linux/mmzone.h>
-#include <linux/kexec.h>
-#include <linux/cpufreq.h>
-#include <linux/dmi.h>
-#include <linux/dma-mapping.h>
-#include <linux/ctype.h>
-
-#include <asm/mtrr.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/msr.h>
-#include <asm/desc.h>
-#include <video/edid.h>
-#include <asm/e820.h>
-#include <asm/dma.h>
-#include <asm/mpspec.h>
-#include <asm/mmu_context.h>
-#include <asm/bootsetup.h>
-#include <asm/proto.h>
-#include <asm/setup.h>
-#include <asm/mach_apic.h>
-#include <asm/numa.h>
-#include <asm/sections.h>
-#include <asm/dmi.h>
-
-/*
- * Machine setup..
- */
-
-struct cpuinfo_x86 boot_cpu_data __read_mostly;
-EXPORT_SYMBOL(boot_cpu_data);
-
-unsigned long mmu_cr4_features;
-
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
-
-unsigned long saved_video_mode;
-
-int force_mwait __cpuinitdata;
-
-/*
- * Early DMI memory
- */
-int dmi_alloc_index;
-char dmi_alloc_data[DMI_MAX_DATA];
-
-/*
- * Setup options
- */
-struct screen_info screen_info;
-EXPORT_SYMBOL(screen_info);
-struct sys_desc_table_struct {
- unsigned short length;
- unsigned char table[0];
-};
-
-struct edid_info edid_info;
-EXPORT_SYMBOL_GPL(edid_info);
-
-extern int root_mountflags;
-
-char __initdata command_line[COMMAND_LINE_SIZE];
-
-struct resource standard_io_resources[] = {
- { .name = "dma1", .start = 0x00, .end = 0x1f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "pic1", .start = 0x20, .end = 0x21,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "timer0", .start = 0x40, .end = 0x43,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "timer1", .start = 0x50, .end = 0x53,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "keyboard", .start = 0x60, .end = 0x6f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "dma page reg", .start = 0x80, .end = 0x8f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "pic2", .start = 0xa0, .end = 0xa1,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "dma2", .start = 0xc0, .end = 0xdf,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "fpu", .start = 0xf0, .end = 0xff,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO }
-};
-
-#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
-
-struct resource data_resource = {
- .name = "Kernel data",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_RAM,
-};
-struct resource code_resource = {
- .name = "Kernel code",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_RAM,
-};
-
-#ifdef CONFIG_PROC_VMCORE
-/* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel. This option will be passed
- * by kexec loader to the capture kernel.
- */
-static int __init setup_elfcorehdr(char *arg)
-{
- char *end;
- if (!arg)
- return -EINVAL;
- elfcorehdr_addr = memparse(arg, &end);
- return end > arg ? 0 : -EINVAL;
-}
-early_param("elfcorehdr", setup_elfcorehdr);
-#endif
-
-#ifndef CONFIG_NUMA
-static void __init
-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
- unsigned long bootmap_size, bootmap;
-
- bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
- bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
- if (bootmap == -1L)
- panic("Cannot find bootmem map of size %ld\n",bootmap_size);
- bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
- e820_register_active_regions(0, start_pfn, end_pfn);
- free_bootmem_with_active_regions(0, end_pfn);
- reserve_bootmem(bootmap, bootmap_size);
-}
-#endif
-
-#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
-struct edd edd;
-#ifdef CONFIG_EDD_MODULE
-EXPORT_SYMBOL(edd);
-#endif
-/**
- * copy_edd() - Copy the BIOS EDD information
- * from boot_params into a safe place.
- *
- */
-static inline void copy_edd(void)
-{
- memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
- memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
- edd.mbr_signature_nr = EDD_MBR_SIG_NR;
- edd.edd_info_nr = EDD_NR;
-}
-#else
-static inline void copy_edd(void)
-{
-}
-#endif
-
-#define EBDA_ADDR_POINTER 0x40E
-
-unsigned __initdata ebda_addr;
-unsigned __initdata ebda_size;
-
-static void discover_ebda(void)
-{
- /*
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E
- */
- ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
- ebda_addr <<= 4;
-
- ebda_size = *(unsigned short *)__va(ebda_addr);
-
- /* Round EBDA up to pages */
- if (ebda_size == 0)
- ebda_size = 1;
- ebda_size <<= 10;
- ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
- if (ebda_size > 64*1024)
- ebda_size = 64*1024;
-}
-
-void __init setup_arch(char **cmdline_p)
-{
- printk(KERN_INFO "Command line: %s\n", boot_command_line);
-
- ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
- screen_info = SCREEN_INFO;
- edid_info = EDID_INFO;
- saved_video_mode = SAVED_VIDEO_MODE;
- bootloader_type = LOADER_TYPE;
-
-#ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
-#endif
- setup_memory_region();
- copy_edd();
-
- if (!MOUNT_ROOT_RDONLY)
- root_mountflags &= ~MS_RDONLY;
- init_mm.start_code = (unsigned long) &_text;
- init_mm.end_code = (unsigned long) &_etext;
- init_mm.end_data = (unsigned long) &_edata;
- init_mm.brk = (unsigned long) &_end;
-
- code_resource.start = virt_to_phys(&_text);
- code_resource.end = virt_to_phys(&_etext)-1;
- data_resource.start = virt_to_phys(&_etext);
- data_resource.end = virt_to_phys(&_edata)-1;
-
- early_identify_cpu(&boot_cpu_data);
-
- strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
- *cmdline_p = command_line;
-
- parse_early_param();
-
- finish_e820_parsing();
-
- e820_register_active_regions(0, 0, -1UL);
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
- end_pfn = e820_end_of_ram();
- num_physpages = end_pfn;
-
- check_efer();
-
- discover_ebda();
-
- init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
-
- dmi_scan_machine();
-
-#ifdef CONFIG_ACPI
- /*
- * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
- * Call this early for SRAT node setup.
- */
- acpi_boot_table_init();
-#endif
-
- /* How many end-of-memory variables you have, grandma! */
- max_low_pfn = end_pfn;
- max_pfn = end_pfn;
- high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
-
- /* Remove active ranges so rediscovery with NUMA-awareness happens */
- remove_all_active_ranges();
-
-#ifdef CONFIG_ACPI_NUMA
- /*
- * Parse SRAT to discover nodes.
- */
- acpi_numa_init();
-#endif
-
-#ifdef CONFIG_NUMA
- numa_initmem_init(0, end_pfn);
-#else
- contig_initmem_init(0, end_pfn);
-#endif
-
- /* Reserve direct mapping */
- reserve_bootmem_generic(table_start << PAGE_SHIFT,
- (table_end - table_start) << PAGE_SHIFT);
-
- /* reserve kernel */
- reserve_bootmem_generic(__pa_symbol(&_text),
- __pa_symbol(&_end) - __pa_symbol(&_text));
-
- /*
- * reserve physical page 0 - it's a special BIOS page on many boxes,
- * enabling clean reboots, SMP operation, laptop functions.
- */
- reserve_bootmem_generic(0, PAGE_SIZE);
-
- /* reserve ebda region */
- if (ebda_addr)
- reserve_bootmem_generic(ebda_addr, ebda_size);
-#ifdef CONFIG_NUMA
- /* reserve nodemap region */
- if (nodemap_addr)
- reserve_bootmem_generic(nodemap_addr, nodemap_size);
-#endif
-
-#ifdef CONFIG_SMP
- /* Reserve SMP trampoline */
- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
-#endif
-
-#ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- */
- acpi_reserve_bootmem();
-#endif
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
-#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START) {
- if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
- reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
- initrd_start = INITRD_START + PAGE_OFFSET;
- initrd_end = initrd_start+INITRD_SIZE;
- }
- else {
- printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- (unsigned long)(INITRD_START + INITRD_SIZE),
- (unsigned long)(end_pfn << PAGE_SHIFT));
- initrd_start = 0;
- }
- }
-#endif
-#ifdef CONFIG_KEXEC
- if (crashk_res.start != crashk_res.end) {
- reserve_bootmem_generic(crashk_res.start,
- crashk_res.end - crashk_res.start + 1);
- }
-#endif
-
- paging_init();
-
-#ifdef CONFIG_PCI
- early_quirks();
-#endif
-
- /*
- * set this early, so we dont allocate cpu0
- * if MADT list doesnt list BSP first
- * mpparse.c/MP_processor_info() allocates logical cpu numbers.
- */
- cpu_set(0, cpu_present_map);
-#ifdef CONFIG_ACPI
- /*
- * Read APIC and some other early information from ACPI tables.
- */
- acpi_boot_init();
-#endif
-
- init_cpu_to_node();
-
- /*
- * get boot-time SMP configuration:
- */
- if (smp_found_config)
- get_smp_config();
- init_apic_mappings();
-
- /*
- * We trust e820 completely. No explicit ROM probing in memory.
- */
- e820_reserve_resources();
- e820_mark_nosave_regions();
-
- {
- unsigned i;
- /* request I/O space for devices used on all i[345]86 PCs */
- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
- request_resource(&ioport_resource, &standard_io_resources[i]);
- }
-
- e820_setup_gap();
-
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
- conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = &dummy_con;
-#endif
-#endif
-}
-
-static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
-{
- unsigned int *v;
-
- if (c->extended_cpuid_level < 0x80000004)
- return 0;
-
- v = (unsigned int *) c->x86_model_id;
- cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
- cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
- cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
- c->x86_model_id[48] = 0;
- return 1;
-}
-
-
-static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
-{
- unsigned int n, dummy, eax, ebx, ecx, edx;
-
- n = c->extended_cpuid_level;
-
- if (n >= 0x80000005) {
- cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
- c->x86_cache_size=(ecx>>24)+(edx>>24);
- /* On K8 L1 TLB is inclusive, so don't count it */
- c->x86_tlbsize = 0;
- }
-
- if (n >= 0x80000006) {
- cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
- ecx = cpuid_ecx(0x80000006);
- c->x86_cache_size = ecx >> 16;
- c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
-
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- c->x86_cache_size, ecx & 0xFF);
- }
-
- if (n >= 0x80000007)
- cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
- if (n >= 0x80000008) {
- cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- }
-}
-
-#ifdef CONFIG_NUMA
-static int nearby_node(int apicid)
-{
- int i;
- for (i = apicid - 1; i >= 0; i--) {
- int node = apicid_to_node[i];
- if (node != NUMA_NO_NODE && node_online(node))
- return node;
- }
- for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- int node = apicid_to_node[i];
- if (node != NUMA_NO_NODE && node_online(node))
- return node;
- }
- return first_node(node_online_map); /* Shouldn't happen */
-}
-#endif
-
-/*
- * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
- * Assumes number of cores is a power of two.
- */
-static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- unsigned bits;
-#ifdef CONFIG_NUMA
- int cpu = smp_processor_id();
- int node = 0;
- unsigned apicid = hard_smp_processor_id();
-#endif
- unsigned ecx = cpuid_ecx(0x80000008);
-
- c->x86_max_cores = (ecx & 0xff) + 1;
-
- /* CPU telling us the core id bits shift? */
- bits = (ecx >> 12) & 0xF;
-
- /* Otherwise recompute */
- if (bits == 0) {
- while ((1 << bits) < c->x86_max_cores)
- bits++;
- }
-
- /* Low order bits define the core id (index of core in socket) */
- c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
- /* Convert the APIC ID into the socket ID */
- c->phys_proc_id = phys_pkg_id(bits);
-
-#ifdef CONFIG_NUMA
- node = c->phys_proc_id;
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
- node = apicid_to_node[apicid];
- if (!node_online(node)) {
- /* Two possibilities here:
- - The CPU is missing memory and no node was created.
- In that case try picking one from a nearby CPU
- - The APIC IDs differ from the HyperTransport node IDs
- which the K8 northbridge parsing fills in.
- Assume they are all increased by a constant offset,
- but in the same order as the HT nodeids.
- If that doesn't result in a usable node fall back to the
- path for the previous case. */
- int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
- if (ht_nodeid >= 0 &&
- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = apicid_to_node[ht_nodeid];
- /* Pick a nearby node */
- if (!node_online(node))
- node = nearby_node(apicid);
- }
- numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
-#endif
-#endif
-}
-
-static void __cpuinit init_amd(struct cpuinfo_x86 *c)
-{
- unsigned level;
-
-#ifdef CONFIG_SMP
- unsigned long value;
-
- /*
- * Disable TLB flush filter by setting HWCR.FFDIS on K8
- * bit 6 of msr C001_0015
- *
- * Errata 63 for SH-B3 steppings
- * Errata 122 for all steppings (F+ have it disabled by default)
- */
- if (c->x86 == 15) {
- rdmsrl(MSR_K8_HWCR, value);
- value |= 1 << 6;
- wrmsrl(MSR_K8_HWCR, value);
- }
-#endif
-
- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
- clear_bit(0*32+31, &c->x86_capability);
-
- /* On C+ stepping K8 rep microcode works well for copy/memset */
- level = cpuid_eax(1);
- if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
- set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
- if (c->x86 == 0x10)
- set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
-
- /* Enable workaround for FXSAVE leak */
- if (c->x86 >= 6)
- set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
-
- level = get_model_name(c);
- if (!level) {
- switch (c->x86) {
- case 15:
- /* Should distinguish Models here, but this is only
- a fallback anyways. */
- strcpy(c->x86_model_id, "Hammer");
- break;
- }
- }
- display_cacheinfo(c);
-
- /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
- if (c->x86_power & (1<<8))
- set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
-
- /* Multi core CPU? */
- if (c->extended_cpuid_level >= 0x80000008)
- amd_detect_cmp(c);
-
- if (c->extended_cpuid_level >= 0x80000006 &&
- (cpuid_edx(0x80000006) & 0xf000))
- num_cache_leaves = 4;
- else
- num_cache_leaves = 3;
-
- if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
- set_bit(X86_FEATURE_K8, &c->x86_capability);
-
- /* RDTSC can be speculated around */
- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
-
- /* Family 10 doesn't support C states in MWAIT so don't use it */
- if (c->x86 == 0x10 && !force_mwait)
- clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
-}
-
-static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- u32 eax, ebx, ecx, edx;
- int index_msb, core_bits;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
-
-
- if (!cpu_has(c, X86_FEATURE_HT))
- return;
- if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
- goto out;
-
- smp_num_siblings = (ebx & 0xff0000) >> 16;
-
- if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- } else if (smp_num_siblings > 1 ) {
-
- if (smp_num_siblings > NR_CPUS) {
- printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
- smp_num_siblings = 1;
- return;
- }
-
- index_msb = get_count_order(smp_num_siblings);
- c->phys_proc_id = phys_pkg_id(index_msb);
-
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
- index_msb = get_count_order(smp_num_siblings) ;
-
- core_bits = get_count_order(c->x86_max_cores);
-
- c->cpu_core_id = phys_pkg_id(index_msb) &
- ((1 << core_bits) - 1);
- }
-out:
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
- printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id);
- }
-
-#endif
-}
-
-/*
- * find out the number of processor cores on the die
- */
-static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, t;
-
- if (c->cpuid_level < 4)
- return 1;
-
- cpuid_count(4, 0, &eax, &t, &t, &t);
-
- if (eax & 0x1f)
- return ((eax >> 26) + 1);
- else
- return 1;
-}
-
-static void srat_detect_node(void)
-{
-#ifdef CONFIG_NUMA
- unsigned node;
- int cpu = smp_processor_id();
- int apicid = hard_smp_processor_id();
-
- /* Don't do the funky fallback heuristics the AMD version employs
- for now. */
- node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE)
- node = first_node(node_online_map);
- numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
-#endif
-}
-
-static void __cpuinit init_intel(struct cpuinfo_x86 *c)
-{
- /* Cache sizes */
- unsigned n;
-
- init_intel_cacheinfo(c);
- if (c->cpuid_level > 9 ) {
- unsigned eax = cpuid_eax(10);
- /* Check for version and the number of counters */
- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
- set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
- }
-
- if (cpu_has_ds) {
- unsigned int l1, l2;
- rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
- if (!(l1 & (1<<11)))
- set_bit(X86_FEATURE_BTS, c->x86_capability);
- if (!(l1 & (1<<12)))
- set_bit(X86_FEATURE_PEBS, c->x86_capability);
- }
-
- n = c->extended_cpuid_level;
- if (n >= 0x80000008) {
- unsigned eax = cpuid_eax(0x80000008);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- /* CPUID workaround for Intel 0F34 CPU */
- if (c->x86_vendor == X86_VENDOR_INTEL &&
- c->x86 == 0xF && c->x86_model == 0x3 &&
- c->x86_mask == 0x4)
- c->x86_phys_bits = 36;
- }
-
- if (c->x86 == 15)
- c->x86_cache_alignment = c->x86_clflush_size * 2;
- if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
- (c->x86 == 0x6 && c->x86_model >= 0x0e))
- set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
- if (c->x86 == 6)
- set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
- if (c->x86 == 15)
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- else
- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- c->x86_max_cores = intel_num_cpu_cores(c);
-
- srat_detect_node();
-}
-
-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
-{
- char *v = c->x86_vendor_id;
-
- if (!strcmp(v, "AuthenticAMD"))
- c->x86_vendor = X86_VENDOR_AMD;
- else if (!strcmp(v, "GenuineIntel"))
- c->x86_vendor = X86_VENDOR_INTEL;
- else
- c->x86_vendor = X86_VENDOR_UNKNOWN;
-}
-
-struct cpu_model_info {
- int vendor;
- int family;
- char *model_names[16];
-};
-
-/* Do some early cpuid on the boot CPU to get some parameter that are
- needed before check_bugs. Everything advanced is in identify_cpu
- below. */
-void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
-{
- u32 tfms;
-
- c->loops_per_jiffy = loops_per_jiffy;
- c->x86_cache_size = -1;
- c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
- c->x86_vendor_id[0] = '\0'; /* Unset */
- c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_clflush_size = 64;
- c->x86_cache_alignment = c->x86_clflush_size;
- c->x86_max_cores = 1;
- c->extended_cpuid_level = 0;
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
-
- /* Get vendor name */
- cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
- (unsigned int *)&c->x86_vendor_id[0],
- (unsigned int *)&c->x86_vendor_id[8],
- (unsigned int *)&c->x86_vendor_id[4]);
-
- get_cpu_vendor(c);
-
- /* Initialize the standard set of capabilities */
- /* Note that the vendor-specific code below might override */
-
- /* Intel-defined flags: level 0x00000001 */
- if (c->cpuid_level >= 0x00000001) {
- __u32 misc;
- cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
- &c->x86_capability[0]);
- c->x86 = (tfms >> 8) & 0xf;
- c->x86_model = (tfms >> 4) & 0xf;
- c->x86_mask = tfms & 0xf;
- if (c->x86 == 0xf)
- c->x86 += (tfms >> 20) & 0xff;
- if (c->x86 >= 0x6)
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
- if (c->x86_capability[0] & (1<<19))
- c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
- } else {
- /* Have CPUID level 0 only - unheard of */
- c->x86 = 4;
- }
-
-#ifdef CONFIG_SMP
- c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
-#endif
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
-{
- int i;
- u32 xlvl;
-
- early_identify_cpu(c);
-
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- c->extended_cpuid_level = xlvl;
- if ((xlvl & 0xffff0000) == 0x80000000) {
- if (xlvl >= 0x80000001) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
- }
- if (xlvl >= 0x80000004)
- get_model_name(c); /* Default name */
- }
-
- /* Transmeta-defined flags: level 0x80860001 */
- xlvl = cpuid_eax(0x80860000);
- if ((xlvl & 0xffff0000) == 0x80860000) {
- /* Don't set x86_cpuid_level here for now to not confuse. */
- if (xlvl >= 0x80860001)
- c->x86_capability[2] = cpuid_edx(0x80860001);
- }
-
- init_scattered_cpuid_features(c);
-
- c->apicid = phys_pkg_id(0);
-
- /*
- * Vendor-specific initialization. In this section we
- * canonicalize the feature flags, meaning if there are
- * features a certain CPU supports which CPUID doesn't
- * tell us, CPUID claiming incorrect flags, or other bugs,
- * we handle them here.
- *
- * At the end of this section, c->x86_capability better
- * indicate the features this CPU genuinely supports!
- */
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- init_amd(c);
- break;
-
- case X86_VENDOR_INTEL:
- init_intel(c);
- break;
-
- case X86_VENDOR_UNKNOWN:
- default:
- display_cacheinfo(c);
- break;
- }
-
- select_idle_routine(c);
- detect_ht(c);
-
- /*
- * On SMP, boot_cpu_data holds the common feature set between
- * all CPUs; so make sure that we indicate which features are
- * common between the CPUs. The first time this routine gets
- * executed, c == &boot_cpu_data.
- */
- if (c != &boot_cpu_data) {
- /* AND the already accumulated flags with these */
- for (i = 0 ; i < NCAPINTS ; i++)
- boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
- }
-
-#ifdef CONFIG_X86_MCE
- mcheck_init(c);
-#endif
- if (c != &boot_cpu_data)
- mtrr_ap_init();
-#ifdef CONFIG_NUMA
- numa_add_cpu(smp_processor_id());
-#endif
-}
-
-
-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
-{
- if (c->x86_model_id[0])
- printk("%s", c->x86_model_id);
-
- if (c->x86_mask || c->cpuid_level >= 0)
- printk(" stepping %02x\n", c->x86_mask);
- else
- printk("\n");
-}
-
-/*
- * Get CPU information for use by the procfs.
- */
-
-static int show_cpuinfo(struct seq_file *m, void *v)
-{
- struct cpuinfo_x86 *c = v;
-
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static char *x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, "sync_rdtsc",
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
- "altmovcr8", "abm", "sse4a",
- "misalignsse", "3dnowprefetch",
- "osvw", "ibs", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- static char *x86_power_flags[] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* tsc invariant mapped to constant_tsc */
- /* nothing */
- };
-
-
-#ifdef CONFIG_SMP
- if (!cpu_online(c-cpu_data))
- return 0;
-#endif
-
- seq_printf(m,"processor\t: %u\n"
- "vendor_id\t: %s\n"
- "cpu family\t: %d\n"
- "model\t\t: %d\n"
- "model name\t: %s\n",
- (unsigned)(c-cpu_data),
- c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
- c->x86,
- (int)c->x86_model,
- c->x86_model_id[0] ? c->x86_model_id : "unknown");
-
- if (c->x86_mask || c->cpuid_level >= 0)
- seq_printf(m, "stepping\t: %d\n", c->x86_mask);
- else
- seq_printf(m, "stepping\t: unknown\n");
-
- if (cpu_has(c,X86_FEATURE_TSC)) {
- unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
- if (!freq)
- freq = cpu_khz;
- seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- freq / 1000, (freq % 1000));
- }
-
- /* Cache size */
- if (c->x86_cache_size >= 0)
- seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-
-#ifdef CONFIG_SMP
- if (smp_num_siblings * c->x86_max_cores > 1) {
- int cpu = c - cpu_data;
- seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
- seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
- seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
- seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
- }
-#endif
-
- seq_printf(m,
- "fpu\t\t: yes\n"
- "fpu_exception\t: yes\n"
- "cpuid level\t: %d\n"
- "wp\t\t: yes\n"
- "flags\t\t:",
- c->cpuid_level);
-
- {
- int i;
- for ( i = 0 ; i < 32*NCAPINTS ; i++ )
- if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
- seq_printf(m, " %s", x86_cap_flags[i]);
- }
-
- seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
- c->loops_per_jiffy/(500000/HZ),
- (c->loops_per_jiffy/(5000/HZ)) % 100);
-
- if (c->x86_tlbsize > 0)
- seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
- seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
- seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
-
- seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
- c->x86_phys_bits, c->x86_virt_bits);
-
- seq_printf(m, "power management:");
- {
- unsigned i;
- for (i = 0; i < 32; i++)
- if (c->x86_power & (1 << i)) {
- if (i < ARRAY_SIZE(x86_power_flags) &&
- x86_power_flags[i])
- seq_printf(m, "%s%s",
- x86_power_flags[i][0]?" ":"",
- x86_power_flags[i]);
- else
- seq_printf(m, " [%d]", i);
- }
- }
-
- seq_printf(m, "\n\n");
-
- return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- return *pos < NR_CPUS ? cpu_data + *pos : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
-};
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
deleted file mode 100644
index 1200aaac403e..000000000000
--- a/arch/x86_64/kernel/setup64.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * X86-64 specific CPU setup.
- * Copyright (C) 1995 Linus Torvalds
- * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
- * See setup.c for older changelog.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/bootmem.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <asm/bootsetup.h>
-#include <asm/pda.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/atomic.h>
-#include <asm/mmu_context.h>
-#include <asm/smp.h>
-#include <asm/i387.h>
-#include <asm/percpu.h>
-#include <asm/proto.h>
-#include <asm/sections.h>
-
-char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
-
-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-
-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
-
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-
-char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
-
-unsigned long __supported_pte_mask __read_mostly = ~0UL;
-static int do_not_nx __cpuinitdata = 0;
-
-/* noexec=on|off
-Control non executable mappings for 64bit processes.
-
-on Enable(default)
-off Disable
-*/
-static int __init nonx_setup(char *str)
-{
- if (!str)
- return -EINVAL;
- if (!strncmp(str, "on", 2)) {
- __supported_pte_mask |= _PAGE_NX;
- do_not_nx = 0;
- } else if (!strncmp(str, "off", 3)) {
- do_not_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- }
- return 0;
-}
-early_param("noexec", nonx_setup);
-
-int force_personality32 = 0;
-
-/* noexec32=on|off
-Control non executable heap for 32bit processes.
-To control the stack too use noexec=off
-
-on PROT_READ does not imply PROT_EXEC for 32bit processes
-off PROT_READ implies PROT_EXEC (default)
-*/
-static int __init nonx32_setup(char *str)
-{
- if (!strcmp(str, "on"))
- force_personality32 &= ~READ_IMPLIES_EXEC;
- else if (!strcmp(str, "off"))
- force_personality32 |= READ_IMPLIES_EXEC;
- return 1;
-}
-__setup("noexec32=", nonx32_setup);
-
-/*
- * Great future plan:
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
- * Always point %gs to its beginning
- */
-void __init setup_per_cpu_areas(void)
-{
- int i;
- unsigned long size;
-
-#ifdef CONFIG_HOTPLUG_CPU
- prefill_possible_map();
-#endif
-
- /* Copy section for each CPU (we discard the original) */
- size = PERCPU_ENOUGH_ROOM;
-
- printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
- for_each_cpu_mask (i, cpu_possible_map) {
- char *ptr;
-
- if (!NODE_DATA(cpu_to_node(i))) {
- printk("cpu with no node %d, num_online_nodes %d\n",
- i, num_online_nodes());
- ptr = alloc_bootmem_pages(size);
- } else {
- ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
- }
- if (!ptr)
- panic("Cannot allocate cpu data for CPU %d\n", i);
- cpu_pda(i)->data_offset = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
- }
-}
-
-void pda_init(int cpu)
-{
- struct x8664_pda *pda = cpu_pda(cpu);
-
- /* Setup up data that may be needed in __get_free_pages early */
- asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
- /* Memory clobbers used to order PDA accessed */
- mb();
- wrmsrl(MSR_GS_BASE, pda);
- mb();
-
- pda->cpunumber = cpu;
- pda->irqcount = -1;
- pda->kernelstack =
- (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
- pda->active_mm = &init_mm;
- pda->mmu_state = 0;
-
- if (cpu == 0) {
- /* others are initialized in smpboot.c */
- pda->pcurrent = &init_task;
- pda->irqstackptr = boot_cpu_stack;
- } else {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d", cpu);
- }
-
-
- pda->irqstackptr += IRQSTACKSIZE-64;
-}
-
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]
-__attribute__((section(".bss.page_aligned")));
-
-extern asmlinkage void ignore_sysret(void);
-
-/* May not be marked __init: used by software suspend */
-void syscall_init(void)
-{
- /*
- * LSTAR and STAR live in a bit strange symbiosis.
- * They both write to the same internal register. STAR allows to set CS/DS
- * but only a 32bit target. LSTAR sets the 64bit rip.
- */
- wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
- wrmsrl(MSR_LSTAR, system_call);
- wrmsrl(MSR_CSTAR, ignore_sysret);
-
-#ifdef CONFIG_IA32_EMULATION
- syscall32_cpu_init ();
-#endif
-
- /* Flags to clear on syscall */
- wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
-}
-
-void __cpuinit check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || do_not_nx) {
- __supported_pte_mask &= ~_PAGE_NX;
- }
-}
-
-unsigned long kernel_eflags;
-
-/*
- * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
- * A lot of state is already set up in PDA init.
- */
-void __cpuinit cpu_init (void)
-{
- int cpu = stack_smp_processor_id();
- struct tss_struct *t = &per_cpu(init_tss, cpu);
- struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
- unsigned long v;
- char *estacks = NULL;
- struct task_struct *me;
- int i;
-
- /* CPU 0 is initialised in head64.c */
- if (cpu != 0) {
- pda_init(cpu);
- } else
- estacks = boot_exception_stacks;
-
- me = current;
-
- if (cpu_test_and_set(cpu, cpu_initialized))
- panic("CPU#%d already initialized!\n", cpu);
-
- printk("Initializing CPU#%d\n", cpu);
-
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-
- /*
- * Initialize the per-CPU GDT with the boot GDT,
- * and set up the GDT descriptor:
- */
- if (cpu)
- memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
-
- cpu_gdt_descr[cpu].size = GDT_SIZE;
- asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
- asm volatile("lidt %0" :: "m" (idt_descr));
-
- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
- syscall_init();
-
- wrmsrl(MSR_FS_BASE, 0);
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
- barrier();
-
- check_efer();
-
- /*
- * set up and load the per-CPU TSS
- */
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
- static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
- };
- if (cpu) {
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
- if (!estacks)
- panic("Cannot allocate exception stack %ld %d\n",
- v, cpu);
- }
- estacks += PAGE_SIZE << order[v];
- orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
- }
-
- t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
- /*
- * <= is required because the CPU will access up to
- * 8 bits beyond the end of the IO permission bitmap.
- */
- for (i = 0; i <= IO_BITMAP_LONGS; i++)
- t->io_bitmap[i] = ~0UL;
-
- atomic_inc(&init_mm.mm_count);
- me->active_mm = &init_mm;
- if (me->mm)
- BUG();
- enter_lazy_tlb(&init_mm, me);
-
- set_tss_desc(cpu, t);
- load_TR_desc();
- load_LDT(&init_mm.context);
-
- /*
- * Clear all 6 debug registers:
- */
-
- set_debugreg(0UL, 0);
- set_debugreg(0UL, 1);
- set_debugreg(0UL, 2);
- set_debugreg(0UL, 3);
- set_debugreg(0UL, 6);
- set_debugreg(0UL, 7);
-
- fpu_init();
-
- raw_local_save_flags(kernel_eflags);
-}
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
deleted file mode 100644
index 739175b01e06..000000000000
--- a/arch/x86_64/kernel/signal.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/signal.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
- *
- * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
- * 2000-2002 x86-64 support by Andi Kleen
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/compiler.h>
-#include <asm/ucontext.h>
-#include <asm/uaccess.h>
-#include <asm/i387.h>
-#include <asm/proto.h>
-#include <asm/ia32_unistd.h>
-#include <asm/mce.h>
-
-/* #define DEBUG_SIG 1 */
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs * regs);
-
-asmlinkage long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
- struct pt_regs *regs)
-{
- return do_sigaltstack(uss, uoss, regs->rsp);
-}
-
-
-/*
- * Do a signal return; undo the signal stack.
- */
-
-struct rt_sigframe
-{
- char __user *pretcode;
- struct ucontext uc;
- struct siginfo info;
-};
-
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *prax)
-{
- unsigned int err = 0;
-
- /* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-#define COPY(x) err |= __get_user(regs->x, &sc->x)
-
- COPY(rdi); COPY(rsi); COPY(rbp); COPY(rsp); COPY(rbx);
- COPY(rdx); COPY(rcx); COPY(rip);
- COPY(r8);
- COPY(r9);
- COPY(r10);
- COPY(r11);
- COPY(r12);
- COPY(r13);
- COPY(r14);
- COPY(r15);
-
- /* Kernel saves and restores only the CS segment register on signals,
- * which is the bare minimum needed to allow mixed 32/64-bit code.
- * App's signal handler can save/restore other segments if needed. */
- {
- unsigned cs;
- err |= __get_user(cs, &sc->cs);
- regs->cs = cs | 3; /* Force into user mode */
- }
-
- {
- unsigned int tmpflags;
- err |= __get_user(tmpflags, &sc->eflags);
- regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
- regs->orig_rax = -1; /* disable syscall checks */
- }
-
- {
- struct _fpstate __user * buf;
- err |= __get_user(buf, &sc->fpstate);
-
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387(buf);
- } else {
- struct task_struct *me = current;
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
- }
- }
-
- err |= __get_user(*prax, &sc->rax);
- return err;
-
-badframe:
- return 1;
-}
-
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
- struct rt_sigframe __user *frame;
- sigset_t set;
- unsigned long eax;
-
- frame = (struct rt_sigframe __user *)(regs->rsp - 8);
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) {
- goto badframe;
- }
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) {
- goto badframe;
- }
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
- goto badframe;
-
-#ifdef DEBUG_SIG
- printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax);
-#endif
-
- if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
- goto badframe;
-
- return eax;
-
-badframe:
- signal_fault(regs,frame,"sigreturn");
- return 0;
-}
-
-/*
- * Set up a signal frame.
- */
-
-static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
-{
- int err = 0;
-
- err |= __put_user(regs->cs, &sc->cs);
- err |= __put_user(0, &sc->gs);
- err |= __put_user(0, &sc->fs);
-
- err |= __put_user(regs->rdi, &sc->rdi);
- err |= __put_user(regs->rsi, &sc->rsi);
- err |= __put_user(regs->rbp, &sc->rbp);
- err |= __put_user(regs->rsp, &sc->rsp);
- err |= __put_user(regs->rbx, &sc->rbx);
- err |= __put_user(regs->rdx, &sc->rdx);
- err |= __put_user(regs->rcx, &sc->rcx);
- err |= __put_user(regs->rax, &sc->rax);
- err |= __put_user(regs->r8, &sc->r8);
- err |= __put_user(regs->r9, &sc->r9);
- err |= __put_user(regs->r10, &sc->r10);
- err |= __put_user(regs->r11, &sc->r11);
- err |= __put_user(regs->r12, &sc->r12);
- err |= __put_user(regs->r13, &sc->r13);
- err |= __put_user(regs->r14, &sc->r14);
- err |= __put_user(regs->r15, &sc->r15);
- err |= __put_user(me->thread.trap_no, &sc->trapno);
- err |= __put_user(me->thread.error_code, &sc->err);
- err |= __put_user(regs->rip, &sc->rip);
- err |= __put_user(regs->eflags, &sc->eflags);
- err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(me->thread.cr2, &sc->cr2);
-
- return err;
-}
-
-/*
- * Determine which stack to use..
- */
-
-static void __user *
-get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
-{
- unsigned long rsp;
-
- /* Default to using normal stack - redzone*/
- rsp = regs->rsp - 128;
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(rsp) == 0)
- rsp = current->sas_ss_sp + current->sas_ss_size;
- }
-
- return (void __user *)round_down(rsp - size, 16);
-}
-
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs)
-{
- struct rt_sigframe __user *frame;
- struct _fpstate __user *fp = NULL;
- int err = 0;
- struct task_struct *me = current;
-
- if (used_math()) {
- fp = get_stack(ka, regs, sizeof(struct _fpstate));
- frame = (void __user *)round_down(
- (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
-
- if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
- goto give_sigsegv;
-
- if (save_i387(fp) < 0)
- err |= -1;
- } else
- frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
-
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
-
- if (ka->sa.sa_flags & SA_SIGINFO) {
- err |= copy_siginfo_to_user(&frame->info, info);
- if (err)
- goto give_sigsegv;
- }
-
- /* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->rsp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
- err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
- if (sizeof(*set) == 16) {
- __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
- __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
- } else
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- /* x86-64 should always use SA_RESTORER. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
- } else {
- /* could use a vstub here */
- goto give_sigsegv;
- }
-
- if (err)
- goto give_sigsegv;
-
-#ifdef DEBUG_SIG
- printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
-#endif
-
- /* Set up registers for signal handler */
- regs->rdi = sig;
- /* In case the signal handler was declared without prototypes */
- regs->rax = 0;
-
- /* This also works for non SA_SIGINFO handlers because they expect the
- next argument after the signal number on the stack. */
- regs->rsi = (unsigned long)&frame->info;
- regs->rdx = (unsigned long)&frame->uc;
- regs->rip = (unsigned long) ka->sa.sa_handler;
-
- regs->rsp = (unsigned long)frame;
-
- /* Set up the CS register to run signal handlers in 64-bit mode,
- even if the handler happens to be interrupting 32-bit code. */
- regs->cs = __USER_CS;
-
- /* This, by contrast, has nothing to do with segment registers -
- see include/asm-x86_64/uaccess.h for details. */
- set_fs(USER_DS);
-
- regs->eflags &= ~TF_MASK;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-#ifdef DEBUG_SIG
- printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
-#endif
-
- return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
-}
-
-/*
- * OK, we're invoking a handler
- */
-
-static int
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
- sigset_t *oldset, struct pt_regs *regs)
-{
- int ret;
-
-#ifdef DEBUG_SIG
- printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n",
- current->pid, sig,
- regs->rip, regs->rsp, regs);
-#endif
-
- /* Are we from a system call? */
- if ((long)regs->orig_rax >= 0) {
- /* If so, check system call restarting.. */
- switch (regs->rax) {
- case -ERESTART_RESTARTBLOCK:
- case -ERESTARTNOHAND:
- regs->rax = -EINTR;
- break;
-
- case -ERESTARTSYS:
- if (!(ka->sa.sa_flags & SA_RESTART)) {
- regs->rax = -EINTR;
- break;
- }
- /* fallthrough */
- case -ERESTARTNOINTR:
- regs->rax = regs->orig_rax;
- regs->rip -= 2;
- break;
- }
- }
-
- /*
- * If TF is set due to a debugger (PT_DTRACE), clear the TF
- * flag so that register information in the sigcontext is
- * correct.
- */
- if (unlikely(regs->eflags & TF_MASK)) {
- if (likely(current->ptrace & PT_DTRACE)) {
- current->ptrace &= ~PT_DTRACE;
- regs->eflags &= ~TF_MASK;
- }
- }
-
-#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32)) {
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
- else
- ret = ia32_setup_frame(sig, ka, oldset, regs);
- } else
-#endif
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
-
- if (ret == 0) {
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked,sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- }
-
- return ret;
-}
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
-static void do_signal(struct pt_regs *regs)
-{
- struct k_sigaction ka;
- siginfo_t info;
- int signr;
- sigset_t *oldset;
-
- /*
- * We want the common case to go fast, which
- * is why we may in certain cases get here from
- * kernel mode. Just return without doing anything
- * if so.
- */
- if (!user_mode(regs))
- return;
-
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
- oldset = &current->saved_sigmask;
- else
- oldset = &current->blocked;
-
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
- if (signr > 0) {
- /* Reenable any watchpoints before delivering the
- * signal to user space. The processor register will
- * have been cleared if the watchpoint triggered
- * inside the kernel.
- */
- if (current->thread.debugreg7)
- set_debugreg(current->thread.debugreg7, 7);
-
- /* Whee! Actually deliver the signal. */
- if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
- /* a signal was successfully delivered; the saved
- * sigmask will have been stored in the signal frame,
- * and will be restored by sigreturn, so we can simply
- * clear the TIF_RESTORE_SIGMASK flag */
- clear_thread_flag(TIF_RESTORE_SIGMASK);
- }
- return;
- }
-
- /* Did we come from a system call? */
- if ((long)regs->orig_rax >= 0) {
- /* Restart the system call - no handlers present */
- long res = regs->rax;
- switch (res) {
- case -ERESTARTNOHAND:
- case -ERESTARTSYS:
- case -ERESTARTNOINTR:
- regs->rax = regs->orig_rax;
- regs->rip -= 2;
- break;
- case -ERESTART_RESTARTBLOCK:
- regs->rax = test_thread_flag(TIF_IA32) ?
- __NR_ia32_restart_syscall :
- __NR_restart_syscall;
- regs->rip -= 2;
- break;
- }
- }
-
- /* if there's no signal to deliver, we just put the saved sigmask
- back. */
- if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
- clear_thread_flag(TIF_RESTORE_SIGMASK);
- sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
- }
-}
-
-void
-do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
-{
-#ifdef DEBUG_SIG
- printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n",
- thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current));
-#endif
-
- /* Pending single-step? */
- if (thread_info_flags & _TIF_SINGLESTEP) {
- regs->eflags |= TF_MASK;
- clear_thread_flag(TIF_SINGLESTEP);
- }
-
-#ifdef CONFIG_X86_MCE
- /* notify userspace of pending MCEs */
- if (thread_info_flags & _TIF_MCE_NOTIFY)
- mce_notify_user();
-#endif /* CONFIG_X86_MCE */
-
- /* deal with pending signal delivery */
- if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
- do_signal(regs);
-}
-
-void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{
- struct task_struct *me = current;
- if (show_unhandled_signals && printk_ratelimit())
- printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n",
- me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax);
-
- force_sig(SIGSEGV, me);
-}
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
deleted file mode 100644
index df4a82812adb..000000000000
--- a/arch/x86_64/kernel/smp.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Intel SMP support routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- * (c) 2002,2003 Andi Kleen, SuSE Labs.
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/interrupt.h>
-
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mach_apic.h>
-#include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/apicdef.h>
-#include <asm/idle.h>
-
-/*
- * Smarter SMP flushing macros.
- * c/o Linus Torvalds.
- *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
- *
- * Optimizations Manfred Spraul <manfred@colorfullife.com>
- *
- * More scalable flush, from Andi Kleen
- *
- * To avoid global state use 8 different call vectors.
- * Each CPU uses a specific vector to trigger flushes on other
- * CPUs. Depending on the received vector the target CPUs look into
- * the right per cpu variable for the flush data.
- *
- * With more than 8 CPUs they are hashed to the 8 available
- * vectors. The limited global vector space forces us to this right now.
- * In future when interrupts are split into per CPU domains this could be
- * fixed, at the cost of triggering multiple IPIs in some cases.
- */
-
-union smp_flush_state {
- struct {
- cpumask_t flush_cpumask;
- struct mm_struct *flush_mm;
- unsigned long flush_va;
-#define FLUSH_ALL -1ULL
- spinlock_t tlbstate_lock;
- };
- char pad[SMP_CACHE_BYTES];
-} ____cacheline_aligned;
-
-/* State is put into the per CPU data section, but padded
- to a full cache line because other CPUs can access it and we don't
- want false sharing in the per cpu data segment. */
-static DEFINE_PER_CPU(union smp_flush_state, flush_state);
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- */
-static inline void leave_mm(int cpu)
-{
- if (read_pda(mmu_state) == TLBSTATE_OK)
- BUG();
- cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
- load_cr3(swapper_pg_dir);
-}
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superfluous
- * tlb flush.
- * 1a2) set cpu mmu_state to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- * was in lazy tlb mode.
- * 1a3) update cpu active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- * cpu active_mm is correct, cpu0 already handles
- * flush ipis.
- * 1b1) set cpu mmu_state to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu mmu_state is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- *
- * Interrupts are disabled.
- */
-
-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
-{
- int cpu;
- int sender;
- union smp_flush_state *f;
-
- cpu = smp_processor_id();
- /*
- * orig_rax contains the negated interrupt vector.
- * Use that to determine where the sender put the data.
- */
- sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START;
- f = &per_cpu(flush_state, sender);
-
- if (!cpu_isset(cpu, f->flush_cpumask))
- goto out;
- /*
- * This was a BUG() but until someone can quote me the
- * line from the intel manual that guarantees an IPI to
- * multiple CPUs is retried _only_ on the erroring CPUs
- * its staying as a return
- *
- * BUG();
- */
-
- if (f->flush_mm == read_pda(active_mm)) {
- if (read_pda(mmu_state) == TLBSTATE_OK) {
- if (f->flush_va == FLUSH_ALL)
- local_flush_tlb();
- else
- __flush_tlb_one(f->flush_va);
- } else
- leave_mm(cpu);
- }
-out:
- ack_APIC_irq();
- cpu_clear(cpu, f->flush_cpumask);
-}
-
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
- unsigned long va)
-{
- int sender;
- union smp_flush_state *f;
-
- /* Caller has disabled preemption */
- sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
- f = &per_cpu(flush_state, sender);
-
- /* Could avoid this lock when
- num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
- probably not worth checking this for a cache-hot lock. */
- spin_lock(&f->tlbstate_lock);
-
- f->flush_mm = mm;
- f->flush_va = va;
- cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
-
- /*
- * We have to send the IPI only to
- * CPUs affected.
- */
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
-
- while (!cpus_empty(f->flush_cpumask))
- cpu_relax();
-
- f->flush_mm = NULL;
- f->flush_va = 0;
- spin_unlock(&f->tlbstate_lock);
-}
-
-int __cpuinit init_smp_flush(void)
-{
- int i;
- for_each_cpu_mask(i, cpu_possible_map) {
- spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
- }
- return 0;
-}
-
-core_initcall(init_smp_flush);
-
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- local_flush_tlb();
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_current_task);
-
-void flush_tlb_mm (struct mm_struct * mm)
-{
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if (current->mm)
- local_flush_tlb();
- else
- leave_mm(smp_processor_id());
- }
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_mm);
-
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-{
- struct mm_struct *mm = vma->vm_mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if(current->mm)
- __flush_tlb_one(va);
- else
- leave_mm(smp_processor_id());
- }
-
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, va);
-
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_page);
-
-static void do_flush_tlb_all(void* info)
-{
- unsigned long cpu = smp_processor_id();
-
- __flush_tlb_all();
- if (read_pda(mmu_state) == TLBSTATE_LAZY)
- leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
-}
-
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-
-void smp_send_reschedule(int cpu)
-{
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
-
-static struct call_data_struct * call_data;
-
-void lock_ipi_call_lock(void)
-{
- spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
- spin_unlock_irq(&call_lock);
-}
-
-/*
- * this function sends a 'generic call function' IPI to one other CPU
- * in the system.
- *
- * cpu is a standard Linux logical CPU number.
- */
-static void
-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- struct call_data_struct data;
- int cpus = 1;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (!wait)
- return;
-
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
-}
-
-/*
- * smp_call_function_single - Run a function on a specific CPU
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Currently unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Retrurns 0 on success, else a negative status code.
- *
- * Does not return until the remote CPU is nearly ready to execute <func>
- * or is or has executed.
- */
-
-int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- /* prevent preemption and reschedule on another processor */
- int me = get_cpu();
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- if (cpu == me) {
- local_irq_disable();
- func(info);
- local_irq_enable();
- put_cpu();
- return 0;
- }
-
- spin_lock(&call_lock);
- __smp_call_function_single(cpu, func, info, nonatomic, wait);
- spin_unlock(&call_lock);
- put_cpu();
- return 0;
-}
-EXPORT_SYMBOL(smp_call_function_single);
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-static void __smp_call_function (void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- struct call_data_struct data;
- int cpus = num_online_cpus()-1;
-
- if (!cpus)
- return;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (!wait)
- return;
-
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
-}
-
-/*
- * smp_call_function - run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
- * @wait: If true, wait (atomically) until function has completed on other
- * CPUs.
- *
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute func or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- * Actually there are a few legal cases, like panic.
- */
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-{
- spin_lock(&call_lock);
- __smp_call_function(func,info,nonatomic,wait);
- spin_unlock(&call_lock);
- return 0;
-}
-EXPORT_SYMBOL(smp_call_function);
-
-static void stop_this_cpu(void *dummy)
-{
- local_irq_disable();
- /*
- * Remove this CPU:
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
- disable_local_APIC();
- for (;;)
- halt();
-}
-
-void smp_send_stop(void)
-{
- int nolock;
- unsigned long flags;
-
- if (reboot_force)
- return;
-
- /* Don't deadlock on the call lock in panic */
- nolock = !spin_trylock(&call_lock);
- local_irq_save(flags);
- __smp_call_function(stop_this_cpu, NULL, 0, 0);
- if (!nolock)
- spin_unlock(&call_lock);
- disable_local_APIC();
- local_irq_restore(flags);
-}
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-asmlinkage void smp_reschedule_interrupt(void)
-{
- ack_APIC_irq();
-}
-
-asmlinkage void smp_call_function_interrupt(void)
-{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- ack_APIC_irq();
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- exit_idle();
- irq_enter();
- (*func)(info);
- irq_exit();
- if (wait) {
- mb();
- atomic_inc(&call_data->finished);
- }
-}
-
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
deleted file mode 100644
index 32f50783edc8..000000000000
--- a/arch/x86_64/kernel/smpboot.c
+++ /dev/null
@@ -1,1085 +0,0 @@
-/*
- * x86 SMP booting functions
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- * Copyright 2001 Andi Kleen, SuSE Labs.
- *
- * Much of the core SMP work is based on previous work by Thomas Radke, to
- * whom a great many thanks are extended.
- *
- * Thanks to Intel for making available several different Pentium,
- * Pentium Pro and Pentium-II/Xeon MP machines.
- * Original development of Linux SMP code supported by Caldera.
- *
- * This code is released under the GNU General Public License version 2
- *
- * Fixes
- * Felix Koop : NR_CPUS used properly
- * Jose Renau : Handle single CPU case.
- * Alan Cox : By repeated request 8) - Total BogoMIP report.
- * Greg Wright : Fix for kernel stacks panic.
- * Erich Boleyn : MP v1.4 and additional changes.
- * Matthias Sattler : Changes for 2.1 kernel map.
- * Michel Lespinasse : Changes for 2.1 kernel map.
- * Michael Chastain : Change trampoline.S to gnu as.
- * Alan Cox : Dumb bug: 'B' step PPro's are fine
- * Ingo Molnar : Added APIC timers, based on code
- * from Jose Renau
- * Ingo Molnar : various cleanups and rewrites
- * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs
- * Andi Kleen : Changed for SMP boot into long mode.
- * Rusty Russell : Hacked into shape for new "hotplug" boot process.
- * Andi Kleen : Converted to new state machine.
- * Various cleanups.
- * Probably mostly hotplug CPU ready now.
- * Ashok Raj : CPU hotplug support
- */
-
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/bootmem.h>
-#include <linux/thread_info.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <linux/smp.h>
-#include <linux/kdebug.h>
-
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
-#include <asm/desc.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
-#include <asm/nmi.h>
-#include <asm/irq.h>
-#include <asm/hw_irq.h>
-#include <asm/numa.h>
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
-
-/* Last level cache ID of each logical CPU */
-u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
-
-/* Bitmask of currently online CPUs */
-cpumask_t cpu_online_map __read_mostly;
-
-EXPORT_SYMBOL(cpu_online_map);
-
-/*
- * Private maps to synchronize booting between AP and BP.
- * Probably not needed anymore, but it makes for easier debugging. -AK
- */
-cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-EXPORT_SYMBOL(cpu_callout_map);
-
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-
-/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
-
-/* Set when the idlers are all forked */
-int smp_threads_ready;
-
-/* representing HT siblings of each logical CPU */
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_sibling_map);
-
-/* representing HT and core siblings of each logical CPU */
-cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_core_map);
-
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data[];
-extern unsigned char trampoline_end[];
-
-/* State of each CPU */
-DEFINE_PER_CPU(int, cpu_state) = { 0 };
-
-/*
- * Store all idle threads, this can be reused instead of creating
- * a new thread. Also avoids complicated thread destroy functionality
- * for idle threads.
- */
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
-
-#define get_idle_for_cpu(x) (idle_thread_array[(x)])
-#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p))
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __cpuinit setup_trampoline(void)
-{
- void *tramp = __va(SMP_TRAMPOLINE_BASE);
- memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
- return virt_to_phys(tramp);
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-static void __cpuinit smp_store_cpu_info(int id)
-{
- struct cpuinfo_x86 *c = cpu_data + id;
-
- *c = boot_cpu_data;
- identify_cpu(c);
- print_cpu_info(c);
-}
-
-static atomic_t init_deasserted __cpuinitdata;
-
-/*
- * Report back to the Boot Processor.
- * Running on AP.
- */
-void __cpuinit smp_callin(void)
-{
- int cpuid, phys_id;
- unsigned long timeout;
-
- /*
- * If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- */
- while (!atomic_read(&init_deasserted))
- cpu_relax();
-
- /*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = GET_APIC_ID(apic_read(APIC_ID));
- cpuid = smp_processor_id();
- if (cpu_isset(cpuid, cpu_callin_map)) {
- panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
- phys_id, cpuid);
- }
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- /*
- * Waiting 2s total for startup (udelay is not yet working)
- */
- timeout = jiffies + 2*HZ;
- while (time_before(jiffies, timeout)) {
- /*
- * Has the boot CPU finished it's STARTUP sequence?
- */
- if (cpu_isset(cpuid, cpu_callout_map))
- break;
- cpu_relax();
- }
-
- if (!time_before(jiffies, timeout)) {
- panic("smp_callin: CPU%d started up but did not get a callout!\n",
- cpuid);
- }
-
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
- */
-
- Dprintk("CALLIN, before setup_local_APIC().\n");
- setup_local_APIC();
-
- /*
- * Get our bogomips.
- *
- * Need to enable IRQs because it can take longer and then
- * the NMI watchdog might kill us.
- */
- local_irq_enable();
- calibrate_delay();
- local_irq_disable();
- Dprintk("Stack at about %p\n",&cpuid);
-
- disable_APIC_timer();
-
- /*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
- /*
- * Allow the master to continue.
- */
- cpu_set(cpuid, cpu_callin_map);
-}
-
-/* maps the cpu to the sched domain representing multi-core */
-cpumask_t cpu_coregroup_map(int cpu)
-{
- struct cpuinfo_x86 *c = cpu_data + cpu;
- /*
- * For perf, we return last level cache shared map.
- * And for power savings, we return cpu_core_map
- */
- if (sched_mc_power_savings || sched_smt_power_savings)
- return cpu_core_map[cpu];
- else
- return c->llc_shared_map;
-}
-
-/* representing cpus for which sibling maps can be computed */
-static cpumask_t cpu_sibling_setup_map;
-
-static inline void set_cpu_sibling_map(int cpu)
-{
- int i;
- struct cpuinfo_x86 *c = cpu_data;
-
- cpu_set(cpu, cpu_sibling_setup_map);
-
- if (smp_num_siblings > 1) {
- for_each_cpu_mask(i, cpu_sibling_setup_map) {
- if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
- c[cpu].cpu_core_id == c[i].cpu_core_id) {
- cpu_set(i, cpu_sibling_map[cpu]);
- cpu_set(cpu, cpu_sibling_map[i]);
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
- cpu_set(i, c[cpu].llc_shared_map);
- cpu_set(cpu, c[i].llc_shared_map);
- }
- }
- } else {
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
-
- cpu_set(cpu, c[cpu].llc_shared_map);
-
- if (current_cpu_data.x86_max_cores == 1) {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- c[cpu].booted_cores = 1;
- return;
- }
-
- for_each_cpu_mask(i, cpu_sibling_setup_map) {
- if (cpu_llc_id[cpu] != BAD_APICID &&
- cpu_llc_id[cpu] == cpu_llc_id[i]) {
- cpu_set(i, c[cpu].llc_shared_map);
- cpu_set(cpu, c[i].llc_shared_map);
- }
- if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
- /*
- * Does this new cpu bringup a new core?
- */
- if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
- /*
- * for each core in package, increment
- * the booted_cores for this new cpu
- */
- if (first_cpu(cpu_sibling_map[i]) == i)
- c[cpu].booted_cores++;
- /*
- * increment the core count for all
- * the other cpus in this package
- */
- if (i != cpu)
- c[i].booted_cores++;
- } else if (i != cpu && !c[cpu].booted_cores)
- c[cpu].booted_cores = c[i].booted_cores;
- }
- }
-}
-
-/*
- * Setup code on secondary processor (after comming out of the trampoline)
- */
-void __cpuinit start_secondary(void)
-{
- /*
- * Dont put anything before smp_callin(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
- */
- cpu_init();
- preempt_disable();
- smp_callin();
-
- /* otherwise gcc will move up the smp_processor_id before the cpu_init */
- barrier();
-
- /*
- * Check TSC sync first:
- */
- check_tsc_sync_target();
-
- Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
- setup_secondary_APIC_clock();
-
- Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
-
- if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
- enable_NMI_through_LVT0(NULL);
- enable_8259A_irq(0);
- }
-
- enable_APIC_timer();
-
- /*
- * The sibling maps must be set before turing the online map on for
- * this cpu
- */
- set_cpu_sibling_map(smp_processor_id());
-
- /*
- * We need to hold call_lock, so there is no inconsistency
- * between the time smp_call_function() determines number of
- * IPI receipients, and the time when the determination is made
- * for which cpus receive the IPI in genapic_flat.c. Holding this
- * lock helps us to not include this cpu in a currently in progress
- * smp_call_function().
- */
- lock_ipi_call_lock();
- spin_lock(&vector_lock);
-
- /* Setup the per cpu irq handling data structures */
- __setup_vector_irq(smp_processor_id());
- /*
- * Allow the master to continue.
- */
- cpu_set(smp_processor_id(), cpu_online_map);
- per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
- spin_unlock(&vector_lock);
-
- unlock_ipi_call_lock();
-
- cpu_idle();
-}
-
-extern volatile unsigned long init_rsp;
-extern void (*initial_code)(void);
-
-#ifdef APIC_DEBUG
-static void inquire_remote_apic(int apicid)
-{
- unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
- char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout;
- unsigned int status;
-
- printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
-
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
-
- /*
- * Wait for idle.
- */
- status = safe_apic_wait_icr_idle();
- if (status)
- printk("a previous APIC delivery may have failed\n");
-
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
- timeout = 0;
- do {
- udelay(100);
- status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
- } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
- switch (status) {
- case APIC_ICR_RR_VALID:
- status = apic_read(APIC_RRR);
- printk("%08x\n", status);
- break;
- default:
- printk("failed\n");
- }
- }
-}
-#endif
-
-/*
- * Kick the secondary to wake up.
- */
-static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
-{
- unsigned long send_status, accept_status = 0;
- int maxlvt, num_starts, j;
-
- Dprintk("Asserting INIT.\n");
-
- /*
- * Turn INIT on target chip
- */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /*
- * Send IPI
- */
- apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
-
- mdelay(10);
-
- Dprintk("Deasserting INIT.\n");
-
- /* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Send IPI */
- apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
-
- mb();
- atomic_set(&init_deasserted, 1);
-
- num_starts = 2;
-
- /*
- * Run STARTUP IPI loop.
- */
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = get_maxlvt();
-
- for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n",j);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
-
- /*
- * STARTUP IPI
- */
-
- /* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- if (send_status || accept_status)
- break;
- }
- Dprintk("After Startup.\n");
-
- if (send_status)
- printk(KERN_ERR "APIC never delivered???\n");
- if (accept_status)
- printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-
-struct create_idle {
- struct work_struct work;
- struct task_struct *idle;
- struct completion done;
- int cpu;
-};
-
-void do_fork_idle(struct work_struct *work)
-{
- struct create_idle *c_idle =
- container_of(work, struct create_idle, work);
-
- c_idle->idle = fork_idle(c_idle->cpu);
- complete(&c_idle->done);
-}
-
-/*
- * Boot one CPU.
- */
-static int __cpuinit do_boot_cpu(int cpu, int apicid)
-{
- unsigned long boot_error;
- int timeout;
- unsigned long start_rip;
- struct create_idle c_idle = {
- .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
- .cpu = cpu,
- .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
- };
-
- /* allocate memory for gdts of secondary cpus. Hotplug is considered */
- if (!cpu_gdt_descr[cpu].address &&
- !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
- printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
- return -1;
- }
-
- /* Allocate node local memory for AP pdas */
- if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
- struct x8664_pda *newpda, *pda;
- int node = cpu_to_node(cpu);
- pda = cpu_pda(cpu);
- newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC,
- node);
- if (newpda) {
- memcpy(newpda, pda, sizeof (struct x8664_pda));
- cpu_pda(cpu) = newpda;
- } else
- printk(KERN_ERR
- "Could not allocate node local PDA for CPU %d on node %d\n",
- cpu, node);
- }
-
- alternatives_smp_switch(1);
-
- c_idle.idle = get_idle_for_cpu(cpu);
-
- if (c_idle.idle) {
- c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
- init_idle(c_idle.idle, cpu);
- goto do_rest;
- }
-
- /*
- * During cold boot process, keventd thread is not spun up yet.
- * When we do cpu hot-add, we create idle threads on the fly, we should
- * not acquire any attributes from the calling context. Hence the clean
- * way to create kernel_threads() is to do that from keventd().
- * We do the current_is_keventd() due to the fact that ACPI notifier
- * was also queuing to keventd() and when the caller is already running
- * in context of keventd(), we would end up with locking up the keventd
- * thread.
- */
- if (!keventd_up() || current_is_keventd())
- c_idle.work.func(&c_idle.work);
- else {
- schedule_work(&c_idle.work);
- wait_for_completion(&c_idle.done);
- }
-
- if (IS_ERR(c_idle.idle)) {
- printk("failed fork for CPU %d\n", cpu);
- return PTR_ERR(c_idle.idle);
- }
-
- set_idle_for_cpu(cpu, c_idle.idle);
-
-do_rest:
-
- cpu_pda(cpu)->pcurrent = c_idle.idle;
-
- start_rip = setup_trampoline();
-
- init_rsp = c_idle.idle->thread.rsp;
- per_cpu(init_tss,cpu).rsp0 = init_rsp;
- initial_code = start_secondary;
- clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
-
- printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu,
- cpus_weight(cpu_present_map),
- apicid);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
- Dprintk("Setting warm reset code and vector.\n");
-
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
- Dprintk("3.\n");
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
-
- /*
- * Status is now clean
- */
- boot_error = 0;
-
- /*
- * Starting actual IPI sequence...
- */
- boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_isset(cpu, cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (cpu_isset(cpu, cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("CPU has booted.\n");
- } else {
- boot_error = 1;
- if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
- == 0xA5)
- /* trampoline started but...? */
- printk("Stuck ??\n");
- else
- /* trampoline code not run */
- printk("Not responding.\n");
-#ifdef APIC_DEBUG
- inquire_remote_apic(apicid);
-#endif
- }
- }
- if (boot_error) {
- cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
- clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
- clear_node_cpumask(cpu); /* was set by numa_add_cpu */
- cpu_clear(cpu, cpu_present_map);
- cpu_clear(cpu, cpu_possible_map);
- x86_cpu_to_apicid[cpu] = BAD_APICID;
- x86_cpu_to_log_apicid[cpu] = BAD_APICID;
- return -EIO;
- }
-
- return 0;
-}
-
-cycles_t cacheflush_time;
-unsigned long cache_decay_ticks;
-
-/*
- * Cleanup possible dangling ends...
- */
-static __cpuinit void smp_cleanup_boot(void)
-{
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- /*
- * Reset trampoline flag
- */
- *((volatile int *) phys_to_virt(0x467)) = 0;
-}
-
-/*
- * Fall back to non SMP mode after errors.
- *
- * RED-PEN audit/test this more. I bet there is more state messed up here.
- */
-static __init void disable_smp(void)
-{
- cpu_present_map = cpumask_of_cpu(0);
- cpu_possible_map = cpumask_of_cpu(0);
- if (smp_found_config)
- phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
- else
- phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-int additional_cpus __initdata = -1;
-
-/*
- * cpu_possible_map should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and dont expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with additional_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- * We do this because additional CPUs waste a lot of memory.
- * -AK
- */
-__init void prefill_possible_map(void)
-{
- int i;
- int possible;
-
- if (additional_cpus == -1) {
- if (disabled_cpus > 0)
- additional_cpus = disabled_cpus;
- else
- additional_cpus = 0;
- }
- possible = num_processors + additional_cpus;
- if (possible > NR_CPUS)
- possible = NR_CPUS;
-
- printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
- possible,
- max_t(int, possible - num_processors, 0));
-
- for (i = 0; i < possible; i++)
- cpu_set(i, cpu_possible_map);
-}
-#endif
-
-/*
- * Various sanity checks.
- */
-static int __init smp_sanity_check(unsigned max_cpus)
-{
- if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
- printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- hard_smp_processor_id());
- physid_set(hard_smp_processor_id(), phys_cpu_present_map);
- }
-
- /*
- * If we couldn't find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config) {
- printk(KERN_NOTICE "SMP motherboard not detected.\n");
- disable_smp();
- if (APIC_init_uniprocessor())
- printk(KERN_NOTICE "Local APIC not detected."
- " Using dummy APIC emulation.\n");
- return -1;
- }
-
- /*
- * Should not be necessary because the MP table should list the boot
- * CPU too, but we do it for the sake of robustness anyway.
- */
- if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
- printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_id);
- physid_set(hard_smp_processor_id(), phys_cpu_present_map);
- }
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (!cpu_has_apic) {
- printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_id);
- printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
- nr_ioapics = 0;
- return -1;
- }
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
- printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
- nr_ioapics = 0;
- return -1;
- }
-
- return 0;
-}
-
-/*
- * Prepare for SMP bootup. The MP table or ACPI has been read
- * earlier. Just do some sanity checking here and enable APIC mode.
- */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
- nmi_watchdog_default();
- current_cpu_data = boot_cpu_data;
- current_thread_info()->cpu = 0; /* needed? */
- set_cpu_sibling_map(0);
-
- if (smp_sanity_check(max_cpus) < 0) {
- printk(KERN_INFO "SMP disabled\n");
- disable_smp();
- return;
- }
-
-
- /*
- * Switch from PIC to APIC mode.
- */
- setup_local_APIC();
-
- if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
- panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
- GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
- /* Or can we switch back to PIC here? */
- }
-
- /*
- * Now start the IO-APICs
- */
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
- else
- nr_ioapics = 0;
-
- /*
- * Set up local APIC timer on boot CPU.
- */
-
- setup_boot_APIC_clock();
-}
-
-/*
- * Early setup to make printk work.
- */
-void __init smp_prepare_boot_cpu(void)
-{
- int me = smp_processor_id();
- cpu_set(me, cpu_online_map);
- cpu_set(me, cpu_callout_map);
- per_cpu(cpu_state, me) = CPU_ONLINE;
-}
-
-/*
- * Entry point to boot a CPU.
- */
-int __cpuinit __cpu_up(unsigned int cpu)
-{
- int apicid = cpu_present_to_apicid(cpu);
- unsigned long flags;
- int err;
-
- WARN_ON(irqs_disabled());
-
- Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
-
- if (apicid == BAD_APICID || apicid == boot_cpu_id ||
- !physid_isset(apicid, phys_cpu_present_map)) {
- printk("__cpu_up: bad cpu %d\n", cpu);
- return -EINVAL;
- }
-
- /*
- * Already booted CPU?
- */
- if (cpu_isset(cpu, cpu_callin_map)) {
- Dprintk("do_boot_cpu %d Already started\n", cpu);
- return -ENOSYS;
- }
-
- /*
- * Save current MTRR state in case it was changed since early boot
- * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
- */
- mtrr_save_state();
-
- per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
- /* Boot it! */
- err = do_boot_cpu(cpu, apicid);
- if (err < 0) {
- Dprintk("do_boot_cpu failed %d\n", err);
- return err;
- }
-
- /* Unleash the CPU! */
- Dprintk("waiting for cpu %d\n", cpu);
-
- /*
- * Make sure and check TSC sync:
- */
- local_irq_save(flags);
- check_tsc_sync_source(cpu);
- local_irq_restore(flags);
-
- while (!cpu_isset(cpu, cpu_online_map))
- cpu_relax();
- err = 0;
-
- return err;
-}
-
-/*
- * Finish the SMP boot.
- */
-void __init smp_cpus_done(unsigned int max_cpus)
-{
- smp_cleanup_boot();
- setup_ioapic_dest();
- check_nmi_watchdog();
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void remove_siblinginfo(int cpu)
-{
- int sibling;
- struct cpuinfo_x86 *c = cpu_data;
-
- for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
- cpu_clear(cpu, cpu_core_map[sibling]);
- /*
- * last thread sibling in this cpu core going down
- */
- if (cpus_weight(cpu_sibling_map[cpu]) == 1)
- c[sibling].booted_cores--;
- }
-
- for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[sibling]);
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- c[cpu].phys_proc_id = 0;
- c[cpu].cpu_core_id = 0;
- cpu_clear(cpu, cpu_sibling_setup_map);
-}
-
-void remove_cpu_from_maps(void)
-{
- int cpu = smp_processor_id();
-
- cpu_clear(cpu, cpu_callout_map);
- cpu_clear(cpu, cpu_callin_map);
- clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
- clear_node_cpumask(cpu);
-}
-
-int __cpu_disable(void)
-{
- int cpu = smp_processor_id();
-
- /*
- * Perhaps use cpufreq to drop frequency, but that could go
- * into generic code.
- *
- * We won't take down the boot processor on i386 due to some
- * interrupts only being able to be serviced by the BSP.
- * Especially so if we're not using an IOAPIC -zwane
- */
- if (cpu == 0)
- return -EBUSY;
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- stop_apic_nmi_watchdog(NULL);
- clear_local_APIC();
-
- /*
- * HACK:
- * Allow any queued timer interrupts to get serviced
- * This is only a temporary solution until we cleanup
- * fixup_irqs as we do for IA64.
- */
- local_irq_enable();
- mdelay(1);
-
- local_irq_disable();
- remove_siblinginfo(cpu);
-
- spin_lock(&vector_lock);
- /* It's now safe to remove this processor from the online map */
- cpu_clear(cpu, cpu_online_map);
- spin_unlock(&vector_lock);
- remove_cpu_from_maps();
- fixup_irqs(cpu_online_map);
- return 0;
-}
-
-void __cpu_die(unsigned int cpu)
-{
- /* We don't do anything here: idle task is faking death itself. */
- unsigned int i;
-
- for (i = 0; i < 10; i++) {
- /* They ack this in play_dead by setting CPU_DEAD */
- if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
- printk ("CPU %d is now offline\n", cpu);
- if (1 == num_online_cpus())
- alternatives_smp_switch(0);
- return;
- }
- msleep(100);
- }
- printk(KERN_ERR "CPU %u didn't die...\n", cpu);
-}
-
-static __init int setup_additional_cpus(char *s)
-{
- return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
-}
-early_param("additional_cpus", setup_additional_cpus);
-
-#else /* ... !CONFIG_HOTPLUG_CPU */
-
-int __cpu_disable(void)
-{
- return -ENOSYS;
-}
-
-void __cpu_die(unsigned int cpu)
-{
- /* We said "no" in __cpu_disable */
- BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
deleted file mode 100644
index cb9109113584..000000000000
--- a/arch/x86_64/kernel/stacktrace.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * arch/x86_64/kernel/stacktrace.c
- *
- * Stack trace management functions
- *
- * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#include <linux/sched.h>
-#include <linux/stacktrace.h>
-#include <linux/module.h>
-#include <asm/stacktrace.h>
-
-static void save_stack_warning(void *data, char *msg)
-{
-}
-
-static void
-save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-}
-
-static int save_stack_stack(void *data, char *name)
-{
- return -1;
-}
-
-static void save_stack_address(void *data, unsigned long addr)
-{
- struct stack_trace *trace = (struct stack_trace *)data;
- if (trace->skip > 0) {
- trace->skip--;
- return;
- }
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = addr;
-}
-
-static struct stacktrace_ops save_stack_ops = {
- .warning = save_stack_warning,
- .warning_symbol = save_stack_warning_symbol,
- .stack = save_stack_stack,
- .address = save_stack_address,
-};
-
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- */
-void save_stack_trace(struct stack_trace *trace)
-{
- dump_trace(current, NULL, NULL, &save_stack_ops, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
-}
-EXPORT_SYMBOL(save_stack_trace);
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
deleted file mode 100644
index 573c0a6e0ac6..000000000000
--- a/arch/x86_64/kernel/suspend.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Suspend support specific for i386.
- *
- * Distribute under GPLv2
- *
- * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
- * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
- */
-
-#include <linux/smp.h>
-#include <linux/suspend.h>
-#include <asm/proto.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/mtrr.h>
-
-/* References to section boundaries */
-extern const void __nosave_begin, __nosave_end;
-
-struct saved_context saved_context;
-
-unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx;
-unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi;
-unsigned long saved_context_r08, saved_context_r09, saved_context_r10, saved_context_r11;
-unsigned long saved_context_r12, saved_context_r13, saved_context_r14, saved_context_r15;
-unsigned long saved_context_eflags;
-
-void __save_processor_state(struct saved_context *ctxt)
-{
- kernel_fpu_begin();
-
- /*
- * descriptor tables
- */
- asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
- asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
- asm volatile ("str %0" : "=m" (ctxt->tr));
-
- /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
- /*
- * segment registers
- */
- asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
- asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
- asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
- asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
- asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
-
- rdmsrl(MSR_FS_BASE, ctxt->fs_base);
- rdmsrl(MSR_GS_BASE, ctxt->gs_base);
- rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
- mtrr_save_fixed_ranges(NULL);
-
- /*
- * control registers
- */
- rdmsrl(MSR_EFER, ctxt->efer);
- ctxt->cr0 = read_cr0();
- ctxt->cr2 = read_cr2();
- ctxt->cr3 = read_cr3();
- ctxt->cr4 = read_cr4();
- ctxt->cr8 = read_cr8();
-}
-
-void save_processor_state(void)
-{
- __save_processor_state(&saved_context);
-}
-
-static void do_fpu_end(void)
-{
- /*
- * Restore FPU regs if necessary
- */
- kernel_fpu_end();
-}
-
-void __restore_processor_state(struct saved_context *ctxt)
-{
- /*
- * control registers
- */
- wrmsrl(MSR_EFER, ctxt->efer);
- write_cr8(ctxt->cr8);
- write_cr4(ctxt->cr4);
- write_cr3(ctxt->cr3);
- write_cr2(ctxt->cr2);
- write_cr0(ctxt->cr0);
-
- /*
- * now restore the descriptor tables to their proper values
- * ltr is done i fix_processor_context().
- */
- asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
- asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
-
- /*
- * segment registers
- */
- asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
- asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
- asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
- load_gs_index(ctxt->gs);
- asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
-
- wrmsrl(MSR_FS_BASE, ctxt->fs_base);
- wrmsrl(MSR_GS_BASE, ctxt->gs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
-
- fix_processor_context();
-
- do_fpu_end();
- mtrr_ap_init();
-}
-
-void restore_processor_state(void)
-{
- __restore_processor_state(&saved_context);
-}
-
-void fix_processor_context(void)
-{
- int cpu = smp_processor_id();
- struct tss_struct *t = &per_cpu(init_tss, cpu);
-
- set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
-
- cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9;
-
- syscall_init(); /* This sets MSR_*STAR and related */
- load_TR_desc(); /* This does ltr */
- load_LDT(&current->active_mm->context); /* This does lldt */
-
- /*
- * Now maybe reload the debug registers
- */
- if (current->thread.debugreg7){
- loaddebug(&current->thread, 0);
- loaddebug(&current->thread, 1);
- loaddebug(&current->thread, 2);
- loaddebug(&current->thread, 3);
- /* no 4 and 5 */
- loaddebug(&current->thread, 6);
- loaddebug(&current->thread, 7);
- }
-
-}
-
-#ifdef CONFIG_HIBERNATION
-/* Defined in arch/x86_64/kernel/suspend_asm.S */
-extern int restore_image(void);
-
-pgd_t *temp_level4_pgt;
-
-static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
-{
- long i, j;
-
- i = pud_index(address);
- pud = pud + i;
- for (; i < PTRS_PER_PUD; pud++, i++) {
- unsigned long paddr;
- pmd_t *pmd;
-
- paddr = address + i*PUD_SIZE;
- if (paddr >= end)
- break;
-
- pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!pmd)
- return -ENOMEM;
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
- unsigned long pe;
-
- if (paddr >= end)
- break;
- pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
- pe &= __supported_pte_mask;
- set_pmd(pmd, __pmd(pe));
- }
- }
- return 0;
-}
-
-static int set_up_temporary_mappings(void)
-{
- unsigned long start, end, next;
- int error;
-
- temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!temp_level4_pgt)
- return -ENOMEM;
-
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
- /* Set up the direct mapping from scratch */
- start = (unsigned long)pfn_to_kaddr(0);
- end = (unsigned long)pfn_to_kaddr(end_pfn);
-
- for (; start < end; start = next) {
- pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!pud)
- return -ENOMEM;
- next = start + PGDIR_SIZE;
- if (next > end)
- next = end;
- if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
- return error;
- set_pgd(temp_level4_pgt + pgd_index(start),
- mk_kernel_pgd(__pa(pud)));
- }
- return 0;
-}
-
-int swsusp_arch_resume(void)
-{
- int error;
-
- /* We have got enough memory and from now on we cannot recover */
- if ((error = set_up_temporary_mappings()))
- return error;
- restore_image();
- return 0;
-}
-
-/*
- * pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
- unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
- unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
- return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
-#endif /* CONFIG_HIBERNATION */
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
deleted file mode 100644
index 16d183f67bc1..000000000000
--- a/arch/x86_64/kernel/suspend_asm.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Copyright 2004,2005 Pavel Machek <pavel@suse.cz>, Andi Kleen <ak@suse.de>, Rafael J. Wysocki <rjw@sisk.pl>
- *
- * Distribute under GPLv2.
- *
- * swsusp_arch_resume may not use any stack, nor any variable that is
- * not "NoSave" during copying pages:
- *
- * Its rewriting one kernel image with another. What is stack in "old"
- * image could very well be data page in "new" image, and overwriting
- * your own stack under you is bad idea.
- */
-
- .text
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-
-ENTRY(swsusp_arch_suspend)
-
- movq %rsp, saved_context_esp(%rip)
- movq %rax, saved_context_eax(%rip)
- movq %rbx, saved_context_ebx(%rip)
- movq %rcx, saved_context_ecx(%rip)
- movq %rdx, saved_context_edx(%rip)
- movq %rbp, saved_context_ebp(%rip)
- movq %rsi, saved_context_esi(%rip)
- movq %rdi, saved_context_edi(%rip)
- movq %r8, saved_context_r08(%rip)
- movq %r9, saved_context_r09(%rip)
- movq %r10, saved_context_r10(%rip)
- movq %r11, saved_context_r11(%rip)
- movq %r12, saved_context_r12(%rip)
- movq %r13, saved_context_r13(%rip)
- movq %r14, saved_context_r14(%rip)
- movq %r15, saved_context_r15(%rip)
- pushfq ; popq saved_context_eflags(%rip)
-
- call swsusp_save
- ret
-
-ENTRY(restore_image)
- /* switch to temporary page tables */
- movq $__PAGE_OFFSET, %rdx
- movq temp_level4_pgt(%rip), %rax
- subq %rdx, %rax
- movq %rax, %cr3
- /* Flush TLB */
- movq mmu_cr4_features(%rip), %rax
- movq %rax, %rdx
- andq $~(1<<7), %rdx # PGE
- movq %rdx, %cr4; # turn off PGE
- movq %cr3, %rcx; # flush TLB
- movq %rcx, %cr3;
- movq %rax, %cr4; # turn PGE back on
-
- movq restore_pblist(%rip), %rdx
-loop:
- testq %rdx, %rdx
- jz done
-
- /* get addresses from the pbe and copy the page */
- movq pbe_address(%rdx), %rsi
- movq pbe_orig_address(%rdx), %rdi
- movq $512, %rcx
- rep
- movsq
-
- /* progress to the next pbe */
- movq pbe_next(%rdx), %rdx
- jmp loop
-done:
- /* go back to the original page tables */
- movq $(init_level4_pgt - __START_KERNEL_map), %rax
- addq phys_base(%rip), %rax
- movq %rax, %cr3
-
- /* Flush TLB, including "global" things (vmalloc) */
- movq mmu_cr4_features(%rip), %rax
- movq %rax, %rdx
- andq $~(1<<7), %rdx; # PGE
- movq %rdx, %cr4; # turn off PGE
- movq %cr3, %rcx; # flush TLB
- movq %rcx, %cr3
- movq %rax, %cr4; # turn PGE back on
-
- movl $24, %eax
- movl %eax, %ds
-
- movq saved_context_esp(%rip), %rsp
- movq saved_context_ebp(%rip), %rbp
- /* Don't restore %rax, it must be 0 anyway */
- movq saved_context_ebx(%rip), %rbx
- movq saved_context_ecx(%rip), %rcx
- movq saved_context_edx(%rip), %rdx
- movq saved_context_esi(%rip), %rsi
- movq saved_context_edi(%rip), %rdi
- movq saved_context_r08(%rip), %r8
- movq saved_context_r09(%rip), %r9
- movq saved_context_r10(%rip), %r10
- movq saved_context_r11(%rip), %r11
- movq saved_context_r12(%rip), %r12
- movq saved_context_r13(%rip), %r13
- movq saved_context_r14(%rip), %r14
- movq saved_context_r15(%rip), %r15
- pushq saved_context_eflags(%rip) ; popfq
-
- xorq %rax, %rax
-
- ret
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
deleted file mode 100644
index 4770b7a2052c..000000000000
--- a/arch/x86_64/kernel/sys_x86_64.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/sys_x86_64.c
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/syscalls.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/personality.h>
-
-#include <asm/uaccess.h>
-#include <asm/ia32.h>
-
-/*
- * sys_pipe() is the normal C calling standard for creating
- * a pipe. It's not the way Unix traditionally does this, though.
- */
-asmlinkage long sys_pipe(int __user *fildes)
-{
- int fd[2];
- int error;
-
- error = do_pipe(fd);
- if (!error) {
- if (copy_to_user(fildes, fd, 2*sizeof(int)))
- error = -EFAULT;
- }
- return error;
-}
-
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off)
-{
- long error;
- struct file * file;
-
- error = -EINVAL;
- if (off & ~PAGE_MASK)
- goto out;
-
- error = -EBADF;
- file = NULL;
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- goto out;
- }
- down_write(&current->mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT);
- up_write(&current->mm->mmap_sem);
-
- if (file)
- fput(file);
-out:
- return error;
-}
-
-static void find_start_end(unsigned long flags, unsigned long *begin,
- unsigned long *end)
-{
- if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) {
- /* This is usually used needed to map code in small
- model, so it needs to be in the first 31bit. Limit
- it to that. This means we need to move the
- unmapped base down for this case. This can give
- conflicts with the heap, but we assume that glibc
- malloc knows how to fall back to mmap. Give it 1GB
- of playground for now. -AK */
- *begin = 0x40000000;
- *end = 0x80000000;
- } else {
- *begin = TASK_UNMAPPED_BASE;
- *end = TASK_SIZE;
- }
-}
-
-unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start_addr;
- unsigned long begin, end;
-
- if (flags & MAP_FIXED)
- return addr;
-
- find_start_end(flags, &begin, &end);
-
- if (len > end)
- return -ENOMEM;
-
- if (addr) {
- addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
- if (end - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
- return addr;
- }
- if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
- && len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
- mm->free_area_cache = begin;
- }
- addr = mm->free_area_cache;
- if (addr < begin)
- addr = begin;
- start_addr = addr;
-
-full_search:
- for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
- /* At this point: (!vma || addr < vma->vm_end). */
- if (end - len < addr) {
- /*
- * Start a new search - just in case we missed
- * some holes.
- */
- if (start_addr != begin) {
- start_addr = addr = begin;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
- }
- if (!vma || addr + len <= vma->vm_start) {
- /*
- * Remember the place where we stopped the search:
- */
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
-
- addr = vma->vm_end;
- }
-}
-
-asmlinkage long sys_uname(struct new_utsname __user * name)
-{
- int err;
- down_read(&uts_sem);
- err = copy_to_user(name, utsname(), sizeof (*name));
- up_read(&uts_sem);
- if (personality(current->personality) == PER_LINUX32)
- err |= copy_to_user(&name->machine, "i686", 5);
- return err ? -EFAULT : 0;
-}
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c
deleted file mode 100644
index 63d592c276cc..000000000000
--- a/arch/x86_64/kernel/syscall.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* System call table for x86-64. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <asm/asm-offsets.h>
-
-#define __NO_STUBS
-
-#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
-#include <asm-x86_64/unistd.h>
-
-#undef __SYSCALL
-#define __SYSCALL(nr, sym) [ nr ] = sym,
-#undef _ASM_X86_64_UNISTD_H_
-
-typedef void (*sys_call_ptr_t)(void);
-
-extern void sys_ni_syscall(void);
-
-const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
- /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */
- [0 ... __NR_syscall_max] = &sys_ni_syscall,
-#include <asm-x86_64/unistd.h>
-};
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
deleted file mode 100644
index e3f2569b2c44..000000000000
--- a/arch/x86_64/kernel/tce.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * This file manages the translation entries for the IBM Calgary IOMMU.
- *
- * Derived from arch/powerpc/platforms/pseries/iommu.c
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Jon Mason <jdmason@us.ibm.com>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/bootmem.h>
-#include <asm/tce.h>
-#include <asm/calgary.h>
-#include <asm/proto.h>
-
-/* flush a tce at 'tceaddr' to main memory */
-static inline void flush_tce(void* tceaddr)
-{
- /* a single tce can't cross a cache line */
- if (cpu_has_clflush)
- asm volatile("clflush (%0)" :: "r" (tceaddr));
- else
- asm volatile("wbinvd":::"memory");
-}
-
-void tce_build(struct iommu_table *tbl, unsigned long index,
- unsigned int npages, unsigned long uaddr, int direction)
-{
- u64* tp;
- u64 t;
- u64 rpn;
-
- t = (1 << TCE_READ_SHIFT);
- if (direction != DMA_TO_DEVICE)
- t |= (1 << TCE_WRITE_SHIFT);
-
- tp = ((u64*)tbl->it_base) + index;
-
- while (npages--) {
- rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
- t &= ~TCE_RPN_MASK;
- t |= (rpn << TCE_RPN_SHIFT);
-
- *tp = cpu_to_be64(t);
- flush_tce(tp);
-
- uaddr += PAGE_SIZE;
- tp++;
- }
-}
-
-void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
-{
- u64* tp;
-
- tp = ((u64*)tbl->it_base) + index;
-
- while (npages--) {
- *tp = cpu_to_be64(0);
- flush_tce(tp);
- tp++;
- }
-}
-
-static inline unsigned int table_size_to_number_of_entries(unsigned char size)
-{
- /*
- * size is the order of the table, 0-7
- * smallest table is 8K entries, so shift result by 13 to
- * multiply by 8K
- */
- return (1 << size) << 13;
-}
-
-static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
-{
- unsigned int bitmapsz;
- unsigned long bmppages;
- int ret;
-
- tbl->it_busno = dev->bus->number;
-
- /* set the tce table size - measured in entries */
- tbl->it_size = table_size_to_number_of_entries(specified_table_size);
-
- /*
- * number of bytes needed for the bitmap size in number of
- * entries; we need one bit per entry
- */
- bitmapsz = tbl->it_size / BITS_PER_BYTE;
- bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
- if (!bmppages) {
- printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
- ret = -ENOMEM;
- goto done;
- }
-
- tbl->it_map = (unsigned long*)bmppages;
-
- memset(tbl->it_map, 0, bitmapsz);
-
- tbl->it_hint = 0;
-
- spin_lock_init(&tbl->it_lock);
-
- return 0;
-
-done:
- return ret;
-}
-
-int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar)
-{
- struct iommu_table *tbl;
- int ret;
-
- if (pci_iommu(dev->bus)) {
- printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n",
- dev, pci_iommu(dev->bus));
- BUG();
- }
-
- tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
- if (!tbl) {
- printk(KERN_ERR "Calgary: error allocating iommu_table\n");
- ret = -ENOMEM;
- goto done;
- }
-
- ret = tce_table_setparms(dev, tbl);
- if (ret)
- goto free_tbl;
-
- tbl->bbar = bbar;
-
- set_pci_iommu(dev->bus, tbl);
-
- return 0;
-
-free_tbl:
- kfree(tbl);
-done:
- return ret;
-}
-
-void * __init alloc_tce_table(void)
-{
- unsigned int size;
-
- size = table_size_to_number_of_entries(specified_table_size);
- size *= TCE_ENTRY_SIZE;
-
- return __alloc_bootmem_low(size, size, 0);
-}
-
-void __init free_tce_table(void *tbl)
-{
- unsigned int size;
-
- if (!tbl)
- return;
-
- size = table_size_to_number_of_entries(specified_table_size);
- size *= TCE_ENTRY_SIZE;
-
- free_bootmem(__pa(tbl), size);
-}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
deleted file mode 100644
index 6d48a4e826d9..000000000000
--- a/arch/x86_64/kernel/time.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * linux/arch/x86-64/kernel/time.c
- *
- * "High Precision Event Timer" based timekeeping.
- *
- * Copyright (c) 1991,1992,1995 Linus Torvalds
- * Copyright (c) 1994 Alan Modra
- * Copyright (c) 1995 Markus Kuhn
- * Copyright (c) 1996 Ingo Molnar
- * Copyright (c) 1998 Andrea Arcangeli
- * Copyright (c) 2002,2006 Vojtech Pavlik
- * Copyright (c) 2003 Andi Kleen
- * RTC support code taken from arch/i386/kernel/timers/time_hpet.c
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/mc146818rtc.h>
-#include <linux/time.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/bcd.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/kallsyms.h>
-#include <linux/acpi.h>
-#ifdef CONFIG_ACPI
-#include <acpi/achware.h> /* for PM timer frequency */
-#include <acpi/acpi_bus.h>
-#endif
-#include <asm/8253pit.h>
-#include <asm/i8253.h>
-#include <asm/pgtable.h>
-#include <asm/vsyscall.h>
-#include <asm/timex.h>
-#include <asm/proto.h>
-#include <asm/hpet.h>
-#include <asm/sections.h>
-#include <linux/hpet.h>
-#include <asm/apic.h>
-#include <asm/hpet.h>
-#include <asm/mpspec.h>
-#include <asm/nmi.h>
-#include <asm/vgtod.h>
-
-static char *timename = NULL;
-
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-DEFINE_SPINLOCK(i8253_lock);
-EXPORT_SYMBOL(i8253_lock);
-
-volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
-
-unsigned long profile_pc(struct pt_regs *regs)
-{
- unsigned long pc = instruction_pointer(regs);
-
- /* Assume the lock function has either no stack frame or a copy
- of eflags from PUSHF
- Eflags always has bits 22 and up cleared unlike kernel addresses. */
- if (!user_mode(regs) && in_lock_functions(pc)) {
- unsigned long *sp = (unsigned long *)regs->rsp;
- if (sp[0] >> 22)
- return sp[0];
- if (sp[1] >> 22)
- return sp[1];
- }
- return pc;
-}
-EXPORT_SYMBOL(profile_pc);
-
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
- * ms after the second nowtime has started, because when nowtime is written
- * into the registers of the CMOS clock, it will jump to the next second
- * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
- * sheet for details.
- */
-
-static int set_rtc_mmss(unsigned long nowtime)
-{
- int retval = 0;
- int real_seconds, real_minutes, cmos_minutes;
- unsigned char control, freq_select;
-
-/*
- * IRQs are disabled when we're called from the timer interrupt,
- * no need for spin_lock_irqsave()
- */
-
- spin_lock(&rtc_lock);
-
-/*
- * Tell the clock it's being set and stop it.
- */
-
- control = CMOS_READ(RTC_CONTROL);
- CMOS_WRITE(control | RTC_SET, RTC_CONTROL);
-
- freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
-
- cmos_minutes = CMOS_READ(RTC_MINUTES);
- BCD_TO_BIN(cmos_minutes);
-
-/*
- * since we're only adjusting minutes and seconds, don't interfere with hour
- * overflow. This avoids messing with unknown time zones but requires your RTC
- * not to be off by more than 15 minutes. Since we're calling it only when
- * our clock is externally synchronized using NTP, this shouldn't be a problem.
- */
-
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
- if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
- real_minutes += 30; /* correct for half hour time zone */
- real_minutes %= 60;
-
- if (abs(real_minutes - cmos_minutes) >= 30) {
- printk(KERN_WARNING "time.c: can't update CMOS clock "
- "from %d to %d\n", cmos_minutes, real_minutes);
- retval = -1;
- } else {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
- CMOS_WRITE(real_seconds, RTC_SECONDS);
- CMOS_WRITE(real_minutes, RTC_MINUTES);
- }
-
-/*
- * The following flags have to be released exactly in this order, otherwise the
- * DS12887 (popular MC146818A clone with integrated battery and quartz) will
- * not reset the oscillator and will not update precisely 500 ms later. You
- * won't find this mentioned in the Dallas Semiconductor data sheets, but who
- * believes data sheets anyway ... -- Markus Kuhn
- */
-
- CMOS_WRITE(control, RTC_CONTROL);
- CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
-
- spin_unlock(&rtc_lock);
-
- return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
- return set_rtc_mmss(now.tv_sec);
-}
-
-void main_timer_handler(void)
-{
-/*
- * Here we are in the timer irq handler. We have irqs locally disabled (so we
- * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
- * on the other CPU, so we need a lock. We also need to lock the vsyscall
- * variables, because both do_timer() and us change them -arca+vojtech
- */
-
- write_seqlock(&xtime_lock);
-
-/*
- * Do the timer stuff.
- */
-
- do_timer(1);
-#ifndef CONFIG_SMP
- update_process_times(user_mode(get_irq_regs()));
-#endif
-
-/*
- * In the SMP case we use the local APIC timer interrupt to do the profiling,
- * except when we simulate SMP mode on a uniprocessor system, in that case we
- * have to call the local interrupt handler.
- */
-
- if (!using_apic_timer)
- smp_local_timer_interrupt();
-
- write_sequnlock(&xtime_lock);
-}
-
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
- if (apic_runs_main_timer > 1)
- return IRQ_HANDLED;
- main_timer_handler();
- if (using_apic_timer)
- smp_send_timer_broadcast_ipi();
- return IRQ_HANDLED;
-}
-
-unsigned long read_persistent_clock(void)
-{
- unsigned int year, mon, day, hour, min, sec;
- unsigned long flags;
- unsigned century = 0;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- do {
- sec = CMOS_READ(RTC_SECONDS);
- min = CMOS_READ(RTC_MINUTES);
- hour = CMOS_READ(RTC_HOURS);
- day = CMOS_READ(RTC_DAY_OF_MONTH);
- mon = CMOS_READ(RTC_MONTH);
- year = CMOS_READ(RTC_YEAR);
-#ifdef CONFIG_ACPI
- if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- acpi_gbl_FADT.century)
- century = CMOS_READ(acpi_gbl_FADT.century);
-#endif
- } while (sec != CMOS_READ(RTC_SECONDS));
-
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- /*
- * We know that x86-64 always uses BCD format, no need to check the
- * config register.
- */
-
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
-
- if (century) {
- BCD_TO_BIN(century);
- year += century * 100;
- printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
- } else {
- /*
- * x86-64 systems only exists since 2002.
- * This will work up to Dec 31, 2100
- */
- year += 2000;
- }
-
- return mktime(year, mon, day, hour, min, sec);
-}
-
-/* calibrate_cpu is used on systems with fixed rate TSCs to determine
- * processor frequency */
-#define TICK_COUNT 100000000
-static unsigned int __init tsc_calibrate_cpu_khz(void)
-{
- int tsc_start, tsc_now;
- int i, no_ctr_free;
- unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
- unsigned long flags;
-
- for (i = 0; i < 4; i++)
- if (avail_to_resrv_perfctr_nmi_bit(i))
- break;
- no_ctr_free = (i == 4);
- if (no_ctr_free) {
- i = 3;
- rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
- wrmsrl(MSR_K7_EVNTSEL3, 0);
- rdmsrl(MSR_K7_PERFCTR3, pmc3);
- } else {
- reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
- reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
- }
- local_irq_save(flags);
- /* start meauring cycles, incrementing from 0 */
- wrmsrl(MSR_K7_PERFCTR0 + i, 0);
- wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
- rdtscl(tsc_start);
- do {
- rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
- tsc_now = get_cycles_sync();
- } while ((tsc_now - tsc_start) < TICK_COUNT);
-
- local_irq_restore(flags);
- if (no_ctr_free) {
- wrmsrl(MSR_K7_EVNTSEL3, 0);
- wrmsrl(MSR_K7_PERFCTR3, pmc3);
- wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
- } else {
- release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
- release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
- }
-
- return pmc_now * tsc_khz / (tsc_now - tsc_start);
-}
-
-/*
- * pit_calibrate_tsc() uses the speaker output (channel 2) of
- * the PIT. This is better than using the timer interrupt output,
- * because we can read the value of the speaker with just one inb(),
- * where we need three i/o operations for the interrupt channel.
- * We count how many ticks the TSC does in 50 ms.
- */
-
-static unsigned int __init pit_calibrate_tsc(void)
-{
- unsigned long start, end;
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
-
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
- outb(0xb0, 0x43);
- outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
- outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
- start = get_cycles_sync();
- while ((inb(0x61) & 0x20) == 0);
- end = get_cycles_sync();
-
- spin_unlock_irqrestore(&i8253_lock, flags);
-
- return (end - start) / 50;
-}
-
-#define PIT_MODE 0x43
-#define PIT_CH0 0x40
-
-static void __pit_init(int val, u8 mode)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- outb_p(mode, PIT_MODE);
- outb_p(val & 0xff, PIT_CH0); /* LSB */
- outb_p(val >> 8, PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
-}
-
-void __init pit_init(void)
-{
- __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
-}
-
-void pit_stop_interrupt(void)
-{
- __pit_init(0, 0x30); /* mode 0 */
-}
-
-void stop_timer_interrupt(void)
-{
- char *name;
- if (hpet_address) {
- name = "HPET";
- hpet_timer_stop_set_go(0);
- } else {
- name = "PIT";
- pit_stop_interrupt();
- }
- printk(KERN_INFO "timer: %s interrupt stopped.\n", name);
-}
-
-static struct irqaction irq0 = {
- .handler = timer_interrupt,
- .flags = IRQF_DISABLED | IRQF_IRQPOLL,
- .mask = CPU_MASK_NONE,
- .name = "timer"
-};
-
-void __init time_init(void)
-{
- if (nohpet)
- hpet_address = 0;
-
- if (hpet_arch_init())
- hpet_address = 0;
-
- if (hpet_use_timer) {
- /* set tick_nsec to use the proper rate for HPET */
- tick_nsec = TICK_NSEC_HPET;
- tsc_khz = hpet_calibrate_tsc();
- timename = "HPET";
- } else {
- pit_init();
- tsc_khz = pit_calibrate_tsc();
- timename = "PIT";
- }
-
- cpu_khz = tsc_khz;
- if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
- boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 16)
- cpu_khz = tsc_calibrate_cpu_khz();
-
- if (unsynchronized_tsc())
- mark_tsc_unstable("TSCs unsynchronized");
-
- if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
- vgetcpu_mode = VGETCPU_RDTSCP;
- else
- vgetcpu_mode = VGETCPU_LSL;
-
- set_cyc2ns_scale(tsc_khz);
- printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- init_tsc_clocksource();
-
- setup_irq(0, &irq0);
-}
-
-/*
- * sysfs support for the timer.
- */
-
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
- return 0;
-}
-
-static int timer_resume(struct sys_device *dev)
-{
- if (hpet_address)
- hpet_reenable();
- else
- i8254_timer_resume();
- return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
- .resume = timer_resume,
- .suspend = timer_suspend,
- set_kset_name("timer"),
-};
-
-/* XXX this sysfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timer_sysclass,
-};
-
-static int time_init_device(void)
-{
- int error = sysdev_class_register(&timer_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
-}
-
-device_initcall(time_init_device);
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
deleted file mode 100644
index e7e2764c461b..000000000000
--- a/arch/x86_64/kernel/trampoline.S
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- *
- * Trampoline.S Derived from Setup.S by Linus Torvalds
- *
- * 4 Jan 1997 Michael Chastain: changed to gnu as.
- * 15 Sept 2005 Eric Biederman: 64bit PIC support
- *
- * Entry: CS:IP point to the start of our code, we are
- * in real mode with no stack, but the rest of the
- * trampoline page to make our stack and everything else
- * is a mystery.
- *
- * In fact we don't actually need a stack so we don't
- * set one up.
- *
- * On entry to trampoline_data, the processor is in real mode
- * with 16-bit addressing and 16-bit data. CS has some value
- * and IP is zero. Thus, data addresses need to be absolute
- * (no relocation) and are taken with regard to r_base.
- *
- * With the addition of trampoline_level4_pgt this code can
- * now enter a 64bit kernel that lives at arbitrary 64bit
- * physical addresses.
- *
- * If you work on this file, check the object module with objdump
- * --full-contents --reloc to make sure there are no relocation
- * entries.
- */
-
-#include <linux/linkage.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-#include <asm/segment.h>
-
-.data
-
-.code16
-
-ENTRY(trampoline_data)
-r_base = .
- cli # We should be safe anyway
- wbinvd
- mov %cs, %ax # Code and data in the same place
- mov %ax, %ds
- mov %ax, %es
- mov %ax, %ss
-
-
- movl $0xA5A5A5A5, trampoline_data - r_base
- # write marker for master knows we're running
-
- # Setup stack
- movw $(trampoline_stack_end - r_base), %sp
-
- call verify_cpu # Verify the cpu supports long mode
- testl %eax, %eax # Check for return code
- jnz no_longmode
-
- mov %cs, %ax
- movzx %ax, %esi # Find the 32bit trampoline location
- shll $4, %esi
-
- # Fixup the vectors
- addl %esi, startup_32_vector - r_base
- addl %esi, startup_64_vector - r_base
- addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer
-
- /*
- * GDT tables in non default location kernel can be beyond 16MB and
- * lgdt will not be able to load the address as in real mode default
- * operand size is 16bit. Use lgdtl instead to force operand size
- * to 32 bit.
- */
-
- lidtl tidt - r_base # load idt with 0, 0
- lgdtl tgdt - r_base # load gdt with whatever is appropriate
-
- xor %ax, %ax
- inc %ax # protected mode (PE) bit
- lmsw %ax # into protected mode
-
- # flush prefetch and jump to startup_32
- ljmpl *(startup_32_vector - r_base)
-
- .code32
- .balign 4
-startup_32:
- movl $__KERNEL_DS, %eax # Initialize the %ds segment register
- movl %eax, %ds
-
- xorl %eax, %eax
- btsl $5, %eax # Enable PAE mode
- movl %eax, %cr4
-
- # Setup trampoline 4 level pagetables
- leal (trampoline_level4_pgt - r_base)(%esi), %eax
- movl %eax, %cr3
-
- movl $MSR_EFER, %ecx
- movl $(1 << _EFER_LME), %eax # Enable Long Mode
- xorl %edx, %edx
- wrmsr
-
- xorl %eax, %eax
- btsl $31, %eax # Enable paging and in turn activate Long Mode
- btsl $0, %eax # Enable protected mode
- movl %eax, %cr0
-
- /*
- * At this point we're in long mode but in 32bit compatibility mode
- * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
- * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
- * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
- */
- ljmp *(startup_64_vector - r_base)(%esi)
-
- .code64
- .balign 4
-startup_64:
- # Now jump into the kernel using virtual addresses
- movq $secondary_startup_64, %rax
- jmp *%rax
-
- .code16
-no_longmode:
- hlt
- jmp no_longmode
-#include "verify_cpu.S"
-
- # Careful these need to be in the same 64K segment as the above;
-tidt:
- .word 0 # idt limit = 0
- .word 0, 0 # idt base = 0L
-
- # Duplicate the global descriptor table
- # so the kernel can live anywhere
- .balign 4
-tgdt:
- .short tgdt_end - tgdt # gdt limit
- .long tgdt - r_base
- .short 0
- .quad 0x00cf9b000000ffff # __KERNEL32_CS
- .quad 0x00af9b000000ffff # __KERNEL_CS
- .quad 0x00cf93000000ffff # __KERNEL_DS
-tgdt_end:
-
- .balign 4
-startup_32_vector:
- .long startup_32 - r_base
- .word __KERNEL32_CS, 0
-
- .balign 4
-startup_64_vector:
- .long startup_64 - r_base
- .word __KERNEL_CS, 0
-
-trampoline_stack:
- .org 0x1000
-trampoline_stack_end:
-ENTRY(trampoline_level4_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 510,8,0
- .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
-
-ENTRY(trampoline_end)
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
deleted file mode 100644
index 03888420775d..000000000000
--- a/arch/x86_64/kernel/traps.c
+++ /dev/null
@@ -1,1138 +0,0 @@
-/*
- * linux/arch/x86-64/traps.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'entry.S'.
- */
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/kallsyms.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/nmi.h>
-#include <linux/kprobes.h>
-#include <linux/kexec.h>
-#include <linux/unwind.h>
-#include <linux/uaccess.h>
-#include <linux/bug.h>
-#include <linux/kdebug.h>
-
-#if defined(CONFIG_EDAC)
-#include <linux/edac.h>
-#endif
-
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/debugreg.h>
-#include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/processor.h>
-#include <asm/unwind.h>
-#include <asm/smp.h>
-#include <asm/pgalloc.h>
-#include <asm/pda.h>
-#include <asm/proto.h>
-#include <asm/nmi.h>
-#include <asm/stacktrace.h>
-
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void double_fault(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void reserved(void);
-asmlinkage void alignment_check(void);
-asmlinkage void machine_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-
-static inline void conditional_sti(struct pt_regs *regs)
-{
- if (regs->eflags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
- preempt_disable();
- if (regs->eflags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void preempt_conditional_cli(struct pt_regs *regs)
-{
- if (regs->eflags & X86_EFLAGS_IF)
- local_irq_disable();
- /* Make sure to not schedule here because we could be running
- on an exception stack. */
- preempt_enable_no_resched();
-}
-
-int kstack_depth_to_print = 12;
-
-#ifdef CONFIG_KALLSYMS
-void printk_address(unsigned long address)
-{
- unsigned long offset = 0, symsize;
- const char *symname;
- char *modname;
- char *delim = ":";
- char namebuf[128];
-
- symname = kallsyms_lookup(address, &symsize, &offset,
- &modname, namebuf);
- if (!symname) {
- printk(" [<%016lx>]\n", address);
- return;
- }
- if (!modname)
- modname = delim = "";
- printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
- address, delim, modname, delim, symname, offset, symsize);
-}
-#else
-void printk_address(unsigned long address)
-{
- printk(" [<%016lx>]\n", address);
-}
-#endif
-
-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
- unsigned *usedp, char **idp)
-{
- static char ids[][8] = {
- [DEBUG_STACK - 1] = "#DB",
- [NMI_STACK - 1] = "NMI",
- [DOUBLEFAULT_STACK - 1] = "#DF",
- [STACKFAULT_STACK - 1] = "#SS",
- [MCE_STACK - 1] = "#MC",
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
-#endif
- };
- unsigned k;
-
- /*
- * Iterate over all exception stacks, and figure out whether
- * 'stack' is in one of them:
- */
- for (k = 0; k < N_EXCEPTION_STACKS; k++) {
- unsigned long end = per_cpu(orig_ist, cpu).ist[k];
- /*
- * Is 'stack' above this exception frame's end?
- * If yes then skip to the next frame.
- */
- if (stack >= end)
- continue;
- /*
- * Is 'stack' above this exception frame's start address?
- * If yes then we found the right frame.
- */
- if (stack >= end - EXCEPTION_STKSZ) {
- /*
- * Make sure we only iterate through an exception
- * stack once. If it comes up for the second time
- * then there's something wrong going on - just
- * break out and return NULL:
- */
- if (*usedp & (1U << k))
- break;
- *usedp |= 1U << k;
- *idp = ids[k];
- return (unsigned long *)end;
- }
- /*
- * If this is a debug stack, and if it has a larger size than
- * the usual exception stacks, then 'stack' might still
- * be within the lower portion of the debug stack:
- */
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
- unsigned j = N_EXCEPTION_STACKS - 1;
-
- /*
- * Black magic. A large debug stack is composed of
- * multiple exception stack entries, which we
- * iterate through now. Dont look:
- */
- do {
- ++j;
- end -= EXCEPTION_STKSZ;
- ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
- } while (stack < end - EXCEPTION_STKSZ);
- if (*usedp & (1U << j))
- break;
- *usedp |= 1U << j;
- *idp = ids[j];
- return (unsigned long *)end;
- }
-#endif
- }
- return NULL;
-}
-
-#define MSG(txt) ops->warning(data, txt)
-
-/*
- * x86-64 can have upto three kernel stacks:
- * process stack
- * interrupt stack
- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
- */
-
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
-{
- void *t = (void *)tinfo;
- return p > t && p < t + THREAD_SIZE - 3;
-}
-
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
- unsigned long *stack,
- struct stacktrace_ops *ops, void *data)
-{
- const unsigned cpu = get_cpu();
- unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
- unsigned used = 0;
- struct thread_info *tinfo;
-
- if (!tsk)
- tsk = current;
-
- if (!stack) {
- unsigned long dummy;
- stack = &dummy;
- if (tsk && tsk != current)
- stack = (unsigned long *)tsk->thread.rsp;
- }
-
- /*
- * Print function call entries within a stack. 'cond' is the
- * "end of stackframe" condition, that the 'stack++'
- * iteration will eventually trigger.
- */
-#define HANDLE_STACK(cond) \
- do while (cond) { \
- unsigned long addr = *stack++; \
- /* Use unlocked access here because except for NMIs \
- we should be already protected against module unloads */ \
- if (__kernel_text_address(addr)) { \
- /* \
- * If the address is either in the text segment of the \
- * kernel, or in the region which contains vmalloc'ed \
- * memory, it *may* be the address of a calling \
- * routine; if so, print it so that someone tracing \
- * down the cause of the crash will be able to figure \
- * out the call path that was taken. \
- */ \
- ops->address(data, addr); \
- } \
- } while (0)
-
- /*
- * Print function call entries in all stacks, starting at the
- * current stack address. If the stacks consist of nested
- * exceptions
- */
- for (;;) {
- char *id;
- unsigned long *estack_end;
- estack_end = in_exception_stack(cpu, (unsigned long)stack,
- &used, &id);
-
- if (estack_end) {
- if (ops->stack(data, id) < 0)
- break;
- HANDLE_STACK (stack < estack_end);
- ops->stack(data, "<EOE>");
- /*
- * We link to the next stack via the
- * second-to-last pointer (index -2 to end) in the
- * exception stack:
- */
- stack = (unsigned long *) estack_end[-2];
- continue;
- }
- if (irqstack_end) {
- unsigned long *irqstack;
- irqstack = irqstack_end -
- (IRQSTACKSIZE - 64) / sizeof(*irqstack);
-
- if (stack >= irqstack && stack < irqstack_end) {
- if (ops->stack(data, "IRQ") < 0)
- break;
- HANDLE_STACK (stack < irqstack_end);
- /*
- * We link to the next stack (which would be
- * the process stack normally) the last
- * pointer (index -1 to end) in the IRQ stack:
- */
- stack = (unsigned long *) (irqstack_end[-1]);
- irqstack_end = NULL;
- ops->stack(data, "EOI");
- continue;
- }
- }
- break;
- }
-
- /*
- * This handles the process stack:
- */
- tinfo = task_thread_info(tsk);
- HANDLE_STACK (valid_stack_ptr(tinfo, stack));
-#undef HANDLE_STACK
- put_cpu();
-}
-EXPORT_SYMBOL(dump_trace);
-
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s\n", msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
- printk(" <%s> ", name);
- return 0;
-}
-
-static void print_trace_address(void *data, unsigned long addr)
-{
- touch_nmi_watchdog();
- printk_address(addr);
-}
-
-static struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
-
-void
-show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
-{
- printk("\nCall Trace:\n");
- dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
- printk("\n");
-}
-
-static void
-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
-{
- unsigned long *stack;
- int i;
- const int cpu = smp_processor_id();
- unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
-
- // debugging aid: "show_stack(NULL, NULL);" prints the
- // back trace for this cpu.
-
- if (rsp == NULL) {
- if (tsk)
- rsp = (unsigned long *)tsk->thread.rsp;
- else
- rsp = (unsigned long *)&rsp;
- }
-
- stack = rsp;
- for(i=0; i < kstack_depth_to_print; i++) {
- if (stack >= irqstack && stack <= irqstack_end) {
- if (stack == irqstack_end) {
- stack = (unsigned long *) (irqstack_end[-1]);
- printk(" <EOI> ");
- }
- } else {
- if (((long) stack & (THREAD_SIZE-1)) == 0)
- break;
- }
- if (i && ((i % 4) == 0))
- printk("\n");
- printk(" %016lx", *stack++);
- touch_nmi_watchdog();
- }
- show_trace(tsk, regs, rsp);
-}
-
-void show_stack(struct task_struct *tsk, unsigned long * rsp)
-{
- _show_stack(tsk, NULL, rsp);
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long dummy;
- show_trace(NULL, NULL, &dummy);
-}
-
-EXPORT_SYMBOL(dump_stack);
-
-void show_registers(struct pt_regs *regs)
-{
- int i;
- int in_kernel = !user_mode(regs);
- unsigned long rsp;
- const int cpu = smp_processor_id();
- struct task_struct *cur = cpu_pda(cpu)->pcurrent;
-
- rsp = regs->rsp;
- printk("CPU %d ", cpu);
- __show_regs(regs);
- printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
- cur->comm, cur->pid, task_thread_info(cur), cur);
-
- /*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
- */
- if (in_kernel) {
- printk("Stack: ");
- _show_stack(NULL, regs, (unsigned long*)rsp);
-
- printk("\nCode: ");
- if (regs->rip < PAGE_OFFSET)
- goto bad;
-
- for (i=0; i<20; i++) {
- unsigned char c;
- if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
-bad:
- printk(" Bad RIP value.");
- break;
- }
- printk("%02x ", c);
- }
- }
- printk("\n");
-}
-
-int is_valid_bugaddr(unsigned long rip)
-{
- unsigned short ud2;
-
- if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
- return 0;
-
- return ud2 == 0x0b0f;
-}
-
-#ifdef CONFIG_BUG
-void out_of_line_bug(void)
-{
- BUG();
-}
-EXPORT_SYMBOL(out_of_line_bug);
-#endif
-
-static DEFINE_SPINLOCK(die_lock);
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- int cpu;
- unsigned long flags;
-
- oops_enter();
-
- /* racy, but better than risking deadlock. */
- local_irq_save(flags);
- cpu = smp_processor_id();
- if (!spin_trylock(&die_lock)) {
- if (cpu == die_owner)
- /* nested oops. should stop eventually */;
- else
- spin_lock(&die_lock);
- }
- die_nest_count++;
- die_owner = cpu;
- console_verbose();
- bust_spinlocks(1);
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags)
-{
- die_owner = -1;
- bust_spinlocks(0);
- die_nest_count--;
- if (die_nest_count)
- /* We still own the lock */
- local_irq_restore(flags);
- else
- /* Nest count reaches zero, release the lock. */
- spin_unlock_irqrestore(&die_lock, flags);
- if (panic_on_oops)
- panic("Fatal exception");
- oops_exit();
-}
-
-void __kprobes __die(const char * str, struct pt_regs * regs, long err)
-{
- static int die_counter;
- printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
- show_registers(regs);
- add_taint(TAINT_DIE);
- /* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP ");
- printk_address(regs->rip);
- printk(" RSP <%016lx>\n", regs->rsp);
- if (kexec_should_crash(current))
- crash_kexec(regs);
-}
-
-void die(const char * str, struct pt_regs * regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (!user_mode(regs))
- report_bug(regs->rip, regs);
-
- __die(str, regs, err);
- oops_end(flags);
- do_exit(SIGSEGV);
-}
-
-void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- unsigned long flags = oops_begin();
-
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- printk(str, smp_processor_id());
- show_registers(regs);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (do_panic || panic_on_oops)
- panic("Non maskable interrupt");
- oops_end(flags);
- nmi_exit();
- local_irq_enable();
- do_exit(SIGSEGV);
-}
-
-static void __kprobes do_trap(int trapnr, int signr, char *str,
- struct pt_regs * regs, long error_code,
- siginfo_t *info)
-{
- struct task_struct *tsk = current;
-
- if (user_mode(regs)) {
- /*
- * We want error_code and trap_no set for userspace
- * faults and kernelspace faults which result in
- * die(), but not kernelspace faults which are fixed
- * up. die() gives the process no chance to handle
- * the signal and notice the kernel fault information,
- * so that won't result in polluting the information
- * about previously queued, but not yet delivered,
- * faults. See also do_general_protection below.
- */
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
-
- if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
- printk_ratelimit())
- printk(KERN_INFO
- "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
- tsk->comm, tsk->pid, str,
- regs->rip, regs->rsp, error_code);
-
- if (info)
- force_sig_info(signr, info, tsk);
- else
- force_sig(signr, tsk);
- return;
- }
-
-
- /* kernel trap */
- {
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup)
- regs->rip = fixup->fixup;
- else {
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
- die(str, regs, error_code);
- }
- return;
- }
-}
-
-#define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- conditional_sti(regs); \
- do_trap(trapnr, signr, str, regs, error_code, NULL); \
-}
-
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- siginfo_t info; \
- info.si_signo = signr; \
- info.si_errno = 0; \
- info.si_code = sicode; \
- info.si_addr = (void __user *)siaddr; \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- conditional_sti(regs); \
- do_trap(trapnr, signr, str, regs, error_code, &info); \
-}
-
-DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
-DO_ERROR( 4, SIGSEGV, "overflow", overflow)
-DO_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip)
-DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
-DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
-DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
-DO_ERROR(18, SIGSEGV, "reserved", reserved)
-
-/* Runs on IST stack */
-asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
-{
- if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
- 12, SIGBUS) == NOTIFY_STOP)
- return;
- preempt_conditional_sti(regs);
- do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
- preempt_conditional_cli(regs);
-}
-
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
-{
- static const char str[] = "double fault";
- struct task_struct *tsk = current;
-
- /* Return not checked because double check cannot be ignored */
- notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 8;
-
- /* This is always a kernel trap and never fixable (and thus must
- never return). */
- for (;;)
- die(str, regs, error_code);
-}
-
-asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
- long error_code)
-{
- struct task_struct *tsk = current;
-
- conditional_sti(regs);
-
- if (user_mode(regs)) {
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 13;
-
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit())
- printk(KERN_INFO
- "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
- tsk->comm, tsk->pid,
- regs->rip, regs->rsp, error_code);
-
- force_sig(SIGSEGV, tsk);
- return;
- }
-
- /* kernel gp */
- {
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return;
- }
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 13;
- if (notify_die(DIE_GPF, "general protection fault", regs,
- error_code, 13, SIGSEGV) == NOTIFY_STOP)
- return;
- die("general protection fault", regs, error_code);
- }
-}
-
-static __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs * regs)
-{
- printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
- reason);
- printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
-
-#if defined(CONFIG_EDAC)
- if(edac_handler_set()) {
- edac_atomic_assert_error();
- return;
- }
-#endif
-
- if (panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
-
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
-
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
-}
-
-static __kprobes void
-io_check_error(unsigned char reason, struct pt_regs * regs)
-{
- printk("NMI: IOCK error (debug interrupt?)\n");
- show_registers(regs);
-
- /* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
- mdelay(2000);
- reason &= ~8;
- outb(reason, 0x61);
-}
-
-static __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-{
- printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
- reason);
- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-
- if (panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
-
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
-}
-
-/* Runs on IST stack. This code must keep interrupts off all the time.
- Nested NMIs are prevented by the CPU. */
-asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
-{
- unsigned char reason = 0;
- int cpu;
-
- cpu = smp_processor_id();
-
- /* Only the BSP gets external NMIs from the system. */
- if (!cpu)
- reason = get_nmi_reason();
-
- if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
- /*
- * Ok, so this is none of the documented NMI sources,
- * so it must be the NMI watchdog.
- */
- if (nmi_watchdog_tick(regs,reason))
- return;
- if (!do_nmi_callback(regs,cpu))
- unknown_nmi_error(reason, regs);
-
- return;
- }
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- /* AK: following checks seem to be broken on modern chipsets. FIXME */
-
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
- io_check_error(reason, regs);
-}
-
-/* runs on IST stack. */
-asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
-{
- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
- return;
- }
- preempt_conditional_sti(regs);
- do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
-}
-
-/* Help handler running on IST stack to switch back to user stack
- for scheduling or signal handling. The actual stack switch is done in
- entry.S */
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
-{
- struct pt_regs *regs = eregs;
- /* Did already sync */
- if (eregs == (struct pt_regs *)eregs->rsp)
- ;
- /* Exception from user space */
- else if (user_mode(eregs))
- regs = task_pt_regs(current);
- /* Exception from kernel and interrupts are enabled. Move to
- kernel process stack. */
- else if (eregs->eflags & X86_EFLAGS_IF)
- regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs));
- if (eregs != regs)
- *regs = *eregs;
- return regs;
-}
-
-/* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
- unsigned long error_code)
-{
- unsigned long condition;
- struct task_struct *tsk = current;
- siginfo_t info;
-
- get_debugreg(condition, 6);
-
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
- return;
-
- preempt_conditional_sti(regs);
-
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7) {
- goto clear_dr7;
- }
- }
-
- tsk->thread.debugreg6 = condition;
-
- /* Mask out spurious TF errors due to lazy TF clearing */
- if (condition & DR_STEP) {
- /*
- * The TF error should be masked out only if the current
- * process is not traced and if the TRAP flag has been set
- * previously by a tracing process (condition detected by
- * the PT_DTRACE flag); remember that the i386 TRAP flag
- * can be modified by the process itself in user mode,
- * allowing programs to debug themselves without the ptrace()
- * interface.
- */
- if (!user_mode(regs))
- goto clear_TF_reenable;
- /*
- * Was the TF flag set by a debugger? If so, clear it now,
- * so that register information is correct.
- */
- if (tsk->ptrace & PT_DTRACE) {
- regs->eflags &= ~TF_MASK;
- tsk->ptrace &= ~PT_DTRACE;
- }
- }
-
- /* Ok, finally something we can handle */
- tsk->thread.trap_no = 1;
- tsk->thread.error_code = error_code;
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL;
- force_sig_info(SIGTRAP, &info, tsk);
-
-clear_dr7:
- set_debugreg(0UL, 7);
- preempt_conditional_cli(regs);
- return;
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->eflags &= ~TF_MASK;
- preempt_conditional_cli(regs);
-}
-
-static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
-{
- const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return 1;
- }
- notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
- /* Illegal floating point operation in the kernel */
- current->thread.trap_no = trapnr;
- die(str, regs, 0);
- return 0;
-}
-
-/*
- * Note that we play around with the 'TS' bit in an attempt to get
- * the correct behaviour even in the presence of the asynchronous
- * IRQ13 behaviour
- */
-asmlinkage void do_coprocessor_error(struct pt_regs *regs)
-{
- void __user *rip = (void __user *)(regs->rip);
- struct task_struct * task;
- siginfo_t info;
- unsigned short cwd, swd;
-
- conditional_sti(regs);
- if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel x87 math error", 16))
- return;
-
- /*
- * Save the info for the exception handler and clear the error.
- */
- task = current;
- save_init_fpu(task);
- task->thread.trap_no = 16;
- task->thread.error_code = 0;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = __SI_FAULT;
- info.si_addr = rip;
- /*
- * (~cwd & swd) will mask out exceptions that are not set to unmasked
- * status. 0x3f is the exception bits in these regs, 0x200 is the
- * C1 reg you need in case of a stack fault, 0x040 is the stack
- * fault bit. We should only be taking one exception at a time,
- * so if this combination doesn't produce any single exception,
- * then we have a bad program that isn't synchronizing its FPU usage
- * and it will suffer the consequences since we won't be able to
- * fully reproduce the context of the exception
- */
- cwd = get_fpu_cwd(task);
- swd = get_fpu_swd(task);
- switch (swd & ~cwd & 0x3f) {
- case 0x000:
- default:
- break;
- case 0x001: /* Invalid Op */
- /*
- * swd & 0x240 == 0x040: Stack Underflow
- * swd & 0x240 == 0x240: Stack Overflow
- * User must clear the SF bit (0x40) if set
- */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
- }
- force_sig_info(SIGFPE, &info, task);
-}
-
-asmlinkage void bad_intr(void)
-{
- printk("bad interrupt");
-}
-
-asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
-{
- void __user *rip = (void __user *)(regs->rip);
- struct task_struct * task;
- siginfo_t info;
- unsigned short mxcsr;
-
- conditional_sti(regs);
- if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel simd math error", 19))
- return;
-
- /*
- * Save the info for the exception handler and clear the error.
- */
- task = current;
- save_init_fpu(task);
- task->thread.trap_no = 19;
- task->thread.error_code = 0;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = __SI_FAULT;
- info.si_addr = rip;
- /*
- * The SIMD FPU exceptions are handled a little differently, as there
- * is only a single status/control register. Thus, to determine which
- * unmasked exception was caught we must mask the exception mask bits
- * at 0x1f80, and then use these to mask the exception bits at 0x3f.
- */
- mxcsr = get_fpu_mxcsr(task);
- switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
- case 0x000:
- default:
- break;
- case 0x001: /* Invalid Op */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
- }
- force_sig_info(SIGFPE, &info, task);
-}
-
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
-{
-}
-
-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
-{
-}
-
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
-{
-}
-
-/*
- * 'math_state_restore()' saves the current math information in the
- * old math state array, and gets the new ones from the current task
- *
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
- * Don't touch unless you *really* know how it works.
- */
-asmlinkage void math_state_restore(void)
-{
- struct task_struct *me = current;
- clts(); /* Allow maths ops (or we recurse) */
-
- if (!used_math())
- init_fpu(me);
- restore_fpu_checking(&me->thread.i387.fxsave);
- task_thread_info(me)->status |= TS_USEDFPU;
- me->fpu_counter++;
-}
-
-void __init trap_init(void)
-{
- set_intr_gate(0,&divide_error);
- set_intr_gate_ist(1,&debug,DEBUG_STACK);
- set_intr_gate_ist(2,&nmi,NMI_STACK);
- set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */
- set_system_gate(4,&overflow); /* int4 can be called from all */
- set_intr_gate(5,&bounds);
- set_intr_gate(6,&invalid_op);
- set_intr_gate(7,&device_not_available);
- set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK);
- set_intr_gate(9,&coprocessor_segment_overrun);
- set_intr_gate(10,&invalid_TSS);
- set_intr_gate(11,&segment_not_present);
- set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK);
- set_intr_gate(13,&general_protection);
- set_intr_gate(14,&page_fault);
- set_intr_gate(15,&spurious_interrupt_bug);
- set_intr_gate(16,&coprocessor_error);
- set_intr_gate(17,&alignment_check);
-#ifdef CONFIG_X86_MCE
- set_intr_gate_ist(18,&machine_check, MCE_STACK);
-#endif
- set_intr_gate(19,&simd_coprocessor_error);
-
-#ifdef CONFIG_IA32_EMULATION
- set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
-#endif
-
- /*
- * Should be a barrier for any external CPU state.
- */
- cpu_init();
-}
-
-
-static int __init oops_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- if (!strcmp(s, "panic"))
- panic_on_oops = 1;
- return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s,NULL,0);
- return 0;
-}
-early_param("kstack", kstack_setup);
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
deleted file mode 100644
index 2a59bde663f2..000000000000
--- a/arch/x86_64/kernel/tsc.c
+++ /dev/null
@@ -1,207 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/clocksource.h>
-#include <linux/time.h>
-#include <linux/acpi.h>
-#include <linux/cpufreq.h>
-
-#include <asm/timex.h>
-
-static int notsc __initdata = 0;
-
-unsigned int cpu_khz; /* TSC clocks / usec, not used here */
-EXPORT_SYMBOL(cpu_khz);
-unsigned int tsc_khz;
-EXPORT_SYMBOL(tsc_khz);
-
-static unsigned int cyc2ns_scale __read_mostly;
-
-void set_cyc2ns_scale(unsigned long khz)
-{
- cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
-}
-
-static unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> NS_SCALE;
-}
-
-unsigned long long sched_clock(void)
-{
- unsigned long a = 0;
-
- /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
- * which means it is not completely exact and may not be monotonous
- * between CPUs. But the errors should be too small to matter for
- * scheduling purposes.
- */
-
- rdtscll(a);
- return cycles_2_ns(a);
-}
-
-static int tsc_unstable;
-
-inline int check_tsc_unstable(void)
-{
- return tsc_unstable;
-}
-#ifdef CONFIG_CPU_FREQ
-
-/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
- * changes.
- *
- * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
- * not that important because current Opteron setups do not support
- * scaling on SMP anyroads.
- *
- * Should fix up last_tsc too. Currently gettimeofday in the
- * first tick after the change will be slightly wrong.
- */
-
-static unsigned int ref_freq;
-static unsigned long loops_per_jiffy_ref;
-static unsigned long tsc_khz_ref;
-
-static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
- unsigned long *lpj, dummy;
-
- if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
- return 0;
-
- lpj = &dummy;
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-#ifdef CONFIG_SMP
- lpj = &cpu_data[freq->cpu].loops_per_jiffy;
-#else
- lpj = &boot_cpu_data.loops_per_jiffy;
-#endif
-
- if (!ref_freq) {
- ref_freq = freq->old;
- loops_per_jiffy_ref = *lpj;
- tsc_khz_ref = tsc_khz;
- }
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- *lpj =
- cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-
- tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- mark_tsc_unstable("cpufreq changes");
- }
-
- set_cyc2ns_scale(tsc_khz_ref);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-static int __init cpufreq_tsc(void)
-{
- cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- return 0;
-}
-
-core_initcall(cpufreq_tsc);
-
-#endif
-
-/*
- * Make an educated guess if the TSC is trustworthy and synchronized
- * over all CPUs.
- */
-__cpuinit int unsynchronized_tsc(void)
-{
- if (tsc_unstable)
- return 1;
-
-#ifdef CONFIG_SMP
- if (apic_is_clustered_box())
- return 1;
-#endif
- /* Most intel systems have synchronized TSCs except for
- multi node systems */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
-#ifdef CONFIG_ACPI
- /* But TSC doesn't tick in C3 so don't use it there */
- if (acpi_gbl_FADT.header.length > 0 &&
- acpi_gbl_FADT.C3latency < 1000)
- return 1;
-#endif
- return 0;
- }
-
- /* Assume multi socket systems are not synchronized */
- return num_present_cpus() > 1;
-}
-
-int __init notsc_setup(char *s)
-{
- notsc = 1;
- return 1;
-}
-
-__setup("notsc", notsc_setup);
-
-
-/* clock source code: */
-static cycle_t read_tsc(void)
-{
- cycle_t ret = (cycle_t)get_cycles_sync();
- return ret;
-}
-
-static cycle_t __vsyscall_fn vread_tsc(void)
-{
- cycle_t ret = (cycle_t)get_cycles_sync();
- return ret;
-}
-
-static struct clocksource clocksource_tsc = {
- .name = "tsc",
- .rating = 300,
- .read = read_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .shift = 22,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS |
- CLOCK_SOURCE_MUST_VERIFY,
- .vread = vread_tsc,
-};
-
-void mark_tsc_unstable(char *reason)
-{
- if (!tsc_unstable) {
- tsc_unstable = 1;
- printk("Marking TSC unstable due to %s\n", reason);
- /* Change only the rating, when not registered */
- if (clocksource_tsc.mult)
- clocksource_change_rating(&clocksource_tsc, 0);
- else
- clocksource_tsc.rating = 0;
- }
-}
-EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-
-void __init init_tsc_clocksource(void)
-{
- if (!notsc) {
- clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
- clocksource_tsc.shift);
- if (check_tsc_unstable())
- clocksource_tsc.rating = 0;
-
- clocksource_register(&clocksource_tsc);
- }
-}
diff --git a/arch/x86_64/kernel/tsc_sync.c b/arch/x86_64/kernel/tsc_sync.c
deleted file mode 100644
index 355f5f506c81..000000000000
--- a/arch/x86_64/kernel/tsc_sync.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * arch/x86_64/kernel/tsc_sync.c: check TSC synchronization.
- *
- * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
- *
- * We check whether all boot CPUs have their TSC's synchronized,
- * print a warning if not and turn off the TSC clock-source.
- *
- * The warp-check is point-to-point between two CPUs, the CPU
- * initiating the bootup is the 'source CPU', the freshly booting
- * CPU is the 'target CPU'.
- *
- * Only two CPUs may participate - they can enter in any order.
- * ( The serial nature of the boot logic and the CPU hotplug lock
- * protects against more than 2 CPUs entering this code. )
- */
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/nmi.h>
-#include <asm/tsc.h>
-
-/*
- * Entry/exit counters that make sure that both CPUs
- * run the measurement code at once:
- */
-static __cpuinitdata atomic_t start_count;
-static __cpuinitdata atomic_t stop_count;
-
-/*
- * We use a raw spinlock in this exceptional case, because
- * we want to have the fastest, inlined, non-debug version
- * of a critical section, to be able to prove TSC time-warps:
- */
-static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static __cpuinitdata cycles_t last_tsc;
-static __cpuinitdata cycles_t max_warp;
-static __cpuinitdata int nr_warps;
-
-/*
- * TSC-warp measurement loop running on both CPUs:
- */
-static __cpuinit void check_tsc_warp(void)
-{
- cycles_t start, now, prev, end;
- int i;
-
- start = get_cycles_sync();
- /*
- * The measurement runs for 20 msecs:
- */
- end = start + tsc_khz * 20ULL;
- now = start;
-
- for (i = 0; ; i++) {
- /*
- * We take the global lock, measure TSC, save the
- * previous TSC that was measured (possibly on
- * another CPU) and update the previous TSC timestamp.
- */
- __raw_spin_lock(&sync_lock);
- prev = last_tsc;
- now = get_cycles_sync();
- last_tsc = now;
- __raw_spin_unlock(&sync_lock);
-
- /*
- * Be nice every now and then (and also check whether
- * measurement is done [we also insert a 100 million
- * loops safety exit, so we dont lock up in case the
- * TSC readout is totally broken]):
- */
- if (unlikely(!(i & 7))) {
- if (now > end || i > 100000000)
- break;
- cpu_relax();
- touch_nmi_watchdog();
- }
- /*
- * Outside the critical section we can now see whether
- * we saw a time-warp of the TSC going backwards:
- */
- if (unlikely(prev > now)) {
- __raw_spin_lock(&sync_lock);
- max_warp = max(max_warp, prev - now);
- nr_warps++;
- __raw_spin_unlock(&sync_lock);
- }
-
- }
-}
-
-/*
- * Source CPU calls into this - it waits for the freshly booted
- * target CPU to arrive and then starts the measurement:
- */
-void __cpuinit check_tsc_sync_source(int cpu)
-{
- int cpus = 2;
-
- /*
- * No need to check if we already know that the TSC is not
- * synchronized:
- */
- if (unsynchronized_tsc())
- return;
-
- printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
- smp_processor_id(), cpu);
-
- /*
- * Reset it - in case this is a second bootup:
- */
- atomic_set(&stop_count, 0);
-
- /*
- * Wait for the target to arrive:
- */
- while (atomic_read(&start_count) != cpus-1)
- cpu_relax();
- /*
- * Trigger the target to continue into the measurement too:
- */
- atomic_inc(&start_count);
-
- check_tsc_warp();
-
- while (atomic_read(&stop_count) != cpus-1)
- cpu_relax();
-
- /*
- * Reset it - just in case we boot another CPU later:
- */
- atomic_set(&start_count, 0);
-
- if (nr_warps) {
- printk("\n");
- printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
- " turning off TSC clock.\n", max_warp);
- mark_tsc_unstable("check_tsc_sync_source failed");
- nr_warps = 0;
- max_warp = 0;
- last_tsc = 0;
- } else {
- printk(" passed.\n");
- }
-
- /*
- * Let the target continue with the bootup:
- */
- atomic_inc(&stop_count);
-}
-
-/*
- * Freshly booted CPUs call into this:
- */
-void __cpuinit check_tsc_sync_target(void)
-{
- int cpus = 2;
-
- if (unsynchronized_tsc())
- return;
-
- /*
- * Register this CPU's participation and wait for the
- * source CPU to start the measurement:
- */
- atomic_inc(&start_count);
- while (atomic_read(&start_count) != cpus)
- cpu_relax();
-
- check_tsc_warp();
-
- /*
- * Ok, we are done:
- */
- atomic_inc(&stop_count);
-
- /*
- * Wait for the source CPU to print stuff:
- */
- while (atomic_read(&stop_count) != cpus)
- cpu_relax();
-}
-#undef NR_LOOPS
-
diff --git a/arch/x86_64/kernel/verify_cpu.S b/arch/x86_64/kernel/verify_cpu.S
deleted file mode 100644
index 45b6f8a975a1..000000000000
--- a/arch/x86_64/kernel/verify_cpu.S
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- *
- * verify_cpu.S - Code for cpu long mode and SSE verification. This
- * code has been borrowed from boot/setup.S and was introduced by
- * Andi Kleen.
- *
- * Copyright (c) 2007 Andi Kleen (ak@suse.de)
- * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
- * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- *
- * This is a common code for verification whether CPU supports
- * long mode and SSE or not. It is not called directly instead this
- * file is included at various places and compiled in that context.
- * Following are the current usage.
- *
- * This file is included by both 16bit and 32bit code.
- *
- * arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
- * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
- * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
- * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
- *
- * verify_cpu, returns the status of cpu check in register %eax.
- * 0: Success 1: Failure
- *
- * The caller needs to check for the error code and take the action
- * appropriately. Either display a message or halt.
- */
-
-#include <asm/cpufeature.h>
-
-verify_cpu:
- pushfl # Save caller passed flags
- pushl $0 # Kill any dangerous flags
- popfl
-
- pushfl # standard way to check for cpuid
- popl %eax
- movl %eax,%ebx
- xorl $0x200000,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- cmpl %eax,%ebx
- jz verify_cpu_no_longmode # cpu has no cpuid
-
- movl $0x0,%eax # See if cpuid 1 is implemented
- cpuid
- cmpl $0x1,%eax
- jb verify_cpu_no_longmode # no cpuid 1
-
- xor %di,%di
- cmpl $0x68747541,%ebx # AuthenticAMD
- jnz verify_cpu_noamd
- cmpl $0x69746e65,%edx
- jnz verify_cpu_noamd
- cmpl $0x444d4163,%ecx
- jnz verify_cpu_noamd
- mov $1,%di # cpu is from AMD
-
-verify_cpu_noamd:
- movl $0x1,%eax # Does the cpu have what it takes
- cpuid
- andl $REQUIRED_MASK0,%edx
- xorl $REQUIRED_MASK0,%edx
- jnz verify_cpu_no_longmode
-
- movl $0x80000000,%eax # See if extended cpuid is implemented
- cpuid
- cmpl $0x80000001,%eax
- jb verify_cpu_no_longmode # no extended cpuid
-
- movl $0x80000001,%eax # Does the cpu have what it takes
- cpuid
- andl $REQUIRED_MASK1,%edx
- xorl $REQUIRED_MASK1,%edx
- jnz verify_cpu_no_longmode
-
-verify_cpu_sse_test:
- movl $1,%eax
- cpuid
- andl $SSE_MASK,%edx
- cmpl $SSE_MASK,%edx
- je verify_cpu_sse_ok
- test %di,%di
- jz verify_cpu_no_longmode # only try to force SSE on AMD
- movl $0xc0010015,%ecx # HWCR
- rdmsr
- btr $15,%eax # enable SSE
- wrmsr
- xor %di,%di # don't loop
- jmp verify_cpu_sse_test # try again
-
-verify_cpu_no_longmode:
- popfl # Restore caller passed flags
- movl $1,%eax
- ret
-verify_cpu_sse_ok:
- popfl # Restore caller passed flags
- xorl %eax, %eax
- ret
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
deleted file mode 100644
index ba8ea97abd21..000000000000
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,235 +0,0 @@
-/* ld script to make x86-64 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- */
-
-#define LOAD_OFFSET __START_KERNEL_map
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/page.h>
-
-#undef i386 /* in case the preprocessor is a 32bit one */
-
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(phys_startup_64)
-jiffies_64 = jiffies;
-_proxy_pda = 1;
-PHDRS {
- text PT_LOAD FLAGS(5); /* R_E */
- data PT_LOAD FLAGS(7); /* RWE */
- user PT_LOAD FLAGS(7); /* RWE */
- data.init PT_LOAD FLAGS(7); /* RWE */
- note PT_NOTE FLAGS(4); /* R__ */
-}
-SECTIONS
-{
- . = __START_KERNEL;
- phys_startup_64 = startup_64 - LOAD_OFFSET;
- _text = .; /* Text and read-only data */
- .text : AT(ADDR(.text) - LOAD_OFFSET) {
- /* First the code that has to be first for bootstrapping */
- *(.text.head)
- _stext = .;
- /* Then the rest */
- TEXT_TEXT
- SCHED_TEXT
- LOCK_TEXT
- KPROBES_TEXT
- *(.fixup)
- *(.gnu.warning)
- } :text = 0x9090
- /* out-of-line lock text */
- .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
-
- _etext = .; /* End of text section */
-
- . = ALIGN(16); /* Exception table */
- __start___ex_table = .;
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
- __stop___ex_table = .;
-
- NOTES :text :note
-
- BUG_TABLE :text
-
- RODATA
-
- . = ALIGN(4);
- .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
- __tracedata_start = .;
- *(.tracedata)
- __tracedata_end = .;
- }
-
- . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
- /* Data */
- .data : AT(ADDR(.data) - LOAD_OFFSET) {
- DATA_DATA
- CONSTRUCTORS
- } :data
-
- _edata = .; /* End of data section */
-
- . = ALIGN(PAGE_SIZE);
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
- *(.data.cacheline_aligned)
- }
- . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
- .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
- *(.data.read_mostly)
- }
-
-#define VSYSCALL_ADDR (-10*1024*1024)
-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-
-#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
-#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-
-#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
-#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
- . = VSYSCALL_ADDR;
- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
- __vsyscall_0 = VSYSCALL_VIRT_ADDR;
-
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
- .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) }
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
- .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data))
- { *(.vsyscall_gtod_data) }
- vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
- .vsyscall_clock : AT(VLOAD(.vsyscall_clock))
- { *(.vsyscall_clock) }
- vsyscall_clock = VVIRT(.vsyscall_clock);
-
-
- .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
- { *(.vsyscall_1) }
- .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
- { *(.vsyscall_2) }
-
- .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
- vgetcpu_mode = VVIRT(.vgetcpu_mode);
-
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
- .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
- jiffies = VVIRT(.jiffies);
-
- .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
- { *(.vsyscall_3) }
-
- . = VSYSCALL_VIRT_ADDR + 4096;
-
-#undef VSYSCALL_ADDR
-#undef VSYSCALL_PHYS_ADDR
-#undef VSYSCALL_VIRT_ADDR
-#undef VLOAD_OFFSET
-#undef VLOAD
-#undef VVIRT_OFFSET
-#undef VVIRT
-
- . = ALIGN(8192); /* init_task */
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
- *(.data.init_task)
- }:data.init
-
- . = ALIGN(4096);
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
- *(.data.page_aligned)
- }
-
- /* might get freed after init */
- . = ALIGN(4096);
- __smp_alt_begin = .;
- __smp_locks = .;
- .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
- *(.smp_locks)
- }
- __smp_locks_end = .;
- . = ALIGN(4096);
- __smp_alt_end = .;
-
- . = ALIGN(4096); /* Init code and data */
- __init_begin = .;
- .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
- _sinittext = .;
- *(.init.text)
- _einittext = .;
- }
- __initdata_begin = .;
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
- __initdata_end = .;
- . = ALIGN(16);
- __setup_start = .;
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
- __setup_end = .;
- __initcall_start = .;
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
- INITCALLS
- }
- __initcall_end = .;
- __con_initcall_start = .;
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
- *(.con_initcall.init)
- }
- __con_initcall_end = .;
- SECURITY_INIT
- . = ALIGN(8);
- __alt_instructions = .;
- .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
- *(.altinstructions)
- }
- __alt_instructions_end = .;
- .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
- *(.altinstr_replacement)
- }
- /* .exit.text is discard at runtime, not link time, to deal with references
- from .altinstructions and .eh_frame */
- .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
- .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
-
-/* vdso blob that is mapped into user space */
- vdso_start = . ;
- .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
- . = ALIGN(4096);
- vdso_end = .;
-
-#ifdef CONFIG_BLK_DEV_INITRD
- . = ALIGN(4096);
- __initramfs_start = .;
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
- __initramfs_end = .;
-#endif
-
- PERCPU(4096)
-
- . = ALIGN(4096);
- __init_end = .;
-
- . = ALIGN(4096);
- __nosave_begin = .;
- .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
- . = ALIGN(4096);
- __nosave_end = .;
-
- __bss_start = .; /* BSS */
- .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
- *(.bss.page_aligned)
- *(.bss)
- }
- __bss_stop = .;
-
- _end = . ;
-
- /* Sections to be discarded */
- /DISCARD/ : {
- *(.exitcall.exit)
- *(.eh_frame)
- }
-
- STABS_DEBUG
-
- DWARF_DEBUG
-}
diff --git a/arch/x86_64/kernel/vsmp.c b/arch/x86_64/kernel/vsmp.c
deleted file mode 100644
index 414caf0c5f9a..000000000000
--- a/arch/x86_64/kernel/vsmp.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * vSMPowered(tm) systems specific initialization
- * Copyright (C) 2005 ScaleMP Inc.
- *
- * Use of this code is subject to the terms and conditions of the
- * GNU general public license version 2. See "COPYING" or
- * http://www.gnu.org/licenses/gpl.html
- *
- * Ravikiran Thirumalai <kiran@scalemp.com>,
- * Shai Fultheim <shai@scalemp.com>
- */
-
-#include <linux/init.h>
-#include <linux/pci_ids.h>
-#include <linux/pci_regs.h>
-#include <asm/pci-direct.h>
-#include <asm/io.h>
-
-static int __init vsmp_init(void)
-{
- void *address;
- unsigned int cap, ctl;
-
- if (!early_pci_allowed())
- return 0;
-
- /* Check if we are running on a ScaleMP vSMP box */
- if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) ||
- (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
- return 0;
-
- /* set vSMP magic bits to indicate vSMP capable kernel */
- address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8);
- cap = readl(address);
- ctl = readl(address + 4);
- printk("vSMP CTL: capabilities:0x%08x control:0x%08x\n", cap, ctl);
- if (cap & ctl & (1 << 4)) {
- /* Turn on vSMP IRQ fastpath handling (see system.h) */
- ctl &= ~(1 << 4);
- writel(ctl, address + 4);
- ctl = readl(address + 4);
- printk("vSMP CTL: control set to:0x%08x\n", ctl);
- }
-
- iounmap(address);
- return 0;
-}
-
-core_initcall(vsmp_init);
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
deleted file mode 100644
index 06c34949bfdc..000000000000
--- a/arch/x86_64/kernel/vsyscall.c
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
- * linux/arch/x86_64/kernel/vsyscall.c
- *
- * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright 2003 Andi Kleen, SuSE Labs.
- *
- * Thanks to hpa@transmeta.com for some useful hint.
- * Special thanks to Ingo Molnar for his early experience with
- * a different vsyscall implementation for Linux/IA32 and for the name.
- *
- * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
- * at virtual address -10Mbyte+1024bytes etc... There are at max 4
- * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
- * jumping out of line if necessary. We cannot add more with this
- * mechanism because older kernels won't return -ENOSYS.
- * If we want more than four we need a vDSO.
- *
- * Note: the concept clashes with user mode linux. If you use UML and
- * want per guest time just set the kernel.vsyscall64 sysctl to 0.
- */
-
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/seqlock.h>
-#include <linux/jiffies.h>
-#include <linux/sysctl.h>
-#include <linux/clocksource.h>
-#include <linux/getcpu.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <linux/notifier.h>
-
-#include <asm/vsyscall.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/unistd.h>
-#include <asm/fixmap.h>
-#include <asm/errno.h>
-#include <asm/io.h>
-#include <asm/segment.h>
-#include <asm/desc.h>
-#include <asm/topology.h>
-#include <asm/vgtod.h>
-
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-#define __syscall_clobber "r11","rcx","memory"
-#define __pa_vsymbol(x) \
- ({unsigned long v; \
- extern char __vsyscall_0; \
- asm("" : "=r" (v) : "0" (x)); \
- ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
-
-/*
- * vsyscall_gtod_data contains data that is :
- * - readonly from vsyscalls
- * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
- * Try to keep this structure as small as possible to avoid cache line ping pongs
- */
-int __vgetcpu_mode __section_vgetcpu_mode;
-
-struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
-{
- .lock = SEQLOCK_UNLOCKED,
- .sysctl_enabled = 1,
-};
-
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
-{
- unsigned long flags;
-
- write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
- /* copy vsyscall data */
- vsyscall_gtod_data.clock.vread = clock->vread;
- vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
- vsyscall_gtod_data.clock.mask = clock->mask;
- vsyscall_gtod_data.clock.mult = clock->mult;
- vsyscall_gtod_data.clock.shift = clock->shift;
- vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
- vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
- vsyscall_gtod_data.sys_tz = sys_tz;
- vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
- vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
- write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
-}
-
-/* RED-PEN may want to readd seq locking, but then the variable should be
- * write-once.
- */
-static __always_inline void do_get_tz(struct timezone * tz)
-{
- *tz = __vsyscall_gtod_data.sys_tz;
-}
-
-static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- int ret;
- asm volatile("vsysc2: syscall"
- : "=a" (ret)
- : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
- : __syscall_clobber );
- return ret;
-}
-
-static __always_inline long time_syscall(long *t)
-{
- long secs;
- asm volatile("vsysc1: syscall"
- : "=a" (secs)
- : "0" (__NR_time),"D" (t) : __syscall_clobber);
- return secs;
-}
-
-static __always_inline void do_vgettimeofday(struct timeval * tv)
-{
- cycle_t now, base, mask, cycle_delta;
- unsigned seq;
- unsigned long mult, shift, nsec;
- cycle_t (*vread)(void);
- do {
- seq = read_seqbegin(&__vsyscall_gtod_data.lock);
-
- vread = __vsyscall_gtod_data.clock.vread;
- if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
- gettimeofday(tv,NULL);
- return;
- }
- now = vread();
- base = __vsyscall_gtod_data.clock.cycle_last;
- mask = __vsyscall_gtod_data.clock.mask;
- mult = __vsyscall_gtod_data.clock.mult;
- shift = __vsyscall_gtod_data.clock.shift;
-
- tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
- nsec = __vsyscall_gtod_data.wall_time_nsec;
- } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
-
- /* calculate interval: */
- cycle_delta = (now - base) & mask;
- /* convert to nsecs: */
- nsec += (cycle_delta * mult) >> shift;
-
- while (nsec >= NSEC_PER_SEC) {
- tv->tv_sec += 1;
- nsec -= NSEC_PER_SEC;
- }
- tv->tv_usec = nsec / NSEC_PER_USEC;
-}
-
-int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
-{
- if (tv)
- do_vgettimeofday(tv);
- if (tz)
- do_get_tz(tz);
- return 0;
-}
-
-/* This will break when the xtime seconds get inaccurate, but that is
- * unlikely */
-time_t __vsyscall(1) vtime(time_t *t)
-{
- struct timeval tv;
- time_t result;
- if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
- return time_syscall(t);
-
- vgettimeofday(&tv, 0);
- result = tv.tv_sec;
- if (t)
- *t = result;
- return result;
-}
-
-/* Fast way to get current CPU and node.
- This helps to do per node and per CPU caches in user space.
- The result is not guaranteed without CPU affinity, but usually
- works out because the scheduler tries to keep a thread on the same
- CPU.
-
- tcache must point to a two element sized long array.
- All arguments can be NULL. */
-long __vsyscall(2)
-vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
-{
- unsigned int dummy, p;
- unsigned long j = 0;
-
- /* Fast cache - only recompute value once per jiffies and avoid
- relatively costly rdtscp/cpuid otherwise.
- This works because the scheduler usually keeps the process
- on the same CPU and this syscall doesn't guarantee its
- results anyways.
- We do this here because otherwise user space would do it on
- its own in a likely inferior way (no access to jiffies).
- If you don't like it pass NULL. */
- if (tcache && tcache->blob[0] == (j = __jiffies)) {
- p = tcache->blob[1];
- } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
- /* Load per CPU data from RDTSCP */
- rdtscp(dummy, dummy, p);
- } else {
- /* Load per CPU data from GDT */
- asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
- }
- if (tcache) {
- tcache->blob[0] = j;
- tcache->blob[1] = p;
- }
- if (cpu)
- *cpu = p & 0xfff;
- if (node)
- *node = p >> 12;
- return 0;
-}
-
-long __vsyscall(3) venosys_1(void)
-{
- return -ENOSYS;
-}
-
-#ifdef CONFIG_SYSCTL
-
-#define SYSCALL 0x050f
-#define NOP2 0x9090
-
-/*
- * NOP out syscall in vsyscall page when not needed.
- */
-static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- extern u16 vsysc1, vsysc2;
- u16 __iomem *map1;
- u16 __iomem *map2;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- if (!write)
- return ret;
- /* gcc has some trouble with __va(__pa()), so just do it this
- way. */
- map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
- if (!map1)
- return -ENOMEM;
- map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
- if (!map2) {
- ret = -ENOMEM;
- goto out;
- }
- if (!vsyscall_gtod_data.sysctl_enabled) {
- writew(SYSCALL, map1);
- writew(SYSCALL, map2);
- } else {
- writew(NOP2, map1);
- writew(NOP2, map2);
- }
- iounmap(map2);
-out:
- iounmap(map1);
- return ret;
-}
-
-static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
-static ctl_table kernel_table2[] = {
- { .ctl_name = 99, .procname = "vsyscall64",
- .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
- .mode = 0644,
- .strategy = vsyscall_sysctl_nostrat,
- .proc_handler = vsyscall_sysctl_change },
- {}
-};
-
-static ctl_table kernel_root_table2[] = {
- { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
- .child = kernel_table2 },
- {}
-};
-
-#endif
-
-/* Assume __initcall executes before all user space. Hopefully kmod
- doesn't violate that. We'll find out if it does. */
-static void __cpuinit vsyscall_set_cpu(int cpu)
-{
- unsigned long *d;
- unsigned long node = 0;
-#ifdef CONFIG_NUMA
- node = cpu_to_node[cpu];
-#endif
- if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
- write_rdtscp_aux((node << 12) | cpu);
-
- /* Store cpu number in limit so that it can be loaded quickly
- in user space in vgetcpu.
- 12 bits for the CPU and 8 bits for the node. */
- d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
- *d = 0x0f40000000000ULL;
- *d |= cpu;
- *d |= (node & 0xf) << 12;
- *d |= (node >> 4) << 48;
-}
-
-static void __cpuinit cpu_vsyscall_init(void *arg)
-{
- /* preemption should be already off */
- vsyscall_set_cpu(raw_smp_processor_id());
-}
-
-static int __cpuinit
-cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
-{
- long cpu = (long)arg;
- if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
- return NOTIFY_DONE;
-}
-
-static void __init map_vsyscall(void)
-{
- extern char __vsyscall_0;
- unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
-
- /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
- __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
-}
-
-static int __init vsyscall_init(void)
-{
- BUG_ON(((unsigned long) &vgettimeofday !=
- VSYSCALL_ADDR(__NR_vgettimeofday)));
- BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
- BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
- BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
- map_vsyscall();
-#ifdef CONFIG_SYSCTL
- register_sysctl_table(kernel_root_table2);
-#endif
- on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
- hotcpu_notifier(cpu_vsyscall_notifier, 0);
- return 0;
-}
-
-__initcall(vsyscall_init);
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
deleted file mode 100644
index 77c25b307635..000000000000
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Exports for assembly files.
- All C exports should go in the respective C files. */
-
-#include <linux/module.h>
-#include <linux/smp.h>
-
-#include <asm/semaphore.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-EXPORT_SYMBOL(kernel_thread);
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-EXPORT_SYMBOL(__get_user_8);
-EXPORT_SYMBOL(__put_user_1);
-EXPORT_SYMBOL(__put_user_2);
-EXPORT_SYMBOL(__put_user_4);
-EXPORT_SYMBOL(__put_user_8);
-
-EXPORT_SYMBOL(copy_user_generic);
-EXPORT_SYMBOL(__copy_user_nocache);
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(__copy_from_user_inatomic);
-
-EXPORT_SYMBOL(copy_page);
-EXPORT_SYMBOL(clear_page);
-
-#ifdef CONFIG_SMP
-extern void __write_lock_failed(rwlock_t *rw);
-extern void __read_lock_failed(rwlock_t *rw);
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
-/* Export string functions. We normally rely on gcc builtin for most of these,
- but gcc sometimes decides not to inline them. */
-#undef memcpy
-#undef memset
-#undef memmove
-
-extern void * memset(void *,int,__kernel_size_t);
-extern void * memcpy(void *,const void *,__kernel_size_t);
-extern void * __memcpy(void *,const void *,__kernel_size_t);
-
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(__memcpy);
-
-EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(init_level4_pgt);
-EXPORT_SYMBOL(load_gs_index);
-
-EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile
deleted file mode 100644
index c94327178398..000000000000
--- a/arch/x86_64/lib/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-#
-# Makefile for x86_64-specific library files.
-#
-
-CFLAGS_csum-partial.o := -funroll-loops
-
-obj-y := io.o iomap_copy.o
-obj-$(CONFIG_SMP) += msr-on-cpu.o
-
-lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
- usercopy.o getuser.o putuser.o \
- thunk.o clear_page.o copy_page.o bitstr.o bitops.o
-lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o copy_user_nocache.o
diff --git a/arch/x86_64/lib/bitops.c b/arch/x86_64/lib/bitops.c
deleted file mode 100644
index 95b6d9639fba..000000000000
--- a/arch/x86_64/lib/bitops.c
+++ /dev/null
@@ -1,175 +0,0 @@
-#include <linux/bitops.h>
-
-#undef find_first_zero_bit
-#undef find_next_zero_bit
-#undef find_first_bit
-#undef find_next_bit
-
-static inline long
-__find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
- long d0, d1, d2;
- long res;
-
- /*
- * We must test the size in words, not in bits, because
- * otherwise incoming sizes in the range -63..-1 will not run
- * any scasq instructions, and then the flags used by the je
- * instruction will have whatever random value was in place
- * before. Nobody should call us like that, but
- * find_next_zero_bit() does when offset and size are at the
- * same word and it fails to find a zero itself.
- */
- size += 63;
- size >>= 6;
- if (!size)
- return 0;
- asm volatile(
- " repe; scasq\n"
- " je 1f\n"
- " xorq -8(%%rdi),%%rax\n"
- " subq $8,%%rdi\n"
- " bsfq %%rax,%%rdx\n"
- "1: subq %[addr],%%rdi\n"
- " shlq $3,%%rdi\n"
- " addq %%rdi,%%rdx"
- :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
- :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
- [addr] "S" (addr) : "memory");
- /*
- * Any register would do for [addr] above, but GCC tends to
- * prefer rbx over rsi, even though rsi is readily available
- * and doesn't have to be saved.
- */
- return res;
-}
-
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-long find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
- return __find_first_zero_bit (addr, size);
-}
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_zero_bit (const unsigned long * addr, long size, long offset)
-{
- const unsigned long * p = addr + (offset >> 6);
- unsigned long set = 0;
- unsigned long res, bit = offset&63;
-
- if (bit) {
- /*
- * Look for zero in first word
- */
- asm("bsfq %1,%0\n\t"
- "cmoveq %2,%0"
- : "=r" (set)
- : "r" (~(*p >> bit)), "r"(64L));
- if (set < (64 - bit))
- return set + offset;
- set = 64 - bit;
- p++;
- }
- /*
- * No zero yet, search remaining full words for a zero
- */
- res = __find_first_zero_bit (p, size - 64 * (p - addr));
-
- return (offset + set + res);
-}
-
-static inline long
-__find_first_bit(const unsigned long * addr, unsigned long size)
-{
- long d0, d1;
- long res;
-
- /*
- * We must test the size in words, not in bits, because
- * otherwise incoming sizes in the range -63..-1 will not run
- * any scasq instructions, and then the flags used by the jz
- * instruction will have whatever random value was in place
- * before. Nobody should call us like that, but
- * find_next_bit() does when offset and size are at the same
- * word and it fails to find a one itself.
- */
- size += 63;
- size >>= 6;
- if (!size)
- return 0;
- asm volatile(
- " repe; scasq\n"
- " jz 1f\n"
- " subq $8,%%rdi\n"
- " bsfq (%%rdi),%%rax\n"
- "1: subq %[addr],%%rdi\n"
- " shlq $3,%%rdi\n"
- " addq %%rdi,%%rax"
- :"=a" (res), "=&c" (d0), "=&D" (d1)
- :"0" (0ULL), "1" (size), "2" (addr),
- [addr] "r" (addr) : "memory");
- return res;
-}
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-long find_first_bit(const unsigned long * addr, unsigned long size)
-{
- return __find_first_bit(addr,size);
-}
-
-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_bit(const unsigned long * addr, long size, long offset)
-{
- const unsigned long * p = addr + (offset >> 6);
- unsigned long set = 0, bit = offset & 63, res;
-
- if (bit) {
- /*
- * Look for nonzero in the first 64 bits:
- */
- asm("bsfq %1,%0\n\t"
- "cmoveq %2,%0\n\t"
- : "=r" (set)
- : "r" (*p >> bit), "r" (64L));
- if (set < (64 - bit))
- return set + offset;
- set = 64 - bit;
- p++;
- }
- /*
- * No set bit yet, search remaining full words for a bit
- */
- res = __find_first_bit (p, size - 64 * (p - addr));
- return (offset + set + res);
-}
-
-#include <linux/module.h>
-
-EXPORT_SYMBOL(find_next_bit);
-EXPORT_SYMBOL(find_first_bit);
-EXPORT_SYMBOL(find_first_zero_bit);
-EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/x86_64/lib/bitstr.c b/arch/x86_64/lib/bitstr.c
deleted file mode 100644
index 24676609a6ac..000000000000
--- a/arch/x86_64/lib/bitstr.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <linux/module.h>
-#include <linux/bitops.h>
-
-/* Find string of zero bits in a bitmap */
-unsigned long
-find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len)
-{
- unsigned long n, end, i;
-
- again:
- n = find_next_zero_bit(bitmap, nbits, start);
- if (n == -1)
- return -1;
-
- /* could test bitsliced, but it's hardly worth it */
- end = n+len;
- if (end >= nbits)
- return -1;
- for (i = n+1; i < end; i++) {
- if (test_bit(i, bitmap)) {
- start = i+1;
- goto again;
- }
- }
- return n;
-}
-
-EXPORT_SYMBOL(find_next_zero_string);
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
deleted file mode 100644
index 9a10a78bb4a4..000000000000
--- a/arch/x86_64/lib/clear_page.S
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-/*
- * Zero a page.
- * rdi page
- */
- ALIGN
-clear_page_c:
- CFI_STARTPROC
- movl $4096/8,%ecx
- xorl %eax,%eax
- rep stosq
- ret
- CFI_ENDPROC
-ENDPROC(clear_page)
-
-ENTRY(clear_page)
- CFI_STARTPROC
- xorl %eax,%eax
- movl $4096/64,%ecx
- .p2align 4
-.Lloop:
- decl %ecx
-#define PUT(x) movq %rax,x*8(%rdi)
- movq %rax,(%rdi)
- PUT(1)
- PUT(2)
- PUT(3)
- PUT(4)
- PUT(5)
- PUT(6)
- PUT(7)
- leaq 64(%rdi),%rdi
- jnz .Lloop
- nop
- ret
- CFI_ENDPROC
-.Lclear_page_end:
-ENDPROC(clear_page)
-
- /* Some CPUs run faster using the string instructions.
- It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
-2:
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad clear_page
- .quad 1b
- .byte X86_FEATURE_REP_GOOD
- .byte .Lclear_page_end - clear_page
- .byte 2b - 1b
- .previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
deleted file mode 100644
index 727a5d46d2fc..000000000000
--- a/arch/x86_64/lib/copy_page.S
+++ /dev/null
@@ -1,119 +0,0 @@
-/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
- ALIGN
-copy_page_c:
- CFI_STARTPROC
- movl $4096/8,%ecx
- rep movsq
- ret
- CFI_ENDPROC
-ENDPROC(copy_page_c)
-
-/* Don't use streaming store because it's better when the target
- ends up in cache. */
-
-/* Could vary the prefetch distance based on SMP/UP */
-
-ENTRY(copy_page)
- CFI_STARTPROC
- subq $3*8,%rsp
- CFI_ADJUST_CFA_OFFSET 3*8
- movq %rbx,(%rsp)
- CFI_REL_OFFSET rbx, 0
- movq %r12,1*8(%rsp)
- CFI_REL_OFFSET r12, 1*8
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13, 2*8
-
- movl $(4096/64)-5,%ecx
- .p2align 4
-.Loop64:
- dec %rcx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
-
- prefetcht0 5*64(%rsi)
-
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
-
- leaq 64 (%rsi), %rsi
- leaq 64 (%rdi), %rdi
-
- jnz .Loop64
-
- movl $5,%ecx
- .p2align 4
-.Loop2:
- decl %ecx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
-
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
-
- leaq 64(%rdi),%rdi
- leaq 64(%rsi),%rsi
-
- jnz .Loop2
-
- movq (%rsp),%rbx
- CFI_RESTORE rbx
- movq 1*8(%rsp),%r12
- CFI_RESTORE r12
- movq 2*8(%rsp),%r13
- CFI_RESTORE r13
- addq $3*8,%rsp
- CFI_ADJUST_CFA_OFFSET -3*8
- ret
-.Lcopy_page_end:
- CFI_ENDPROC
-ENDPROC(copy_page)
-
- /* Some CPUs run faster using the string copy instructions.
- It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
-2:
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad copy_page
- .quad 1b
- .byte X86_FEATURE_REP_GOOD
- .byte .Lcopy_page_end - copy_page
- .byte 2b - 1b
- .previous
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
deleted file mode 100644
index 70bebd310408..000000000000
--- a/arch/x86_64/lib/copy_user.S
+++ /dev/null
@@ -1,354 +0,0 @@
-/* Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v2.
- *
- * Functions to copy from and to user space.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/cpufeature.h>
-
- .macro ALTERNATIVE_JUMP feature,orig,alt
-0:
- .byte 0xe9 /* 32bit jump */
- .long \orig-1f /* by default jump to orig */
-1:
- .section .altinstr_replacement,"ax"
-2: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt-1b /* offset */ /* or alternatively to alt */
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad 0b
- .quad 2b
- .byte \feature /* when feature is set */
- .byte 5
- .byte 5
- .previous
- .endm
-
-/* Standard copy_to_user with segment limit checking */
-ENTRY(copy_to_user)
- CFI_STARTPROC
- GET_THREAD_INFO(%rax)
- movq %rdi,%rcx
- addq %rdx,%rcx
- jc bad_to_user
- cmpq threadinfo_addr_limit(%rax),%rcx
- jae bad_to_user
- xorl %eax,%eax /* clear zero flag */
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-
-ENTRY(copy_user_generic)
- CFI_STARTPROC
- movl $1,%ecx /* set zero flag */
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-
-ENTRY(__copy_from_user_inatomic)
- CFI_STARTPROC
- xorl %ecx,%ecx /* clear zero flag */
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-
-/* Standard copy_from_user with segment limit checking */
-ENTRY(copy_from_user)
- CFI_STARTPROC
- GET_THREAD_INFO(%rax)
- movq %rsi,%rcx
- addq %rdx,%rcx
- jc bad_from_user
- cmpq threadinfo_addr_limit(%rax),%rcx
- jae bad_from_user
- movl $1,%ecx /* set zero flag */
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-ENDPROC(copy_from_user)
-
- .section .fixup,"ax"
- /* must zero dest */
-bad_from_user:
- CFI_STARTPROC
- movl %edx,%ecx
- xorl %eax,%eax
- rep
- stosb
-bad_to_user:
- movl %edx,%eax
- ret
- CFI_ENDPROC
-END(bad_from_user)
- .previous
-
-
-/*
- * copy_user_generic_unrolled - memory copy with exception handling.
- * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- * ecx zero flag -- if true zero destination on error
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(copy_user_generic_unrolled)
- CFI_STARTPROC
- pushq %rbx
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbx, 0
- pushq %rcx
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rcx, 0
- xorl %eax,%eax /*zero for the exception handler */
-
-#ifdef FIX_ALIGNMENT
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jnz .Lbad_alignment
-.Lafter_bad_alignment:
-#endif
-
- movq %rdx,%rcx
-
- movl $64,%ebx
- shrq $6,%rdx
- decq %rdx
- js .Lhandle_tail
-
- .p2align 4
-.Lloop:
-.Ls1: movq (%rsi),%r11
-.Ls2: movq 1*8(%rsi),%r8
-.Ls3: movq 2*8(%rsi),%r9
-.Ls4: movq 3*8(%rsi),%r10
-.Ld1: movq %r11,(%rdi)
-.Ld2: movq %r8,1*8(%rdi)
-.Ld3: movq %r9,2*8(%rdi)
-.Ld4: movq %r10,3*8(%rdi)
-
-.Ls5: movq 4*8(%rsi),%r11
-.Ls6: movq 5*8(%rsi),%r8
-.Ls7: movq 6*8(%rsi),%r9
-.Ls8: movq 7*8(%rsi),%r10
-.Ld5: movq %r11,4*8(%rdi)
-.Ld6: movq %r8,5*8(%rdi)
-.Ld7: movq %r9,6*8(%rdi)
-.Ld8: movq %r10,7*8(%rdi)
-
- decq %rdx
-
- leaq 64(%rsi),%rsi
- leaq 64(%rdi),%rdi
-
- jns .Lloop
-
- .p2align 4
-.Lhandle_tail:
- movl %ecx,%edx
- andl $63,%ecx
- shrl $3,%ecx
- jz .Lhandle_7
- movl $8,%ebx
- .p2align 4
-.Lloop_8:
-.Ls9: movq (%rsi),%r8
-.Ld9: movq %r8,(%rdi)
- decl %ecx
- leaq 8(%rdi),%rdi
- leaq 8(%rsi),%rsi
- jnz .Lloop_8
-
-.Lhandle_7:
- movl %edx,%ecx
- andl $7,%ecx
- jz .Lende
- .p2align 4
-.Lloop_1:
-.Ls10: movb (%rsi),%bl
-.Ld10: movb %bl,(%rdi)
- incq %rdi
- incq %rsi
- decl %ecx
- jnz .Lloop_1
-
- CFI_REMEMBER_STATE
-.Lende:
- popq %rcx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE rcx
- popq %rbx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE rbx
- ret
- CFI_RESTORE_STATE
-
-#ifdef FIX_ALIGNMENT
- /* align destination */
- .p2align 4
-.Lbad_alignment:
- movl $8,%r9d
- subl %ecx,%r9d
- movl %r9d,%ecx
- cmpq %r9,%rdx
- jz .Lhandle_7
- js .Lhandle_7
-.Lalign_1:
-.Ls11: movb (%rsi),%bl
-.Ld11: movb %bl,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .Lalign_1
- subq %r9,%rdx
- jmp .Lafter_bad_alignment
-#endif
-
- /* table sorted by exception address */
- .section __ex_table,"a"
- .align 8
- .quad .Ls1,.Ls1e
- .quad .Ls2,.Ls2e
- .quad .Ls3,.Ls3e
- .quad .Ls4,.Ls4e
- .quad .Ld1,.Ls1e
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
- .quad .Ls5,.Ls5e
- .quad .Ls6,.Ls6e
- .quad .Ls7,.Ls7e
- .quad .Ls8,.Ls8e
- .quad .Ld5,.Ls5e
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
- .quad .Ls10,.Le_byte
- .quad .Ld10,.Le_byte
-#ifdef FIX_ALIGNMENT
- .quad .Ls11,.Lzero_rest
- .quad .Ld11,.Lzero_rest
-#endif
- .quad .Le5,.Le_zero
- .previous
-
- /* compute 64-offset for main loop. 8 bytes accuracy with error on the
- pessimistic side. this is gross. it would be better to fix the
- interface. */
- /* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
- addq %rbx,%rdi /* +64 */
- subq %rax,%rdi /* correct destination with computed offset */
-
- shlq $6,%rdx /* loop counter * 64 (stride length) */
- addq %rax,%rdx /* add offset to loopcnt */
- andl $63,%ecx /* remaining bytes */
- addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
-
- /* exception on quad word loop in tail handling */
- /* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
- andl $7,%edx
- addl %ecx,%edx
- /* edx: bytes to zero, rdi: dest, eax:zero */
-.Lzero_rest:
- cmpl $0,(%rsp)
- jz .Le_zero
- movq %rdx,%rcx
-.Le_byte:
- xorl %eax,%eax
-.Le5: rep
- stosb
- /* when there is another exception while zeroing the rest just return */
-.Le_zero:
- movq %rdx,%rax
- jmp .Lende
- CFI_ENDPROC
-ENDPROC(copy_user_generic)
-
-
- /* Some CPUs run faster using the string copy instructions.
- This is also a lot simpler. Use them when possible.
- Patch in jmps to this code instead of copying it fully
- to avoid unwanted aliasing in the exception tables. */
-
- /* rdi destination
- * rsi source
- * rdx count
- * ecx zero flag
- *
- * Output:
- * eax uncopied bytes or 0 if successfull.
- *
- * Only 4GB of copy is supported. This shouldn't be a problem
- * because the kernel normally only writes from/to page sized chunks
- * even if user space passed a longer buffer.
- * And more would be dangerous because both Intel and AMD have
- * errata with rep movsq > 4GB. If someone feels the need to fix
- * this please consider this.
- */
-ENTRY(copy_user_generic_string)
- CFI_STARTPROC
- movl %ecx,%r8d /* save zero flag */
- movl %edx,%ecx
- shrl $3,%ecx
- andl $7,%edx
- jz 10f
-1: rep
- movsq
- movl %edx,%ecx
-2: rep
- movsb
-9: movl %ecx,%eax
- ret
-
- /* multiple of 8 byte */
-10: rep
- movsq
- xor %eax,%eax
- ret
-
- /* exception handling */
-3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
- jmp 6f
-5: movl %ecx,%eax /* exception on byte loop */
- /* eax: left over bytes */
-6: testl %r8d,%r8d /* zero flag set? */
- jz 7f
- movl %eax,%ecx /* initialize x86 loop counter */
- push %rax
- xorl %eax,%eax
-8: rep
- stosb /* zero the rest */
-11: pop %rax
-7: ret
- CFI_ENDPROC
-END(copy_user_generic_c)
-
- .section __ex_table,"a"
- .quad 1b,3b
- .quad 2b,5b
- .quad 8b,11b
- .quad 10b,3b
- .previous
diff --git a/arch/x86_64/lib/copy_user_nocache.S b/arch/x86_64/lib/copy_user_nocache.S
deleted file mode 100644
index 4620efb12f13..000000000000
--- a/arch/x86_64/lib/copy_user_nocache.S
+++ /dev/null
@@ -1,217 +0,0 @@
-/* Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v2.
- *
- * Functions to copy from and to user space.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/cpufeature.h>
-
-/*
- * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- * rcx zero flag when 1 zero on exception
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(__copy_user_nocache)
- CFI_STARTPROC
- pushq %rbx
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbx, 0
- pushq %rcx /* save zero flag */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rcx, 0
-
- xorl %eax,%eax /* zero for the exception handler */
-
-#ifdef FIX_ALIGNMENT
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jnz .Lbad_alignment
-.Lafter_bad_alignment:
-#endif
-
- movq %rdx,%rcx
-
- movl $64,%ebx
- shrq $6,%rdx
- decq %rdx
- js .Lhandle_tail
-
- .p2align 4
-.Lloop:
-.Ls1: movq (%rsi),%r11
-.Ls2: movq 1*8(%rsi),%r8
-.Ls3: movq 2*8(%rsi),%r9
-.Ls4: movq 3*8(%rsi),%r10
-.Ld1: movnti %r11,(%rdi)
-.Ld2: movnti %r8,1*8(%rdi)
-.Ld3: movnti %r9,2*8(%rdi)
-.Ld4: movnti %r10,3*8(%rdi)
-
-.Ls5: movq 4*8(%rsi),%r11
-.Ls6: movq 5*8(%rsi),%r8
-.Ls7: movq 6*8(%rsi),%r9
-.Ls8: movq 7*8(%rsi),%r10
-.Ld5: movnti %r11,4*8(%rdi)
-.Ld6: movnti %r8,5*8(%rdi)
-.Ld7: movnti %r9,6*8(%rdi)
-.Ld8: movnti %r10,7*8(%rdi)
-
- dec %rdx
-
- leaq 64(%rsi),%rsi
- leaq 64(%rdi),%rdi
-
- jns .Lloop
-
- .p2align 4
-.Lhandle_tail:
- movl %ecx,%edx
- andl $63,%ecx
- shrl $3,%ecx
- jz .Lhandle_7
- movl $8,%ebx
- .p2align 4
-.Lloop_8:
-.Ls9: movq (%rsi),%r8
-.Ld9: movnti %r8,(%rdi)
- decl %ecx
- leaq 8(%rdi),%rdi
- leaq 8(%rsi),%rsi
- jnz .Lloop_8
-
-.Lhandle_7:
- movl %edx,%ecx
- andl $7,%ecx
- jz .Lende
- .p2align 4
-.Lloop_1:
-.Ls10: movb (%rsi),%bl
-.Ld10: movb %bl,(%rdi)
- incq %rdi
- incq %rsi
- decl %ecx
- jnz .Lloop_1
-
- CFI_REMEMBER_STATE
-.Lende:
- popq %rcx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE %rcx
- popq %rbx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE rbx
- ret
- CFI_RESTORE_STATE
-
-#ifdef FIX_ALIGNMENT
- /* align destination */
- .p2align 4
-.Lbad_alignment:
- movl $8,%r9d
- subl %ecx,%r9d
- movl %r9d,%ecx
- cmpq %r9,%rdx
- jz .Lhandle_7
- js .Lhandle_7
-.Lalign_1:
-.Ls11: movb (%rsi),%bl
-.Ld11: movb %bl,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .Lalign_1
- subq %r9,%rdx
- jmp .Lafter_bad_alignment
-#endif
-
- /* table sorted by exception address */
- .section __ex_table,"a"
- .align 8
- .quad .Ls1,.Ls1e
- .quad .Ls2,.Ls2e
- .quad .Ls3,.Ls3e
- .quad .Ls4,.Ls4e
- .quad .Ld1,.Ls1e
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
- .quad .Ls5,.Ls5e
- .quad .Ls6,.Ls6e
- .quad .Ls7,.Ls7e
- .quad .Ls8,.Ls8e
- .quad .Ld5,.Ls5e
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
- .quad .Ls10,.Le_byte
- .quad .Ld10,.Le_byte
-#ifdef FIX_ALIGNMENT
- .quad .Ls11,.Lzero_rest
- .quad .Ld11,.Lzero_rest
-#endif
- .quad .Le5,.Le_zero
- .previous
-
- /* compute 64-offset for main loop. 8 bytes accuracy with error on the
- pessimistic side. this is gross. it would be better to fix the
- interface. */
- /* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
- addq %rbx,%rdi /* +64 */
- subq %rax,%rdi /* correct destination with computed offset */
-
- shlq $6,%rdx /* loop counter * 64 (stride length) */
- addq %rax,%rdx /* add offset to loopcnt */
- andl $63,%ecx /* remaining bytes */
- addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
-
- /* exception on quad word loop in tail handling */
- /* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
- andl $7,%edx
- addl %ecx,%edx
- /* edx: bytes to zero, rdi: dest, eax:zero */
-.Lzero_rest:
- cmpl $0,(%rsp) /* zero flag set? */
- jz .Le_zero
- movq %rdx,%rcx
-.Le_byte:
- xorl %eax,%eax
-.Le5: rep
- stosb
- /* when there is another exception while zeroing the rest just return */
-.Le_zero:
- movq %rdx,%rax
- jmp .Lende
- CFI_ENDPROC
-ENDPROC(__copy_user_nocache)
-
-
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
deleted file mode 100644
index f0dba36578ea..000000000000
--- a/arch/x86_64/lib/csum-copy.S
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file COPYING in the main directory of this archive
- * for more details. No warranty for anything given at all.
- */
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/errno.h>
-
-/*
- * Checksum copy with exception handling.
- * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
- * destination is zeroed.
- *
- * Input
- * rdi source
- * rsi destination
- * edx len (32bit)
- * ecx sum (32bit)
- * r8 src_err_ptr (int)
- * r9 dst_err_ptr (int)
- *
- * Output
- * eax 64bit sum. undefined in case of exception.
- *
- * Wrappers need to take care of valid exception sum and zeroing.
- * They also should align source or destination to 8 bytes.
- */
-
- .macro source
-10:
- .section __ex_table,"a"
- .align 8
- .quad 10b,.Lbad_source
- .previous
- .endm
-
- .macro dest
-20:
- .section __ex_table,"a"
- .align 8
- .quad 20b,.Lbad_dest
- .previous
- .endm
-
- .macro ignore L=.Lignore
-30:
- .section __ex_table,"a"
- .align 8
- .quad 30b,\L
- .previous
- .endm
-
-
-ENTRY(csum_partial_copy_generic)
- CFI_STARTPROC
- cmpl $3*64,%edx
- jle .Lignore
-
-.Lignore:
- subq $7*8,%rsp
- CFI_ADJUST_CFA_OFFSET 7*8
- movq %rbx,2*8(%rsp)
- CFI_REL_OFFSET rbx, 2*8
- movq %r12,3*8(%rsp)
- CFI_REL_OFFSET r12, 3*8
- movq %r14,4*8(%rsp)
- CFI_REL_OFFSET r14, 4*8
- movq %r13,5*8(%rsp)
- CFI_REL_OFFSET r13, 5*8
- movq %rbp,6*8(%rsp)
- CFI_REL_OFFSET rbp, 6*8
-
- movq %r8,(%rsp)
- movq %r9,1*8(%rsp)
-
- movl %ecx,%eax
- movl %edx,%ecx
-
- xorl %r9d,%r9d
- movq %rcx,%r12
-
- shrq $6,%r12
- jz .Lhandle_tail /* < 64 */
-
- clc
-
- /* main loop. clear in 64 byte blocks */
- /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
- /* r11: temp3, rdx: temp4, r12 loopcnt */
- /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
- .p2align 4
-.Lloop:
- source
- movq (%rdi),%rbx
- source
- movq 8(%rdi),%r8
- source
- movq 16(%rdi),%r11
- source
- movq 24(%rdi),%rdx
-
- source
- movq 32(%rdi),%r10
- source
- movq 40(%rdi),%rbp
- source
- movq 48(%rdi),%r14
- source
- movq 56(%rdi),%r13
-
- ignore 2f
- prefetcht0 5*64(%rdi)
-2:
- adcq %rbx,%rax
- adcq %r8,%rax
- adcq %r11,%rax
- adcq %rdx,%rax
- adcq %r10,%rax
- adcq %rbp,%rax
- adcq %r14,%rax
- adcq %r13,%rax
-
- decl %r12d
-
- dest
- movq %rbx,(%rsi)
- dest
- movq %r8,8(%rsi)
- dest
- movq %r11,16(%rsi)
- dest
- movq %rdx,24(%rsi)
-
- dest
- movq %r10,32(%rsi)
- dest
- movq %rbp,40(%rsi)
- dest
- movq %r14,48(%rsi)
- dest
- movq %r13,56(%rsi)
-
-3:
-
- leaq 64(%rdi),%rdi
- leaq 64(%rsi),%rsi
-
- jnz .Lloop
-
- adcq %r9,%rax
-
- /* do last upto 56 bytes */
-.Lhandle_tail:
- /* ecx: count */
- movl %ecx,%r10d
- andl $63,%ecx
- shrl $3,%ecx
- jz .Lfold
- clc
- .p2align 4
-.Lloop_8:
- source
- movq (%rdi),%rbx
- adcq %rbx,%rax
- decl %ecx
- dest
- movq %rbx,(%rsi)
- leaq 8(%rsi),%rsi /* preserve carry */
- leaq 8(%rdi),%rdi
- jnz .Lloop_8
- adcq %r9,%rax /* add in carry */
-
-.Lfold:
- /* reduce checksum to 32bits */
- movl %eax,%ebx
- shrq $32,%rax
- addl %ebx,%eax
- adcl %r9d,%eax
-
- /* do last upto 6 bytes */
-.Lhandle_7:
- movl %r10d,%ecx
- andl $7,%ecx
- shrl $1,%ecx
- jz .Lhandle_1
- movl $2,%edx
- xorl %ebx,%ebx
- clc
- .p2align 4
-.Lloop_1:
- source
- movw (%rdi),%bx
- adcl %ebx,%eax
- decl %ecx
- dest
- movw %bx,(%rsi)
- leaq 2(%rdi),%rdi
- leaq 2(%rsi),%rsi
- jnz .Lloop_1
- adcl %r9d,%eax /* add in carry */
-
- /* handle last odd byte */
-.Lhandle_1:
- testl $1,%r10d
- jz .Lende
- xorl %ebx,%ebx
- source
- movb (%rdi),%bl
- dest
- movb %bl,(%rsi)
- addl %ebx,%eax
- adcl %r9d,%eax /* carry */
-
- CFI_REMEMBER_STATE
-.Lende:
- movq 2*8(%rsp),%rbx
- CFI_RESTORE rbx
- movq 3*8(%rsp),%r12
- CFI_RESTORE r12
- movq 4*8(%rsp),%r14
- CFI_RESTORE r14
- movq 5*8(%rsp),%r13
- CFI_RESTORE r13
- movq 6*8(%rsp),%rbp
- CFI_RESTORE rbp
- addq $7*8,%rsp
- CFI_ADJUST_CFA_OFFSET -7*8
- ret
- CFI_RESTORE_STATE
-
- /* Exception handlers. Very simple, zeroing is done in the wrappers */
-.Lbad_source:
- movq (%rsp),%rax
- testq %rax,%rax
- jz .Lende
- movl $-EFAULT,(%rax)
- jmp .Lende
-
-.Lbad_dest:
- movq 8(%rsp),%rax
- testq %rax,%rax
- jz .Lende
- movl $-EFAULT,(%rax)
- jmp .Lende
- CFI_ENDPROC
-ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
deleted file mode 100644
index bc503f506903..000000000000
--- a/arch/x86_64/lib/csum-partial.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * arch/x86_64/lib/csum-partial.c
- *
- * This file contains network checksum routines that are better done
- * in an architecture-specific manner due to speed.
- */
-
-#include <linux/compiler.h>
-#include <linux/module.h>
-#include <asm/checksum.h>
-
-static inline unsigned short from32to16(unsigned a)
-{
- unsigned short b = a >> 16;
- asm("addw %w2,%w0\n\t"
- "adcw $0,%w0\n"
- : "=r" (b)
- : "0" (b), "r" (a));
- return b;
-}
-
-/*
- * Do a 64-bit checksum on an arbitrary memory area.
- * Returns a 32bit checksum.
- *
- * This isn't as time critical as it used to be because many NICs
- * do hardware checksumming these days.
- *
- * Things tried and found to not make it faster:
- * Manual Prefetching
- * Unrolling to an 128 bytes inner loop.
- * Using interleaving with more registers to break the carry chains.
- */
-static unsigned do_csum(const unsigned char *buff, unsigned len)
-{
- unsigned odd, count;
- unsigned long result = 0;
-
- if (unlikely(len == 0))
- return result;
- odd = 1 & (unsigned long) buff;
- if (unlikely(odd)) {
- result = *buff << 8;
- len--;
- buff++;
- }
- count = len >> 1; /* nr of 16-bit words.. */
- if (count) {
- if (2 & (unsigned long) buff) {
- result += *(unsigned short *)buff;
- count--;
- len -= 2;
- buff += 2;
- }
- count >>= 1; /* nr of 32-bit words.. */
- if (count) {
- unsigned long zero;
- unsigned count64;
- if (4 & (unsigned long) buff) {
- result += *(unsigned int *) buff;
- count--;
- len -= 4;
- buff += 4;
- }
- count >>= 1; /* nr of 64-bit words.. */
-
- /* main loop using 64byte blocks */
- zero = 0;
- count64 = count >> 3;
- while (count64) {
- asm("addq 0*8(%[src]),%[res]\n\t"
- "adcq 1*8(%[src]),%[res]\n\t"
- "adcq 2*8(%[src]),%[res]\n\t"
- "adcq 3*8(%[src]),%[res]\n\t"
- "adcq 4*8(%[src]),%[res]\n\t"
- "adcq 5*8(%[src]),%[res]\n\t"
- "adcq 6*8(%[src]),%[res]\n\t"
- "adcq 7*8(%[src]),%[res]\n\t"
- "adcq %[zero],%[res]"
- : [res] "=r" (result)
- : [src] "r" (buff), [zero] "r" (zero),
- "[res]" (result));
- buff += 64;
- count64--;
- }
-
- /* last upto 7 8byte blocks */
- count %= 8;
- while (count) {
- asm("addq %1,%0\n\t"
- "adcq %2,%0\n"
- : "=r" (result)
- : "m" (*(unsigned long *)buff),
- "r" (zero), "0" (result));
- --count;
- buff += 8;
- }
- result = add32_with_carry(result>>32,
- result&0xffffffff);
-
- if (len & 4) {
- result += *(unsigned int *) buff;
- buff += 4;
- }
- }
- if (len & 2) {
- result += *(unsigned short *) buff;
- buff += 2;
- }
- }
- if (len & 1)
- result += *buff;
- result = add32_with_carry(result>>32, result & 0xffffffff);
- if (unlikely(odd)) {
- result = from32to16(result);
- result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
- }
- return result;
-}
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 64-bit boundary
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
-{
- return (__force __wsum)add32_with_carry(do_csum(buff, len),
- (__force u32)sum);
-}
-
-EXPORT_SYMBOL(csum_partial);
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-__sum16 ip_compute_csum(const void *buff, int len)
-{
- return csum_fold(csum_partial(buff,len,0));
-}
-EXPORT_SYMBOL(ip_compute_csum);
-
diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c
deleted file mode 100644
index fd42a4a095fc..000000000000
--- a/arch/x86_64/lib/csum-wrappers.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Copyright 2002,2003 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v.2
- *
- * Wrappers of assembly checksum functions for x86-64.
- */
-
-#include <asm/checksum.h>
-#include <linux/module.h>
-
-/**
- * csum_partial_copy_from_user - Copy and checksum from user space.
- * @src: source address (user space)
- * @dst: destination address
- * @len: number of bytes to be copied.
- * @isum: initial sum that is added into the result (32bit unfolded)
- * @errp: set to -EFAULT for an bad source address.
- *
- * Returns an 32bit unfolded checksum of the buffer.
- * src and dst are best aligned to 64bits.
- */
-__wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
- int len, __wsum isum, int *errp)
-{
- might_sleep();
- *errp = 0;
- if (likely(access_ok(VERIFY_READ,src, len))) {
- /* Why 6, not 7? To handle odd addresses aligned we
- would need to do considerable complications to fix the
- checksum which is defined as an 16bit accumulator. The
- fix alignment code is primarily for performance
- compatibility with 32bit and that will handle odd
- addresses slowly too. */
- if (unlikely((unsigned long)src & 6)) {
- while (((unsigned long)src & 6) && len >= 2) {
- __u16 val16;
- *errp = __get_user(val16, (const __u16 __user *)src);
- if (*errp)
- return isum;
- *(__u16 *)dst = val16;
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- src += 2;
- dst += 2;
- len -= 2;
- }
- }
- isum = csum_partial_copy_generic((__force const void *)src,
- dst, len, isum, errp, NULL);
- if (likely(*errp == 0))
- return isum;
- }
- *errp = -EFAULT;
- memset(dst,0,len);
- return isum;
-}
-
-EXPORT_SYMBOL(csum_partial_copy_from_user);
-
-/**
- * csum_partial_copy_to_user - Copy and checksum to user space.
- * @src: source address
- * @dst: destination address (user space)
- * @len: number of bytes to be copied.
- * @isum: initial sum that is added into the result (32bit unfolded)
- * @errp: set to -EFAULT for an bad destination address.
- *
- * Returns an 32bit unfolded checksum of the buffer.
- * src and dst are best aligned to 64bits.
- */
-__wsum
-csum_partial_copy_to_user(const void *src, void __user *dst,
- int len, __wsum isum, int *errp)
-{
- might_sleep();
- if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
- *errp = -EFAULT;
- return 0;
- }
-
- if (unlikely((unsigned long)dst & 6)) {
- while (((unsigned long)dst & 6) && len >= 2) {
- __u16 val16 = *(__u16 *)src;
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- *errp = __put_user(val16, (__u16 __user *)dst);
- if (*errp)
- return isum;
- src += 2;
- dst += 2;
- len -= 2;
- }
- }
-
- *errp = 0;
- return csum_partial_copy_generic(src, (void __force *)dst,len,isum,NULL,errp);
-}
-
-EXPORT_SYMBOL(csum_partial_copy_to_user);
-
-/**
- * csum_partial_copy_nocheck - Copy and checksum.
- * @src: source address
- * @dst: destination address
- * @len: number of bytes to be copied.
- * @isum: initial sum that is added into the result (32bit unfolded)
- *
- * Returns an 32bit unfolded checksum of the buffer.
- */
-__wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
-{
- return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
-}
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
-
-__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- __u32 len, unsigned short proto, __wsum sum)
-{
- __u64 rest, sum64;
-
- rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
- (__force __u64)sum;
- asm(" addq (%[saddr]),%[sum]\n"
- " adcq 8(%[saddr]),%[sum]\n"
- " adcq (%[daddr]),%[sum]\n"
- " adcq 8(%[daddr]),%[sum]\n"
- " adcq $0,%[sum]\n"
- : [sum] "=r" (sum64)
- : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr));
- return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
-}
-
-EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
deleted file mode 100644
index 2dbebd308347..000000000000
--- a/arch/x86_64/lib/delay.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Precise Delay Loops for x86-64
- *
- * Copyright (C) 1993 Linus Torvalds
- * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- *
- * The __delay function must _NOT_ be inlined as its execution time
- * depends wildly on alignment on many x86 processors.
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <asm/delay.h>
-#include <asm/msr.h>
-
-#ifdef CONFIG_SMP
-#include <asm/smp.h>
-#endif
-
-int read_current_timer(unsigned long *timer_value)
-{
- rdtscll(*timer_value);
- return 0;
-}
-
-void __delay(unsigned long loops)
-{
- unsigned bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- }
- while((now-bclock) < loops);
-}
-EXPORT_SYMBOL(__delay);
-
-inline void __const_udelay(unsigned long xloops)
-{
- __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
-}
-EXPORT_SYMBOL(__const_udelay);
-
-void __udelay(unsigned long usecs)
-{
- __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
-}
-EXPORT_SYMBOL(__udelay);
-
-void __ndelay(unsigned long nsecs)
-{
- __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
-}
-EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S
deleted file mode 100644
index 5448876261f8..000000000000
--- a/arch/x86_64/lib/getuser.S
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * __get_user functions.
- *
- * (C) Copyright 1998 Linus Torvalds
- * (C) Copyright 2005 Andi Kleen
- *
- * These functions have a non-standard call interface
- * to make them more efficient, especially as they
- * return an error value in addition to the "real"
- * return value.
- */
-
-/*
- * __get_user_X
- *
- * Inputs: %rcx contains the address.
- * The register is modified, but all changes are undone
- * before returning because the C code doesn't know about it.
- *
- * Outputs: %rax is error code (0 or -EFAULT)
- * %rdx contains zero-extended value
- *
- * %r8 is destroyed.
- *
- * These functions should not modify any other registers,
- * as they get called from within inline assembly.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/page.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
- .text
-ENTRY(__get_user_1)
- CFI_STARTPROC
- GET_THREAD_INFO(%r8)
- cmpq threadinfo_addr_limit(%r8),%rcx
- jae bad_get_user
-1: movzb (%rcx),%edx
- xorl %eax,%eax
- ret
- CFI_ENDPROC
-ENDPROC(__get_user_1)
-
-ENTRY(__get_user_2)
- CFI_STARTPROC
- GET_THREAD_INFO(%r8)
- addq $1,%rcx
- jc 20f
- cmpq threadinfo_addr_limit(%r8),%rcx
- jae 20f
- decq %rcx
-2: movzwl (%rcx),%edx
- xorl %eax,%eax
- ret
-20: decq %rcx
- jmp bad_get_user
- CFI_ENDPROC
-ENDPROC(__get_user_2)
-
-ENTRY(__get_user_4)
- CFI_STARTPROC
- GET_THREAD_INFO(%r8)
- addq $3,%rcx
- jc 30f
- cmpq threadinfo_addr_limit(%r8),%rcx
- jae 30f
- subq $3,%rcx
-3: movl (%rcx),%edx
- xorl %eax,%eax
- ret
-30: subq $3,%rcx
- jmp bad_get_user
- CFI_ENDPROC
-ENDPROC(__get_user_4)
-
-ENTRY(__get_user_8)
- CFI_STARTPROC
- GET_THREAD_INFO(%r8)
- addq $7,%rcx
- jc 40f
- cmpq threadinfo_addr_limit(%r8),%rcx
- jae 40f
- subq $7,%rcx
-4: movq (%rcx),%rdx
- xorl %eax,%eax
- ret
-40: subq $7,%rcx
- jmp bad_get_user
- CFI_ENDPROC
-ENDPROC(__get_user_8)
-
-bad_get_user:
- CFI_STARTPROC
- xorl %edx,%edx
- movq $(-EFAULT),%rax
- ret
- CFI_ENDPROC
-END(bad_get_user)
-
-.section __ex_table,"a"
- .quad 1b,bad_get_user
- .quad 2b,bad_get_user
- .quad 3b,bad_get_user
- .quad 4b,bad_get_user
-.previous
diff --git a/arch/x86_64/lib/io.c b/arch/x86_64/lib/io.c
deleted file mode 100644
index 87b4a4e18039..000000000000
--- a/arch/x86_64/lib/io.c
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <linux/string.h>
-#include <asm/io.h>
-#include <linux/module.h>
-
-void __memcpy_toio(unsigned long dst,const void*src,unsigned len)
-{
- __inline_memcpy((void *) dst,src,len);
-}
-EXPORT_SYMBOL(__memcpy_toio);
-
-void __memcpy_fromio(void *dst,unsigned long src,unsigned len)
-{
- __inline_memcpy(dst,(const void *) src,len);
-}
-EXPORT_SYMBOL(__memcpy_fromio);
-
-void memset_io(volatile void __iomem *a, int b, size_t c)
-{
- /* XXX: memset can mangle the IO patterns quite a bit.
- perhaps it would be better to use a dumb one */
- memset((void *)a,b,c);
-}
-EXPORT_SYMBOL(memset_io);
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S
deleted file mode 100644
index 05a95e713da8..000000000000
--- a/arch/x86_64/lib/iomap_copy.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2006 PathScale, Inc. All Rights Reserved.
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-/*
- * override generic version in lib/iomap_copy.c
- */
-ENTRY(__iowrite32_copy)
- CFI_STARTPROC
- movl %edx,%ecx
- rep movsd
- ret
- CFI_ENDPROC
-ENDPROC(__iowrite32_copy)
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
deleted file mode 100644
index c22981fa2f3a..000000000000
--- a/arch/x86_64/lib/memcpy.S
+++ /dev/null
@@ -1,131 +0,0 @@
-/* Copyright 2002 Andi Kleen */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/cpufeature.h>
-
-/*
- * memcpy - Copy a memory block.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * rax original destination
- */
-
- ALIGN
-memcpy_c:
- CFI_STARTPROC
- movq %rdi,%rax
- movl %edx,%ecx
- shrl $3,%ecx
- andl $7,%edx
- rep movsq
- movl %edx,%ecx
- rep movsb
- ret
- CFI_ENDPROC
-ENDPROC(memcpy_c)
-
-ENTRY(__memcpy)
-ENTRY(memcpy)
- CFI_STARTPROC
- pushq %rbx
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbx, 0
- movq %rdi,%rax
-
- movl %edx,%ecx
- shrl $6,%ecx
- jz .Lhandle_tail
-
- .p2align 4
-.Lloop_64:
- decl %ecx
-
- movq (%rsi),%r11
- movq 8(%rsi),%r8
-
- movq %r11,(%rdi)
- movq %r8,1*8(%rdi)
-
- movq 2*8(%rsi),%r9
- movq 3*8(%rsi),%r10
-
- movq %r9,2*8(%rdi)
- movq %r10,3*8(%rdi)
-
- movq 4*8(%rsi),%r11
- movq 5*8(%rsi),%r8
-
- movq %r11,4*8(%rdi)
- movq %r8,5*8(%rdi)
-
- movq 6*8(%rsi),%r9
- movq 7*8(%rsi),%r10
-
- movq %r9,6*8(%rdi)
- movq %r10,7*8(%rdi)
-
- leaq 64(%rsi),%rsi
- leaq 64(%rdi),%rdi
- jnz .Lloop_64
-
-.Lhandle_tail:
- movl %edx,%ecx
- andl $63,%ecx
- shrl $3,%ecx
- jz .Lhandle_7
- .p2align 4
-.Lloop_8:
- decl %ecx
- movq (%rsi),%r8
- movq %r8,(%rdi)
- leaq 8(%rdi),%rdi
- leaq 8(%rsi),%rsi
- jnz .Lloop_8
-
-.Lhandle_7:
- movl %edx,%ecx
- andl $7,%ecx
- jz .Lende
- .p2align 4
-.Lloop_1:
- movb (%rsi),%r8b
- movb %r8b,(%rdi)
- incq %rdi
- incq %rsi
- decl %ecx
- jnz .Lloop_1
-
-.Lende:
- popq %rbx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE rbx
- ret
-.Lfinal:
- CFI_ENDPROC
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
-
- /* Some CPUs run faster using the string copy instructions.
- It is also a lot simpler. Use this when possible */
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
-2:
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad memcpy
- .quad 1b
- .byte X86_FEATURE_REP_GOOD
- /* Replace only beginning, memcpy is used to apply alternatives, so it
- * is silly to overwrite itself with nops - reboot is only outcome... */
- .byte 2b - 1b
- .byte 2b - 1b
- .previous
diff --git a/arch/x86_64/lib/memmove.c b/arch/x86_64/lib/memmove.c
deleted file mode 100644
index 751ebae8ec42..000000000000
--- a/arch/x86_64/lib/memmove.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Normally compiler builtins are used, but sometimes the compiler calls out
- of line code. Based on asm-i386/string.h.
- */
-#define _STRING_C
-#include <linux/string.h>
-#include <linux/module.h>
-
-#undef memmove
-void *memmove(void * dest,const void *src,size_t count)
-{
- if (dest < src) {
- return memcpy(dest,src,count);
- } else {
- char *p = (char *) dest + count;
- char *s = (char *) src + count;
- while (count--)
- *--p = *--s;
- }
- return dest;
-}
-EXPORT_SYMBOL(memmove);
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
deleted file mode 100644
index 2c5948116bd2..000000000000
--- a/arch/x86_64/lib/memset.S
+++ /dev/null
@@ -1,133 +0,0 @@
-/* Copyright 2002 Andi Kleen, SuSE Labs */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-/*
- * ISO C memset - set a memory block to a byte value.
- *
- * rdi destination
- * rsi value (char)
- * rdx count (bytes)
- *
- * rax original destination
- */
- ALIGN
-memset_c:
- CFI_STARTPROC
- movq %rdi,%r9
- movl %edx,%r8d
- andl $7,%r8d
- movl %edx,%ecx
- shrl $3,%ecx
- /* expand byte value */
- movzbl %sil,%esi
- movabs $0x0101010101010101,%rax
- mulq %rsi /* with rax, clobbers rdx */
- rep stosq
- movl %r8d,%ecx
- rep stosb
- movq %r9,%rax
- ret
- CFI_ENDPROC
-ENDPROC(memset_c)
-
-ENTRY(memset)
-ENTRY(__memset)
- CFI_STARTPROC
- movq %rdi,%r10
- movq %rdx,%r11
-
- /* expand byte value */
- movzbl %sil,%ecx
- movabs $0x0101010101010101,%rax
- mul %rcx /* with rax, clobbers rdx */
-
- /* align dst */
- movl %edi,%r9d
- andl $7,%r9d
- jnz .Lbad_alignment
- CFI_REMEMBER_STATE
-.Lafter_bad_alignment:
-
- movl %r11d,%ecx
- shrl $6,%ecx
- jz .Lhandle_tail
-
- .p2align 4
-.Lloop_64:
- decl %ecx
- movq %rax,(%rdi)
- movq %rax,8(%rdi)
- movq %rax,16(%rdi)
- movq %rax,24(%rdi)
- movq %rax,32(%rdi)
- movq %rax,40(%rdi)
- movq %rax,48(%rdi)
- movq %rax,56(%rdi)
- leaq 64(%rdi),%rdi
- jnz .Lloop_64
-
- /* Handle tail in loops. The loops should be faster than hard
- to predict jump tables. */
- .p2align 4
-.Lhandle_tail:
- movl %r11d,%ecx
- andl $63&(~7),%ecx
- jz .Lhandle_7
- shrl $3,%ecx
- .p2align 4
-.Lloop_8:
- decl %ecx
- movq %rax,(%rdi)
- leaq 8(%rdi),%rdi
- jnz .Lloop_8
-
-.Lhandle_7:
- movl %r11d,%ecx
- andl $7,%ecx
- jz .Lende
- .p2align 4
-.Lloop_1:
- decl %ecx
- movb %al,(%rdi)
- leaq 1(%rdi),%rdi
- jnz .Lloop_1
-
-.Lende:
- movq %r10,%rax
- ret
-
- CFI_RESTORE_STATE
-.Lbad_alignment:
- cmpq $7,%r11
- jbe .Lhandle_7
- movq %rax,(%rdi) /* unaligned store */
- movq $8,%r8
- subq %r9,%r8
- addq %r8,%rdi
- subq %r8,%r11
- jmp .Lafter_bad_alignment
-.Lfinal:
- CFI_ENDPROC
-ENDPROC(memset)
-ENDPROC(__memset)
-
- /* Some CPUs run faster using the string instructions.
- It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (memset_c - memset) - (2f - 1b) /* offset */
-2:
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad memset
- .quad 1b
- .byte X86_FEATURE_REP_GOOD
- .byte .Lfinal - memset
- .byte 2b - 1b
- .previous
diff --git a/arch/x86_64/lib/msr-on-cpu.c b/arch/x86_64/lib/msr-on-cpu.c
deleted file mode 100644
index 47e0ec47c376..000000000000
--- a/arch/x86_64/lib/msr-on-cpu.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../i386/lib/msr-on-cpu.c"
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S
deleted file mode 100644
index 4989f5a8fa9b..000000000000
--- a/arch/x86_64/lib/putuser.S
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * __put_user functions.
- *
- * (C) Copyright 1998 Linus Torvalds
- * (C) Copyright 2005 Andi Kleen
- *
- * These functions have a non-standard call interface
- * to make them more efficient, especially as they
- * return an error value in addition to the "real"
- * return value.
- */
-
-/*
- * __put_user_X
- *
- * Inputs: %rcx contains the address
- * %rdx contains new value
- *
- * Outputs: %rax is error code (0 or -EFAULT)
- *
- * %r8 is destroyed.
- *
- * These functions should not modify any other registers,
- * as they get called from within inline assembly.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/page.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
- .text
-ENTRY(__put_user_1)
- CFI_STARTPROC
- GET_THREAD_INFO(%r8)
- cmpq threadinfo_addr_limit(%r8),%rcx
- jae bad_put_user
-1: movb %dl,(%rcx)
- xorl %eax,%eax
- ret
- CFI_ENDPROC
-ENDPROC(__put_user_1)
-
-ENTRY(__put_user_2)
- CFI_STARTPROC
- GET_THREAD_INFO(%r8)
- addq $1,%rcx
- jc 20f
- cmpq threadinfo_addr_limit(%r8),%rcx
- jae 20f
- decq %rcx
-2: movw %dx,(%rcx)
- xorl %eax,%eax
- ret
-20: decq %rcx
- jmp bad_put_user
- CFI_ENDPROC
-ENDPROC(__put_user_2)
-
-ENTRY(__put_user_4)
- CFI_STARTPROC
- GET_THREAD_INFO(%r8)
- addq $3,%rcx
- jc 30f
- cmpq threadinfo_addr_limit(%r8),%rcx
- jae 30f
- subq $3,%rcx
-3: movl %edx,(%rcx)
- xorl %eax,%eax
- ret
-30: subq $3,%rcx
- jmp bad_put_user
- CFI_ENDPROC
-ENDPROC(__put_user_4)
-
-ENTRY(__put_user_8)
- CFI_STARTPROC
- GET_THREAD_INFO(%r8)
- addq $7,%rcx
- jc 40f
- cmpq threadinfo_addr_limit(%r8),%rcx
- jae 40f
- subq $7,%rcx
-4: movq %rdx,(%rcx)
- xorl %eax,%eax
- ret
-40: subq $7,%rcx
- jmp bad_put_user
- CFI_ENDPROC
-ENDPROC(__put_user_8)
-
-bad_put_user:
- CFI_STARTPROC
- movq $(-EFAULT),%rax
- ret
- CFI_ENDPROC
-END(bad_put_user)
-
-.section __ex_table,"a"
- .quad 1b,bad_put_user
- .quad 2b,bad_put_user
- .quad 3b,bad_put_user
- .quad 4b,bad_put_user
-.previous
diff --git a/arch/x86_64/lib/rwlock.S b/arch/x86_64/lib/rwlock.S
deleted file mode 100644
index 0cde1f807314..000000000000
--- a/arch/x86_64/lib/rwlock.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Slow paths of read/write spinlocks. */
-
-#include <linux/linkage.h>
-#include <asm/rwlock.h>
-#include <asm/alternative-asm.i>
-#include <asm/dwarf2.h>
-
-/* rdi: pointer to rwlock_t */
-ENTRY(__write_lock_failed)
- CFI_STARTPROC
- LOCK_PREFIX
- addl $RW_LOCK_BIAS,(%rdi)
-1: rep
- nop
- cmpl $RW_LOCK_BIAS,(%rdi)
- jne 1b
- LOCK_PREFIX
- subl $RW_LOCK_BIAS,(%rdi)
- jnz __write_lock_failed
- ret
- CFI_ENDPROC
-END(__write_lock_failed)
-
-/* rdi: pointer to rwlock_t */
-ENTRY(__read_lock_failed)
- CFI_STARTPROC
- LOCK_PREFIX
- incl (%rdi)
-1: rep
- nop
- cmpl $1,(%rdi)
- js 1b
- LOCK_PREFIX
- decl (%rdi)
- js __read_lock_failed
- ret
- CFI_ENDPROC
-END(__read_lock_failed)
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
deleted file mode 100644
index 55e586d352d3..000000000000
--- a/arch/x86_64/lib/thunk.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Save registers before calling assembly functions. This avoids
- * disturbance of register allocation in some inline assembly constructs.
- * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
- * Subject to the GNU public license, v.2. No warranty of any kind.
- */
-
- #include <linux/linkage.h>
- #include <asm/dwarf2.h>
- #include <asm/calling.h>
- #include <asm/rwlock.h>
-
- /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
- .macro thunk name,func
- .globl \name
-\name:
- CFI_STARTPROC
- SAVE_ARGS
- call \func
- jmp restore
- CFI_ENDPROC
- .endm
-
- /* rdi: arg1 ... normal C conventions. rax is passed from C. */
- .macro thunk_retrax name,func
- .globl \name
-\name:
- CFI_STARTPROC
- SAVE_ARGS
- call \func
- jmp restore_norax
- CFI_ENDPROC
- .endm
-
-
- .section .sched.text
-#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
- thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
- thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
- thunk rwsem_wake_thunk,rwsem_wake
- thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
-#endif
-
- thunk __down_failed,__down
- thunk_retrax __down_failed_interruptible,__down_interruptible
- thunk_retrax __down_failed_trylock,__down_trylock
- thunk __up_wakeup,__up
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- thunk trace_hardirqs_on_thunk,trace_hardirqs_on
- thunk trace_hardirqs_off_thunk,trace_hardirqs_off
-#endif
-
- /* SAVE_ARGS below is used only for the .cfi directives it contains. */
- CFI_STARTPROC
- SAVE_ARGS
-restore:
- RESTORE_ARGS
- ret
- CFI_ENDPROC
-
- CFI_STARTPROC
- SAVE_ARGS
-restore_norax:
- RESTORE_ARGS 1
- ret
- CFI_ENDPROC
diff --git a/arch/x86_64/lib/usercopy.c b/arch/x86_64/lib/usercopy.c
deleted file mode 100644
index 893d43f838cc..000000000000
--- a/arch/x86_64/lib/usercopy.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * User address space access functions.
- *
- * Copyright 1997 Andi Kleen <ak@muc.de>
- * Copyright 1997 Linus Torvalds
- * Copyright 2002 Andi Kleen <ak@suse.de>
- */
-#include <linux/module.h>
-#include <asm/uaccess.h>
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-#define __do_strncpy_from_user(dst,src,count,res) \
-do { \
- long __d0, __d1, __d2; \
- might_sleep(); \
- __asm__ __volatile__( \
- " testq %1,%1\n" \
- " jz 2f\n" \
- "0: lodsb\n" \
- " stosb\n" \
- " testb %%al,%%al\n" \
- " jz 1f\n" \
- " decq %1\n" \
- " jnz 0b\n" \
- "1: subq %1,%0\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movq %5,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 0b,3b\n" \
- ".previous" \
- : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
- "=&D" (__d2) \
- : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
- : "memory"); \
-} while (0)
-
-long
-__strncpy_from_user(char *dst, const char __user *src, long count)
-{
- long res;
- __do_strncpy_from_user(dst, src, count, res);
- return res;
-}
-EXPORT_SYMBOL(__strncpy_from_user);
-
-long
-strncpy_from_user(char *dst, const char __user *src, long count)
-{
- long res = -EFAULT;
- if (access_ok(VERIFY_READ, src, 1))
- return __strncpy_from_user(dst, src, count);
- return res;
-}
-EXPORT_SYMBOL(strncpy_from_user);
-
-/*
- * Zero Userspace
- */
-
-unsigned long __clear_user(void __user *addr, unsigned long size)
-{
- long __d0;
- might_sleep();
- /* no memory constraint because it doesn't change any memory gcc knows
- about */
- asm volatile(
- " testq %[size8],%[size8]\n"
- " jz 4f\n"
- "0: movq %[zero],(%[dst])\n"
- " addq %[eight],%[dst]\n"
- " decl %%ecx ; jnz 0b\n"
- "4: movq %[size1],%%rcx\n"
- " testl %%ecx,%%ecx\n"
- " jz 2f\n"
- "1: movb %b[zero],(%[dst])\n"
- " incq %[dst]\n"
- " decl %%ecx ; jnz 1b\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: lea 0(%[size1],%[size8],8),%[size8]\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 0b,3b\n"
- " .quad 1b,2b\n"
- ".previous"
- : [size8] "=c"(size), [dst] "=&D" (__d0)
- : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
- [zero] "r" (0UL), [eight] "r" (8UL));
- return size;
-}
-EXPORT_SYMBOL(__clear_user);
-
-unsigned long clear_user(void __user *to, unsigned long n)
-{
- if (access_ok(VERIFY_WRITE, to, n))
- return __clear_user(to, n);
- return n;
-}
-EXPORT_SYMBOL(clear_user);
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-
-long __strnlen_user(const char __user *s, long n)
-{
- long res = 0;
- char c;
-
- while (1) {
- if (res>n)
- return n+1;
- if (__get_user(c, s))
- return 0;
- if (!c)
- return res+1;
- res++;
- s++;
- }
-}
-EXPORT_SYMBOL(__strnlen_user);
-
-long strnlen_user(const char __user *s, long n)
-{
- if (!access_ok(VERIFY_READ, s, n))
- return 0;
- return __strnlen_user(s, n);
-}
-EXPORT_SYMBOL(strnlen_user);
-
-long strlen_user(const char __user *s)
-{
- long res = 0;
- char c;
-
- for (;;) {
- if (get_user(c, s))
- return 0;
- if (!c)
- return res+1;
- res++;
- s++;
- }
-}
-EXPORT_SYMBOL(strlen_user);
-
-unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
-{
- if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) {
- return copy_user_generic((__force void *)to, (__force void *)from, len);
- }
- return len;
-}
-EXPORT_SYMBOL(copy_in_user);
-
diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile
deleted file mode 100644
index d25ac86fe27a..000000000000
--- a/arch/x86_64/mm/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for the linux x86_64-specific parts of the memory manager.
-#
-
-obj-y := init.o fault.o ioremap.o extable.o pageattr.o mmap.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_NUMA) += numa.o
-obj-$(CONFIG_K8_NUMA) += k8topology.o
-obj-$(CONFIG_ACPI_NUMA) += srat.o
-
-hugetlbpage-y = ../../i386/mm/hugetlbpage.o
diff --git a/arch/x86_64/mm/extable.c b/arch/x86_64/mm/extable.c
deleted file mode 100644
index 79ac6e7100af..000000000000
--- a/arch/x86_64/mm/extable.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * linux/arch/x86_64/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <asm/uaccess.h>
-
-/* Simple binary search */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
- unsigned long value)
-{
- /* Work around a B stepping K8 bug */
- if ((value >> 32) == 0)
- value |= 0xffffffffUL << 32;
-
- while (first <= last) {
- const struct exception_table_entry *mid;
- long diff;
-
- mid = (last - first) / 2 + first;
- diff = mid->insn - value;
- if (diff == 0)
- return mid;
- else if (diff < 0)
- first = mid+1;
- else
- last = mid-1;
- }
- return NULL;
-}
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
deleted file mode 100644
index 54816adb8e93..000000000000
--- a/arch/x86_64/mm/fault.c
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
- * linux/arch/x86-64/mm/fault.c
- *
- * Copyright (C) 1995 Linus Torvalds
- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/vt_kern.h> /* For unblank_screen() */
-#include <linux/compiler.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/kdebug.h>
-
-#include <asm/system.h>
-#include <asm/pgalloc.h>
-#include <asm/smp.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
-#include <asm-generic/sections.h>
-
-/* Page fault error code bits */
-#define PF_PROT (1<<0) /* or no page found */
-#define PF_WRITE (1<<1)
-#define PF_USER (1<<2)
-#define PF_RSVD (1<<3)
-#define PF_INSTR (1<<4)
-
-static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-
-/* Hook to register for page fault notifications */
-int register_page_fault_notifier(struct notifier_block *nb)
-{
- vmalloc_sync_all();
- return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_page_fault_notifier);
-
-int unregister_page_fault_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
-
-static inline int notify_page_fault(struct pt_regs *regs, long err)
-{
- struct die_args args = {
- .regs = regs,
- .str = "page fault",
- .err = err,
- .trapnr = 14,
- .signr = SIGSEGV
- };
- return atomic_notifier_call_chain(&notify_page_fault_chain,
- DIE_PAGE_FAULT, &args);
-}
-
-/* Sometimes the CPU reports invalid exceptions on prefetch.
- Check that here and ignore.
- Opcode checker based on code by Richard Brunner */
-static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
- unsigned long error_code)
-{
- unsigned char *instr;
- int scan_more = 1;
- int prefetch = 0;
- unsigned char *max_instr;
-
- /* If it was a exec fault ignore */
- if (error_code & PF_INSTR)
- return 0;
-
- instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
- max_instr = instr + 15;
-
- if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
- return 0;
-
- while (scan_more && instr < max_instr) {
- unsigned char opcode;
- unsigned char instr_hi;
- unsigned char instr_lo;
-
- if (probe_kernel_address(instr, opcode))
- break;
-
- instr_hi = opcode & 0xf0;
- instr_lo = opcode & 0x0f;
- instr++;
-
- switch (instr_hi) {
- case 0x20:
- case 0x30:
- /* Values 0x26,0x2E,0x36,0x3E are valid x86
- prefixes. In long mode, the CPU will signal
- invalid opcode if some of these prefixes are
- present so we will never get here anyway */
- scan_more = ((instr_lo & 7) == 0x6);
- break;
-
- case 0x40:
- /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
- Need to figure out under what instruction mode the
- instruction was issued ... */
- /* Could check the LDT for lm, but for now it's good
- enough to assume that long mode only uses well known
- segments or kernel. */
- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
- break;
-
- case 0x60:
- /* 0x64 thru 0x67 are valid prefixes in all modes. */
- scan_more = (instr_lo & 0xC) == 0x4;
- break;
- case 0xF0:
- /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
- scan_more = !instr_lo || (instr_lo>>1) == 1;
- break;
- case 0x00:
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
- scan_more = 0;
- if (probe_kernel_address(instr, opcode))
- break;
- prefetch = (instr_lo == 0xF) &&
- (opcode == 0x0D || opcode == 0x18);
- break;
- default:
- scan_more = 0;
- break;
- }
- }
- return prefetch;
-}
-
-static int bad_address(void *p)
-{
- unsigned long dummy;
- return probe_kernel_address((unsigned long *)p, dummy);
-}
-
-void dump_pagetable(unsigned long address)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = (pgd_t *)read_cr3();
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
- pgd += pgd_index(address);
- if (bad_address(pgd)) goto bad;
- printk("PGD %lx ", pgd_val(*pgd));
- if (!pgd_present(*pgd)) goto ret;
-
- pud = pud_offset(pgd, address);
- if (bad_address(pud)) goto bad;
- printk("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud)) goto ret;
-
- pmd = pmd_offset(pud, address);
- if (bad_address(pmd)) goto bad;
- printk("PMD %lx ", pmd_val(*pmd));
- if (!pmd_present(*pmd)) goto ret;
-
- pte = pte_offset_kernel(pmd, address);
- if (bad_address(pte)) goto bad;
- printk("PTE %lx", pte_val(*pte));
-ret:
- printk("\n");
- return;
-bad:
- printk("BAD\n");
-}
-
-static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
-
-/* Workaround for K8 erratum #93 & buggy BIOS.
- BIOS SMM functions are required to use a specific workaround
- to avoid corruption of the 64bit RIP register on C stepping K8.
- A lot of BIOS that didn't get tested properly miss this.
- The OS sees this as a page fault with the upper 32bits of RIP cleared.
- Try to work around it here.
- Note we only handle faults in kernel here. */
-
-static int is_errata93(struct pt_regs *regs, unsigned long address)
-{
- static int warned;
- if (address != regs->rip)
- return 0;
- if ((address >> 32) != 0)
- return 0;
- address |= 0xffffffffUL << 32;
- if ((address >= (u64)_stext && address <= (u64)_etext) ||
- (address >= MODULES_VADDR && address <= MODULES_END)) {
- if (!warned) {
- printk(errata93_warning);
- warned = 1;
- }
- regs->rip = address;
- return 1;
- }
- return 0;
-}
-
-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
- unsigned long error_code)
-{
- unsigned long flags = oops_begin();
- struct task_struct *tsk;
-
- printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
- current->comm, address);
- dump_pagetable(address);
- tsk = current;
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
- __die("Bad pagetable", regs, error_code);
- oops_end(flags);
- do_exit(SIGKILL);
-}
-
-/*
- * Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
- */
-static int vmalloc_fault(unsigned long address)
-{
- pgd_t *pgd, *pgd_ref;
- pud_t *pud, *pud_ref;
- pmd_t *pmd, *pmd_ref;
- pte_t *pte, *pte_ref;
-
- /* Copy kernel mappings over when needed. This can also
- happen within a race in page table update. In the later
- case just flush. */
-
- pgd = pgd_offset(current->mm ?: &init_mm, address);
- pgd_ref = pgd_offset_k(address);
- if (pgd_none(*pgd_ref))
- return -1;
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-
- /* Below here mismatches are bugs because these lower tables
- are shared */
-
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
- if (pud_none(*pud_ref))
- return -1;
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
- BUG();
- pmd = pmd_offset(pud, address);
- pmd_ref = pmd_offset(pud_ref, address);
- if (pmd_none(*pmd_ref))
- return -1;
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
- BUG();
- pte_ref = pte_offset_kernel(pmd_ref, address);
- if (!pte_present(*pte_ref))
- return -1;
- pte = pte_offset_kernel(pmd, address);
- /* Don't use pte_page here, because the mappings can point
- outside mem_map, and the NUMA hash lookup cannot handle
- that. */
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
- BUG();
- return 0;
-}
-
-static int page_fault_trace;
-int show_unhandled_signals = 1;
-
-/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
- unsigned long error_code)
-{
- struct task_struct *tsk;
- struct mm_struct *mm;
- struct vm_area_struct * vma;
- unsigned long address;
- const struct exception_table_entry *fixup;
- int write, fault;
- unsigned long flags;
- siginfo_t info;
-
- tsk = current;
- mm = tsk->mm;
- prefetchw(&mm->mmap_sem);
-
- /* get the address */
- address = read_cr2();
-
- info.si_code = SEGV_MAPERR;
-
-
- /*
- * We fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- *
- * This verifies that the fault happens in kernel space
- * (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 9) == 0.
- */
- if (unlikely(address >= TASK_SIZE64)) {
- /*
- * Don't check for the module range here: its PML4
- * is always initialized because it's shared with the main
- * kernel text. Only vmalloc may need PML4 syncups.
- */
- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
- ((address >= VMALLOC_START && address < VMALLOC_END))) {
- if (vmalloc_fault(address) >= 0)
- return;
- }
- if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
- return;
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
- */
- goto bad_area_nosemaphore;
- }
-
- if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
- return;
-
- if (likely(regs->eflags & X86_EFLAGS_IF))
- local_irq_enable();
-
- if (unlikely(page_fault_trace))
- printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
- regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
-
- if (unlikely(error_code & PF_RSVD))
- pgtable_bad(address, regs, error_code);
-
- /*
- * If we're in an interrupt or have no user
- * context, we must not take the fault..
- */
- if (unlikely(in_atomic() || !mm))
- goto bad_area_nosemaphore;
-
- /*
- * User-mode registers count as a user access even for any
- * potential system fault or CPU buglet.
- */
- if (user_mode_vm(regs))
- error_code |= PF_USER;
-
- again:
- /* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
- * kernel and should generate an OOPS. Unfortunatly, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
- * we will deadlock attempting to validate the fault against the
- * address space. Luckily the kernel only validly references user
- * space from well defined areas of code, which are listed in the
- * exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibilty of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
- */
- if (!down_read_trylock(&mm->mmap_sem)) {
- if ((error_code & PF_USER) == 0 &&
- !search_exception_tables(regs->rip))
- goto bad_area_nosemaphore;
- down_read(&mm->mmap_sem);
- }
-
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (error_code & 4) {
- /* Allow userspace just enough access below the stack pointer
- * to let the 'enter' instruction work.
- */
- if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
- goto bad_area;
- }
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
- info.si_code = SEGV_ACCERR;
- write = 0;
- switch (error_code & (PF_PROT|PF_WRITE)) {
- default: /* 3: write, present */
- /* fall through */
- case PF_WRITE: /* write, not present */
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- write++;
- break;
- case PF_PROT: /* read, present */
- goto bad_area;
- case 0: /* read, not present */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- }
-
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
- fault = handle_mm_fault(mm, vma, address, write);
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
- }
- if (fault & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
- up_read(&mm->mmap_sem);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (error_code & PF_USER) {
-
- /*
- * It's possible to have interrupts off here.
- */
- local_irq_enable();
-
- if (is_prefetch(regs, address, error_code))
- return;
-
- /* Work around K8 erratum #100 K8 in compat mode
- occasionally jumps to illegal addresses >4GB. We
- catch this here in the page fault handler because
- these addresses are not reachable. Just detect this
- case and return. Any code segment in LDT is
- compatibility mode. */
- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
- (address >> 32))
- return;
-
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit()) {
- printk(
- "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
- tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, tsk->pid, address, regs->rip,
- regs->rsp, error_code);
- }
-
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- tsk->thread.trap_no = 14;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void __user *)address;
- force_sig_info(SIGSEGV, &info, tsk);
- return;
- }
-
-no_context:
-
- /* Are we prepared to handle this kernel fault? */
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return;
- }
-
- /*
- * Hall of shame of CPU/BIOS bugs.
- */
-
- if (is_prefetch(regs, address, error_code))
- return;
-
- if (is_errata93(regs, address))
- return;
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-
- flags = oops_begin();
-
- if (address < PAGE_SIZE)
- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
- else
- printk(KERN_ALERT "Unable to handle kernel paging request");
- printk(" at %016lx RIP: \n" KERN_ALERT,address);
- printk_address(regs->rip);
- dump_pagetable(address);
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
- __die("Oops", regs, error_code);
- /* Executive summary in case the body of the oops scrolled away */
- printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end(flags);
- do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (is_init(current)) {
- yield();
- goto again;
- }
- printk("VM: killing process %s\n", tsk->comm);
- if (error_code & 4)
- do_group_exit(SIGKILL);
- goto no_context;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(error_code & PF_USER))
- goto no_context;
-
- tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 14;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, tsk);
- return;
-}
-
-DEFINE_SPINLOCK(pgd_lock);
-LIST_HEAD(pgd_list);
-
-void vmalloc_sync_all(void)
-{
- /* Note that races in the updates of insync and start aren't
- problematic:
- insync can only get set bits added, and updates to start are only
- improving performance (without affecting correctness if undone). */
- static DECLARE_BITMAP(insync, PTRS_PER_PGD);
- static unsigned long start = VMALLOC_START & PGDIR_MASK;
- unsigned long address;
-
- for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
- if (!test_bit(pgd_index(address), insync)) {
- const pgd_t *pgd_ref = pgd_offset_k(address);
- struct page *page;
-
- if (pgd_none(*pgd_ref))
- continue;
- spin_lock(&pgd_lock);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
- }
- spin_unlock(&pgd_lock);
- set_bit(pgd_index(address), insync);
- }
- if (address == start)
- start = address + PGDIR_SIZE;
- }
- /* Check that there is no need to do the same for the modules area. */
- BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
- (__START_KERNEL & PGDIR_MASK)));
-}
-
-static int __init enable_pagefaulttrace(char *str)
-{
- page_fault_trace = 1;
- return 1;
-}
-__setup("pagefaulttrace", enable_pagefaulttrace);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
deleted file mode 100644
index 458893b376f8..000000000000
--- a/arch/x86_64/mm/init.c
+++ /dev/null
@@ -1,750 +0,0 @@
-/*
- * linux/arch/x86_64/mm/init.c
- *
- * Copyright (C) 1995 Linus Torvalds
- * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/pagemap.h>
-#include <linux/bootmem.h>
-#include <linux/proc_fs.h>
-#include <linux/pci.h>
-#include <linux/pfn.h>
-#include <linux/poison.h>
-#include <linux/dma-mapping.h>
-#include <linux/module.h>
-#include <linux/memory_hotplug.h>
-#include <linux/nmi.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/dma.h>
-#include <asm/fixmap.h>
-#include <asm/e820.h>
-#include <asm/apic.h>
-#include <asm/tlb.h>
-#include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/smp.h>
-#include <asm/sections.h>
-
-#ifndef Dprintk
-#define Dprintk(x...)
-#endif
-
-const struct dma_mapping_ops* dma_ops;
-EXPORT_SYMBOL(dma_ops);
-
-static unsigned long dma_reserve __initdata;
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-/*
- * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
- * physical space so we can cache the place of the first one and move
- * around without checking the pgd every time.
- */
-
-void show_mem(void)
-{
- long i, total = 0, reserved = 0;
- long shared = 0, cached = 0;
- pg_data_t *pgdat;
- struct page *page;
-
- printk(KERN_INFO "Mem-info:\n");
- show_free_areas();
- printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-
- for_each_online_pgdat(pgdat) {
- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
- /* this loop can take a while with 256 GB and 4k pages
- so update the NMI watchdog */
- if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
- touch_nmi_watchdog();
- }
- if (!pfn_valid(pgdat->node_start_pfn + i))
- continue;
- page = pfn_to_page(pgdat->node_start_pfn + i);
- total++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (page_count(page))
- shared += page_count(page) - 1;
- }
- }
- printk(KERN_INFO "%lu pages of RAM\n", total);
- printk(KERN_INFO "%lu reserved pages\n",reserved);
- printk(KERN_INFO "%lu pages shared\n",shared);
- printk(KERN_INFO "%lu pages swap cached\n",cached);
-}
-
-int after_bootmem;
-
-static __init void *spp_getpage(void)
-{
- void *ptr;
- if (after_bootmem)
- ptr = (void *) get_zeroed_page(GFP_ATOMIC);
- else
- ptr = alloc_bootmem_pages(PAGE_SIZE);
- if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
- panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
-
- Dprintk("spp_getpage %p\n", ptr);
- return ptr;
-}
-
-static __init void set_pte_phys(unsigned long vaddr,
- unsigned long phys, pgprot_t prot)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte, new_pte;
-
- Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
-
- pgd = pgd_offset_k(vaddr);
- if (pgd_none(*pgd)) {
- printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
- return;
- }
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud)) {
- pmd = (pmd_t *) spp_getpage();
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
- if (pmd != pmd_offset(pud, 0)) {
- printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
- return;
- }
- }
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd)) {
- pte = (pte_t *) spp_getpage();
- set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
- if (pte != pte_offset_kernel(pmd, 0)) {
- printk("PAGETABLE BUG #02!\n");
- return;
- }
- }
- new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
-
- pte = pte_offset_kernel(pmd, vaddr);
- if (!pte_none(*pte) &&
- pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
- pte_ERROR(*pte);
- set_pte(pte, new_pte);
-
- /*
- * It's enough to flush this one mapping.
- * (PGE mappings get flushed as well)
- */
- __flush_tlb_one(vaddr);
-}
-
-/* NOTE: this is meant to be run only at boot */
-void __init
-__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
-{
- unsigned long address = __fix_to_virt(idx);
-
- if (idx >= __end_of_fixed_addresses) {
- printk("Invalid __set_fixmap\n");
- return;
- }
- set_pte_phys(address, phys, prot);
-}
-
-unsigned long __meminitdata table_start, table_end;
-
-static __meminit void *alloc_low_page(unsigned long *phys)
-{
- unsigned long pfn = table_end++;
- void *adr;
-
- if (after_bootmem) {
- adr = (void *)get_zeroed_page(GFP_ATOMIC);
- *phys = __pa(adr);
- return adr;
- }
-
- if (pfn >= end_pfn)
- panic("alloc_low_page: ran out of memory");
-
- adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
- memset(adr, 0, PAGE_SIZE);
- *phys = pfn * PAGE_SIZE;
- return adr;
-}
-
-static __meminit void unmap_low_page(void *adr)
-{
-
- if (after_bootmem)
- return;
-
- early_iounmap(adr, PAGE_SIZE);
-}
-
-/* Must run before zap_low_mappings */
-__meminit void *early_ioremap(unsigned long addr, unsigned long size)
-{
- unsigned long vaddr;
- pmd_t *pmd, *last_pmd;
- int i, pmds;
-
- pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
- vaddr = __START_KERNEL_map;
- pmd = level2_kernel_pgt;
- last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
- for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
- for (i = 0; i < pmds; i++) {
- if (pmd_present(pmd[i]))
- goto next;
- }
- vaddr += addr & ~PMD_MASK;
- addr &= PMD_MASK;
- for (i = 0; i < pmds; i++, addr += PMD_SIZE)
- set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
- __flush_tlb();
- return (void *)vaddr;
- next:
- ;
- }
- printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
- return NULL;
-}
-
-/* To avoid virtual aliases later */
-__meminit void early_iounmap(void *addr, unsigned long size)
-{
- unsigned long vaddr;
- pmd_t *pmd;
- int i, pmds;
-
- vaddr = (unsigned long)addr;
- pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
- pmd = level2_kernel_pgt + pmd_index(vaddr);
- for (i = 0; i < pmds; i++)
- pmd_clear(pmd + i);
- __flush_tlb();
-}
-
-static void __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
-{
- int i = pmd_index(address);
-
- for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
- unsigned long entry;
- pmd_t *pmd = pmd_page + pmd_index(address);
-
- if (address >= end) {
- if (!after_bootmem)
- for (; i < PTRS_PER_PMD; i++, pmd++)
- set_pmd(pmd, __pmd(0));
- break;
- }
-
- if (pmd_val(*pmd))
- continue;
-
- entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
- entry &= __supported_pte_mask;
- set_pmd(pmd, __pmd(entry));
- }
-}
-
-static void __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
-{
- pmd_t *pmd = pmd_offset(pud,0);
- spin_lock(&init_mm.page_table_lock);
- phys_pmd_init(pmd, address, end);
- spin_unlock(&init_mm.page_table_lock);
- __flush_tlb_all();
-}
-
-static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
-{
- int i = pud_index(addr);
-
-
- for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
- unsigned long pmd_phys;
- pud_t *pud = pud_page + pud_index(addr);
- pmd_t *pmd;
-
- if (addr >= end)
- break;
-
- if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
- set_pud(pud, __pud(0));
- continue;
- }
-
- if (pud_val(*pud)) {
- phys_pmd_update(pud, addr, end);
- continue;
- }
-
- pmd = alloc_low_page(&pmd_phys);
- spin_lock(&init_mm.page_table_lock);
- set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
- phys_pmd_init(pmd, addr, end);
- spin_unlock(&init_mm.page_table_lock);
- unmap_low_page(pmd);
- }
- __flush_tlb();
-}
-
-static void __init find_early_table_space(unsigned long end)
-{
- unsigned long puds, pmds, tables, start;
-
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
- round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
-
- /* RED-PEN putting page tables only on node 0 could
- cause a hotspot and fill up ZONE_DMA. The page tables
- need roughly 0.5KB per GB. */
- start = 0x8000;
- table_start = find_e820_area(start, end, tables);
- if (table_start == -1UL)
- panic("Cannot find space for the kernel page tables");
-
- table_start >>= PAGE_SHIFT;
- table_end = table_start;
-
- early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, table_start << PAGE_SHIFT,
- (table_start << PAGE_SHIFT) + tables);
-}
-
-/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
- This runs before bootmem is initialized and gets pages directly from the
- physical memory. To access them they are temporarily mapped. */
-void __meminit init_memory_mapping(unsigned long start, unsigned long end)
-{
- unsigned long next;
-
- Dprintk("init_memory_mapping\n");
-
- /*
- * Find space for the kernel direct mapping tables.
- * Later we should allocate these tables in the local node of the memory
- * mapped. Unfortunately this is done currently before the nodes are
- * discovered.
- */
- if (!after_bootmem)
- find_early_table_space(end);
-
- start = (unsigned long)__va(start);
- end = (unsigned long)__va(end);
-
- for (; start < end; start = next) {
- unsigned long pud_phys;
- pgd_t *pgd = pgd_offset_k(start);
- pud_t *pud;
-
- if (after_bootmem)
- pud = pud_offset(pgd, start & PGDIR_MASK);
- else
- pud = alloc_low_page(&pud_phys);
-
- next = start + PGDIR_SIZE;
- if (next > end)
- next = end;
- phys_pud_init(pud, __pa(start), __pa(next));
- if (!after_bootmem)
- set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
- unmap_low_page(pud);
- }
-
- if (!after_bootmem)
- mmu_cr4_features = read_cr4();
- __flush_tlb_all();
-}
-
-#ifndef CONFIG_NUMA
-void __init paging_init(void)
-{
- unsigned long max_zone_pfns[MAX_NR_ZONES];
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
- max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
- max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
- max_zone_pfns[ZONE_NORMAL] = end_pfn;
-
- memory_present(0, 0, end_pfn);
- sparse_init();
- free_area_init_nodes(max_zone_pfns);
-}
-#endif
-
-/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
- from the CPU leading to inconsistent cache lines. address and size
- must be aligned to 2MB boundaries.
- Does nothing when the mapping doesn't exist. */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
-{
- unsigned long end = address + size;
-
- BUG_ON(address & ~LARGE_PAGE_MASK);
- BUG_ON(size & ~LARGE_PAGE_MASK);
-
- for (; address < end; address += LARGE_PAGE_SIZE) {
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
- if (pgd_none(*pgd))
- continue;
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- continue;
- pmd = pmd_offset(pud, address);
- if (!pmd || pmd_none(*pmd))
- continue;
- if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
- /* Could handle this, but it should not happen currently. */
- printk(KERN_ERR
- "clear_kernel_mapping: mapping has been split. will leak memory\n");
- pmd_ERROR(*pmd);
- }
- set_pmd(pmd, __pmd(0));
- }
- __flush_tlb_all();
-}
-
-/*
- * Memory hotplug specific functions
- */
-void online_page(struct page *page)
-{
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalram_pages++;
- num_physpages++;
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-/*
- * Memory is added always to NORMAL zone. This means you will never get
- * additional DMA/DMA32 memory.
- */
-int arch_add_memory(int nid, u64 start, u64 size)
-{
- struct pglist_data *pgdat = NODE_DATA(nid);
- struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long nr_pages = size >> PAGE_SHIFT;
- int ret;
-
- init_memory_mapping(start, (start + size -1));
-
- ret = __add_pages(zone, start_pfn, nr_pages);
- if (ret)
- goto error;
-
- return ret;
-error:
- printk("%s: Problem encountered in __add_pages!\n", __func__);
- return ret;
-}
-EXPORT_SYMBOL_GPL(arch_add_memory);
-
-int remove_memory(u64 start, u64 size)
-{
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
-
-#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
-int memory_add_physaddr_to_nid(u64 start)
-{
- return 0;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
- * just online the pages.
- */
-int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
-{
- int err = -EIO;
- unsigned long pfn;
- unsigned long total = 0, mem = 0;
- for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
- if (pfn_valid(pfn)) {
- online_page(pfn_to_page(pfn));
- err = 0;
- mem++;
- }
- total++;
- }
- if (!err) {
- z->spanned_pages += total;
- z->present_pages += mem;
- z->zone_pgdat->node_spanned_pages += total;
- z->zone_pgdat->node_present_pages += mem;
- }
- return err;
-}
-#endif
-
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
- kcore_vsyscall;
-
-void __init mem_init(void)
-{
- long codesize, reservedpages, datasize, initsize;
-
- pci_iommu_alloc();
-
- /* clear the zero-page */
- memset(empty_zero_page, 0, PAGE_SIZE);
-
- reservedpages = 0;
-
- /* this will put all low memory onto the freelists */
-#ifdef CONFIG_NUMA
- totalram_pages = numa_free_all_bootmem();
-#else
- totalram_pages = free_all_bootmem();
-#endif
- reservedpages = end_pfn - totalram_pages -
- absent_pages_in_range(0, end_pfn);
-
- after_bootmem = 1;
-
- codesize = (unsigned long) &_etext - (unsigned long) &_text;
- datasize = (unsigned long) &_edata - (unsigned long) &_etext;
- initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
- /* Register memory areas for /proc/kcore */
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
- VMALLOC_END-VMALLOC_START);
- kclist_add(&kcore_kernel, &_stext, _end - _stext);
- kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
- VSYSCALL_END - VSYSCALL_START);
-
- printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
- end_pfn << (PAGE_SHIFT-10),
- codesize >> 10,
- reservedpages << (PAGE_SHIFT-10),
- datasize >> 10,
- initsize >> 10);
-}
-
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr;
-
- if (begin >= end)
- return;
-
- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)(addr & ~(PAGE_SIZE-1)),
- POISON_FREE_INITMEM, PAGE_SIZE);
- if (addr >= __START_KERNEL_map)
- change_page_attr_addr(addr, 1, __pgprot(0));
- free_page(addr);
- totalram_pages++;
- }
- if (addr > __START_KERNEL_map)
- global_flush_tlb();
-}
-
-void free_initmem(void)
-{
- free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
-}
-
-#ifdef CONFIG_DEBUG_RODATA
-
-void mark_rodata_ro(void)
-{
- unsigned long start = (unsigned long)_stext, end;
-
-#ifdef CONFIG_HOTPLUG_CPU
- /* It must still be possible to apply SMP alternatives. */
- if (num_possible_cpus() > 1)
- start = (unsigned long)_etext;
-#endif
-
-#ifdef CONFIG_KPROBES
- start = (unsigned long)__start_rodata;
-#endif
-
- end = (unsigned long)__end_rodata;
- start = (start + PAGE_SIZE - 1) & PAGE_MASK;
- end &= PAGE_MASK;
- if (end <= start)
- return;
-
- change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
-
- printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
- (end - start) >> 10);
-
- /*
- * change_page_attr_addr() requires a global_flush_tlb() call after it.
- * We do this after the printk so that if something went wrong in the
- * change, the printk gets out at least to give a better debug hint
- * of who is the culprit.
- */
- global_flush_tlb();
-}
-#endif
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
- free_init_pages("initrd memory", start, end);
-}
-#endif
-
-void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
-{
-#ifdef CONFIG_NUMA
- int nid = phys_to_nid(phys);
-#endif
- unsigned long pfn = phys >> PAGE_SHIFT;
- if (pfn >= end_pfn) {
- /* This can happen with kdump kernels when accessing firmware
- tables. */
- if (pfn < end_pfn_map)
- return;
- printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
- phys, len);
- return;
- }
-
- /* Should check here against the e820 map to avoid double free */
-#ifdef CONFIG_NUMA
- reserve_bootmem_node(NODE_DATA(nid), phys, len);
-#else
- reserve_bootmem(phys, len);
-#endif
- if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
- dma_reserve += len / PAGE_SIZE;
- set_dma_reserve(dma_reserve);
- }
-}
-
-int kern_addr_valid(unsigned long addr)
-{
- unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- if (above != 0 && above != -1UL)
- return 0;
-
- pgd = pgd_offset_k(addr);
- if (pgd_none(*pgd))
- return 0;
-
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
- return 0;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
- return 0;
- if (pmd_large(*pmd))
- return pfn_valid(pmd_pfn(*pmd));
-
- pte = pte_offset_kernel(pmd, addr);
- if (pte_none(*pte))
- return 0;
- return pfn_valid(pte_pfn(*pte));
-}
-
-/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
- covers the 64bit vsyscall page now. 32bit has a real VMA now and does
- not need special handling anymore. */
-
-static struct vm_area_struct gate_vma = {
- .vm_start = VSYSCALL_START,
- .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
- .vm_page_prot = PAGE_READONLY_EXEC,
- .vm_flags = VM_READ | VM_EXEC
-};
-
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(tsk, TIF_IA32))
- return NULL;
-#endif
- return &gate_vma;
-}
-
-int in_gate_area(struct task_struct *task, unsigned long addr)
-{
- struct vm_area_struct *vma = get_gate_vma(task);
- if (!vma)
- return 0;
- return (addr >= vma->vm_start) && (addr < vma->vm_end);
-}
-
-/* Use this when you have no reliable task/vma, typically from interrupt
- * context. It is less reliable than using the task's vma and may give
- * false positives.
- */
-int in_gate_area_no_task(unsigned long addr)
-{
- return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
-}
-
-void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
-{
- return __alloc_bootmem_core(pgdat->bdata, size,
- SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
-}
-
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
- return "[vdso]";
- if (vma == &gate_vma)
- return "[vsyscall]";
- return NULL;
-}
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
deleted file mode 100644
index 6cac90aa5032..000000000000
--- a/arch/x86_64/mm/ioremap.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * arch/x86_64/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/io.h>
-
-#include <asm/pgalloc.h>
-#include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-#include <asm/cacheflush.h>
-#include <asm/proto.h>
-
-unsigned long __phys_addr(unsigned long x)
-{
- if (x >= __START_KERNEL_map)
- return x - __START_KERNEL_map + phys_base;
- return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-
-#define ISA_START_ADDRESS 0xa0000
-#define ISA_END_ADDRESS 0x100000
-
-/*
- * Fix up the linear direct mapping of the kernel to avoid cache attribute
- * conflicts.
- */
-static int
-ioremap_change_attr(unsigned long phys_addr, unsigned long size,
- unsigned long flags)
-{
- int err = 0;
- if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
- unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long vaddr = (unsigned long) __va(phys_addr);
-
- /*
- * Must use a address here and not struct page because the phys addr
- * can be a in hole between nodes and not have an memmap entry.
- */
- err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
- if (!err)
- global_flush_tlb();
- }
- return err;
-}
-
-/*
- * Generic mapping function
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
-{
- void * addr;
- struct vm_struct * area;
- unsigned long offset, last_addr;
- pgprot_t pgprot;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
- return (__force void __iomem *)phys_to_virt(phys_addr);
-
-#ifdef CONFIG_FLATMEM
- /*
- * Don't allow anybody to remap normal RAM that we're using..
- */
- if (last_addr < virt_to_phys(high_memory)) {
- char *t_addr, *t_end;
- struct page *page;
-
- t_addr = __va(phys_addr);
- t_end = t_addr + (size - 1);
-
- for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
- if(!PageReserved(page))
- return NULL;
- }
-#endif
-
- pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL
- | _PAGE_DIRTY | _PAGE_ACCESSED | flags);
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP | (flags << 20));
- if (!area)
- return NULL;
- area->phys_addr = phys_addr;
- addr = area->addr;
- if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
- phys_addr, pgprot)) {
- remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
- return NULL;
- }
- if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) {
- area->flags &= 0xffffff;
- vunmap(addr);
- return NULL;
- }
- return (__force void __iomem *) (offset + (char *)addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-/**
- * ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- *
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- return __ioremap(phys_addr, size, _PAGE_PCD);
-}
-EXPORT_SYMBOL(ioremap_nocache);
-
-/**
- * iounmap - Free a IO remapping
- * @addr: virtual address from ioremap_*
- *
- * Caller must ensure there is only one unmapping for the same pointer.
- */
-void iounmap(volatile void __iomem *addr)
-{
- struct vm_struct *p, *o;
-
- if (addr <= high_memory)
- return;
- if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
- addr < phys_to_virt(ISA_END_ADDRESS))
- return;
-
- addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
- /* Use the vm area unlocked, assuming the caller
- ensures there isn't another iounmap for the same address
- in parallel. Reuse of the virtual address is prevented by
- leaving it in the global lists until we're done with it.
- cpa takes care of the direct mappings. */
- read_lock(&vmlist_lock);
- for (p = vmlist; p; p = p->next) {
- if (p->addr == addr)
- break;
- }
- read_unlock(&vmlist_lock);
-
- if (!p) {
- printk("iounmap: bad address %p\n", addr);
- dump_stack();
- return;
- }
-
- /* Reset the direct mapping. Can block */
- if (p->flags >> 20)
- ioremap_change_attr(p->phys_addr, p->size, 0);
-
- /* Finally remove it */
- o = remove_vm_area((void *)addr);
- BUG_ON(p != o || o == NULL);
- kfree(p);
-}
-EXPORT_SYMBOL(iounmap);
-
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
deleted file mode 100644
index a96006f7ae0c..000000000000
--- a/arch/x86_64/mm/k8topology.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * AMD K8 NUMA support.
- * Discover the memory map and associated nodes.
- *
- * This version reads it directly from the K8 northbridge.
- *
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <asm/io.h>
-#include <linux/pci_ids.h>
-#include <asm/types.h>
-#include <asm/mmzone.h>
-#include <asm/proto.h>
-#include <asm/e820.h>
-#include <asm/pci-direct.h>
-#include <asm/numa.h>
-
-static __init int find_northbridge(void)
-{
- int num;
-
- for (num = 0; num < 32; num++) {
- u32 header;
-
- header = read_pci_config(0, num, 0, 0x00);
- if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)))
- continue;
-
- header = read_pci_config(0, num, 1, 0x00);
- if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)))
- continue;
- return num;
- }
-
- return -1;
-}
-
-int __init k8_scan_nodes(unsigned long start, unsigned long end)
-{
- unsigned long prevbase;
- struct bootnode nodes[8];
- int nodeid, i, j, nb;
- unsigned char nodeids[8];
- int found = 0;
- u32 reg;
- unsigned numnodes;
- unsigned num_cores;
-
- if (!early_pci_allowed())
- return -1;
-
- nb = find_northbridge();
- if (nb < 0)
- return nb;
-
- printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
-
- num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
- printk(KERN_INFO "CPU has %d num_cores\n", num_cores);
-
- reg = read_pci_config(0, nb, 0, 0x60);
- numnodes = ((reg >> 4) & 0xF) + 1;
- if (numnodes <= 1)
- return -1;
-
- printk(KERN_INFO "Number of nodes %d\n", numnodes);
-
- memset(&nodes,0,sizeof(nodes));
- prevbase = 0;
- for (i = 0; i < 8; i++) {
- unsigned long base,limit;
- u32 nodeid;
-
- base = read_pci_config(0, nb, 1, 0x40 + i*8);
- limit = read_pci_config(0, nb, 1, 0x44 + i*8);
-
- nodeid = limit & 7;
- nodeids[i] = nodeid;
- if ((base & 3) == 0) {
- if (i < numnodes)
- printk("Skipping disabled node %d\n", i);
- continue;
- }
- if (nodeid >= numnodes) {
- printk("Ignoring excess node %d (%lx:%lx)\n", nodeid,
- base, limit);
- continue;
- }
-
- if (!limit) {
- printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i,
- base);
- continue;
- }
- if ((base >> 8) & 3 || (limit >> 8) & 3) {
- printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
- nodeid, (base>>8)&3, (limit>>8) & 3);
- return -1;
- }
- if (node_isset(nodeid, node_possible_map)) {
- printk(KERN_INFO "Node %d already present. Skipping\n",
- nodeid);
- continue;
- }
-
- limit >>= 16;
- limit <<= 24;
- limit |= (1<<24)-1;
- limit++;
-
- if (limit > end_pfn << PAGE_SHIFT)
- limit = end_pfn << PAGE_SHIFT;
- if (limit <= base)
- continue;
-
- base >>= 16;
- base <<= 24;
-
- if (base < start)
- base = start;
- if (limit > end)
- limit = end;
- if (limit == base) {
- printk(KERN_ERR "Empty node %d\n", nodeid);
- continue;
- }
- if (limit < base) {
- printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
- nodeid, base, limit);
- continue;
- }
-
- /* Could sort here, but pun for now. Should not happen anyroads. */
- if (prevbase > base) {
- printk(KERN_ERR "Node map not sorted %lx,%lx\n",
- prevbase,base);
- return -1;
- }
-
- printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
- nodeid, base, limit);
-
- found++;
-
- nodes[nodeid].start = base;
- nodes[nodeid].end = limit;
- e820_register_active_regions(nodeid,
- nodes[nodeid].start >> PAGE_SHIFT,
- nodes[nodeid].end >> PAGE_SHIFT);
-
- prevbase = base;
-
- node_set(nodeid, node_possible_map);
- }
-
- if (!found)
- return -1;
-
- memnode_shift = compute_hash_shift(nodes, 8);
- if (memnode_shift < 0) {
- printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
- return -1;
- }
- printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
-
- for (i = 0; i < 8; i++) {
- if (nodes[i].start != nodes[i].end) {
- nodeid = nodeids[i];
- for (j = 0; j < num_cores; j++)
- apicid_to_node[(nodeid * num_cores) + j] = i;
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
- }
-
- numa_init_array();
- return 0;
-}
diff --git a/arch/x86_64/mm/mmap.c b/arch/x86_64/mm/mmap.c
deleted file mode 100644
index 80bba0dc000e..000000000000
--- a/arch/x86_64/mm/mmap.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright 2005 Andi Kleen, SuSE Labs.
- * Licensed under GPL, v.2
- */
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/random.h>
-#include <asm/ia32.h>
-
-/* Notebook: move the mmap code from sys_x86_64.c over here. */
-
-void arch_pick_mmap_layout(struct mm_struct *mm)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (current_thread_info()->flags & _TIF_IA32)
- return ia32_pick_mmap_layout(mm);
-#endif
- mm->mmap_base = TASK_UNMAPPED_BASE;
- if (current->flags & PF_RANDOMIZE) {
- /* Add 28bit randomness which is about 40bits of address space
- because mmap base has to be page aligned.
- or ~1/128 of the total user VM
- (total user address space is 47bits) */
- unsigned rnd = get_random_int() & 0xfffffff;
- mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
- }
- mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
-}
-
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
deleted file mode 100644
index 6da235522269..000000000000
--- a/arch/x86_64/mm/numa.c
+++ /dev/null
@@ -1,648 +0,0 @@
-/*
- * Generic VM initialization for x86-64 NUMA setups.
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mmzone.h>
-#include <linux/ctype.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-
-#include <asm/e820.h>
-#include <asm/proto.h>
-#include <asm/dma.h>
-#include <asm/numa.h>
-#include <asm/acpi.h>
-
-#ifndef Dprintk
-#define Dprintk(x...)
-#endif
-
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-bootmem_data_t plat_node_bdata[MAX_NUMNODES];
-
-struct memnode memnode;
-
-unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
- [0 ... NR_CPUS-1] = NUMA_NO_NODE
-};
-unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
-
-int numa_off __initdata;
-unsigned long __initdata nodemap_addr;
-unsigned long __initdata nodemap_size;
-
-
-/*
- * Given a shift value, try to populate memnodemap[]
- * Returns :
- * 1 if OK
- * 0 if memnodmap[] too small (of shift too small)
- * -1 if node overlap or lost ram (shift too big)
- */
-static int __init
-populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
-{
- int i;
- int res = -1;
- unsigned long addr, end;
-
- memset(memnodemap, 0xff, memnodemapsize);
- for (i = 0; i < numnodes; i++) {
- addr = nodes[i].start;
- end = nodes[i].end;
- if (addr >= end)
- continue;
- if ((end >> shift) >= memnodemapsize)
- return 0;
- do {
- if (memnodemap[addr >> shift] != 0xff)
- return -1;
- memnodemap[addr >> shift] = i;
- addr += (1UL << shift);
- } while (addr < end);
- res = 1;
- }
- return res;
-}
-
-static int __init allocate_cachealigned_memnodemap(void)
-{
- unsigned long pad, pad_addr;
-
- memnodemap = memnode.embedded_map;
- if (memnodemapsize <= 48)
- return 0;
-
- pad = L1_CACHE_BYTES - 1;
- pad_addr = 0x8000;
- nodemap_size = pad + memnodemapsize;
- nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT,
- nodemap_size);
- if (nodemap_addr == -1UL) {
- printk(KERN_ERR
- "NUMA: Unable to allocate Memory to Node hash map\n");
- nodemap_addr = nodemap_size = 0;
- return -1;
- }
- pad_addr = (nodemap_addr + pad) & ~pad;
- memnodemap = phys_to_virt(pad_addr);
-
- printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
- nodemap_addr, nodemap_addr + nodemap_size);
- return 0;
-}
-
-/*
- * The LSB of all start and end addresses in the node map is the value of the
- * maximum possible shift.
- */
-static int __init
-extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes)
-{
- int i, nodes_used = 0;
- unsigned long start, end;
- unsigned long bitfield = 0, memtop = 0;
-
- for (i = 0; i < numnodes; i++) {
- start = nodes[i].start;
- end = nodes[i].end;
- if (start >= end)
- continue;
- bitfield |= start;
- nodes_used++;
- if (end > memtop)
- memtop = end;
- }
- if (nodes_used <= 1)
- i = 63;
- else
- i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
- memnodemapsize = (memtop >> i)+1;
- return i;
-}
-
-int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
-{
- int shift;
-
- shift = extract_lsb_from_nodes(nodes, numnodes);
- if (allocate_cachealigned_memnodemap())
- return -1;
- printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
- shift);
-
- if (populate_memnodemap(nodes, numnodes, shift) != 1) {
- printk(KERN_INFO
- "Your memory is not aligned you need to rebuild your kernel "
- "with a bigger NODEMAPSIZE shift=%d\n",
- shift);
- return -1;
- }
- return shift;
-}
-
-#ifdef CONFIG_SPARSEMEM
-int early_pfn_to_nid(unsigned long pfn)
-{
- return phys_to_nid(pfn << PAGE_SHIFT);
-}
-#endif
-
-static void * __init
-early_node_mem(int nodeid, unsigned long start, unsigned long end,
- unsigned long size)
-{
- unsigned long mem = find_e820_area(start, end, size);
- void *ptr;
- if (mem != -1L)
- return __va(mem);
- ptr = __alloc_bootmem_nopanic(size,
- SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
- if (ptr == 0) {
- printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
- size, nodeid);
- return NULL;
- }
- return ptr;
-}
-
-/* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
-{
- unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
- unsigned long nodedata_phys;
- void *bootmap;
- const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
-
- start = round_up(start, ZONE_ALIGN);
-
- printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
-
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = end >> PAGE_SHIFT;
-
- node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size);
- if (node_data[nodeid] == NULL)
- return;
- nodedata_phys = __pa(node_data[nodeid]);
-
- memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
- NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
- NODE_DATA(nodeid)->node_start_pfn = start_pfn;
- NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
-
- /* Find a place for the bootmem map */
- bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
- bootmap = early_node_mem(nodeid, bootmap_start, end,
- bootmap_pages<<PAGE_SHIFT);
- if (bootmap == NULL) {
- if (nodedata_phys < start || nodedata_phys >= end)
- free_bootmem((unsigned long)node_data[nodeid],pgdat_size);
- node_data[nodeid] = NULL;
- return;
- }
- bootmap_start = __pa(bootmap);
- Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
-
- bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
- bootmap_start >> PAGE_SHIFT,
- start_pfn, end_pfn);
-
- free_bootmem_with_active_regions(nodeid, end);
-
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
- reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
-#ifdef CONFIG_ACPI_NUMA
- srat_reserve_add_area(nodeid);
-#endif
- node_set_online(nodeid);
-}
-
-/* Initialize final allocator for a zone */
-void __init setup_node_zones(int nodeid)
-{
- unsigned long start_pfn, end_pfn, memmapsize, limit;
-
- start_pfn = node_start_pfn(nodeid);
- end_pfn = node_end_pfn(nodeid);
-
- Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
- nodeid, start_pfn, end_pfn);
-
- /* Try to allocate mem_map at end to not fill up precious <4GB
- memory. */
- memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
- limit = end_pfn << PAGE_SHIFT;
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
- NODE_DATA(nodeid)->node_mem_map =
- __alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
- memmapsize, SMP_CACHE_BYTES,
- round_down(limit - memmapsize, PAGE_SIZE),
- limit);
-#endif
-}
-
-void __init numa_init_array(void)
-{
- int rr, i;
- /* There are unfortunately some poorly designed mainboards around
- that only connect memory to a single CPU. This breaks the 1:1 cpu->node
- mapping. To avoid this fill in the mapping for all possible
- CPUs, as the number of CPUs is not known yet.
- We round robin the existing nodes. */
- rr = first_node(node_online_map);
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_to_node[i] != NUMA_NO_NODE)
- continue;
- numa_set_node(i, rr);
- rr = next_node(rr, node_online_map);
- if (rr == MAX_NUMNODES)
- rr = first_node(node_online_map);
- }
-
-}
-
-#ifdef CONFIG_NUMA_EMU
-/* Numa emulation */
-char *cmdline __initdata;
-
-/*
- * Setups up nid to range from addr to addr + size. If the end boundary is
- * greater than max_addr, then max_addr is used instead. The return value is 0
- * if there is additional memory left for allocation past addr and -1 otherwise.
- * addr is adjusted to be at the end of the node.
- */
-static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
- u64 size, u64 max_addr)
-{
- int ret = 0;
- nodes[nid].start = *addr;
- *addr += size;
- if (*addr >= max_addr) {
- *addr = max_addr;
- ret = -1;
- }
- nodes[nid].end = *addr;
- node_set(nid, node_possible_map);
- printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
- nodes[nid].start, nodes[nid].end,
- (nodes[nid].end - nodes[nid].start) >> 20);
- return ret;
-}
-
-/*
- * Splits num_nodes nodes up equally starting at node_start. The return value
- * is the number of nodes split up and addr is adjusted to be at the end of the
- * last node allocated.
- */
-static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
- u64 max_addr, int node_start,
- int num_nodes)
-{
- unsigned int big;
- u64 size;
- int i;
-
- if (num_nodes <= 0)
- return -1;
- if (num_nodes > MAX_NUMNODES)
- num_nodes = MAX_NUMNODES;
- size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) /
- num_nodes;
- /*
- * Calculate the number of big nodes that can be allocated as a result
- * of consolidating the leftovers.
- */
- big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
- FAKE_NODE_MIN_SIZE;
-
- /* Round down to nearest FAKE_NODE_MIN_SIZE. */
- size &= FAKE_NODE_MIN_HASH_MASK;
- if (!size) {
- printk(KERN_ERR "Not enough memory for each node. "
- "NUMA emulation disabled.\n");
- return -1;
- }
-
- for (i = node_start; i < num_nodes + node_start; i++) {
- u64 end = *addr + size;
- if (i < big)
- end += FAKE_NODE_MIN_SIZE;
- /*
- * The final node can have the remaining system RAM. Other
- * nodes receive roughly the same amount of available pages.
- */
- if (i == num_nodes + node_start - 1)
- end = max_addr;
- else
- while (end - *addr - e820_hole_size(*addr, end) <
- size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > max_addr) {
- end = max_addr;
- break;
- }
- }
- if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
- break;
- }
- return i - node_start + 1;
-}
-
-/*
- * Splits the remaining system RAM into chunks of size. The remaining memory is
- * always assigned to a final node and can be asymmetric. Returns the number of
- * nodes split.
- */
-static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
- u64 max_addr, int node_start, u64 size)
-{
- int i = node_start;
- size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
- while (!setup_node_range(i++, nodes, addr, size, max_addr))
- ;
- return i - node_start;
-}
-
-/*
- * Sets up the system RAM area from start_pfn to end_pfn according to the
- * numa=fake command-line option.
- */
-static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
-{
- struct bootnode nodes[MAX_NUMNODES];
- u64 addr = start_pfn << PAGE_SHIFT;
- u64 max_addr = end_pfn << PAGE_SHIFT;
- int num_nodes = 0;
- int coeff_flag;
- int coeff = -1;
- int num = 0;
- u64 size;
- int i;
-
- memset(&nodes, 0, sizeof(nodes));
- /*
- * If the numa=fake command-line is just a single number N, split the
- * system RAM into N fake nodes.
- */
- if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
- num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0,
- simple_strtol(cmdline, NULL, 0));
- if (num_nodes < 0)
- return num_nodes;
- goto out;
- }
-
- /* Parse the command line. */
- for (coeff_flag = 0; ; cmdline++) {
- if (*cmdline && isdigit(*cmdline)) {
- num = num * 10 + *cmdline - '0';
- continue;
- }
- if (*cmdline == '*') {
- if (num > 0)
- coeff = num;
- coeff_flag = 1;
- }
- if (!*cmdline || *cmdline == ',') {
- if (!coeff_flag)
- coeff = 1;
- /*
- * Round down to the nearest FAKE_NODE_MIN_SIZE.
- * Command-line coefficients are in megabytes.
- */
- size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
- if (size)
- for (i = 0; i < coeff; i++, num_nodes++)
- if (setup_node_range(num_nodes, nodes,
- &addr, size, max_addr) < 0)
- goto done;
- if (!*cmdline)
- break;
- coeff_flag = 0;
- coeff = -1;
- }
- num = 0;
- }
-done:
- if (!num_nodes)
- return -1;
- /* Fill remainder of system RAM, if appropriate. */
- if (addr < max_addr) {
- if (coeff_flag && coeff < 0) {
- /* Split remaining nodes into num-sized chunks */
- num_nodes += split_nodes_by_size(nodes, &addr, max_addr,
- num_nodes, num);
- goto out;
- }
- switch (*(cmdline - 1)) {
- case '*':
- /* Split remaining nodes into coeff chunks */
- if (coeff <= 0)
- break;
- num_nodes += split_nodes_equally(nodes, &addr, max_addr,
- num_nodes, coeff);
- break;
- case ',':
- /* Do not allocate remaining system RAM */
- break;
- default:
- /* Give one final node */
- setup_node_range(num_nodes, nodes, &addr,
- max_addr - addr, max_addr);
- num_nodes++;
- }
- }
-out:
- memnode_shift = compute_hash_shift(nodes, num_nodes);
- if (memnode_shift < 0) {
- memnode_shift = 0;
- printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
- "disabled.\n");
- return -1;
- }
-
- /*
- * We need to vacate all active ranges that may have been registered by
- * SRAT and set acpi_numa to -1 so that srat_disabled() always returns
- * true. NUMA emulation has succeeded so we will not scan ACPI nodes.
- */
- remove_all_active_ranges();
-#ifdef CONFIG_ACPI_NUMA
- acpi_numa = -1;
-#endif
- for_each_node_mask(i, node_possible_map) {
- e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
- acpi_fake_nodes(nodes, num_nodes);
- numa_init_array();
- return 0;
-}
-#endif /* CONFIG_NUMA_EMU */
-
-void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
- int i;
-
- nodes_clear(node_possible_map);
-
-#ifdef CONFIG_NUMA_EMU
- if (cmdline && !numa_emulation(start_pfn, end_pfn))
- return;
- nodes_clear(node_possible_map);
-#endif
-
-#ifdef CONFIG_ACPI_NUMA
- if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
- end_pfn << PAGE_SHIFT))
- return;
- nodes_clear(node_possible_map);
-#endif
-
-#ifdef CONFIG_K8_NUMA
- if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
- return;
- nodes_clear(node_possible_map);
-#endif
- printk(KERN_INFO "%s\n",
- numa_off ? "NUMA turned off" : "No NUMA configuration found");
-
- printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
- start_pfn << PAGE_SHIFT,
- end_pfn << PAGE_SHIFT);
- /* setup dummy node covering all memory */
- memnode_shift = 63;
- memnodemap = memnode.embedded_map;
- memnodemap[0] = 0;
- nodes_clear(node_online_map);
- node_set_online(0);
- node_set(0, node_possible_map);
- for (i = 0; i < NR_CPUS; i++)
- numa_set_node(i, 0);
- node_to_cpumask[0] = cpumask_of_cpu(0);
- e820_register_active_regions(0, start_pfn, end_pfn);
- setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
-}
-
-__cpuinit void numa_add_cpu(int cpu)
-{
- set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_set_node(int cpu, int node)
-{
- cpu_pda(cpu)->nodenumber = node;
- cpu_to_node[cpu] = node;
-}
-
-unsigned long __init numa_free_all_bootmem(void)
-{
- int i;
- unsigned long pages = 0;
- for_each_online_node(i) {
- pages += free_all_bootmem_node(NODE_DATA(i));
- }
- return pages;
-}
-
-void __init paging_init(void)
-{
- int i;
- unsigned long max_zone_pfns[MAX_NR_ZONES];
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
- max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
- max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
- max_zone_pfns[ZONE_NORMAL] = end_pfn;
-
- sparse_memory_present_with_active_regions(MAX_NUMNODES);
- sparse_init();
-
- for_each_online_node(i) {
- setup_node_zones(i);
- }
-
- free_area_init_nodes(max_zone_pfns);
-}
-
-static __init int numa_setup(char *opt)
-{
- if (!opt)
- return -EINVAL;
- if (!strncmp(opt,"off",3))
- numa_off = 1;
-#ifdef CONFIG_NUMA_EMU
- if (!strncmp(opt, "fake=", 5))
- cmdline = opt + 5;
-#endif
-#ifdef CONFIG_ACPI_NUMA
- if (!strncmp(opt,"noacpi",6))
- acpi_numa = -1;
- if (!strncmp(opt,"hotadd=", 7))
- hotadd_percent = simple_strtoul(opt+7, NULL, 10);
-#endif
- return 0;
-}
-
-early_param("numa", numa_setup);
-
-/*
- * Setup early cpu_to_node.
- *
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
- * and apicid_to_node[] tables have valid entries for a CPU.
- * This means we skip cpu_to_node[] initialisation for NUMA
- * emulation and faking node case (when running a kernel compiled
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
- * is already initialized in a round robin manner at numa_init_array,
- * prior to this call, and this initialization is good enough
- * for the fake NUMA cases.
- */
-void __init init_cpu_to_node(void)
-{
- int i;
- for (i = 0; i < NR_CPUS; i++) {
- u8 apicid = x86_cpu_to_apicid[i];
- if (apicid == BAD_APICID)
- continue;
- if (apicid_to_node[apicid] == NUMA_NO_NODE)
- continue;
- numa_set_node(i,apicid_to_node[apicid]);
- }
-}
-
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode);
-EXPORT_SYMBOL(node_data);
-
-#ifdef CONFIG_DISCONTIGMEM
-/*
- * Functions to convert PFNs from/to per node page addresses.
- * These are out of line because they are quite big.
- * They could be all tuned by pre caching more state.
- * Should do that.
- */
-
-int pfn_valid(unsigned long pfn)
-{
- unsigned nid;
- if (pfn >= num_physpages)
- return 0;
- nid = pfn_to_nid(pfn);
- if (nid == 0xff)
- return 0;
- return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid);
-}
-EXPORT_SYMBOL(pfn_valid);
-#endif
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
deleted file mode 100644
index 10b9809ce821..000000000000
--- a/arch/x86_64/mm/pageattr.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Thanks to Ben LaHaise for precious feedback.
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
-
-pte_t *lookup_address(unsigned long address)
-{
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- if (pgd_none(*pgd))
- return NULL;
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- return NULL;
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- return NULL;
- if (pmd_large(*pmd))
- return (pte_t *)pmd;
- pte = pte_offset_kernel(pmd, address);
- if (pte && !pte_present(*pte))
- pte = NULL;
- return pte;
-}
-
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
- pgprot_t ref_prot)
-{
- int i;
- unsigned long addr;
- struct page *base = alloc_pages(GFP_KERNEL, 0);
- pte_t *pbase;
- if (!base)
- return NULL;
- /*
- * page_private is used to track the number of entries in
- * the page table page have non standard attributes.
- */
- SetPagePrivate(base);
- page_private(base) = 0;
-
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
- pbase = (pte_t *)page_address(base);
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : ref_prot);
- }
- return base;
-}
-
-static void cache_flush_page(void *adr)
-{
- int i;
- for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- asm volatile("clflush (%0)" :: "r" (adr + i));
-}
-
-static void flush_kernel_map(void *arg)
-{
- struct list_head *l = (struct list_head *)arg;
- struct page *pg;
-
- /* When clflush is available always use it because it is
- much cheaper than WBINVD. */
- /* clflush is still broken. Disable for now. */
- if (1 || !cpu_has_clflush)
- asm volatile("wbinvd" ::: "memory");
- else list_for_each_entry(pg, l, lru) {
- void *adr = page_address(pg);
- cache_flush_page(adr);
- }
- __flush_tlb_all();
-}
-
-static inline void flush_map(struct list_head *l)
-{
- on_each_cpu(flush_kernel_map, l, 1, 1);
-}
-
-static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
-
-static inline void save_page(struct page *fpage)
-{
- if (!test_and_set_bit(PG_arch_1, &fpage->flags))
- list_add(&fpage->lru, &deferred_pages);
-}
-
-/*
- * No more special protections in this 2/4MB area - revert to a
- * large page again.
- */
-static void revert_page(unsigned long address, pgprot_t ref_prot)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t large_pte;
- unsigned long pfn;
-
- pgd = pgd_offset_k(address);
- BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd,address);
- BUG_ON(pud_none(*pud));
- pmd = pmd_offset(pud, address);
- BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
- pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
- large_pte = pfn_pte(pfn, ref_prot);
- large_pte = pte_mkhuge(large_pte);
- set_pte((pte_t *)pmd, large_pte);
-}
-
-static int
-__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
- pgprot_t ref_prot)
-{
- pte_t *kpte;
- struct page *kpte_page;
- pgprot_t ref_prot2;
-
- kpte = lookup_address(address);
- if (!kpte) return 0;
- kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
- BUG_ON(PageLRU(kpte_page));
- BUG_ON(PageCompound(kpte_page));
- if (pgprot_val(prot) != pgprot_val(ref_prot)) {
- if (!pte_huge(*kpte)) {
- set_pte(kpte, pfn_pte(pfn, prot));
- } else {
- /*
- * split_large_page will take the reference for this
- * change_page_attr on the split page.
- */
- struct page *split;
- ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
- split = split_large_page(address, prot, ref_prot2);
- if (!split)
- return -ENOMEM;
- set_pte(kpte, mk_pte(split, ref_prot2));
- kpte_page = split;
- }
- page_private(kpte_page)++;
- } else if (!pte_huge(*kpte)) {
- set_pte(kpte, pfn_pte(pfn, ref_prot));
- BUG_ON(page_private(kpte_page) == 0);
- page_private(kpte_page)--;
- } else
- BUG();
-
- /* on x86-64 the direct mapping set at boot is not using 4k pages */
- BUG_ON(PageReserved(kpte_page));
-
- save_page(kpte_page);
- if (page_private(kpte_page) == 0)
- revert_page(address, ref_prot);
- return 0;
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere.
- * This function only deals with the kernel linear map.
- *
- * Caller must call global_flush_tlb() after this.
- */
-int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
-{
- int err = 0, kernel_map = 0;
- int i;
-
- if (address >= __START_KERNEL_map
- && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
- address = (unsigned long)__va(__pa(address));
- kernel_map = 1;
- }
-
- down_write(&init_mm.mmap_sem);
- for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
- unsigned long pfn = __pa(address) >> PAGE_SHIFT;
-
- if (!kernel_map || pte_present(pfn_pte(0, prot))) {
- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
- if (err)
- break;
- }
- /* Handle kernel mapping too which aliases part of the
- * lowmem */
- if (__pa(address) < KERNEL_TEXT_SIZE) {
- unsigned long addr2;
- pgprot_t prot2;
- addr2 = __START_KERNEL_map + __pa(address);
- /* Make sure the kernel mappings stay executable */
- prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
- err = __change_page_attr(addr2, pfn, prot2,
- PAGE_KERNEL_EXEC);
- }
- }
- up_write(&init_mm.mmap_sem);
- return err;
-}
-
-/* Don't call this for MMIO areas that may not have a mem_map entry */
-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
- unsigned long addr = (unsigned long)page_address(page);
- return change_page_attr_addr(addr, numpages, prot);
-}
-
-void global_flush_tlb(void)
-{
- struct page *pg, *next;
- struct list_head l;
-
- down_read(&init_mm.mmap_sem);
- list_replace_init(&deferred_pages, &l);
- up_read(&init_mm.mmap_sem);
-
- flush_map(&l);
-
- list_for_each_entry_safe(pg, next, &l, lru) {
- list_del(&pg->lru);
- clear_bit(PG_arch_1, &pg->flags);
- if (page_private(pg) != 0)
- continue;
- ClearPagePrivate(pg);
- __free_page(pg);
- }
-}
-
-EXPORT_SYMBOL(change_page_attr);
-EXPORT_SYMBOL(global_flush_tlb);
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
deleted file mode 100644
index acdf03e19146..000000000000
--- a/arch/x86_64/mm/srat.c
+++ /dev/null
@@ -1,566 +0,0 @@
-/*
- * ACPI 3.0 based NUMA setup
- * Copyright 2004 Andi Kleen, SuSE Labs.
- *
- * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
- *
- * Called from acpi_numa_init while reading the SRAT and SLIT tables.
- * Assumes all memory regions belonging to a single proximity domain
- * are in one chunk. Holes between them will be included in the node.
- */
-
-#include <linux/kernel.h>
-#include <linux/acpi.h>
-#include <linux/mmzone.h>
-#include <linux/bitmap.h>
-#include <linux/module.h>
-#include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <asm/proto.h>
-#include <asm/numa.h>
-#include <asm/e820.h>
-
-int acpi_numa __initdata;
-
-static struct acpi_table_slit *acpi_slit;
-
-static nodemask_t nodes_parsed __initdata;
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
-
-/* Too small nodes confuse the VM badly. Usually they result
- from BIOS bugs. */
-#define NODE_MIN_SIZE (4*1024*1024)
-
-static __init int setup_node(int pxm)
-{
- return acpi_map_pxm_to_node(pxm);
-}
-
-static __init int conflicting_nodes(unsigned long start, unsigned long end)
-{
- int i;
- for_each_node_mask(i, nodes_parsed) {
- struct bootnode *nd = &nodes[i];
- if (nd->start == nd->end)
- continue;
- if (nd->end > start && nd->start < end)
- return i;
- if (nd->end == end && nd->start == start)
- return i;
- }
- return -1;
-}
-
-static __init void cutoff_node(int i, unsigned long start, unsigned long end)
-{
- struct bootnode *nd = &nodes[i];
-
- if (found_add_area)
- return;
-
- if (nd->start < start) {
- nd->start = start;
- if (nd->end < nd->start)
- nd->start = nd->end;
- }
- if (nd->end > end) {
- nd->end = end;
- if (nd->start > nd->end)
- nd->start = nd->end;
- }
-}
-
-static __init void bad_srat(void)
-{
- int i;
- printk(KERN_ERR "SRAT: SRAT not used.\n");
- acpi_numa = -1;
- found_add_area = 0;
- for (i = 0; i < MAX_LOCAL_APIC; i++)
- apicid_to_node[i] = NUMA_NO_NODE;
- for (i = 0; i < MAX_NUMNODES; i++)
- nodes_add[i].start = nodes[i].end = 0;
- remove_all_active_ranges();
-}
-
-static __init inline int srat_disabled(void)
-{
- return numa_off || acpi_numa < 0;
-}
-
-/*
- * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
- * up the NUMA heuristics which wants the local node to have a smaller
- * distance than the others.
- * Do some quick checks here and only use the SLIT if it passes.
- */
-static __init int slit_valid(struct acpi_table_slit *slit)
-{
- int i, j;
- int d = slit->locality_count;
- for (i = 0; i < d; i++) {
- for (j = 0; j < d; j++) {
- u8 val = slit->entry[d*i + j];
- if (i == j) {
- if (val != LOCAL_DISTANCE)
- return 0;
- } else if (val <= LOCAL_DISTANCE)
- return 0;
- }
- }
- return 1;
-}
-
-/* Callback for SLIT parsing */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
- if (!slit_valid(slit)) {
- printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
- return;
- }
- acpi_slit = slit;
-}
-
-/* Callback for Proximity Domain -> LAPIC mapping */
-void __init
-acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
-{
- int pxm, node;
- if (srat_disabled())
- return;
- if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
- bad_srat();
- return;
- }
- if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
- return;
- pxm = pa->proximity_domain_lo;
- node = setup_node(pxm);
- if (node < 0) {
- printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
- bad_srat();
- return;
- }
- apicid_to_node[pa->apic_id] = node;
- acpi_numa = 1;
- printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
- pxm, pa->apic_id, node);
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Protect against too large hotadd areas that would fill up memory.
- */
-static int hotadd_enough_memory(struct bootnode *nd)
-{
- static unsigned long allocated;
- static unsigned long last_area_end;
- unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
- long mem = pages * sizeof(struct page);
- unsigned long addr;
- unsigned long allowed;
- unsigned long oldpages = pages;
-
- if (mem < 0)
- return 0;
- allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
- allowed = (allowed / 100) * hotadd_percent;
- if (allocated + mem > allowed) {
- unsigned long range;
- /* Give them at least part of their hotadd memory upto hotadd_percent
- It would be better to spread the limit out
- over multiple hotplug areas, but that is too complicated
- right now */
- if (allocated >= allowed)
- return 0;
- range = allowed - allocated;
- pages = (range / PAGE_SIZE);
- mem = pages * sizeof(struct page);
- nd->end = nd->start + range;
- }
- /* Not completely fool proof, but a good sanity check */
- addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
- if (addr == -1UL)
- return 0;
- if (pages != oldpages)
- printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
- pages << PAGE_SHIFT);
- last_area_end = addr + mem;
- allocated += mem;
- return 1;
-}
-
-static int update_end_of_memory(unsigned long end)
-{
- found_add_area = 1;
- if ((end >> PAGE_SHIFT) > end_pfn)
- end_pfn = end >> PAGE_SHIFT;
- return 1;
-}
-
-static inline int save_add_info(void)
-{
- return hotadd_percent > 0;
-}
-#else
-int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-static inline int save_add_info(void) {return 1;}
-#else
-static inline int save_add_info(void) {return 0;}
-#endif
-#endif
-/*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add infomation.
- * This code supports one contigious hot add area per node.
- */
-static int reserve_hotadd(int node, unsigned long start, unsigned long end)
-{
- unsigned long s_pfn = start >> PAGE_SHIFT;
- unsigned long e_pfn = end >> PAGE_SHIFT;
- int ret = 0, changed = 0;
- struct bootnode *nd = &nodes_add[node];
-
- /* I had some trouble with strange memory hotadd regions breaking
- the boot. Be very strict here and reject anything unexpected.
- If you want working memory hotadd write correct SRATs.
-
- The node size check is a basic sanity check to guard against
- mistakes */
- if ((signed long)(end - start) < NODE_MIN_SIZE) {
- printk(KERN_ERR "SRAT: Hotplug area too small\n");
- return -1;
- }
-
- /* This check might be a bit too strict, but I'm keeping it for now. */
- if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
- printk(KERN_ERR
- "SRAT: Hotplug area %lu -> %lu has existing memory\n",
- s_pfn, e_pfn);
- return -1;
- }
-
- if (!hotadd_enough_memory(&nodes_add[node])) {
- printk(KERN_ERR "SRAT: Hotplug area too large\n");
- return -1;
- }
-
- /* Looks good */
-
- if (nd->start == nd->end) {
- nd->start = start;
- nd->end = end;
- changed = 1;
- } else {
- if (nd->start == end) {
- nd->start = start;
- changed = 1;
- }
- if (nd->end == start) {
- nd->end = end;
- changed = 1;
- }
- if (!changed)
- printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
- }
-
- ret = update_end_of_memory(nd->end);
-
- if (changed)
- printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
- return ret;
-}
-
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-void __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
-{
- struct bootnode *nd, oldnode;
- unsigned long start, end;
- int node, pxm;
- int i;
-
- if (srat_disabled())
- return;
- if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
- bad_srat();
- return;
- }
- if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
- return;
-
- if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
- return;
- start = ma->base_address;
- end = start + ma->length;
- pxm = ma->proximity_domain;
- node = setup_node(pxm);
- if (node < 0) {
- printk(KERN_ERR "SRAT: Too many proximity domains.\n");
- bad_srat();
- return;
- }
- i = conflicting_nodes(start, end);
- if (i == node) {
- printk(KERN_WARNING
- "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
- pxm, start, end, nodes[i].start, nodes[i].end);
- } else if (i >= 0) {
- printk(KERN_ERR
- "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
- pxm, start, end, node_to_pxm(i),
- nodes[i].start, nodes[i].end);
- bad_srat();
- return;
- }
- nd = &nodes[node];
- oldnode = *nd;
- if (!node_test_and_set(node, nodes_parsed)) {
- nd->start = start;
- nd->end = end;
- } else {
- if (start < nd->start)
- nd->start = start;
- if (nd->end < end)
- nd->end = end;
- }
-
- printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
- nd->start, nd->end);
- e820_register_active_regions(node, nd->start >> PAGE_SHIFT,
- nd->end >> PAGE_SHIFT);
- push_node_boundaries(node, nd->start >> PAGE_SHIFT,
- nd->end >> PAGE_SHIFT);
-
- if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
- (reserve_hotadd(node, start, end) < 0)) {
- /* Ignore hotadd region. Undo damage */
- printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
- *nd = oldnode;
- if ((nd->start | nd->end) == 0)
- node_clear(node, nodes_parsed);
- }
-}
-
-/* Sanity check to catch more bad SRATs (they are amazingly common).
- Make sure the PXMs cover all memory. */
-static int __init nodes_cover_memory(const struct bootnode *nodes)
-{
- int i;
- unsigned long pxmram, e820ram;
-
- pxmram = 0;
- for_each_node_mask(i, nodes_parsed) {
- unsigned long s = nodes[i].start >> PAGE_SHIFT;
- unsigned long e = nodes[i].end >> PAGE_SHIFT;
- pxmram += e - s;
- pxmram -= absent_pages_in_range(s, e);
- if ((long)pxmram < 0)
- pxmram = 0;
- }
-
- e820ram = end_pfn - absent_pages_in_range(0, end_pfn);
- /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
- if ((long)(e820ram - pxmram) >= 1*1024*1024) {
- printk(KERN_ERR
- "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
- (pxmram << PAGE_SHIFT) >> 20,
- (e820ram << PAGE_SHIFT) >> 20);
- return 0;
- }
- return 1;
-}
-
-static void unparse_node(int node)
-{
- int i;
- node_clear(node, nodes_parsed);
- for (i = 0; i < MAX_LOCAL_APIC; i++) {
- if (apicid_to_node[i] == node)
- apicid_to_node[i] = NUMA_NO_NODE;
- }
-}
-
-void __init acpi_numa_arch_fixup(void) {}
-
-/* Use the information discovered above to actually set up the nodes. */
-int __init acpi_scan_nodes(unsigned long start, unsigned long end)
-{
- int i;
-
- if (acpi_numa <= 0)
- return -1;
-
- /* First clean up the node list */
- for (i = 0; i < MAX_NUMNODES; i++) {
- cutoff_node(i, start, end);
- if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
- unparse_node(i);
- node_set_offline(i);
- }
- }
-
- if (!nodes_cover_memory(nodes)) {
- bad_srat();
- return -1;
- }
-
- memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
- if (memnode_shift < 0) {
- printk(KERN_ERR
- "SRAT: No NUMA node hash function found. Contact maintainer\n");
- bad_srat();
- return -1;
- }
-
- node_possible_map = nodes_parsed;
-
- /* Finally register nodes */
- for_each_node_mask(i, node_possible_map)
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- /* Try again in case setup_node_bootmem missed one due
- to missing bootmem */
- for_each_node_mask(i, node_possible_map)
- if (!node_online(i))
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_to_node[i] == NUMA_NO_NODE)
- continue;
- if (!node_isset(cpu_to_node[i], node_possible_map))
- numa_set_node(i, NUMA_NO_NODE);
- }
- numa_init_array();
- return 0;
-}
-
-#ifdef CONFIG_NUMA_EMU
-static int __init find_node_by_addr(unsigned long addr)
-{
- int ret = NUMA_NO_NODE;
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- /*
- * Find the real node that this emulated node appears on. For
- * the sake of simplicity, we only use a real node's starting
- * address to determine which emulated node it appears on.
- */
- if (addr >= nodes[i].start && addr < nodes[i].end) {
- ret = i;
- break;
- }
- }
- return i;
-}
-
-/*
- * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
- * mappings that respect the real ACPI topology but reflect our emulated
- * environment. For each emulated node, we find which real node it appears on
- * and create PXM to NID mappings for those fake nodes which mirror that
- * locality. SLIT will now represent the correct distances between emulated
- * nodes as a result of the real topology.
- */
-void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
-{
- int i, j;
- int fake_node_to_pxm_map[MAX_NUMNODES] = {
- [0 ... MAX_NUMNODES-1] = PXM_INVAL
- };
- unsigned char fake_apicid_to_node[MAX_LOCAL_APIC] = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
- };
-
- printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
- "topology.\n");
- for (i = 0; i < num_nodes; i++) {
- int nid, pxm;
-
- nid = find_node_by_addr(fake_nodes[i].start);
- if (nid == NUMA_NO_NODE)
- continue;
- pxm = node_to_pxm(nid);
- if (pxm == PXM_INVAL)
- continue;
- fake_node_to_pxm_map[i] = pxm;
- /*
- * For each apicid_to_node mapping that exists for this real
- * node, it must now point to the fake node ID.
- */
- for (j = 0; j < MAX_LOCAL_APIC; j++)
- if (apicid_to_node[j] == nid)
- fake_apicid_to_node[j] = i;
- }
- for (i = 0; i < num_nodes; i++)
- __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
-
- nodes_clear(nodes_parsed);
- for (i = 0; i < num_nodes; i++)
- if (fake_nodes[i].start != fake_nodes[i].end)
- node_set(i, nodes_parsed);
- WARN_ON(!nodes_cover_memory(fake_nodes));
-}
-
-static int null_slit_node_compare(int a, int b)
-{
- return node_to_pxm(a) == node_to_pxm(b);
-}
-#else
-static int null_slit_node_compare(int a, int b)
-{
- return a == b;
-}
-#endif /* CONFIG_NUMA_EMU */
-
-void __init srat_reserve_add_area(int nodeid)
-{
- if (found_add_area && nodes_add[nodeid].end) {
- u64 total_mb;
-
- printk(KERN_INFO "SRAT: Reserving hot-add memory space "
- "for node %d at %Lx-%Lx\n",
- nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
- total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
- >> PAGE_SHIFT;
- total_mb *= sizeof(struct page);
- total_mb >>= 20;
- printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
- "pre-allocated memory.\n", (unsigned long long)total_mb);
- reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
- nodes_add[nodeid].end - nodes_add[nodeid].start);
- }
-}
-
-int __node_distance(int a, int b)
-{
- int index;
-
- if (!acpi_slit)
- return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
- REMOTE_DISTANCE;
- index = acpi_slit->locality_count * node_to_pxm(a);
- return acpi_slit->entry[index + node_to_pxm(b)];
-}
-
-EXPORT_SYMBOL(__node_distance);
-
-int memory_add_physaddr_to_nid(u64 start)
-{
- int i, ret = 0;
-
- for_each_node(i)
- if (nodes_add[i].start <= start && nodes_add[i].end > start)
- ret = i;
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-
diff --git a/arch/x86_64/oprofile/Kconfig b/arch/x86_64/oprofile/Kconfig
deleted file mode 100644
index d8a84088471a..000000000000
--- a/arch/x86_64/oprofile/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
-config PROFILING
- bool "Profiling support (EXPERIMENTAL)"
- help
- Say Y here to enable the extended profiling support mechanisms used
- by profilers such as OProfile.
-
-
-config OPROFILE
- tristate "OProfile system profiling (EXPERIMENTAL)"
- depends on PROFILING
- help
- OProfile is a profiling system capable of profiling the
- whole system, include the kernel, kernel modules, libraries,
- and applications.
-
- If unsure, say N.
-
diff --git a/arch/x86_64/oprofile/Makefile b/arch/x86_64/oprofile/Makefile
deleted file mode 100644
index 6be32683e1bc..000000000000
--- a/arch/x86_64/oprofile/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-# oprofile for x86-64.
-# Just reuse the one from i386.
-#
-
-obj-$(CONFIG_OPROFILE) += oprofile.o
-
-DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
- oprof.o cpu_buffer.o buffer_sync.o \
- event_buffer.o oprofile_files.o \
- oprofilefs.o oprofile_stats.o \
- timer_int.o )
-
-OPROFILE-y := init.o backtrace.o
-OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o \
- op_model_ppro.o
-OPROFILE-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
-
-oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y))
diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile
deleted file mode 100644
index c9eddc8859c0..000000000000
--- a/arch/x86_64/pci/Makefile
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Makefile for X86_64 specific PCI routines
-#
-# Reuse the i386 PCI subsystem
-#
-EXTRA_CFLAGS += -Iarch/i386/pci
-
-obj-y := i386.o
-obj-$(CONFIG_PCI_DIRECT)+= direct.o
-obj-y += fixup.o init.o
-obj-$(CONFIG_ACPI) += acpi.o
-obj-y += legacy.o irq.o common.o early.o
-# mmconfig has a 64bit special
-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o mmconfig-shared.o
-
-obj-$(CONFIG_NUMA) += k8-bus.o
-
-direct-y += ../../i386/pci/direct.o
-acpi-y += ../../i386/pci/acpi.o
-legacy-y += ../../i386/pci/legacy.o
-irq-y += ../../i386/pci/irq.o
-common-y += ../../i386/pci/common.o
-fixup-y += ../../i386/pci/fixup.o
-i386-y += ../../i386/pci/i386.o
-init-y += ../../i386/pci/init.o
-early-y += ../../i386/pci/early.o
-mmconfig-shared-y += ../../i386/pci/mmconfig-shared.o
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
deleted file mode 100644
index 9cc813e29706..000000000000
--- a/arch/x86_64/pci/k8-bus.c
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <asm/mpspec.h>
-#include <linux/cpumask.h>
-
-/*
- * This discovers the pcibus <-> node mapping on AMD K8.
- *
- * RED-PEN need to call this again on PCI hotplug
- * RED-PEN empty cpus get reported wrong
- */
-
-#define NODE_ID_REGISTER 0x60
-#define NODE_ID(dword) (dword & 0x07)
-#define LDT_BUS_NUMBER_REGISTER_0 0x94
-#define LDT_BUS_NUMBER_REGISTER_1 0xB4
-#define LDT_BUS_NUMBER_REGISTER_2 0xD4
-#define NR_LDT_BUS_NUMBER_REGISTERS 3
-#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF)
-#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
-#define PCI_DEVICE_ID_K8HTCONFIG 0x1100
-
-/**
- * fill_mp_bus_to_cpumask()
- * fills the mp_bus_to_cpumask array based according to the LDT Bus Number
- * Registers found in the K8 northbridge
- */
-__init static int
-fill_mp_bus_to_cpumask(void)
-{
- struct pci_dev *nb_dev = NULL;
- int i, j;
- u32 ldtbus, nid;
- static int lbnr[3] = {
- LDT_BUS_NUMBER_REGISTER_0,
- LDT_BUS_NUMBER_REGISTER_1,
- LDT_BUS_NUMBER_REGISTER_2
- };
-
- while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD,
- PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) {
- pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);
-
- for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {
- pci_read_config_dword(nb_dev, lbnr[i], &ldtbus);
- /*
- * if there are no busses hanging off of the current
- * ldt link then both the secondary and subordinate
- * bus number fields are set to 0.
- *
- * RED-PEN
- * This is slightly broken because it assumes
- * HT node IDs == Linux node ids, which is not always
- * true. However it is probably mostly true.
- */
- if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0
- && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) {
- for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus);
- j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
- j++) {
- struct pci_bus *bus;
- struct pci_sysdata *sd;
-
- long node = NODE_ID(nid);
- /* Algorithm a bit dumb, but
- it shouldn't matter here */
- bus = pci_find_bus(0, j);
- if (!bus)
- continue;
- if (!node_online(node))
- node = 0;
-
- sd = bus->sysdata;
- sd->node = node;
- }
- }
- }
- }
-
- return 0;
-}
-
-fs_initcall(fill_mp_bus_to_cpumask);
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
deleted file mode 100644
index 4095e4d66a1d..000000000000
--- a/arch/x86_64/pci/mmconfig.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
- *
- * This is an 64bit optimized version that always keeps the full mmconfig
- * space mapped. This allows lockless config space operation.
- */
-
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-#include <linux/bitmap.h>
-#include <asm/e820.h>
-
-#include "pci.h"
-
-/* Static virtual mapping of the MMCONFIG aperture */
-struct mmcfg_virt {
- struct acpi_mcfg_allocation *cfg;
- char __iomem *virt;
-};
-static struct mmcfg_virt *pci_mmcfg_virt;
-
-static char __iomem *get_virt(unsigned int seg, unsigned bus)
-{
- struct acpi_mcfg_allocation *cfg;
- int cfg_num;
-
- for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
- cfg = pci_mmcfg_virt[cfg_num].cfg;
- if (cfg->pci_segment == seg &&
- (cfg->start_bus_number <= bus) &&
- (cfg->end_bus_number >= bus))
- return pci_mmcfg_virt[cfg_num].virt;
- }
-
- /* Fall back to type 0 */
- return NULL;
-}
-
-static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
-{
- char __iomem *addr;
- if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS &&
- test_bit(32*bus + PCI_SLOT(devfn), pci_mmcfg_fallback_slots))
- return NULL;
- addr = get_virt(seg, bus);
- if (!addr)
- return NULL;
- return addr + ((bus << 20) | (devfn << 12));
-}
-
-static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 *value)
-{
- char __iomem *addr;
-
- /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
- if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
- *value = -1;
- return -EINVAL;
- }
-
- addr = pci_dev_base(seg, bus, devfn);
- if (!addr)
- return pci_conf1_read(seg,bus,devfn,reg,len,value);
-
- switch (len) {
- case 1:
- *value = mmio_config_readb(addr + reg);
- break;
- case 2:
- *value = mmio_config_readw(addr + reg);
- break;
- case 4:
- *value = mmio_config_readl(addr + reg);
- break;
- }
-
- return 0;
-}
-
-static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 value)
-{
- char __iomem *addr;
-
- /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
- if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
- return -EINVAL;
-
- addr = pci_dev_base(seg, bus, devfn);
- if (!addr)
- return pci_conf1_write(seg,bus,devfn,reg,len,value);
-
- switch (len) {
- case 1:
- mmio_config_writeb(addr + reg, value);
- break;
- case 2:
- mmio_config_writew(addr + reg, value);
- break;
- case 4:
- mmio_config_writel(addr + reg, value);
- break;
- }
-
- return 0;
-}
-
-static struct pci_raw_ops pci_mmcfg = {
- .read = pci_mmcfg_read,
- .write = pci_mmcfg_write,
-};
-
-static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
-{
- void __iomem *addr;
- u32 size;
-
- size = (cfg->end_bus_number + 1) << 20;
- addr = ioremap_nocache(cfg->address, size);
- if (addr) {
- printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n",
- cfg->address, cfg->address + size - 1);
- }
- return addr;
-}
-
-int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
- unsigned int devfn)
-{
- return pci_dev_base(seg, bus, devfn) != NULL;
-}
-
-int __init pci_mmcfg_arch_init(void)
-{
- int i;
- pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) *
- pci_mmcfg_config_num, GFP_KERNEL);
- if (pci_mmcfg_virt == NULL) {
- printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n");
- return 0;
- }
-
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i];
- pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]);
- if (!pci_mmcfg_virt[i].virt) {
- printk(KERN_ERR "PCI: Cannot map mmconfig aperture for "
- "segment %d\n",
- pci_mmcfg_config[i].pci_segment);
- return 0;
- }
- }
- raw_pci_ops = &pci_mmcfg;
- return 1;
-}
diff --git a/arch/x86_64/vdso/.gitignore b/arch/x86_64/vdso/.gitignore
deleted file mode 100644
index f8b69d84238e..000000000000
--- a/arch/x86_64/vdso/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vdso.lds
diff --git a/arch/x86_64/vdso/Makefile b/arch/x86_64/vdso/Makefile
deleted file mode 100644
index 8d03de029d9b..000000000000
--- a/arch/x86_64/vdso/Makefile
+++ /dev/null
@@ -1,49 +0,0 @@
-#
-# x86-64 vDSO.
-#
-
-# files to link into the vdso
-# vdso-start.o has to be first
-vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
-
-# files to link into kernel
-obj-y := vma.o vdso.o vdso-syms.o
-
-vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
-
-$(obj)/vdso.o: $(obj)/vdso.so
-
-targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o
-
-# The DSO images are built using a special linker script.
-quiet_cmd_syscall = SYSCALL $@
- cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
- -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-export CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-
-vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv) \
- -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
-SYSCFLAGS_vdso.so = $(vdso-flags)
-
-$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
-
-$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
- $(call if_changed,syscall)
-
-CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
-
-$(obj)/vclock_gettime.o: CFLAGS = $(CFL)
-$(obj)/vgetcpu.o: CFLAGS = $(CFL)
-
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vdso-syms.o
-$(obj)/built-in.o: $(obj)/vdso-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
-
-SYSCFLAGS_vdso-syms.o = -r -d
-$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
- $(call if_changed,syscall)
diff --git a/arch/x86_64/vdso/vclock_gettime.c b/arch/x86_64/vdso/vclock_gettime.c
deleted file mode 100644
index 5b54cdfb2b07..000000000000
--- a/arch/x86_64/vdso/vclock_gettime.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2006 Andi Kleen, SUSE Labs.
- * Subject to the GNU Public License, v.2
- *
- * Fast user context implementation of clock_gettime and gettimeofday.
- *
- * The code should have no internal unresolved relocations.
- * Check with readelf after changing.
- * Also alternative() doesn't work.
- */
-
-#include <linux/kernel.h>
-#include <linux/posix-timers.h>
-#include <linux/time.h>
-#include <linux/string.h>
-#include <asm/vsyscall.h>
-#include <asm/vgtod.h>
-#include <asm/timex.h>
-#include <asm/hpet.h>
-#include <asm/unistd.h>
-#include <asm/io.h>
-#include <asm/vgtod.h>
-#include "vextern.h"
-
-#define gtod vdso_vsyscall_gtod_data
-
-static long vdso_fallback_gettime(long clock, struct timespec *ts)
-{
- long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
- return ret;
-}
-
-static inline long vgetns(void)
-{
- long v;
- cycles_t (*vread)(void);
- vread = gtod->clock.vread;
- v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask;
- return (v * gtod->clock.mult) >> gtod->clock.shift;
-}
-
-static noinline int do_realtime(struct timespec *ts)
-{
- unsigned long seq, ns;
- do {
- seq = read_seqbegin(&gtod->lock);
- ts->tv_sec = gtod->wall_time_sec;
- ts->tv_nsec = gtod->wall_time_nsec;
- ns = vgetns();
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
- timespec_add_ns(ts, ns);
- return 0;
-}
-
-/* Copy of the version in kernel/time.c which we cannot directly access */
-static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
-{
- while (nsec >= NSEC_PER_SEC) {
- nsec -= NSEC_PER_SEC;
- ++sec;
- }
- while (nsec < 0) {
- nsec += NSEC_PER_SEC;
- --sec;
- }
- ts->tv_sec = sec;
- ts->tv_nsec = nsec;
-}
-
-static noinline int do_monotonic(struct timespec *ts)
-{
- unsigned long seq, ns, secs;
- do {
- seq = read_seqbegin(&gtod->lock);
- secs = gtod->wall_time_sec;
- ns = gtod->wall_time_nsec + vgetns();
- secs += gtod->wall_to_monotonic.tv_sec;
- ns += gtod->wall_to_monotonic.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
- vset_normalized_timespec(ts, secs, ns);
- return 0;
-}
-
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
-{
- if (likely(gtod->sysctl_enabled && gtod->clock.vread))
- switch (clock) {
- case CLOCK_REALTIME:
- return do_realtime(ts);
- case CLOCK_MONOTONIC:
- return do_monotonic(ts);
- }
- return vdso_fallback_gettime(clock, ts);
-}
-int clock_gettime(clockid_t, struct timespec *)
- __attribute__((weak, alias("__vdso_clock_gettime")));
-
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- long ret;
- if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
- BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
- offsetof(struct timespec, tv_nsec) ||
- sizeof(*tv) != sizeof(struct timespec));
- do_realtime((struct timespec *)tv);
- tv->tv_usec /= 1000;
- if (unlikely(tz != NULL)) {
- /* This relies on gcc inlining the memcpy. We'll notice
- if it ever fails to do so. */
- memcpy(tz, &gtod->sys_tz, sizeof(struct timezone));
- }
- return 0;
- }
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
- return ret;
-}
-int gettimeofday(struct timeval *, struct timezone *)
- __attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/x86_64/vdso/vdso-note.S b/arch/x86_64/vdso/vdso-note.S
deleted file mode 100644
index 79a071e4357e..000000000000
--- a/arch/x86_64/vdso/vdso-note.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
- .long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/x86_64/vdso/vdso-start.S b/arch/x86_64/vdso/vdso-start.S
deleted file mode 100644
index 2dc2cdb84d67..000000000000
--- a/arch/x86_64/vdso/vdso-start.S
+++ /dev/null
@@ -1,2 +0,0 @@
- .globl vdso_kernel_start
-vdso_kernel_start:
diff --git a/arch/x86_64/vdso/vdso.S b/arch/x86_64/vdso/vdso.S
deleted file mode 100644
index 92e80c1972a7..000000000000
--- a/arch/x86_64/vdso/vdso.S
+++ /dev/null
@@ -1,2 +0,0 @@
- .section ".vdso","a"
- .incbin "arch/x86_64/vdso/vdso.so"
diff --git a/arch/x86_64/vdso/vdso.lds.S b/arch/x86_64/vdso/vdso.lds.S
deleted file mode 100644
index b9a60e665d08..000000000000
--- a/arch/x86_64/vdso/vdso.lds.S
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page). This script controls its layout.
- */
-#include <asm/asm-offsets.h>
-#include "voffset.h"
-
-#define VDSO_PRELINK 0xffffffffff700000
-
-SECTIONS
-{
- . = VDSO_PRELINK + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- /* This linker script is used both with -r and with -shared.
- For the layouts to match, we need to skip more than enough
- space for the dynamic symbol table et al. If this amount
- is insufficient, ld -shared will barf. Just increase it here. */
- . = VDSO_PRELINK + VDSO_TEXT_OFFSET;
-
- .text : { *(.text) } :text
- .text.ptr : { *(.text.ptr) } :text
- . = VDSO_PRELINK + 0x900;
- .data : { *(.data) } :text
- .bss : { *(.bss) } :text
-
- .altinstructions : { *(.altinstructions) } :text
- .altinstr_replacement : { *(.altinstr_replacement) } :text
-
- .note : { *(.note.*) } :text :note
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .dynamic : { *(.dynamic) } :text :dynamic
- .useless : {
- *(.got.plt) *(.got)
- *(.gnu.linkonce.d.*)
- *(.dynbss)
- *(.gnu.linkonce.b.*)
- } :text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- LINUX_2.6 {
- global:
- clock_gettime;
- __vdso_clock_gettime;
- gettimeofday;
- __vdso_gettimeofday;
- getcpu;
- __vdso_getcpu;
- local: *;
- };
-}
diff --git a/arch/x86_64/vdso/vextern.h b/arch/x86_64/vdso/vextern.h
deleted file mode 100644
index 1683ba2ae3e8..000000000000
--- a/arch/x86_64/vdso/vextern.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef VEXTERN
-#include <asm/vsyscall.h>
-#define VEXTERN(x) \
- extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
-#endif
-
-#define VMAGIC 0xfeedbabeabcdefabUL
-
-/* Any kernel variables used in the vDSO must be exported in the main
- kernel's vmlinux.lds.S/vsyscall.h/proper __section and
- put into vextern.h and be referenced as a pointer with vdso prefix.
- The main kernel later fills in the values. */
-
-VEXTERN(jiffies)
-VEXTERN(vgetcpu_mode)
-VEXTERN(vsyscall_gtod_data)
diff --git a/arch/x86_64/vdso/vgetcpu.c b/arch/x86_64/vdso/vgetcpu.c
deleted file mode 100644
index 91f6e85d0fc2..000000000000
--- a/arch/x86_64/vdso/vgetcpu.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2006 Andi Kleen, SUSE Labs.
- * Subject to the GNU Public License, v.2
- *
- * Fast user context implementation of getcpu()
- */
-
-#include <linux/kernel.h>
-#include <linux/getcpu.h>
-#include <linux/jiffies.h>
-#include <linux/time.h>
-#include <asm/vsyscall.h>
-#include <asm/vgtod.h>
-#include "vextern.h"
-
-long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
-{
- unsigned int dummy, p;
- unsigned long j = 0;
-
- /* Fast cache - only recompute value once per jiffies and avoid
- relatively costly rdtscp/cpuid otherwise.
- This works because the scheduler usually keeps the process
- on the same CPU and this syscall doesn't guarantee its
- results anyways.
- We do this here because otherwise user space would do it on
- its own in a likely inferior way (no access to jiffies).
- If you don't like it pass NULL. */
- if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) {
- p = tcache->blob[1];
- } else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
- /* Load per CPU data from RDTSCP */
- rdtscp(dummy, dummy, p);
- } else {
- /* Load per CPU data from GDT */
- asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
- }
- if (tcache) {
- tcache->blob[0] = j;
- tcache->blob[1] = p;
- }
- if (cpu)
- *cpu = p & 0xfff;
- if (node)
- *node = p >> 12;
- return 0;
-}
-
-long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
- __attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86_64/vdso/vma.c b/arch/x86_64/vdso/vma.c
deleted file mode 100644
index ff9333e5fb08..000000000000
--- a/arch/x86_64/vdso/vma.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Set up the VMAs to tell the VM about the vDSO.
- * Copyright 2007 Andi Kleen, SUSE Labs.
- * Subject to the GPL, v.2
- */
-#include <linux/mm.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <asm/vsyscall.h>
-#include <asm/vgtod.h>
-#include <asm/proto.h>
-#include "voffset.h"
-
-int vdso_enabled = 1;
-
-#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
-#include "vextern.h"
-#undef VEXTERN
-
-extern char vdso_kernel_start[], vdso_start[], vdso_end[];
-extern unsigned short vdso_sync_cpuid;
-
-struct page **vdso_pages;
-
-static inline void *var_ref(void *vbase, char *var, char *name)
-{
- unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
- void *p = vbase + offset;
- if (*(void **)p != (void *)VMAGIC) {
- printk("VDSO: variable %s broken\n", name);
- vdso_enabled = 0;
- }
- return p;
-}
-
-static int __init init_vdso_vars(void)
-{
- int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
- int i;
- char *vbase;
-
- vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
- if (!vdso_pages)
- goto oom;
- for (i = 0; i < npages; i++) {
- struct page *p;
- p = alloc_page(GFP_KERNEL);
- if (!p)
- goto oom;
- vdso_pages[i] = p;
- copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
- }
-
- vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
- if (!vbase)
- goto oom;
-
- if (memcmp(vbase, "\177ELF", 4)) {
- printk("VDSO: I'm broken; not ELF\n");
- vdso_enabled = 0;
- }
-
-#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
-#define VEXTERN(x) \
- V(vdso_ ## x) = &__ ## x;
-#include "vextern.h"
-#undef VEXTERN
- return 0;
-
- oom:
- printk("Cannot allocate vdso\n");
- vdso_enabled = 0;
- return -ENOMEM;
-}
-__initcall(init_vdso_vars);
-
-struct linux_binprm;
-
-/* Put the vdso above the (randomized) stack with another randomized offset.
- This way there is no hole in the middle of address space.
- To save memory make sure it is still in the same PTE as the stack top.
- This doesn't give that many random bits */
-static unsigned long vdso_addr(unsigned long start, unsigned len)
-{
- unsigned long addr, end;
- unsigned offset;
- end = (start + PMD_SIZE - 1) & PMD_MASK;
- if (end >= TASK_SIZE64)
- end = TASK_SIZE64;
- end -= len;
- /* This loses some more bits than a modulo, but is cheaper */
- offset = get_random_int() & (PTRS_PER_PTE - 1);
- addr = start + (offset << PAGE_SHIFT);
- if (addr >= end)
- addr = end;
- return addr;
-}
-
-/* Setup a VMA at program startup for the vsyscall page.
- Not called for compat tasks */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr;
- int ret;
- unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
-
- if (!vdso_enabled)
- return 0;
-
- down_write(&mm->mmap_sem);
- addr = vdso_addr(mm->start_stack, len);
- addr = get_unmapped_area(NULL, addr, len, 0, 0);
- if (IS_ERR_VALUE(addr)) {
- ret = addr;
- goto up_fail;
- }
-
- ret = install_special_mapping(mm, addr, len,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
- vdso_pages);
- if (ret)
- goto up_fail;
-
- current->mm->context.vdso = (void *)addr;
-up_fail:
- up_write(&mm->mmap_sem);
- return ret;
-}
-
-static __init int vdso_setup(char *s)
-{
- vdso_enabled = simple_strtoul(s, NULL, 0);
- return 0;
-}
-__setup("vdso=", vdso_setup);
diff --git a/arch/x86_64/vdso/voffset.h b/arch/x86_64/vdso/voffset.h
deleted file mode 100644
index 4af67c79085f..000000000000
--- a/arch/x86_64/vdso/voffset.h
+++ /dev/null
@@ -1 +0,0 @@
-#define VDSO_TEXT_OFFSET 0x600
diff --git a/arch/x86_64/vdso/vvar.c b/arch/x86_64/vdso/vvar.c
deleted file mode 100644
index 6fc22219a472..000000000000
--- a/arch/x86_64/vdso/vvar.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/* Define pointer to external vDSO variables.
- These are part of the vDSO. The kernel fills in the real addresses
- at boot time. This is done because when the vdso is linked the
- kernel isn't yet and we don't know the final addresses. */
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <asm/vsyscall.h>
-#include <asm/timex.h>
-#include <asm/vgtod.h>
-
-#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC;
-#include "vextern.h"