summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/boot/compressed/Makefile21
-rw-r--r--arch/x86/boot/compressed/misc.c118
-rw-r--r--arch/x86/include/asm/apic.h28
-rw-r--r--arch/x86/include/asm/boot.h16
-rw-r--r--arch/x86/include/asm/cacheflush.h53
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/fixmap.h139
-rw-r--r--arch/x86/include/asm/fixmap_32.h115
-rw-r--r--arch/x86/include/asm/fixmap_64.h79
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h1
-rw-r--r--arch/x86/include/asm/i387.h8
-rw-r--r--arch/x86/include/asm/init.h18
-rw-r--r--arch/x86/include/asm/io.h3
-rw-r--r--arch/x86/include/asm/irq.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h5
-rw-r--r--arch/x86/include/asm/kexec.h13
-rw-r--r--arch/x86/include/asm/linkage.h16
-rw-r--r--arch/x86/include/asm/mmzone_32.h43
-rw-r--r--arch/x86/include/asm/numa_32.h6
-rw-r--r--arch/x86/include/asm/page_types.h6
-rw-r--r--arch/x86/include/asm/pat.h5
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h5
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/include/asm/seccomp_32.h6
-rw-r--r--arch/x86/include/asm/seccomp_64.h8
-rw-r--r--arch/x86/include/asm/setup.h7
-rw-r--r--arch/x86/include/asm/system.h3
-rw-r--r--arch/x86/include/asm/uaccess_32.h4
-rw-r--r--arch/x86/include/asm/uaccess_64.h25
-rw-r--r--arch/x86/include/asm/uv/uv.h3
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h4
-rw-r--r--arch/x86/include/asm/xen/page.h1
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c41
-rw-r--r--arch/x86/kernel/apic/es7000_32.c221
-rw-r--r--arch/x86/kernel/apic/numaq_32.c12
-rw-r--r--arch/x86/kernel/apic/probe_32.c15
-rw-r--r--arch/x86/kernel/apic/probe_64.c3
-rw-r--r--arch/x86/kernel/apic/summit_32.c49
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c44
-rw-r--r--arch/x86/kernel/cpu/amd.c52
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c25
-rw-r--r--arch/x86/kernel/cpu/proc.c20
-rw-r--r--arch/x86/kernel/ds.c3
-rw-r--r--arch/x86/kernel/efi.c7
-rw-r--r--arch/x86/kernel/efi_64.c21
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/ioport.c11
-rw-r--r--arch/x86/kernel/irq.c34
-rw-r--r--arch/x86/kernel/irq_32.c29
-rw-r--r--arch/x86/kernel/irqinit_32.c3
-rw-r--r--arch/x86/kernel/irqinit_64.c3
-rw-r--r--arch/x86/kernel/machine_kexec_32.c17
-rw-r--r--arch/x86/kernel/machine_kexec_64.c99
-rw-r--r--arch/x86/kernel/mpparse.c25
-rw-r--r--arch/x86/kernel/process.c191
-rw-r--r--arch/x86/kernel/process_32.c190
-rw-r--r--arch/x86/kernel/process_64.c188
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S24
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S189
-rw-r--r--arch/x86/kernel/setup.c25
-rw-r--r--arch/x86/kernel/setup_percpu.c398
-rw-r--r--arch/x86/kernel/signal.c117
-rw-r--r--arch/x86/kernel/smpboot.c97
-rw-r--r--arch/x86/kernel/tlb_uv.c2
-rw-r--r--arch/x86/kernel/traps.c46
-rw-r--r--arch/x86/kernel/uv_time.c393
-rw-r--r--arch/x86/kernel/visws_quirks.c2
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S7
-rw-r--r--arch/x86/kernel/vsmp_64.c12
-rw-r--r--arch/x86/lguest/boot.c21
-rw-r--r--arch/x86/math-emu/fpu_aux.c31
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/highmem_32.c25
-rw-r--r--arch/x86/mm/init.c393
-rw-r--r--arch/x86/mm/init_32.c326
-rw-r--r--arch/x86/mm/init_64.c387
-rw-r--r--arch/x86/mm/ioremap.c35
-rw-r--r--arch/x86/mm/kmmio.c164
-rw-r--r--arch/x86/mm/memtest.c3
-rw-r--r--arch/x86/mm/numa_32.c31
-rw-r--r--arch/x86/mm/pgtable.c18
-rw-r--r--arch/x86/mm/pgtable_32.c18
-rw-r--r--arch/x86/mm/testmmiotrace.c70
-rw-r--r--arch/x86/oprofile/op_model_ppro.c14
-rw-r--r--arch/x86/xen/enlighten.c10
-rw-r--r--arch/x86/xen/mmu.c7
-rw-r--r--arch/x86/xen/smp.c6
102 files changed, 2852 insertions, 2141 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3afc94abe357..87717f3687d2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,6 +40,9 @@ config X86
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select USER_STACKTRACE_SUPPORT
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_BZIP2
+ select HAVE_KERNEL_LZMA
config ARCH_DEFCONFIG
string
@@ -135,6 +138,9 @@ config ARCH_HAS_CACHE_LINE_SIZE
config HAVE_SETUP_PER_CPU_AREA
def_bool y
+config HAVE_DYNAMIC_PER_CPU_AREA
+ def_bool y
+
config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP
@@ -1127,7 +1133,7 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accomodate various tables.
-config HAVE_ARCH_BOOTMEM_NODE
+config HAVE_ARCH_BOOTMEM
def_bool y
depends on X86_32 && NUMA
@@ -1425,7 +1431,7 @@ config CRASH_DUMP
config KEXEC_JUMP
bool "kexec jump (EXPERIMENTAL)"
depends on EXPERIMENTAL
- depends on KEXEC && HIBERNATION && X86_32
+ depends on KEXEC && HIBERNATION
---help---
Jump between original kernel and kexeced kernel and invoke
code in physical address mode via KEXEC
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1771c804e02f..3ca4c194b8e5 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
# create a compressed vmlinux image from the original vmlinux
#
-targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc.o piggy.o
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -47,18 +47,35 @@ ifeq ($(CONFIG_X86_32),y)
ifdef CONFIG_RELOCATABLE
$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
$(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE
+ $(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE
+ $(call if_changed,lzma)
else
$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
$(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzma)
endif
LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
else
+
$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
$(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzma)
LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
endif
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
+suffix_$(CONFIG_KERNEL_GZIP) = gz
+suffix_$(CONFIG_KERNEL_BZIP2) = bz2
+suffix_$(CONFIG_KERNEL_LZMA) = lzma
+
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index da062216948a..e45be73684ff 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -116,71 +116,13 @@
/*
* gzip declarations
*/
-
-#define OF(args) args
#define STATIC static
#undef memset
#undef memcpy
#define memzero(s, n) memset((s), 0, (n))
-typedef unsigned char uch;
-typedef unsigned short ush;
-typedef unsigned long ulg;
-
-/*
- * Window size must be at least 32k, and a power of two.
- * We don't actually have a window just a huge output buffer,
- * so we report a 2G window size, as that should always be
- * larger than our output buffer:
- */
-#define WSIZE 0x80000000
-
-/* Input buffer: */
-static unsigned char *inbuf;
-
-/* Sliding window buffer (and final output buffer): */
-static unsigned char *window;
-
-/* Valid bytes in inbuf: */
-static unsigned insize;
-
-/* Index of next byte to be processed in inbuf: */
-static unsigned inptr;
-
-/* Bytes in output buffer: */
-static unsigned outcnt;
-
-/* gzip flag byte */
-#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
-#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gz file */
-#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
-#define ORIG_NAM 0x08 /* bit 3 set: original file name present */
-#define COMMENT 0x10 /* bit 4 set: file comment present */
-#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
-#define RESERVED 0xC0 /* bit 6, 7: reserved */
-
-#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf())
-
-/* Diagnostic functions */
-#ifdef DEBUG
-# define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
-# define Trace(x) do { fprintf x; } while (0)
-# define Tracev(x) do { if (verbose) fprintf x ; } while (0)
-# define Tracevv(x) do { if (verbose > 1) fprintf x ; } while (0)
-# define Tracec(c, x) do { if (verbose && (c)) fprintf x ; } while (0)
-# define Tracecv(c, x) do { if (verbose > 1 && (c)) fprintf x ; } while (0)
-#else
-# define Assert(cond, msg)
-# define Trace(x)
-# define Tracev(x)
-# define Tracevv(x)
-# define Tracec(c, x)
-# define Tracecv(c, x)
-#endif
-static int fill_inbuf(void);
-static void flush_window(void);
static void error(char *m);
/*
@@ -189,13 +131,8 @@ static void error(char *m);
static struct boot_params *real_mode; /* Pointer to real-mode data */
static int quiet;
-extern unsigned char input_data[];
-extern int input_len;
-
-static long bytes_out;
-
static void *memset(void *s, int c, unsigned n);
-static void *memcpy(void *dest, const void *src, unsigned n);
+void *memcpy(void *dest, const void *src, unsigned n);
static void __putstr(int, const char *);
#define putstr(__x) __putstr(0, __x)
@@ -213,7 +150,17 @@ static char *vidmem;
static int vidport;
static int lines, cols;
-#include "../../../../lib/inflate.c"
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
static void scroll(void)
{
@@ -282,7 +229,7 @@ static void *memset(void *s, int c, unsigned n)
return s;
}
-static void *memcpy(void *dest, const void *src, unsigned n)
+void *memcpy(void *dest, const void *src, unsigned n)
{
int i;
const char *s = src;
@@ -293,38 +240,6 @@ static void *memcpy(void *dest, const void *src, unsigned n)
return dest;
}
-/* ===========================================================================
- * Fill the input buffer. This is called only when the buffer is empty
- * and at least one byte is really needed.
- */
-static int fill_inbuf(void)
-{
- error("ran out of input data");
- return 0;
-}
-
-/* ===========================================================================
- * Write the output window window[0..outcnt-1] and update crc and bytes_out.
- * (Used for the decompressed data only.)
- */
-static void flush_window(void)
-{
- /* With my window equal to my output buffer
- * I only need to compute the crc here.
- */
- unsigned long c = crc; /* temporary variable */
- unsigned n;
- unsigned char *in, ch;
-
- in = window;
- for (n = 0; n < outcnt; n++) {
- ch = *in++;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
- }
- crc = c;
- bytes_out += (unsigned long)outcnt;
- outcnt = 0;
-}
static void error(char *x)
{
@@ -407,12 +322,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
lines = real_mode->screen_info.orig_video_lines;
cols = real_mode->screen_info.orig_video_cols;
- window = output; /* Output buffer (Normally at 1M) */
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
- inbuf = input_data; /* Input buffer */
- insize = input_len;
- inptr = 0;
#ifdef CONFIG_X86_64
if ((unsigned long)output & (__KERNEL_ALIGN - 1))
@@ -430,10 +341,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
#endif
#endif
- makecrc();
if (!quiet)
putstr("\nDecompressing Linux... ");
- gunzip();
+ decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
if (!quiet)
putstr("done.\nBooting the kernel.\n");
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a6208dc74633..394d177d721b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -75,7 +75,14 @@ static inline void default_inquire_remote_apic(int apicid)
#define setup_secondary_clock setup_secondary_APIC_clock
#endif
+#ifdef CONFIG_X86_VSMP
extern int is_vsmp_box(void);
+#else
+static inline int is_vsmp_box(void)
+{
+ return 0;
+}
+#endif
extern void xapic_wait_icr_idle(void);
extern u32 safe_xapic_wait_icr_idle(void);
extern void xapic_icr_write(u32, u32);
@@ -306,7 +313,7 @@ struct apic {
void (*send_IPI_self)(int vector);
/* wakeup_secondary_cpu */
- int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+ int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
int trampoline_phys_low;
int trampoline_phys_high;
@@ -324,8 +331,21 @@ struct apic {
u32 (*safe_wait_icr_idle)(void);
};
+/*
+ * Pointer to the local APIC driver in use on this system (there's
+ * always just one such driver in use - the kernel decides via an
+ * early probing process which one it picks - and then sticks to it):
+ */
extern struct apic *apic;
+/*
+ * APIC functionality to boot other CPUs - only used on SMP:
+ */
+#ifdef CONFIG_SMP
+extern atomic_t init_deasserted;
+extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+#endif
+
static inline u32 apic_read(u32 reg)
{
return apic->read(reg);
@@ -359,6 +379,7 @@ static inline u32 safe_apic_wait_icr_idle(void)
static inline void ack_APIC_irq(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
/*
* ack_APIC_irq() actually gets compiled as a single instruction
* ... yummie.
@@ -366,6 +387,7 @@ static inline void ack_APIC_irq(void)
/* Docs say use 0 for future compatibility */
apic_write(APIC_EOI, 0);
+#endif
}
static inline unsigned default_get_apic_id(unsigned long x)
@@ -384,9 +406,7 @@ static inline unsigned default_get_apic_id(unsigned long x)
#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467
#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469
-#ifdef CONFIG_X86_32
-extern void es7000_update_apic_to_cluster(void);
-#else
+#ifdef CONFIG_X86_64
extern struct apic apic_flat;
extern struct apic apic_physflat;
extern struct apic apic_x2apic_cluster;
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index dd61616cb73d..6526cf08b0e4 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -10,17 +10,31 @@
#define EXTENDED_VGA 0xfffe /* 80x50 mode */
#define ASK_VGA 0xfffd /* ask for it at bootup */
+#ifdef __KERNEL__
+
/* Physical address where kernel should be loaded. */
#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
+ (CONFIG_PHYSICAL_ALIGN - 1)) \
& ~(CONFIG_PHYSICAL_ALIGN - 1))
+#ifdef CONFIG_KERNEL_BZIP2
+#define BOOT_HEAP_SIZE 0x400000
+#else /* !CONFIG_KERNEL_BZIP2 */
+
#ifdef CONFIG_X86_64
#define BOOT_HEAP_SIZE 0x7000
-#define BOOT_STACK_SIZE 0x4000
#else
#define BOOT_HEAP_SIZE 0x4000
+#endif
+
+#endif /* !CONFIG_KERNEL_BZIP2 */
+
+#ifdef CONFIG_X86_64
+#define BOOT_STACK_SIZE 0x4000
+#else
#define BOOT_STACK_SIZE 0x1000
#endif
+#endif /* __KERNEL__ */
+
#endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 2f8466540fb5..5b301b7ff5f4 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -5,24 +5,43 @@
#include <linux/mm.h>
/* Caches aren't brain-dead on the intel. */
-#define flush_cache_all() do { } while (0)
-#define flush_cache_mm(mm) do { } while (0)
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
-#define flush_dcache_page(page) do { } while (0)
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-#define flush_icache_range(start, end) do { } while (0)
-#define flush_icache_page(vma, pg) do { } while (0)
-#define flush_icache_user_range(vma, pg, adr, len) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
+static inline void flush_cache_all(void) { }
+static inline void flush_cache_mm(struct mm_struct *mm) { }
+static inline void flush_cache_dup_mm(struct mm_struct *mm) { }
+static inline void flush_cache_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end) { }
+static inline void flush_cache_page(struct vm_area_struct *vma,
+ unsigned long vmaddr, unsigned long pfn) { }
+static inline void flush_dcache_page(struct page *page) { }
+static inline void flush_dcache_mmap_lock(struct address_space *mapping) { }
+static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { }
+static inline void flush_icache_range(unsigned long start,
+ unsigned long end) { }
+static inline void flush_icache_page(struct vm_area_struct *vma,
+ struct page *page) { }
+static inline void flush_icache_user_range(struct vm_area_struct *vma,
+ struct page *page,
+ unsigned long addr,
+ unsigned long len) { }
+static inline void flush_cache_vmap(unsigned long start, unsigned long end) { }
+static inline void flush_cache_vunmap(unsigned long start,
+ unsigned long end) { }
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
- memcpy((dst), (src), (len))
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy((dst), (src), (len))
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+ struct page *page, unsigned long vaddr,
+ void *dst, const void *src,
+ unsigned long len)
+{
+ memcpy(dst, src, len);
+}
+
+static inline void copy_from_user_page(struct vm_area_struct *vma,
+ struct page *page, unsigned long vaddr,
+ void *dst, const void *src,
+ unsigned long len)
+{
+ memcpy(dst, src, len);
+}
#define PG_non_WB PG_arch_1
PAGEFLAG(NonWB, non_WB)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ca5ffb2856b6..edc90f23e708 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -37,8 +37,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
#else /* !CONFIG_X86_32 */
-#define MAX_EFI_IO_PAGES 100
-
extern u64 efi_call0(void *fp);
extern u64 efi_call1(void *fp, u64 arg1);
extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 854d538ae857..c2e6bedaf258 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -33,6 +33,8 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
smp_invalidate_interrupt)
#endif
+BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR)
+
/*
* every pentium local APIC has two 'local interrupts', with a
* soft-definable vector attached to both interrupts, one of
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 23696d44a0af..63a79c77d220 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -1,11 +1,145 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009
+ */
+
#ifndef _ASM_X86_FIXMAP_H
#define _ASM_X86_FIXMAP_H
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#include <asm/acpi.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#ifdef CONFIG_X86_32
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#else
+#include <asm/vsyscall.h>
+#endif
+
+/*
+ * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
+ * uses fixmaps that relies on FIXADDR_TOP for proper address calculation.
+ * Because of this, FIXADDR_TOP x86 integration was left as later work.
+ */
+#ifdef CONFIG_X86_32
+/* used by vmalloc.c, vsyscall.lds.S.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap.
+ */
+extern unsigned long __FIXADDR_TOP;
+#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
+
+#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
+#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
+#else
+#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
+
+/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
+#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
+#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
+#endif
+
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process.
+ * for x86_32: We allocate these special addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * These 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages (or larger if used with an increment
+ * higher than 1). Use set_fixmap(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+enum fixed_addresses {
#ifdef CONFIG_X86_32
-# include "fixmap_32.h"
+ FIX_HOLE,
+ FIX_VDSO,
#else
-# include "fixmap_64.h"
+ VSYSCALL_LAST_PAGE,
+ VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
+ + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+ VSYSCALL_HPET,
#endif
+ FIX_DBGP_BASE,
+ FIX_EARLYCON_MEM_BASE,
+#ifdef CONFIG_X86_LOCAL_APIC
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ FIX_IO_APIC_BASE_0,
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
+#endif
+#ifdef CONFIG_X86_VISWS_APIC
+ FIX_CO_CPU, /* Cobalt timer */
+ FIX_CO_APIC, /* Cobalt APIC Redirection Table */
+ FIX_LI_PCIA, /* Lithium PCI Bridge A */
+ FIX_LI_PCIB, /* Lithium PCI Bridge B */
+#endif
+#ifdef CONFIG_X86_F00F_BUG
+ FIX_F00F_IDT, /* Virtual mapping for IDT */
+#endif
+#ifdef CONFIG_X86_CYCLONE_TIMER
+ FIX_CYCLONE_TIMER, /*cyclone timer register*/
+#endif
+#ifdef CONFIG_X86_32
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#ifdef CONFIG_PCI_MMCONFIG
+ FIX_PCIE_MCFG,
+#endif
+#endif
+#ifdef CONFIG_PARAVIRT
+ FIX_PARAVIRT_BOOTMAP,
+#endif
+ __end_of_permanent_fixed_addresses,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ FIX_OHCI1394_BASE,
+#endif
+ /*
+ * 256 temporary boot-time mappings, used by early_ioremap(),
+ * before ioremap() is functional.
+ *
+ * We round it up to the next 256 pages boundary so that we
+ * can have a single pgd entry and a single pte table:
+ */
+#define NR_FIX_BTMAPS 64
+#define FIX_BTMAPS_SLOTS 4
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
+ (__end_of_permanent_fixed_addresses & 255),
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
+#ifdef CONFIG_X86_32
+ FIX_WP_TEST,
+#endif
+ __end_of_fixed_addresses
+};
+
+
+extern void reserve_top_address(unsigned long reserve);
+
+#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE)
extern int fixmaps_set;
@@ -69,4 +203,5 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
return __virt_to_fix(vaddr);
}
+#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h
deleted file mode 100644
index 047d9bab2b31..000000000000
--- a/arch/x86/include/asm/fixmap_32.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * fixmap.h: compile-time virtual memory allocation
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998 Ingo Molnar
- *
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
- */
-
-#ifndef _ASM_X86_FIXMAP_32_H
-#define _ASM_X86_FIXMAP_32_H
-
-
-/* used by vmalloc.c, vsyscall.lds.S.
- *
- * Leave one empty page between vmalloc'ed areas and
- * the start of the fixmap.
- */
-extern unsigned long __FIXADDR_TOP;
-#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
-#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
-
-#ifndef __ASSEMBLY__
-#include <linux/kernel.h>
-#include <asm/acpi.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <linux/threads.h>
-#include <asm/kmap_types.h>
-
-/*
- * Here we define all the compile-time 'special' virtual
- * addresses. The point is to have a constant address at
- * compile time, but to set the physical address only
- * in the boot process. We allocate these special addresses
- * from the end of virtual memory (0xfffff000) backwards.
- * Also this lets us do fail-safe vmalloc(), we
- * can guarantee that these special addresses and
- * vmalloc()-ed addresses never overlap.
- *
- * these 'compile-time allocated' memory buffers are
- * fixed-size 4k pages. (or larger if used with an increment
- * highger than 1) use fixmap_set(idx,phys) to associate
- * physical memory with fixmap indices.
- *
- * TLB entries of such buffers will not be flushed across
- * task switches.
- */
-enum fixed_addresses {
- FIX_HOLE,
- FIX_VDSO,
- FIX_DBGP_BASE,
- FIX_EARLYCON_MEM_BASE,
-#ifdef CONFIG_X86_LOCAL_APIC
- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
-#endif
-#ifdef CONFIG_X86_IO_APIC
- FIX_IO_APIC_BASE_0,
- FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
-#endif
-#ifdef CONFIG_X86_VISWS_APIC
- FIX_CO_CPU, /* Cobalt timer */
- FIX_CO_APIC, /* Cobalt APIC Redirection Table */
- FIX_LI_PCIA, /* Lithium PCI Bridge A */
- FIX_LI_PCIB, /* Lithium PCI Bridge B */
-#endif
-#ifdef CONFIG_X86_F00F_BUG
- FIX_F00F_IDT, /* Virtual mapping for IDT */
-#endif
-#ifdef CONFIG_X86_CYCLONE_TIMER
- FIX_CYCLONE_TIMER, /*cyclone timer register*/
-#endif
- FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
- FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#ifdef CONFIG_PCI_MMCONFIG
- FIX_PCIE_MCFG,
-#endif
-#ifdef CONFIG_PARAVIRT
- FIX_PARAVIRT_BOOTMAP,
-#endif
- __end_of_permanent_fixed_addresses,
- /*
- * 256 temporary boot-time mappings, used by early_ioremap(),
- * before ioremap() is functional.
- *
- * We round it up to the next 256 pages boundary so that we
- * can have a single pgd entry and a single pte table:
- */
-#define NR_FIX_BTMAPS 64
-#define FIX_BTMAPS_SLOTS 4
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
- (__end_of_permanent_fixed_addresses & 255),
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
- FIX_WP_TEST,
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
- FIX_OHCI1394_BASE,
-#endif
- __end_of_fixed_addresses
-};
-
-extern void reserve_top_address(unsigned long reserve);
-
-
-#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
-
-#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
-#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
-
-#endif /* !__ASSEMBLY__ */
-#endif /* _ASM_X86_FIXMAP_32_H */
diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h
deleted file mode 100644
index 298d9ba3faeb..000000000000
--- a/arch/x86/include/asm/fixmap_64.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * fixmap.h: compile-time virtual memory allocation
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998 Ingo Molnar
- */
-
-#ifndef _ASM_X86_FIXMAP_64_H
-#define _ASM_X86_FIXMAP_64_H
-
-#include <linux/kernel.h>
-#include <asm/acpi.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <asm/vsyscall.h>
-#include <asm/efi.h>
-
-/*
- * Here we define all the compile-time 'special' virtual
- * addresses. The point is to have a constant address at
- * compile time, but to set the physical address only
- * in the boot process.
- *
- * These 'compile-time allocated' memory buffers are
- * fixed-size 4k pages (or larger if used with an increment
- * higher than 1). Use set_fixmap(idx,phys) to associate
- * physical memory with fixmap indices.
- *
- * TLB entries of such buffers will not be flushed across
- * task switches.
- */
-
-enum fixed_addresses {
- VSYSCALL_LAST_PAGE,
- VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
- + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
- VSYSCALL_HPET,
- FIX_DBGP_BASE,
- FIX_EARLYCON_MEM_BASE,
- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
- FIX_IO_APIC_BASE_0,
- FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
- FIX_EFI_IO_MAP_LAST_PAGE,
- FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
- + MAX_EFI_IO_PAGES - 1,
-#ifdef CONFIG_PARAVIRT
- FIX_PARAVIRT_BOOTMAP,
-#endif
- __end_of_permanent_fixed_addresses,
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
- FIX_OHCI1394_BASE,
-#endif
- /*
- * 256 temporary boot-time mappings, used by early_ioremap(),
- * before ioremap() is functional.
- *
- * We round it up to the next 256 pages boundary so that we
- * can have a single pgd entry and a single pte table:
- */
-#define NR_FIX_BTMAPS 64
-#define FIX_BTMAPS_SLOTS 4
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
- (__end_of_permanent_fixed_addresses & 255),
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
- __end_of_fixed_addresses
-};
-
-#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
-#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-
-/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
-#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
-#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
-
-#endif /* _ASM_X86_FIXMAP_64_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 176f058e7159..039db6aa8e02 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -12,6 +12,7 @@ typedef struct {
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
#endif
+ unsigned int generic_irqs; /* arch dependent */
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 370e1c83bb49..b762ea49bd70 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -27,6 +27,7 @@
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
+extern void generic_interrupt(void);
extern void error_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 48f0004db8c9..71c9e5183982 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -172,7 +172,13 @@ static inline void __save_init_fpu(struct task_struct *tsk)
#else /* CONFIG_X86_32 */
-extern void finit(void);
+#ifdef CONFIG_MATH_EMULATION
+extern void finit_task(struct task_struct *tsk);
+#else
+static inline void finit_task(struct task_struct *tsk)
+{
+}
+#endif
static inline void tolerant_fwait(void)
{
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
new file mode 100644
index 000000000000..36fb1a6a5109
--- /dev/null
+++ b/arch/x86/include/asm/init.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_X86_INIT_32_H
+#define _ASM_X86_INIT_32_H
+
+#ifdef CONFIG_X86_32
+extern void __init early_ioremap_page_table_range_init(void);
+#endif
+
+extern unsigned long __init
+kernel_physical_mapping_init(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask);
+
+
+extern unsigned long __initdata e820_table_start;
+extern unsigned long __meminitdata e820_table_end;
+extern unsigned long __meminitdata e820_table_top;
+
+#endif /* _ASM_X86_INIT_32_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 683d0b4c00fc..e5383e3d2f8c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -172,8 +172,6 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
extern void iounmap(volatile void __iomem *addr);
-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
-
#ifdef CONFIG_X86_32
# include "io_32.h"
@@ -198,7 +196,6 @@ extern void early_ioremap_reset(void);
extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
extern void early_iounmap(void __iomem *addr, unsigned long size);
-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
#define IO_SPACE_LIMIT 0xffff
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 107eb2196691..f38481bcd455 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -36,6 +36,7 @@ static inline int irq_canonicalize(int irq)
extern void fixup_irqs(void);
#endif
+extern void (*generic_interrupt_extension)(void);
extern void init_IRQ(void);
extern void native_init_IRQ(void);
extern bool handle_irq(unsigned irq, struct pt_regs *regs);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 8a285f356f8a..3cbd79bbb47c 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -112,6 +112,11 @@
#define LOCAL_PERF_VECTOR 0xee
/*
+ * Generic system vector for platform specific use
+ */
+#define GENERIC_INTERRUPT_VECTOR 0xed
+
+/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
* start at 0x31(0x41) to spread out vectors evenly between priority
* levels. (0x80 is the syscall vector)
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 0ceb6d19ed30..317ff1703d0b 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -9,13 +9,13 @@
# define PAGES_NR 4
#else
# define PA_CONTROL_PAGE 0
-# define PA_TABLE_PAGE 1
-# define PAGES_NR 2
+# define VA_CONTROL_PAGE 1
+# define PA_TABLE_PAGE 2
+# define PA_SWAP_PAGE 3
+# define PAGES_NR 4
#endif
-#ifdef CONFIG_X86_32
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
-#endif
#ifndef __ASSEMBLY__
@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
unsigned int has_pae,
unsigned int preserve_context);
#else
-NORET_TYPE void
+unsigned long
relocate_kernel(unsigned long indirection_page,
unsigned long page_list,
- unsigned long start_address) ATTRIB_NORET;
+ unsigned long start_address,
+ unsigned int preserve_context);
#endif
#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 9320e2a8a26a..a0d70b46c27c 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -4,11 +4,6 @@
#undef notrace
#define notrace __attribute__((no_instrument_function))
-#ifdef CONFIG_X86_64
-#define __ALIGN .p2align 4,,15
-#define __ALIGN_STR ".p2align 4,,15"
-#endif
-
#ifdef CONFIG_X86_32
#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
/*
@@ -50,16 +45,25 @@
__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
"g" (arg4), "g" (arg5), "g" (arg6))
-#endif
+#endif /* CONFIG_X86_32 */
+
+#ifdef __ASSEMBLY__
#define GLOBAL(name) \
.globl name; \
name:
+#ifdef CONFIG_X86_64
+#define __ALIGN .p2align 4,,15
+#define __ALIGN_STR ".p2align 4,,15"
+#endif
+
#ifdef CONFIG_X86_ALIGNMENT_16
#define __ALIGN .align 16,0x90
#define __ALIGN_STR ".align 16,0x90"
#endif
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 105fb90a0635..ede6998bd92c 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -91,46 +91,9 @@ static inline int pfn_valid(int pfn)
#endif /* CONFIG_DISCONTIGMEM */
#ifdef CONFIG_NEED_MULTIPLE_NODES
-
-/*
- * Following are macros that are specific to this numa platform.
- */
-#define reserve_bootmem(addr, size, flags) \
- reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags))
-#define alloc_bootmem(x) \
- __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
-#define alloc_bootmem_nopanic(x) \
- __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \
- __pa(MAX_DMA_ADDRESS))
-#define alloc_bootmem_low(x) \
- __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0)
-#define alloc_bootmem_pages(x) \
- __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
-#define alloc_bootmem_pages_nopanic(x) \
- __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \
- __pa(MAX_DMA_ADDRESS))
-#define alloc_bootmem_low_pages(x) \
- __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0)
-#define alloc_bootmem_node(pgdat, x) \
-({ \
- struct pglist_data __maybe_unused \
- *__alloc_bootmem_node__pgdat = (pgdat); \
- __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \
- __pa(MAX_DMA_ADDRESS)); \
-})
-#define alloc_bootmem_pages_node(pgdat, x) \
-({ \
- struct pglist_data __maybe_unused \
- *__alloc_bootmem_node__pgdat = (pgdat); \
- __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \
- __pa(MAX_DMA_ADDRESS)); \
-})
-#define alloc_bootmem_low_pages_node(pgdat, x) \
-({ \
- struct pglist_data __maybe_unused \
- *__alloc_bootmem_node__pgdat = (pgdat); \
- __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \
-})
+/* always use node 0 for bootmem on this numa platform */
+#define bootmem_arch_preferred_node(__bdata, size, align, goal, limit) \
+ (NODE_DATA(0)->bdata)
#endif /* CONFIG_NEED_MULTIPLE_NODES */
#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index e9f5db796244..a37229011b56 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,8 +4,12 @@
extern int pxm_to_nid(int pxm);
extern void numa_remove_cpu(int cpu);
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_HIGHMEM
extern void set_highmem_pages_init(void);
+#else
+static inline void set_highmem_pages_init(void)
+{
+}
#endif
#endif /* _ASM_X86_NUMA_32_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 2d625da6603c..826ad37006ab 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -40,14 +40,8 @@
#ifndef __ASSEMBLY__
-struct pgprot;
-
extern int page_is_ram(unsigned long pagenr);
extern int devmem_is_allowed(unsigned long pagenr);
-extern void map_devmem(unsigned long pfn, unsigned long size,
- struct pgprot vma_prot);
-extern void unmap_devmem(unsigned long pfn, unsigned long size,
- struct pgprot vma_prot);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index b0e70056838e..2cd07b9422f4 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -2,6 +2,7 @@
#define _ASM_X86_PAT_H
#include <linux/types.h>
+#include <asm/pgtable_types.h>
#ifdef CONFIG_X86_PAT
extern int pat_enabled;
@@ -17,5 +18,9 @@ extern int free_memtype(u64 start, u64 end);
extern int kernel_map_sync_memtype(u64 base, unsigned long size,
unsigned long flag);
+extern void map_devmem(unsigned long pfn, unsigned long size,
+ struct pgprot vma_prot);
+extern void unmap_devmem(unsigned long pfn, unsigned long size,
+ struct pgprot vma_prot);
#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index aee103b26d01..8f1d2fbec1d4 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -43,6 +43,14 @@
#else /* ...!ASSEMBLY */
#include <linux/stringify.h>
+#include <asm/sections.h>
+
+#define __addr_to_pcpu_ptr(addr) \
+ (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
+ + (unsigned long)__per_cpu_start)
+#define __pcpu_ptr_to_addr(ptr) \
+ (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
+ - (unsigned long)__per_cpu_start)
#ifdef CONFIG_SMP
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1c097a3a6669..d0812e155f1d 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -288,6 +288,8 @@ static inline int is_new_memtype_allowed(unsigned long flags,
return 1;
}
+pmd_t *populate_extra_pmd(unsigned long vaddr);
+pte_t *populate_extra_pte(unsigned long vaddr);
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index bd8df3b2fe04..2733fad45f98 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -25,6 +25,11 @@
* area for the same reason. ;)
*/
#define VMALLOC_OFFSET (8 * 1024 * 1024)
+
+#ifndef __ASSEMBLER__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
#ifdef CONFIG_X86_PAE
#define LAST_PKMAP 512
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 4d258ad76a0f..b8238dc8786d 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,6 +273,7 @@ typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
extern int nx_enabled;
+extern void set_nx(void);
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c7a98f738210..76139506c3e4 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -248,7 +248,6 @@ struct x86_hw_tss {
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
#define INVALID_IO_BITMAP_OFFSET 0x8000
-#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
struct tss_struct {
/*
@@ -263,11 +262,6 @@ struct tss_struct {
* be within the limit.
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
- /*
- * Cache the current maximum and the last task that used the bitmap:
- */
- unsigned long io_bitmap_max;
- struct thread_struct *io_bitmap_owner;
/*
* .. and then another 0x100 bytes for the emergency kernel stack:
diff --git a/arch/x86/include/asm/seccomp_32.h b/arch/x86/include/asm/seccomp_32.h
index a6ad87b352c4..b811d6f5780c 100644
--- a/arch/x86/include/asm/seccomp_32.h
+++ b/arch/x86/include/asm/seccomp_32.h
@@ -1,12 +1,6 @@
#ifndef _ASM_X86_SECCOMP_32_H
#define _ASM_X86_SECCOMP_32_H
-#include <linux/thread_info.h>
-
-#ifdef TIF_32BIT
-#error "unexpected TIF_32BIT on i386"
-#endif
-
#include <linux/unistd.h>
#define __NR_seccomp_read __NR_read
diff --git a/arch/x86/include/asm/seccomp_64.h b/arch/x86/include/asm/seccomp_64.h
index 4171bb794e9e..84ec1bd161a5 100644
--- a/arch/x86/include/asm/seccomp_64.h
+++ b/arch/x86/include/asm/seccomp_64.h
@@ -1,14 +1,6 @@
#ifndef _ASM_X86_SECCOMP_64_H
#define _ASM_X86_SECCOMP_64_H
-#include <linux/thread_info.h>
-
-#ifdef TIF_32BIT
-#error "unexpected TIF_32BIT on x86_64"
-#else
-#define TIF_32BIT TIF_IA32
-#endif
-
#include <linux/unistd.h>
#include <asm/ia32_unistd.h>
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 66801cb72f69..05c6f6b11fd5 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -31,7 +31,6 @@ struct x86_quirks {
void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable,
unsigned short oemsize);
int (*setup_ioapic_ids)(void);
- int (*update_apic)(void);
};
extern void x86_quirk_pre_intr_init(void);
@@ -65,7 +64,11 @@ extern void x86_quirk_time_init(void);
#include <asm/bootparam.h>
/* Interrupt control for vSMPowered x86_64 systems */
+#ifdef CONFIG_X86_VSMP
void vsmp_init(void);
+#else
+static inline void vsmp_init(void) { }
+#endif
void setup_bios_corruption_check(void);
@@ -77,8 +80,6 @@ static inline void visws_early_detect(void) { }
static inline int is_visws_box(void) { return 0; }
#endif
-extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
-extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
extern struct x86_quirks *x86_quirks;
extern unsigned long saved_video_mode;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index c00bfdbdd456..643c59b4bc6e 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -20,6 +20,9 @@
struct task_struct; /* one of the stranger aspects of C forward declarations */
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
+struct tss_struct;
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ struct tss_struct *tss);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index a0ba61386972..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
}
static __always_inline unsigned long __copy_from_user_nocache(void *to,
- const void __user *from, unsigned long n, unsigned long total)
+ const void __user *from, unsigned long n)
{
might_fault();
if (__builtin_constant_p(n)) {
@@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
static __always_inline unsigned long
__copy_from_user_inatomic_nocache(void *to, const void __user *from,
- unsigned long n, unsigned long total)
+ unsigned long n)
{
return __copy_from_user_ll_nocache_nozero(to, from, n);
}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index dcaa0404cf7b..8cc687326eb8 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -188,29 +188,18 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
extern long __copy_user_nocache(void *dst, const void __user *src,
unsigned size, int zerorest);
-static inline int __copy_from_user_nocache(void *dst, const void __user *src,
- unsigned size, unsigned long total)
+static inline int
+__copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
{
might_sleep();
- /*
- * In practice this limit means that large file write()s
- * which get chunked to 4K copies get handled via
- * non-temporal stores here. Smaller writes get handled
- * via regular __copy_from_user():
- */
- if (likely(total >= PAGE_SIZE))
- return __copy_user_nocache(dst, src, size, 1);
- else
- return __copy_from_user(dst, src, size);
+ return __copy_user_nocache(dst, src, size, 1);
}
-static inline int __copy_from_user_inatomic_nocache(void *dst,
- const void __user *src, unsigned size, unsigned total)
+static inline int
+__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
+ unsigned size)
{
- if (likely(total >= PAGE_SIZE))
- return __copy_user_nocache(dst, src, size, 0);
- else
- return __copy_from_user_inatomic(dst, src, size);
+ return __copy_user_nocache(dst, src, size, 0);
}
unsigned long
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 8242bf965812..c0a01b5d985b 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,7 +12,6 @@ extern enum uv_system_type get_uv_system_type(void);
extern int is_uv_system(void);
extern void uv_cpu_init(void);
extern void uv_system_init(void);
-extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va,
@@ -24,8 +23,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline int is_uv_system(void) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
-static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
-{ return 1; }
static inline const struct cpumask *
uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
unsigned long va, unsigned int cpu)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 777327ef05c1..9f4dfba33b28 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -199,6 +199,10 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define SCIR_CPU_ACTIVITY 0x02 /* not idle */
#define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */
+/* Loop through all installed blades */
+#define for_each_possible_blade(bid) \
+ for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++)
+
/*
* Macros for converting between kernel virtual addresses, socket local physical
* addresses, and UV global physical addresses.
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 4bd990ee43df..1a918dde46b5 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -164,6 +164,7 @@ static inline pte_t __pte_ma(pteval_t x)
xmaddr_t arbitrary_virt_to_machine(void *address);
+unsigned long arbitrary_virt_to_mfn(void *vaddr);
void make_lowmem_page_readonly(void *vaddr);
void make_lowmem_page_readwrite(void *vaddr);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index de5657c039e9..339ce35648e6 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
-obj-y += vsmp_64.o
+obj-$(CONFIG_X86_VSMP) += vsmp_64.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_MODULES) += module_$(BITS).o
obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
@@ -111,7 +111,7 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
- obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o
+ obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 3b002995e145..f933822dba18 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -222,7 +222,6 @@ struct apic apic_flat = {
.send_IPI_all = flat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
- .wakeup_cpu = NULL,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
@@ -373,7 +372,6 @@ struct apic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
- .wakeup_cpu = NULL,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 0b1093394fdf..d806ecaa948f 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -16,17 +16,17 @@
#include <asm/apic.h>
#include <asm/ipi.h>
-static inline unsigned bigsmp_get_apic_id(unsigned long x)
+static unsigned bigsmp_get_apic_id(unsigned long x)
{
return (x >> 24) & 0xFF;
}
-static inline int bigsmp_apic_id_registered(void)
+static int bigsmp_apic_id_registered(void)
{
return 1;
}
-static inline const cpumask_t *bigsmp_target_cpus(void)
+static const cpumask_t *bigsmp_target_cpus(void)
{
#ifdef CONFIG_SMP
return &cpu_online_map;
@@ -35,13 +35,12 @@ static inline const cpumask_t *bigsmp_target_cpus(void)
#endif
}
-static inline unsigned long
-bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
{
return 0;
}
-static inline unsigned long bigsmp_check_apicid_present(int bit)
+static unsigned long bigsmp_check_apicid_present(int bit)
{
return 1;
}
@@ -64,7 +63,7 @@ static inline unsigned long calculate_ldr(int cpu)
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
-static inline void bigsmp_init_apic_ldr(void)
+static void bigsmp_init_apic_ldr(void)
{
unsigned long val;
int cpu = smp_processor_id();
@@ -74,19 +73,19 @@ static inline void bigsmp_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-static inline void bigsmp_setup_apic_routing(void)
+static void bigsmp_setup_apic_routing(void)
{
printk(KERN_INFO
"Enabling APIC mode: Physflat. Using %d I/O APICs\n",
nr_ioapics);
}
-static inline int bigsmp_apicid_to_node(int logical_apicid)
+static int bigsmp_apicid_to_node(int logical_apicid)
{
return apicid_2_node[hard_smp_processor_id()];
}
-static inline int bigsmp_cpu_present_to_apicid(int mps_cpu)
+static int bigsmp_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
@@ -94,7 +93,7 @@ static inline int bigsmp_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
+static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
{
return physid_mask_of_physid(phys_apicid);
}
@@ -107,29 +106,24 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu)
return cpu_physical_id(cpu);
}
-static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
+static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
{
/* For clustered we don't have a good way to do this yet - hack */
return physids_promote(0xFFL);
}
-static inline void bigsmp_setup_portio_remap(void)
-{
-}
-
-static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
+static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
{
return 1;
}
/* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
{
return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask));
}
-static inline unsigned int
-bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
{
int cpu;
@@ -148,7 +142,7 @@ bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
return BAD_APICID;
}
-static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
+static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
}
@@ -158,12 +152,12 @@ static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
default_send_IPI_mask_sequence_phys(mask, vector);
}
-static inline void bigsmp_send_IPI_allbutself(int vector)
+static void bigsmp_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
}
-static inline void bigsmp_send_IPI_all(int vector)
+static void bigsmp_send_IPI_all(int vector)
{
bigsmp_send_IPI_mask(cpu_online_mask, vector);
}
@@ -256,7 +250,6 @@ struct apic apic_bigsmp = {
.send_IPI_all = bigsmp_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = NULL,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 320f2d2e4e54..19588f2770ee 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -163,22 +163,17 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
return 0;
}
-static int __init es7000_update_apic(void)
+static int es7000_apic_is_cluster(void)
{
- apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
-
/* MPENTIUMIII */
if (boot_cpu_data.x86 == 6 &&
- (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
- es7000_update_apic_to_cluster();
- apic->wait_for_init_deassert = NULL;
- apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
- }
+ (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11))
+ return 1;
return 0;
}
-static void __init setup_unisys(void)
+static void setup_unisys(void)
{
/*
* Determine the generation of the ES7000 currently running.
@@ -192,14 +187,12 @@ static void __init setup_unisys(void)
else
es7000_plat = ES7000_CLASSIC;
ioapic_renumber_irq = es7000_rename_gsi;
-
- x86_quirks->update_apic = es7000_update_apic;
}
/*
* Parse the OEM Table:
*/
-static int __init parse_unisys_oem(char *oemptr)
+static int parse_unisys_oem(char *oemptr)
{
int i;
int success = 0;
@@ -261,7 +254,7 @@ static int __init parse_unisys_oem(char *oemptr)
}
#ifdef CONFIG_ACPI
-static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
+static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
struct es7000_oem_table *table;
@@ -292,7 +285,7 @@ static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
return 0;
}
-static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+static void unmap_unisys_acpi_oem_table(unsigned long oem_addr)
{
if (!oem_addr)
return;
@@ -310,8 +303,10 @@ static int es7000_check_dsdt(void)
return 0;
}
+static int es7000_acpi_ret;
+
/* Hook from generic ACPI tables.c */
-static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
unsigned long oem_addr = 0;
int check_dsdt;
@@ -332,10 +327,26 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
*/
unmap_unisys_acpi_oem_table(oem_addr);
}
- return ret;
+
+ es7000_acpi_ret = ret;
+
+ return ret && !es7000_apic_is_cluster();
}
+
+static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
+{
+ int ret = es7000_acpi_ret;
+
+ return ret && es7000_apic_is_cluster();
+}
+
#else /* !CONFIG_ACPI: */
-static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 0;
+}
+
+static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
{
return 0;
}
@@ -349,8 +360,7 @@ static void es7000_spin(int n)
rep_nop();
}
-static int __init
-es7000_mip_write(struct mip_reg *mip_reg)
+static int es7000_mip_write(struct mip_reg *mip_reg)
{
int status = 0;
int spin;
@@ -383,7 +393,7 @@ es7000_mip_write(struct mip_reg *mip_reg)
return status;
}
-static void __init es7000_enable_apic_mode(void)
+static void es7000_enable_apic_mode(void)
{
struct mip_reg es7000_mip_reg;
int mip_status;
@@ -416,11 +426,8 @@ static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
static void es7000_wait_for_init_deassert(atomic_t *deassert)
{
-#ifndef CONFIG_ES7000_CLUSTERED_APIC
while (!atomic_read(deassert))
cpu_relax();
-#endif
- return;
}
static unsigned int es7000_get_apic_id(unsigned long x)
@@ -565,72 +572,24 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
return 1;
}
-static unsigned int
-es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
-{
- int cpus_found = 0;
- int num_bits_set;
- int apicid;
- int cpu;
-
- num_bits_set = cpumask_weight(cpumask);
- /* Return id to all */
- if (num_bits_set == nr_cpu_ids)
- return 0xFF;
- /*
- * The cpus in the mask must all be on the apic cluster. If are not
- * on the same apicid cluster return default value of target_cpus():
- */
- cpu = cpumask_first(cpumask);
- apicid = es7000_cpu_to_logical_apicid(cpu);
-
- while (cpus_found < num_bits_set) {
- if (cpumask_test_cpu(cpu, cpumask)) {
- int new_apicid = es7000_cpu_to_logical_apicid(cpu);
-
- if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
- WARN(1, "Not a valid mask!");
-
- return 0xFF;
- }
- apicid = new_apicid;
- cpus_found++;
- }
- cpu++;
- }
- return apicid;
-}
-
static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask)
{
- int cpus_found = 0;
- int num_bits_set;
- int apicid;
- int cpu;
+ unsigned int round = 0;
+ int cpu, uninitialized_var(apicid);
- num_bits_set = cpus_weight(*cpumask);
- /* Return id to all */
- if (num_bits_set == nr_cpu_ids)
- return es7000_cpu_to_logical_apicid(0);
/*
- * The cpus in the mask must all be on the apic cluster. If are not
- * on the same apicid cluster return default value of target_cpus():
+ * The cpus in the mask must all be on the apic cluster.
*/
- cpu = first_cpu(*cpumask);
- apicid = es7000_cpu_to_logical_apicid(cpu);
- while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, *cpumask)) {
- int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+ for_each_cpu(cpu, cpumask) {
+ int new_apicid = es7000_cpu_to_logical_apicid(cpu);
- if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
- printk("%s: Not a valid mask!\n", __func__);
+ if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+ WARN(1, "Not a valid mask!");
- return es7000_cpu_to_logical_apicid(0);
- }
- apicid = new_apicid;
- cpus_found++;
+ return BAD_APICID;
}
- cpu++;
+ apicid = new_apicid;
+ round++;
}
return apicid;
}
@@ -659,37 +618,103 @@ static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
-void __init es7000_update_apic_to_cluster(void)
-{
- apic->target_cpus = target_cpus_cluster;
- apic->irq_delivery_mode = dest_LowestPrio;
- /* logical delivery broadcast to all procs: */
- apic->irq_dest_mode = 1;
-
- apic->init_apic_ldr = es7000_init_apic_ldr_cluster;
-
- apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster;
-}
-
static int probe_es7000(void)
{
/* probed later in mptable/ACPI hooks */
return 0;
}
-static __init int
-es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+static int es7000_mps_ret;
+static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem,
+ char *productid)
{
+ int ret = 0;
+
if (mpc->oemptr) {
struct mpc_oemtable *oem_table =
(struct mpc_oemtable *)mpc->oemptr;
if (!strncmp(oem, "UNISYS", 6))
- return parse_unisys_oem((char *)oem_table);
+ ret = parse_unisys_oem((char *)oem_table);
}
- return 0;
+
+ es7000_mps_ret = ret;
+
+ return ret && !es7000_apic_is_cluster();
}
+static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem,
+ char *productid)
+{
+ int ret = es7000_mps_ret;
+
+ return ret && es7000_apic_is_cluster();
+}
+
+struct apic apic_es7000_cluster = {
+
+ .name = "es7000",
+ .probe = probe_es7000,
+ .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
+ .apic_id_registered = es7000_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all procs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = target_cpus_cluster,
+ .disable_esr = 1,
+ .dest_logical = 0,
+ .check_apicid_used = es7000_check_apicid_used,
+ .check_apicid_present = es7000_check_apicid_present,
+
+ .vector_allocation_domain = es7000_vector_allocation_domain,
+ .init_apic_ldr = es7000_init_apic_ldr_cluster,
+
+ .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
+ .setup_apic_routing = es7000_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .apicid_to_node = es7000_apicid_to_node,
+ .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
+ .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
+ .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = es7000_check_phys_apicid_present,
+ .enable_apic_mode = es7000_enable_apic_mode,
+ .phys_pkg_id = es7000_phys_pkg_id,
+ .mps_oem_check = es7000_mps_oem_check_cluster,
+
+ .get_apic_id = es7000_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0xFF << 24,
+
+ .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = es7000_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = es7000_send_IPI_allbutself,
+ .send_IPI_all = es7000_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip,
+
+ .trampoline_phys_low = 0x467,
+ .trampoline_phys_high = 0x469,
+
+ .wait_for_init_deassert = NULL,
+
+ /* Nothing to do for most platforms, since cleared by the INIT cycle: */
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
struct apic apic_es7000 = {
@@ -737,8 +762,6 @@ struct apic apic_es7000 = {
.send_IPI_all = es7000_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = NULL,
-
.trampoline_phys_low = 0x467,
.trampoline_phys_high = 0x469,
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index d9d6d61eed82..ba2fc6465534 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -69,7 +69,7 @@ struct mpc_trans {
/* x86_quirks member */
static int mpc_record;
-static __cpuinitdata struct mpc_trans *translation_table[MAX_MPC_ENTRY];
+static struct mpc_trans *translation_table[MAX_MPC_ENTRY];
int mp_bus_id_to_node[MAX_MP_BUSSES];
int mp_bus_id_to_local[MAX_MP_BUSSES];
@@ -256,13 +256,6 @@ static int __init numaq_setup_ioapic_ids(void)
return 1;
}
-static int __init numaq_update_apic(void)
-{
- apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
-
- return 0;
-}
-
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
@@ -278,7 +271,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
.setup_ioapic_ids = numaq_setup_ioapic_ids,
- .update_apic = numaq_update_apic,
};
static __init void early_check_numaq(void)
@@ -546,7 +538,7 @@ struct apic apic_numaq = {
.send_IPI_all = numaq_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = NULL,
+ .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi,
.trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 3a730fa574bb..141c99a1c264 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -138,7 +138,6 @@ struct apic apic_default = {
.send_IPI_all = default_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = NULL,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
@@ -159,6 +158,7 @@ extern struct apic apic_numaq;
extern struct apic apic_summit;
extern struct apic apic_bigsmp;
extern struct apic apic_es7000;
+extern struct apic apic_es7000_cluster;
extern struct apic apic_default;
struct apic *apic = &apic_default;
@@ -176,6 +176,7 @@ static struct apic *apic_probe[] __initdata = {
#endif
#ifdef CONFIG_X86_ES7000
&apic_es7000,
+ &apic_es7000_cluster,
#endif
&apic_default, /* must be last */
NULL,
@@ -197,9 +198,6 @@ static int __init parse_apic(char *arg)
}
}
- if (x86_quirks->update_apic)
- x86_quirks->update_apic();
-
/* Parsed again by __setup for debug/verbose */
return 0;
}
@@ -218,8 +216,6 @@ void __init generic_bigsmp_probe(void)
if (!cmdline_apic && apic == &apic_default) {
if (apic_bigsmp.probe()) {
apic = &apic_bigsmp;
- if (x86_quirks->update_apic)
- x86_quirks->update_apic();
printk(KERN_INFO "Overriding APIC driver with %s\n",
apic->name);
}
@@ -240,9 +236,6 @@ void __init generic_apic_probe(void)
/* Not visible without early console */
if (!apic_probe[i])
panic("Didn't find an APIC driver");
-
- if (x86_quirks->update_apic)
- x86_quirks->update_apic();
}
printk(KERN_INFO "Using APIC driver %s\n", apic->name);
}
@@ -262,8 +255,6 @@ generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
if (!cmdline_apic) {
apic = apic_probe[i];
- if (x86_quirks->update_apic)
- x86_quirks->update_apic();
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
apic->name);
}
@@ -284,8 +275,6 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (!cmdline_apic) {
apic = apic_probe[i];
- if (x86_quirks->update_apic)
- x86_quirks->update_apic();
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
apic->name);
}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index e7c163661c77..8d7748efe6a8 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -68,9 +68,6 @@ void __init default_setup_apic_routing(void)
apic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
}
-
- if (x86_quirks->update_apic)
- x86_quirks->update_apic();
}
/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 32838b57a945..aac52fa873ff 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -77,9 +77,9 @@ static void summit_send_IPI_all(int vector)
extern int use_cyclone;
#ifdef CONFIG_X86_SUMMIT_NUMA
-extern void setup_summit(void);
+static void setup_summit(void);
#else
-#define setup_summit() {}
+static inline void setup_summit(void) {}
#endif
static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
@@ -291,33 +291,21 @@ static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
{
- int cpus_found = 0;
- int num_bits_set;
- int apicid;
- int cpu;
+ unsigned int round = 0;
+ int cpu, apicid = 0;
- num_bits_set = cpus_weight(*cpumask);
- if (num_bits_set >= nr_cpu_ids)
- return BAD_APICID;
/*
* The cpus in the mask must all be on the apic cluster.
*/
- cpu = first_cpu(*cpumask);
- apicid = summit_cpu_to_logical_apicid(cpu);
-
- while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, *cpumask)) {
- int new_apicid = summit_cpu_to_logical_apicid(cpu);
+ for_each_cpu(cpu, cpumask) {
+ int new_apicid = summit_cpu_to_logical_apicid(cpu);
- if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
- printk("%s: Not a valid mask!\n", __func__);
-
- return BAD_APICID;
- }
- apicid = apicid | new_apicid;
- cpus_found++;
+ if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+ printk("%s: Not a valid mask!\n", __func__);
+ return BAD_APICID;
}
- cpu++;
+ apicid |= new_apicid;
+ round++;
}
return apicid;
}
@@ -372,15 +360,15 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
}
#ifdef CONFIG_X86_SUMMIT_NUMA
-static struct rio_table_hdr *rio_table_hdr __initdata;
-static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
-static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
+static struct rio_table_hdr *rio_table_hdr;
+static struct scal_detail *scal_devs[MAX_NUMNODES];
+static struct rio_detail *rio_devs[MAX_NUMNODES*4];
#ifndef CONFIG_X86_NUMAQ
-static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
+static int mp_bus_id_to_node[MAX_MP_BUSSES];
#endif
-static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
+static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
{
int twister = 0, node = 0;
int i, bus, num_buses;
@@ -442,7 +430,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
return bus;
}
-static int __init build_detail_arrays(void)
+static int build_detail_arrays(void)
{
unsigned long ptr;
int i, scal_detail_size, rio_detail_size;
@@ -476,7 +464,7 @@ static int __init build_detail_arrays(void)
return 1;
}
-void __init setup_summit(void)
+void setup_summit(void)
{
unsigned long ptr;
unsigned short offset;
@@ -574,7 +562,6 @@ struct apic apic_summit = {
.send_IPI_all = summit_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = NULL,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 354b9c45601d..8fb87b6dd633 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -224,7 +224,6 @@ struct apic apic_x2apic_cluster = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
- .wakeup_cpu = NULL,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 5bcb174409bc..23625b9f98b2 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -213,7 +213,6 @@ struct apic apic_x2apic_phys = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
- .wakeup_cpu = NULL,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 20b4ad07c3a1..1bd6da1f8fad 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -7,28 +7,28 @@
*
* Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
*/
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
#include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/proc_fs.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/string.h>
#include <linux/ctype.h>
-#include <linux/init.h>
#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/hardirq.h>
#include <linux/timer.h>
-#include <linux/proc_fs.h>
-#include <asm/current.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/pgtable.h>
-#include <asm/uv/uv.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/smp.h>
DEFINE_PER_CPU(int, x2apic_extra_bits);
@@ -91,24 +91,28 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
-int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
+#ifdef CONFIG_SMP
unsigned long val;
int pnode;
pnode = uv_apicid_to_pnode(phys_apicid);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
- (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+ ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_INIT;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
mdelay(10);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
- (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+ ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_STARTUP;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+
+ atomic_set(&init_deasserted, 1);
+#endif
return 0;
}
@@ -285,7 +289,7 @@ struct apic apic_x2apic_uv_x = {
.send_IPI_all = uv_send_IPI_all,
.send_IPI_self = uv_send_IPI_self,
- .wakeup_cpu = NULL,
+ .wakeup_secondary_cpu = uv_wakeup_secondary,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
@@ -365,7 +369,7 @@ static __init void map_high(char *id, unsigned long base, int shift,
paddr = base << shift;
bytes = (1UL << shift) * (max_pnode + 1);
printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
- paddr + bytes);
+ paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
@@ -528,7 +532,7 @@ late_initcall(uv_init_heartbeat);
/*
* Called on each cpu to initialize the per_cpu UV data area.
- * ZZZ hotplug not supported yet
+ * FIXME: hotplug not supported yet
*/
void __cpuinit uv_cpu_init(void)
{
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 25423a5b80ed..f47df59016c5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/apic.h>
+#include <asm/cpu.h>
#ifdef CONFIG_X86_64
# include <asm/numa_64.h>
@@ -141,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
}
}
+static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ /* calling is from identify_secondary_cpu() ? */
+ if (c->cpu_index == boot_cpu_id)
+ return;
+
+ /*
+ * Certain Athlons might work (for various values of 'work') in SMP
+ * but they are not certified as MP capable.
+ */
+ /* Athlon 660/661 is valid. */
+ if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
+ (c->x86_mask == 1)))
+ goto valid_k7;
+
+ /* Duron 670 is valid */
+ if ((c->x86_model == 7) && (c->x86_mask == 0))
+ goto valid_k7;
+
+ /*
+ * Athlon 662, Duron 671, and Athlon >model 7 have capability
+ * bit. It's worth noting that the A5 stepping (662) of some
+ * Athlon XP's have the MP bit set.
+ * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
+ * more.
+ */
+ if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
+ ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+ (c->x86_model > 7))
+ if (cpu_has_mp)
+ goto valid_k7;
+
+ /* If we get here, not a certified SMP capable AMD system. */
+
+ /*
+ * Don't taint if we are running SMP kernel on a single non-MP
+ * approved Athlon
+ */
+ WARN_ONCE(1, "WARNING: This combination of AMD"
+ "processors is not suitable for SMP.\n");
+ if (!test_taint(TAINT_UNSAFE_SMP))
+ add_taint(TAINT_UNSAFE_SMP);
+
+valid_k7:
+ ;
+#endif
+}
+
static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -175,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
}
set_cpu_cap(c, X86_FEATURE_K7);
+
+ amd_k7_smp_check(c);
}
#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4b1c319d30c3..22590cf688ae 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (!data)
return -ENOMEM;
- data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
+ data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
per_cpu(drv_data, cpu) = data;
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b585e04cbc9e..3178c3acd97e 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = {
.name = "p4-clockmod",
.owner = THIS_MODULE,
.attr = p4clockmod_attr,
- .hide_interface = 1,
};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 25c559ba8d54..191117f1ad51 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
#include <asm/uaccess.h>
#include <asm/ds.h>
#include <asm/bugs.h>
+#include <asm/cpu.h>
#ifdef CONFIG_X86_64
#include <asm/topology.h>
@@ -110,6 +111,28 @@ static void __cpuinit trap_init_f00f_bug(void)
}
#endif
+static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ /* calling is from identify_secondary_cpu() ? */
+ if (c->cpu_index == boot_cpu_id)
+ return;
+
+ /*
+ * Mask B, Pentium, but not Pentium MMX
+ */
+ if (c->x86 == 5 &&
+ c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_model <= 3) {
+ /*
+ * Remember we have B step Pentia with bugs
+ */
+ WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
+ "with B stepping processors.\n");
+ }
+#endif
+}
+
static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
@@ -186,6 +209,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_NUMAQ
numaq_tsc_disable();
#endif
+
+ intel_smp_check(c);
}
#else
static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 01b1244ef1c0..d67e0e48bc2d 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -7,11 +7,10 @@
/*
* Get CPU information for use by the procfs.
*/
-#ifdef CONFIG_X86_32
static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
unsigned int cpu)
{
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
if (c->x86_max_cores * smp_num_siblings > 1) {
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
seq_printf(m, "siblings\t: %d\n",
@@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
#endif
}
+#ifdef CONFIG_X86_32
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
{
/*
@@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
c->wp_works_ok ? "yes" : "no");
}
#else
-static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
- unsigned int cpu)
-{
-#ifdef CONFIG_SMP
- if (c->x86_max_cores * smp_num_siblings > 1) {
- seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
- seq_printf(m, "siblings\t: %d\n",
- cpus_weight(per_cpu(cpu_core_map, cpu)));
- seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
- seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
- seq_printf(m, "apicid\t\t: %d\n", c->apicid);
- seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
- }
-#endif
-}
-
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
{
seq_printf(m,
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 169a120587be..87b67e3a765a 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -729,7 +729,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
spin_unlock_irqrestore(&ds_lock, irq);
- ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+ ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
ds_resume_pebs(tracer);
return tracer;
@@ -1029,5 +1029,4 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
void ds_exit_thread(struct task_struct *tsk)
{
- WARN_ON(tsk->thread.ds_ctx);
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index b205272ad394..1736acc4d7aa 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -469,7 +469,7 @@ void __init efi_enter_virtual_mode(void)
efi_memory_desc_t *md;
efi_status_t status;
unsigned long size;
- u64 end, systab, addr, npages;
+ u64 end, systab, addr, npages, end_pfn;
void *p, *va;
efi.systab = NULL;
@@ -481,7 +481,10 @@ void __init efi_enter_virtual_mode(void)
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
- if (PFN_UP(end) <= max_low_pfn_mapped)
+ end_pfn = PFN_UP(end);
+ if (end_pfn <= max_low_pfn_mapped
+ || (end_pfn > (1UL << (32 - PAGE_SHIFT))
+ && end_pfn <= max_pfn_mapped))
va = __va(md->phys_addr);
else
va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index a4ee29127fdf..22c3b7828c50 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -100,24 +100,11 @@ void __init efi_call_phys_epilog(void)
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
{
- static unsigned pages_mapped __initdata;
- unsigned i, pages;
- unsigned long offset;
+ unsigned long last_map_pfn;
- pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr);
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
-
- if (pages_mapped + pages > MAX_EFI_IO_PAGES)
+ last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+ if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
return NULL;
- for (i = 0; i < pages; i++) {
- __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
- phys_addr, PAGE_KERNEL);
- phys_addr += PAGE_SIZE;
- pages_mapped++;
- }
-
- return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
- (pages_mapped - pages)) + offset;
+ return (void __iomem *)__va(phys_addr);
}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 83d1836b9467..7ba4621c0dfa 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -984,6 +984,8 @@ apicinterrupt UV_BAU_MESSAGE \
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
+apicinterrupt GENERIC_INTERRUPT_VECTOR \
+ generic_interrupt smp_generic_interrupt
#ifdef CONFIG_SMP
apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index b0f61f0dcd0a..f2f8540a7f3d 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk)
#ifdef CONFIG_X86_32
if (!HAVE_HWFP) {
memset(tsk->thread.xstate, 0, xstate_size);
- finit();
+ finit_task(tsk);
set_stopped_child_used_math(tsk);
return 0;
}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index e41980a373ab..99c4d308f16b 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -85,19 +85,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
t->io_bitmap_max = bytes;
-#ifdef CONFIG_X86_32
- /*
- * Sets the lazy trigger so that the next I/O operation will
- * reload the correct bitmap.
- * Reset the owner so that a process switch will not set
- * tss->io_bitmap_base to IO_BITMAP_OFFSET.
- */
- tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
- tss->io_bitmap_owner = NULL;
-#else
/* Update the TSS: */
memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
-#endif
put_cpu();
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f13ca1650aaf..b864341dcc45 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -15,6 +15,9 @@
atomic_t irq_err_count;
+/* Function pointer for generic interrupt vector handling */
+void (*generic_interrupt_extension)(void) = NULL;
+
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
@@ -56,6 +59,12 @@ static int show_other_interrupts(struct seq_file *p)
seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
seq_printf(p, " Local timer interrupts\n");
#endif
+ if (generic_interrupt_extension) {
+ seq_printf(p, "PLT: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
+ seq_printf(p, " Platform interrupts\n");
+ }
#ifdef CONFIG_SMP
seq_printf(p, "RES: ");
for_each_online_cpu(j)
@@ -163,6 +172,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#ifdef CONFIG_X86_LOCAL_APIC
sum += irq_stats(cpu)->apic_timer_irqs;
#endif
+ if (generic_interrupt_extension)
+ sum += irq_stats(cpu)->generic_irqs;
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
sum += irq_stats(cpu)->irq_call_count;
@@ -226,4 +237,27 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
return 1;
}
+/*
+ * Handler for GENERIC_INTERRUPT_VECTOR.
+ */
+void smp_generic_interrupt(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ ack_APIC_irq();
+
+ exit_idle();
+
+ irq_enter();
+
+ inc_irq_stat(generic_irqs);
+
+ if (generic_interrupt_extension)
+ generic_interrupt_extension();
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 9dc6b2b24275..3b09634a5153 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -16,6 +16,7 @@
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/uaccess.h>
+#include <linux/percpu.h>
#include <asm/apic.h>
@@ -55,13 +56,13 @@ static inline void print_stack_overflow(void) { }
union irq_ctx {
struct thread_info tinfo;
u32 stack[THREAD_SIZE/sizeof(u32)];
-};
+} __attribute__((aligned(PAGE_SIZE)));
-static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
-static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
+static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
-static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
-static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack);
+static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack);
static void call_on_stack(void *func, void *stack)
{
@@ -81,7 +82,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
u32 *isp, arg1, arg2;
curctx = (union irq_ctx *) current_thread_info();
- irqctx = hardirq_ctx[smp_processor_id()];
+ irqctx = __get_cpu_var(hardirq_ctx);
/*
* this is where we switch to the IRQ stack. However, if we are
@@ -125,34 +126,34 @@ void __cpuinit irq_ctx_init(int cpu)
{
union irq_ctx *irqctx;
- if (hardirq_ctx[cpu])
+ if (per_cpu(hardirq_ctx, cpu))
return;
- irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+ irqctx = &per_cpu(hardirq_stack, cpu);
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
- hardirq_ctx[cpu] = irqctx;
+ per_cpu(hardirq_ctx, cpu) = irqctx;
- irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE];
+ irqctx = &per_cpu(softirq_stack, cpu);
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = 0;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
- softirq_ctx[cpu] = irqctx;
+ per_cpu(softirq_ctx, cpu) = irqctx;
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
- cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
+ cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
}
void irq_ctx_exit(int cpu)
{
- hardirq_ctx[cpu] = NULL;
+ per_cpu(hardirq_ctx, cpu) = NULL;
}
asmlinkage void do_softirq(void)
@@ -169,7 +170,7 @@ asmlinkage void do_softirq(void)
if (local_softirq_pending()) {
curctx = current_thread_info();
- irqctx = softirq_ctx[smp_processor_id()];
+ irqctx = __get_cpu_var(softirq_ctx);
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 50b8c3a3006c..bc1326105448 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -175,6 +175,9 @@ void __init native_init_IRQ(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ /* generic IPI for platform specific use */
+ alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
+
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index da481a1e3f30..c7a49e0ffbfb 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -147,6 +147,9 @@ static void __init apic_intr_init(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ /* generic IPI for platform specific use */
+ alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
+
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f5fc8c781a62..e7368c1da01d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -14,12 +14,12 @@
#include <linux/ftrace.h>
#include <linux/suspend.h>
#include <linux/gfp.h>
+#include <linux/io.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#include <asm/io.h>
#include <asm/apic.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
@@ -63,7 +63,7 @@ static void load_segments(void)
"\tmovl %%eax,%%fs\n"
"\tmovl %%eax,%%gs\n"
"\tmovl %%eax,%%ss\n"
- ::: "eax", "memory");
+ : : : "eax", "memory");
#undef STR
#undef __STR
}
@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
- /* We need to put APICs in legacy mode so that we can
+ /*
+ * We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need
@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);
- /* The segment registers are funny things, they have both a
+ /*
+ * The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
- /* The gdt & idt are now invalid.
+ /*
+ * The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
+ set_gdt(phys_to_virt(0), 0);
+ set_idt(phys_to_virt(0), 0);
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6993d51b7fd8..89cea4d44679 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -12,11 +12,47 @@
#include <linux/reboot.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#include <asm/io.h>
+
+static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
+ unsigned long addr)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ struct page *page;
+ int result = -ENOMEM;
+
+ addr &= PMD_MASK;
+ pgd += pgd_index(addr);
+ if (!pgd_present(*pgd)) {
+ page = kimage_alloc_control_pages(image, 0);
+ if (!page)
+ goto out;
+ pud = (pud_t *)page_address(page);
+ memset(pud, 0, PAGE_SIZE);
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud)) {
+ page = kimage_alloc_control_pages(image, 0);
+ if (!page)
+ goto out;
+ pmd = (pmd_t *)page_address(page);
+ memset(pmd, 0, PAGE_SIZE);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+ result = 0;
+out:
+ return result;
+}
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
@@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
}
level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr);
- if (result) {
+ if (result)
goto out;
- }
set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
addr += PGDIR_SIZE;
}
@@ -156,6 +191,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
if (result)
return result;
+ /*
+ * image->start may be outside 0 ~ max_pfn, for example when
+ * jump back to original kernel from kexeced kernel
+ */
+ result = init_one_level2_page(image, level4p, image->start);
+ if (result)
+ return result;
return init_transition_pgtable(image, level4p);
}
@@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
+ int save_ftrace_enabled;
- tracer_disable();
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ save_processor_state();
+#endif
+
+ save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * We need to put APICs in legacy mode so that we can
+ * get timer interrupts in second kernel. kexec/kdump
+ * paths already have calls to disable_IO_APIC() in
+ * one form or other. kexec jump path also need
+ * one.
+ */
+ disable_IO_APIC();
+#endif
+ }
+
control_page = page_address(image->control_code_page) + PAGE_SIZE;
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
+ memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
+ page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page));
- /* The segment registers are funny things, they have both a
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
+ << PAGE_SHIFT);
+
+ /*
+ * The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
- /* The gdt & idt are now invalid.
+ /*
+ * The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
+ set_gdt(phys_to_virt(0), 0);
+ set_idt(phys_to_virt(0), 0);
/* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start);
+ image->start = relocate_kernel((unsigned long)image->head,
+ (unsigned long)page_list,
+ image->start,
+ image->preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ restore_processor_state();
+#endif
+
+ __ftrace_enabled_restore(save_ftrace_enabled);
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 37cb1bda1baf..e8192401da47 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -558,6 +558,19 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
static struct mpf_intel *mpf_found;
+static unsigned long __init get_mpc_size(unsigned long physptr)
+{
+ struct mpc_table *mpc;
+ unsigned long size;
+
+ mpc = early_ioremap(physptr, PAGE_SIZE);
+ size = mpc->length;
+ early_iounmap(mpc, PAGE_SIZE);
+ apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size);
+
+ return size;
+}
+
/*
* Scan the memory blocks for an SMP configuration block.
*/
@@ -611,12 +624,16 @@ static void __init __get_smp_config(unsigned int early)
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->physptr) {
+ struct mpc_table *mpc;
+ unsigned long size;
+ size = get_mpc_size(mpf->physptr);
+ mpc = early_ioremap(mpf->physptr, size);
/*
* Read the physical hardware table. Anything here will
* override the defaults.
*/
- if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
+ if (!smp_read_mpc(mpc, early)) {
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 0;
#endif
@@ -624,8 +641,10 @@ static void __init __get_smp_config(unsigned int early)
"BIOS bug, MP table errors detected!...\n");
printk(KERN_ERR "... disabling SMP support. "
"(tell your hw vendor)\n");
+ early_iounmap(mpc, size);
return;
}
+ early_iounmap(mpc, size);
if (early)
return;
@@ -697,10 +716,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
if (!reserve)
return 1;
- reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
+ reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
BOOTMEM_DEFAULT);
if (mpf->physptr) {
- unsigned long size = PAGE_SIZE;
+ unsigned long size = get_mpc_size(mpf->physptr);
#ifdef CONFIG_X86_32
/*
* We cannot access to MPC table to compute
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 87b69d4fac16..6afa5232dbb7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,8 +1,8 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <asm/idle.h>
#include <linux/smp.h>
+#include <linux/prctl.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/module.h>
@@ -11,6 +11,9 @@
#include <linux/ftrace.h>
#include <asm/system.h>
#include <asm/apic.h>
+#include <asm/idle.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -56,6 +59,192 @@ void arch_task_cache_init(void)
}
/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+ struct task_struct *me = current;
+ struct thread_struct *t = &me->thread;
+
+ if (me->thread.io_bitmap_ptr) {
+ struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+
+ kfree(t->io_bitmap_ptr);
+ t->io_bitmap_ptr = NULL;
+ clear_thread_flag(TIF_IO_BITMAP);
+ /*
+ * Careful, clear this in the TSS too:
+ */
+ memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+ t->io_bitmap_max = 0;
+ put_cpu();
+ }
+
+ ds_exit_thread(current);
+}
+
+void flush_thread(void)
+{
+ struct task_struct *tsk = current;
+
+#ifdef CONFIG_X86_64
+ if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
+ clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
+ if (test_tsk_thread_flag(tsk, TIF_IA32)) {
+ clear_tsk_thread_flag(tsk, TIF_IA32);
+ } else {
+ set_tsk_thread_flag(tsk, TIF_IA32);
+ current_thread_info()->status |= TS_COMPAT;
+ }
+ }
+#endif
+
+ clear_tsk_thread_flag(tsk, TIF_DEBUG);
+
+ tsk->thread.debugreg0 = 0;
+ tsk->thread.debugreg1 = 0;
+ tsk->thread.debugreg2 = 0;
+ tsk->thread.debugreg3 = 0;
+ tsk->thread.debugreg6 = 0;
+ tsk->thread.debugreg7 = 0;
+ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+ /*
+ * Forget coprocessor state..
+ */
+ tsk->fpu_counter = 0;
+ clear_fpu(tsk);
+ clear_used_math();
+}
+
+static void hard_disable_TSC(void)
+{
+ write_cr4(read_cr4() | X86_CR4_TSD);
+}
+
+void disable_TSC(void)
+{
+ preempt_disable();
+ if (!test_and_set_thread_flag(TIF_NOTSC))
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOTSC in the current running context.
+ */
+ hard_disable_TSC();
+ preempt_enable();
+}
+
+static void hard_enable_TSC(void)
+{
+ write_cr4(read_cr4() & ~X86_CR4_TSD);
+}
+
+static void enable_TSC(void)
+{
+ preempt_disable();
+ if (test_and_clear_thread_flag(TIF_NOTSC))
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOTSC in the current running context.
+ */
+ hard_enable_TSC();
+ preempt_enable();
+}
+
+int get_tsc_mode(unsigned long adr)
+{
+ unsigned int val;
+
+ if (test_thread_flag(TIF_NOTSC))
+ val = PR_TSC_SIGSEGV;
+ else
+ val = PR_TSC_ENABLE;
+
+ return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+ if (val == PR_TSC_SIGSEGV)
+ disable_TSC();
+ else if (val == PR_TSC_ENABLE)
+ enable_TSC();
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ struct tss_struct *tss)
+{
+ struct thread_struct *prev, *next;
+
+ prev = &prev_p->thread;
+ next = &next_p->thread;
+
+ if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
+ test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
+ ds_switch_to(prev_p, next_p);
+ else if (next->debugctlmsr != prev->debugctlmsr)
+ update_debugctlmsr(next->debugctlmsr);
+
+ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+ set_debugreg(next->debugreg0, 0);
+ set_debugreg(next->debugreg1, 1);
+ set_debugreg(next->debugreg2, 2);
+ set_debugreg(next->debugreg3, 3);
+ /* no 4 and 5 */
+ set_debugreg(next->debugreg6, 6);
+ set_debugreg(next->debugreg7, 7);
+ }
+
+ if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
+ test_tsk_thread_flag(next_p, TIF_NOTSC)) {
+ /* prev and next are different */
+ if (test_tsk_thread_flag(next_p, TIF_NOTSC))
+ hard_disable_TSC();
+ else
+ hard_enable_TSC();
+ }
+
+ if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+ /*
+ * Copy the relevant range of the IO bitmap.
+ * Normally this is 128 bytes or less:
+ */
+ memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+ max(prev->io_bitmap_max, next->io_bitmap_max));
+ } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+ /*
+ * Clear any possible leftover bits:
+ */
+ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+ }
+}
+
+int sys_fork(struct pt_regs *regs)
+{
+ return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+int sys_vfork(struct pt_regs *regs)
+{
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
+ NULL, NULL);
+}
+
+
+/*
* Idle related variables and functions
*/
unsigned long boot_option_idle_override = 0;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 646da41a620a..14014d766cad 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -230,55 +230,6 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
}
EXPORT_SYMBOL(kernel_thread);
-/*
- * Free current thread data structures etc..
- */
-void exit_thread(void)
-{
- /* The process may have allocated an io port bitmap... nuke it. */
- if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
- struct task_struct *tsk = current;
- struct thread_struct *t = &tsk->thread;
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
- kfree(t->io_bitmap_ptr);
- t->io_bitmap_ptr = NULL;
- clear_thread_flag(TIF_IO_BITMAP);
- /*
- * Careful, clear this in the TSS too:
- */
- memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
- t->io_bitmap_max = 0;
- tss->io_bitmap_owner = NULL;
- tss->io_bitmap_max = 0;
- tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
- put_cpu();
- }
-
- ds_exit_thread(current);
-}
-
-void flush_thread(void)
-{
- struct task_struct *tsk = current;
-
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
- /*
- * Forget coprocessor state..
- */
- tsk->fpu_counter = 0;
- clear_fpu(tsk);
- clear_used_math();
-}
-
void release_thread(struct task_struct *dead_task)
{
BUG_ON(dead_task->mm);
@@ -366,127 +317,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
}
EXPORT_SYMBOL_GPL(start_thread);
-static void hard_disable_TSC(void)
-{
- write_cr4(read_cr4() | X86_CR4_TSD);
-}
-
-void disable_TSC(void)
-{
- preempt_disable();
- if (!test_and_set_thread_flag(TIF_NOTSC))
- /*
- * Must flip the CPU state synchronously with
- * TIF_NOTSC in the current running context.
- */
- hard_disable_TSC();
- preempt_enable();
-}
-
-static void hard_enable_TSC(void)
-{
- write_cr4(read_cr4() & ~X86_CR4_TSD);
-}
-
-static void enable_TSC(void)
-{
- preempt_disable();
- if (test_and_clear_thread_flag(TIF_NOTSC))
- /*
- * Must flip the CPU state synchronously with
- * TIF_NOTSC in the current running context.
- */
- hard_enable_TSC();
- preempt_enable();
-}
-
-int get_tsc_mode(unsigned long adr)
-{
- unsigned int val;
-
- if (test_thread_flag(TIF_NOTSC))
- val = PR_TSC_SIGSEGV;
- else
- val = PR_TSC_ENABLE;
-
- return put_user(val, (unsigned int __user *)adr);
-}
-
-int set_tsc_mode(unsigned int val)
-{
- if (val == PR_TSC_SIGSEGV)
- disable_TSC();
- else if (val == PR_TSC_ENABLE)
- enable_TSC();
- else
- return -EINVAL;
-
- return 0;
-}
-
-static noinline void
-__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss)
-{
- struct thread_struct *prev, *next;
-
- prev = &prev_p->thread;
- next = &next_p->thread;
-
- if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
- test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
- ds_switch_to(prev_p, next_p);
- else if (next->debugctlmsr != prev->debugctlmsr)
- update_debugctlmsr(next->debugctlmsr);
-
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
- /* prev and next are different */
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
- hard_disable_TSC();
- else
- hard_enable_TSC();
- }
-
- if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
- /*
- * Disable the bitmap via an invalid offset. We still cache
- * the previous bitmap owner and the IO bitmap contents:
- */
- tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
- return;
- }
-
- if (likely(next == tss->io_bitmap_owner)) {
- /*
- * Previous owner of the bitmap (hence the bitmap content)
- * matches the next task, we dont have to do anything but
- * to set a valid offset in the TSS:
- */
- tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
- return;
- }
- /*
- * Lazy TSS's I/O bitmap copy. We set an invalid offset here
- * and we let the task to get a GPF in case an I/O instruction
- * is performed. The handler of the GPF will verify that the
- * faulting task has a valid I/O bitmap and, it true, does the
- * real copy and restart the instruction. This will save us
- * redundant copies when the currently switched task does not
- * perform any I/O during its timeslice.
- */
- tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
-}
/*
* switch_to(x,yn) should switch tasks from x to y.
@@ -600,11 +430,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p;
}
-int sys_fork(struct pt_regs *regs)
-{
- return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
int sys_clone(struct pt_regs *regs)
{
unsigned long clone_flags;
@@ -621,21 +446,6 @@ int sys_clone(struct pt_regs *regs)
}
/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-int sys_vfork(struct pt_regs *regs)
-{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
-/*
* sys_execve() executes a new program.
*/
int sys_execve(struct pt_regs *regs)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 836ef6575f01..abb7e6a7f0c6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -237,61 +237,6 @@ void show_regs(struct pt_regs *regs)
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
-/*
- * Free current thread data structures etc..
- */
-void exit_thread(void)
-{
- struct task_struct *me = current;
- struct thread_struct *t = &me->thread;
-
- if (me->thread.io_bitmap_ptr) {
- struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
-
- kfree(t->io_bitmap_ptr);
- t->io_bitmap_ptr = NULL;
- clear_thread_flag(TIF_IO_BITMAP);
- /*
- * Careful, clear this in the TSS too:
- */
- memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
- t->io_bitmap_max = 0;
- put_cpu();
- }
-
- ds_exit_thread(current);
-}
-
-void flush_thread(void)
-{
- struct task_struct *tsk = current;
-
- if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
- clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
- if (test_tsk_thread_flag(tsk, TIF_IA32)) {
- clear_tsk_thread_flag(tsk, TIF_IA32);
- } else {
- set_tsk_thread_flag(tsk, TIF_IA32);
- current_thread_info()->status |= TS_COMPAT;
- }
- }
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- /*
- * Forget coprocessor state..
- */
- tsk->fpu_counter = 0;
- clear_fpu(tsk);
- clear_used_math();
-}
-
void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
@@ -425,118 +370,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
}
EXPORT_SYMBOL_GPL(start_thread);
-static void hard_disable_TSC(void)
-{
- write_cr4(read_cr4() | X86_CR4_TSD);
-}
-
-void disable_TSC(void)
-{
- preempt_disable();
- if (!test_and_set_thread_flag(TIF_NOTSC))
- /*
- * Must flip the CPU state synchronously with
- * TIF_NOTSC in the current running context.
- */
- hard_disable_TSC();
- preempt_enable();
-}
-
-static void hard_enable_TSC(void)
-{
- write_cr4(read_cr4() & ~X86_CR4_TSD);
-}
-
-static void enable_TSC(void)
-{
- preempt_disable();
- if (test_and_clear_thread_flag(TIF_NOTSC))
- /*
- * Must flip the CPU state synchronously with
- * TIF_NOTSC in the current running context.
- */
- hard_enable_TSC();
- preempt_enable();
-}
-
-int get_tsc_mode(unsigned long adr)
-{
- unsigned int val;
-
- if (test_thread_flag(TIF_NOTSC))
- val = PR_TSC_SIGSEGV;
- else
- val = PR_TSC_ENABLE;
-
- return put_user(val, (unsigned int __user *)adr);
-}
-
-int set_tsc_mode(unsigned int val)
-{
- if (val == PR_TSC_SIGSEGV)
- disable_TSC();
- else if (val == PR_TSC_ENABLE)
- enable_TSC();
- else
- return -EINVAL;
-
- return 0;
-}
-
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
-
-static inline void __switch_to_xtra(struct task_struct *prev_p,
- struct task_struct *next_p,
- struct tss_struct *tss)
-{
- struct thread_struct *prev, *next;
-
- prev = &prev_p->thread,
- next = &next_p->thread;
-
- if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
- test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
- ds_switch_to(prev_p, next_p);
- else if (next->debugctlmsr != prev->debugctlmsr)
- update_debugctlmsr(next->debugctlmsr);
-
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- loaddebug(next, 0);
- loaddebug(next, 1);
- loaddebug(next, 2);
- loaddebug(next, 3);
- /* no 4 and 5 */
- loaddebug(next, 6);
- loaddebug(next, 7);
- }
-
- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
- /* prev and next are different */
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
- hard_disable_TSC();
- else
- hard_enable_TSC();
- }
-
- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
- /*
- * Copy the relevant range of the IO bitmap.
- * Normally this is 128 bytes or less:
- */
- memcpy(tss->io_bitmap, next->io_bitmap_ptr,
- max(prev->io_bitmap_max, next->io_bitmap_max));
- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
- /*
- * Clear any possible leftover bits:
- */
- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
- }
-}
-
/*
* switch_to(x,y) should switch tasks from x to y.
*
@@ -694,11 +527,6 @@ void set_personality_64bit(void)
current->personality &= ~READ_IMPLIES_EXEC;
}
-asmlinkage long sys_fork(struct pt_regs *regs)
-{
- return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
asmlinkage long
sys_clone(unsigned long clone_flags, unsigned long newsp,
void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
@@ -708,22 +536,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage long sys_vfork(struct pt_regs *regs)
-{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
- NULL, NULL);
-}
-
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fb2159a5c817..3d9672e59c16 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1383,7 +1383,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
#ifdef CONFIG_X86_32
# define IS_IA32 1
#elif defined CONFIG_IA32_EMULATION
-# define IS_IA32 test_thread_flag(TIF_IA32)
+# define IS_IA32 is_compat_task()
#else
# define IS_IA32 0
#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1cc18d439bbb..2aef36d8aca2 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -216,6 +216,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
},
},
+ { /* Handle problems with rebooting on Dell XPS710 */
+ .callback = set_bios_reboot,
+ .ident = "Dell XPS710",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 2064d0aa8d28..41235531b11c 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -17,7 +17,8 @@
#define PTR(x) (x << 2)
-/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+/*
+ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
* jumping back
*/
@@ -76,8 +77,10 @@ relocate_kernel:
movl %eax, CP_PA_SWAP_PAGE(%edi)
movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
- /* get physical address of control page now */
- /* this is impossible after page table switch */
+ /*
+ * get physical address of control page now
+ * this is impossible after page table switch
+ */
movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
/* switch to new set of page tables */
@@ -97,7 +100,8 @@ identity_mapped:
/* store the start address on the stack */
pushl %edx
- /* Set cr0 to a known state:
+ /*
+ * Set cr0 to a known state:
* - Paging disabled
* - Alignment check disabled
* - Write protect disabled
@@ -113,7 +117,8 @@ identity_mapped:
/* clear cr4 if applicable */
testl %ecx, %ecx
jz 1f
- /* Set cr4 to a known state:
+ /*
+ * Set cr4 to a known state:
* Setting everything to zero seems safe.
*/
xorl %eax, %eax
@@ -132,15 +137,18 @@ identity_mapped:
call swap_pages
addl $8, %esp
- /* To be certain of avoiding problems with self-modifying code
+ /*
+ * To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB, it's handy, and not processor dependent.
*/
xorl %eax, %eax
movl %eax, %cr3
- /* set all of the registers to known values */
- /* leave %esp alone */
+ /*
+ * set all of the registers to known values
+ * leave %esp alone
+ */
testl %esi, %esi
jnz 1f
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index d32cfb27a479..4de8f5b3d476 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -19,29 +19,77 @@
#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+/*
+ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+ * ~ control_page + PAGE_SIZE are used as data storage and stack for
+ * jumping back
+ */
+#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
+
+/* Minimal CPU state */
+#define RSP DATA(0x0)
+#define CR0 DATA(0x8)
+#define CR3 DATA(0x10)
+#define CR4 DATA(0x18)
+
+/* other data */
+#define CP_PA_TABLE_PAGE DATA(0x20)
+#define CP_PA_SWAP_PAGE DATA(0x28)
+#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
+
.text
.align PAGE_SIZE
.code64
.globl relocate_kernel
relocate_kernel:
- /* %rdi indirection_page
+ /*
+ * %rdi indirection_page
* %rsi page_list
* %rdx start address
+ * %rcx preserve_context
*/
+ /* Save the CPU context, used for jumping back */
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushf
+
+ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
+ movq %rsp, RSP(%r11)
+ movq %cr0, %rax
+ movq %rax, CR0(%r11)
+ movq %cr3, %rax
+ movq %rax, CR3(%r11)
+ movq %cr4, %rax
+ movq %rax, CR4(%r11)
+
/* zero out flags, and disable interrupts */
pushq $0
popfq
- /* get physical address of control page now */
- /* this is impossible after page table switch */
+ /*
+ * get physical address of control page now
+ * this is impossible after page table switch
+ */
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
/* get physical address of page table now too */
- movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
+ movq PTR(PA_TABLE_PAGE)(%rsi), %r9
+
+ /* get physical address of swap page now */
+ movq PTR(PA_SWAP_PAGE)(%rsi), %r10
+
+ /* save some information for jumping back */
+ movq %r9, CP_PA_TABLE_PAGE(%r11)
+ movq %r10, CP_PA_SWAP_PAGE(%r11)
+ movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
/* Switch to the identity mapped page tables */
- movq %rcx, %cr3
+ movq %r9, %cr3
/* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%r8), %rsp
@@ -55,7 +103,8 @@ identity_mapped:
/* store the start address on the stack */
pushq %rdx
- /* Set cr0 to a known state:
+ /*
+ * Set cr0 to a known state:
* - Paging enabled
* - Alignment check disabled
* - Write protect disabled
@@ -68,7 +117,8 @@ identity_mapped:
orl $(X86_CR0_PG | X86_CR0_PE), %eax
movq %rax, %cr0
- /* Set cr4 to a known state:
+ /*
+ * Set cr4 to a known state:
* - physical address extension enabled
*/
movq $X86_CR4_PAE, %rax
@@ -78,9 +128,87 @@ identity_mapped:
1:
/* Flush the TLB (needed?) */
- movq %rcx, %cr3
+ movq %r9, %cr3
+
+ movq %rcx, %r11
+ call swap_pages
+
+ /*
+ * To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB by reloading %cr3 here, it's handy,
+ * and not processor dependent.
+ */
+ movq %cr3, %rax
+ movq %rax, %cr3
+
+ /*
+ * set all of the registers to known values
+ * leave %rsp alone
+ */
+
+ testq %r11, %r11
+ jnz 1f
+ xorq %rax, %rax
+ xorq %rbx, %rbx
+ xorq %rcx, %rcx
+ xorq %rdx, %rdx
+ xorq %rsi, %rsi
+ xorq %rdi, %rdi
+ xorq %rbp, %rbp
+ xorq %r8, %r8
+ xorq %r9, %r9
+ xorq %r10, %r9
+ xorq %r11, %r11
+ xorq %r12, %r12
+ xorq %r13, %r13
+ xorq %r14, %r14
+ xorq %r15, %r15
+
+ ret
+
+1:
+ popq %rdx
+ leaq PAGE_SIZE(%r10), %rsp
+ call *%rdx
+
+ /* get the re-entry point of the peer system */
+ movq 0(%rsp), %rbp
+ call 1f
+1:
+ popq %r8
+ subq $(1b - relocate_kernel), %r8
+ movq CP_PA_SWAP_PAGE(%r8), %r10
+ movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
+ movq CP_PA_TABLE_PAGE(%r8), %rax
+ movq %rax, %cr3
+ lea PAGE_SIZE(%r8), %rsp
+ call swap_pages
+ movq $virtual_mapped, %rax
+ pushq %rax
+ ret
+
+virtual_mapped:
+ movq RSP(%r8), %rsp
+ movq CR4(%r8), %rax
+ movq %rax, %cr4
+ movq CR3(%r8), %rax
+ movq CR0(%r8), %r8
+ movq %rax, %cr3
+ movq %r8, %cr0
+ movq %rbp, %rax
+
+ popf
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ ret
/* Do the copies */
+swap_pages:
movq %rdi, %rcx /* Put the page_list in %rcx */
xorq %rdi, %rdi
xorq %rsi, %rsi
@@ -112,36 +240,27 @@ identity_mapped:
movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi
+ movq %rdi, %rdx
+ movq %rsi, %rax
+
+ movq %r10, %rdi
movq $512, %rcx
rep ; movsq
- jmp 0b
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * I need to execute a serializing instruction here.
- * So I flush the TLB by reloading %cr3 here, it's handy,
- * and not processor dependent.
- */
- movq %cr3, %rax
- movq %rax, %cr3
- /* set all of the registers to known values */
- /* leave %rsp alone */
+ movq %rax, %rdi
+ movq %rdx, %rsi
+ movq $512, %rcx
+ rep ; movsq
- xorq %rax, %rax
- xorq %rbx, %rbx
- xorq %rcx, %rcx
- xorq %rdx, %rdx
- xorq %rsi, %rsi
- xorq %rdi, %rdi
- xorq %rbp, %rbp
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r9
- xorq %r11, %r11
- xorq %r12, %r12
- xorq %r13, %r13
- xorq %r14, %r14
- xorq %r15, %r15
+ movq %rdx, %rdi
+ movq %r10, %rsi
+ movq $512, %rcx
+ rep ; movsq
+ lea PAGE_SIZE(%rax), %rsi
+ jmp 0b
+3:
ret
+
+ .globl kexec_control_code_size
+.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 5b85759e7972..f28c56e6bf94 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -202,7 +202,9 @@ struct ist_info ist_info;
#endif
#else
-struct cpuinfo_x86 boot_cpu_data __read_mostly;
+struct cpuinfo_x86 boot_cpu_data __read_mostly = {
+ .x86_phys_bits = MAX_PHYSMEM_BITS,
+};
EXPORT_SYMBOL(boot_cpu_data);
#endif
@@ -600,19 +602,7 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
-static int __init default_update_apic(void)
-{
-#ifdef CONFIG_SMP
- if (!apic->wakeup_cpu)
- apic->wakeup_cpu = wakeup_secondary_cpu_via_init;
-#endif
-
- return 0;
-}
-
-static struct x86_quirks default_x86_quirks __initdata = {
- .update_apic = default_update_apic,
-};
+static struct x86_quirks default_x86_quirks __initdata;
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
@@ -782,6 +772,9 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing();
+ if (efi_enabled)
+ efi_init();
+
dmi_scan_machine();
dmi_check_system(bad_bios_dmi_table);
@@ -801,8 +794,6 @@ void __init setup_arch(char **cmdline_p)
insert_resource(&iomem_resource, &data_resource);
insert_resource(&iomem_resource, &bss_resource);
- if (efi_enabled)
- efi_init();
#ifdef CONFIG_X86_32
if (ppro_with_ram_bug()) {
@@ -875,9 +866,7 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
-#ifdef CONFIG_X86_64
vsmp_init();
-#endif
io_delay_init();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index d992e6cff730..efa615f2bf43 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -7,6 +7,7 @@
#include <linux/crash_dump.h>
#include <linux/smp.h>
#include <linux/topology.h>
+#include <linux/pfn.h>
#include <asm/sections.h>
#include <asm/processor.h>
#include <asm/setup.h>
@@ -41,6 +42,352 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
};
EXPORT_SYMBOL(__per_cpu_offset);
+/*
+ * On x86_64 symbols referenced from code should be reachable using
+ * 32bit relocations. Reserve space for static percpu variables in
+ * modules so that they are always served from the first chunk which
+ * is located at the percpu segment base. On x86_32, anything can
+ * address anywhere. No need to reserve space in the first chunk.
+ */
+#ifdef CONFIG_X86_64
+#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE
+#else
+#define PERCPU_FIRST_CHUNK_RESERVE 0
+#endif
+
+/**
+ * pcpu_need_numa - determine percpu allocation needs to consider NUMA
+ *
+ * If NUMA is not configured or there is only one NUMA node available,
+ * there is no reason to consider NUMA. This function determines
+ * whether percpu allocation should consider NUMA or not.
+ *
+ * RETURNS:
+ * true if NUMA should be considered; otherwise, false.
+ */
+static bool __init pcpu_need_numa(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ pg_data_t *last = NULL;
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ int node = early_cpu_to_node(cpu);
+
+ if (node_online(node) && NODE_DATA(node) &&
+ last && last != NODE_DATA(node))
+ return true;
+
+ last = NODE_DATA(node);
+ }
+#endif
+ return false;
+}
+
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
+ unsigned long align)
+{
+ const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ int node = early_cpu_to_node(cpu);
+ void *ptr;
+
+ if (!node_online(node) || !NODE_DATA(node)) {
+ ptr = __alloc_bootmem_nopanic(size, align, goal);
+ pr_info("cpu %d has no node %d or node-local memory\n",
+ cpu, node);
+ pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+ cpu, size, __pa(ptr));
+ } else {
+ ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
+ size, align, goal);
+ pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+ "%016lx\n", cpu, size, node, __pa(ptr));
+ }
+ return ptr;
+#else
+ return __alloc_bootmem_nopanic(size, align, goal);
+#endif
+}
+
+/*
+ * Remap allocator
+ *
+ * This allocator uses PMD page as unit. A PMD page is allocated for
+ * each cpu and each is remapped into vmalloc area using PMD mapping.
+ * As PMD page is quite large, only part of it is used for the first
+ * chunk. Unused part is returned to the bootmem allocator.
+ *
+ * So, the PMD pages are mapped twice - once to the physical mapping
+ * and to the vmalloc area for the first percpu chunk. The double
+ * mapping does add one more PMD TLB entry pressure but still is much
+ * better than only using 4k mappings while still being NUMA friendly.
+ */
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+static size_t pcpur_size __initdata;
+static void **pcpur_ptrs __initdata;
+
+static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
+{
+ size_t off = (size_t)pageno << PAGE_SHIFT;
+
+ if (off >= pcpur_size)
+ return NULL;
+
+ return virt_to_page(pcpur_ptrs[cpu] + off);
+}
+
+static ssize_t __init setup_pcpu_remap(size_t static_size)
+{
+ static struct vm_struct vm;
+ pg_data_t *last;
+ size_t ptrs_size, dyn_size;
+ unsigned int cpu;
+ ssize_t ret;
+
+ /*
+ * If large page isn't supported, there's no benefit in doing
+ * this. Also, on non-NUMA, embedding is better.
+ */
+ if (!cpu_has_pse || pcpu_need_numa())
+ return -EINVAL;
+
+ last = NULL;
+ for_each_possible_cpu(cpu) {
+ int node = early_cpu_to_node(cpu);
+
+ if (node_online(node) && NODE_DATA(node) &&
+ last && last != NODE_DATA(node))
+ goto proceed;
+
+ last = NODE_DATA(node);
+ }
+ return -EINVAL;
+
+proceed:
+ /*
+ * Currently supports only single page. Supporting multiple
+ * pages won't be too difficult if it ever becomes necessary.
+ */
+ pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+ PERCPU_DYNAMIC_RESERVE);
+ if (pcpur_size > PMD_SIZE) {
+ pr_warning("PERCPU: static data is larger than large page, "
+ "can't use large page\n");
+ return -EINVAL;
+ }
+ dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
+
+ /* allocate pointer array and alloc large pages */
+ ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
+ pcpur_ptrs = alloc_bootmem(ptrs_size);
+
+ for_each_possible_cpu(cpu) {
+ pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE);
+ if (!pcpur_ptrs[cpu])
+ goto enomem;
+
+ /*
+ * Only use pcpur_size bytes and give back the rest.
+ *
+ * Ingo: The 2MB up-rounding bootmem is needed to make
+ * sure the partial 2MB page is still fully RAM - it's
+ * not well-specified to have a PAT-incompatible area
+ * (unmapped RAM, device memory, etc.) in that hole.
+ */
+ free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
+ PMD_SIZE - pcpur_size);
+
+ memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
+ }
+
+ /* allocate address and map */
+ vm.flags = VM_ALLOC;
+ vm.size = num_possible_cpus() * PMD_SIZE;
+ vm_area_register_early(&vm, PMD_SIZE);
+
+ for_each_possible_cpu(cpu) {
+ pmd_t *pmd;
+
+ pmd = populate_extra_pmd((unsigned long)vm.addr
+ + cpu * PMD_SIZE);
+ set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])),
+ PAGE_KERNEL_LARGE));
+ }
+
+ /* we're ready, commit */
+ pr_info("PERCPU: Remapped at %p with large pages, static data "
+ "%zu bytes\n", vm.addr, static_size);
+
+ ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
+ PERCPU_FIRST_CHUNK_RESERVE,
+ PMD_SIZE, dyn_size, vm.addr, NULL);
+ goto out_free_ar;
+
+enomem:
+ for_each_possible_cpu(cpu)
+ if (pcpur_ptrs[cpu])
+ free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE);
+ ret = -ENOMEM;
+out_free_ar:
+ free_bootmem(__pa(pcpur_ptrs), ptrs_size);
+ return ret;
+}
+#else
+static ssize_t __init setup_pcpu_remap(size_t static_size)
+{
+ return -EINVAL;
+}
+#endif
+
+/*
+ * Embedding allocator
+ *
+ * The first chunk is sized to just contain the static area plus
+ * module and dynamic reserves, and allocated as a contiguous area
+ * using bootmem allocator and used as-is without being mapped into
+ * vmalloc area. This enables the first chunk to piggy back on the
+ * linear physical PMD mapping and doesn't add any additional pressure
+ * to TLB. Note that if the needed size is smaller than the minimum
+ * unit size, the leftover is returned to the bootmem allocator.
+ */
+static void *pcpue_ptr __initdata;
+static size_t pcpue_size __initdata;
+static size_t pcpue_unit_size __initdata;
+
+static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
+{
+ size_t off = (size_t)pageno << PAGE_SHIFT;
+
+ if (off >= pcpue_size)
+ return NULL;
+
+ return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
+}
+
+static ssize_t __init setup_pcpu_embed(size_t static_size)
+{
+ unsigned int cpu;
+ size_t dyn_size;
+
+ /*
+ * If large page isn't supported, there's no benefit in doing
+ * this. Also, embedding allocation doesn't play well with
+ * NUMA.
+ */
+ if (!cpu_has_pse || pcpu_need_numa())
+ return -EINVAL;
+
+ /* allocate and copy */
+ pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+ PERCPU_DYNAMIC_RESERVE);
+ pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
+ dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
+
+ pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
+ PAGE_SIZE);
+ if (!pcpue_ptr)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
+
+ free_bootmem(__pa(ptr + pcpue_size),
+ pcpue_unit_size - pcpue_size);
+ memcpy(ptr, __per_cpu_load, static_size);
+ }
+
+ /* we're ready, commit */
+ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
+ pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
+
+ return pcpu_setup_first_chunk(pcpue_get_page, static_size,
+ PERCPU_FIRST_CHUNK_RESERVE,
+ pcpue_unit_size, dyn_size,
+ pcpue_ptr, NULL);
+}
+
+/*
+ * 4k page allocator
+ *
+ * This is the basic allocator. Static percpu area is allocated
+ * page-by-page and most of initialization is done by the generic
+ * setup function.
+ */
+static struct page **pcpu4k_pages __initdata;
+static int pcpu4k_nr_static_pages __initdata;
+
+static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
+{
+ if (pageno < pcpu4k_nr_static_pages)
+ return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
+ return NULL;
+}
+
+static void __init pcpu4k_populate_pte(unsigned long addr)
+{
+ populate_extra_pte(addr);
+}
+
+static ssize_t __init setup_pcpu_4k(size_t static_size)
+{
+ size_t pages_size;
+ unsigned int cpu;
+ int i, j;
+ ssize_t ret;
+
+ pcpu4k_nr_static_pages = PFN_UP(static_size);
+
+ /* unaligned allocations can't be freed, round up to page size */
+ pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus()
+ * sizeof(pcpu4k_pages[0]));
+ pcpu4k_pages = alloc_bootmem(pages_size);
+
+ /* allocate and copy */
+ j = 0;
+ for_each_possible_cpu(cpu)
+ for (i = 0; i < pcpu4k_nr_static_pages; i++) {
+ void *ptr;
+
+ ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
+ if (!ptr)
+ goto enomem;
+
+ memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
+ pcpu4k_pages[j++] = virt_to_page(ptr);
+ }
+
+ /* we're ready, commit */
+ pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
+ pcpu4k_nr_static_pages, static_size);
+
+ ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
+ PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL,
+ pcpu4k_populate_pte);
+ goto out_free_ar;
+
+enomem:
+ while (--j >= 0)
+ free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
+ ret = -ENOMEM;
+out_free_ar:
+ free_bootmem(__pa(pcpu4k_pages), pages_size);
+ return ret;
+}
+
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
@@ -61,38 +408,35 @@ static inline void setup_percpu_segment(int cpu)
*/
void __init setup_per_cpu_areas(void)
{
- ssize_t size;
- char *ptr;
- int cpu;
-
- /* Copy section for each CPU (we discard the original) */
- size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+ size_t static_size = __per_cpu_end - __per_cpu_start;
+ unsigned int cpu;
+ unsigned long delta;
+ size_t pcpu_unit_size;
+ ssize_t ret;
pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
- pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
+ /*
+ * Allocate percpu area. If PSE is supported, try to make use
+ * of large page mappings. Please read comments on top of
+ * each allocator for details.
+ */
+ ret = setup_pcpu_remap(static_size);
+ if (ret < 0)
+ ret = setup_pcpu_embed(static_size);
+ if (ret < 0)
+ ret = setup_pcpu_4k(static_size);
+ if (ret < 0)
+ panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
+ static_size, ret);
- for_each_possible_cpu(cpu) {
-#ifndef CONFIG_NEED_MULTIPLE_NODES
- ptr = alloc_bootmem_pages(size);
-#else
- int node = early_cpu_to_node(cpu);
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = alloc_bootmem_pages(size);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d at %016lx\n",
- cpu, __pa(ptr));
- } else {
- ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
- pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
- cpu, node, __pa(ptr));
- }
-#endif
+ pcpu_unit_size = ret;
- memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
- per_cpu_offset(cpu) = ptr - __per_cpu_start;
+ /* alrighty, percpu areas up and running */
+ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+ for_each_possible_cpu(cpu) {
+ per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
@@ -125,8 +469,6 @@ void __init setup_per_cpu_areas(void)
*/
if (cpu == boot_cpu_id)
switch_to_new_gdt(cpu);
-
- DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
}
/* indicate the early static arrays will soon be gone */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 7cdcd16885ed..d2cc6428c587 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -187,40 +187,35 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
/*
* Set up a signal frame.
*/
-#ifdef CONFIG_X86_32
-static const struct {
- u16 poplmovl;
- u32 val;
- u16 int80;
-} __attribute__((packed)) retcode = {
- 0xb858, /* popl %eax; movl $..., %eax */
- __NR_sigreturn,
- 0x80cd, /* int $0x80 */
-};
-
-static const struct {
- u8 movl;
- u32 val;
- u16 int80;
- u8 pad;
-} __attribute__((packed)) rt_retcode = {
- 0xb8, /* movl $..., %eax */
- __NR_rt_sigreturn,
- 0x80cd, /* int $0x80 */
- 0
-};
/*
* Determine which stack to use..
*/
+static unsigned long align_sigframe(unsigned long sp)
+{
+#ifdef CONFIG_X86_32
+ /*
+ * Align the stack pointer according to the i386 ABI,
+ * i.e. so that on function entry ((sp + 4) & 15) == 0.
+ */
+ sp = ((sp + 4) & -16ul) - 4;
+#else /* !CONFIG_X86_32 */
+ sp = round_down(sp, 16) - 8;
+#endif
+ return sp;
+}
+
static inline void __user *
get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
- void **fpstate)
+ void __user **fpstate)
{
- unsigned long sp;
-
/* Default to using normal stack */
- sp = regs->sp;
+ unsigned long sp = regs->sp;
+
+#ifdef CONFIG_X86_64
+ /* redzone */
+ sp -= 128;
+#endif /* CONFIG_X86_64 */
/*
* If we are on the alternate signal stack and would overflow it, don't.
@@ -234,30 +229,52 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
} else {
+#ifdef CONFIG_X86_32
/* This is the legacy signal stack switching. */
if ((regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer)
sp = (unsigned long) ka->sa.sa_restorer;
+#endif /* CONFIG_X86_32 */
}
if (used_math()) {
- sp = sp - sig_xstate_size;
- *fpstate = (struct _fpstate *) sp;
+ sp -= sig_xstate_size;
+#ifdef CONFIG_X86_64
+ sp = round_down(sp, 64);
+#endif /* CONFIG_X86_64 */
+ *fpstate = (void __user *)sp;
+
if (save_i387_xstate(*fpstate) < 0)
return (void __user *)-1L;
}
- sp -= frame_size;
- /*
- * Align the stack pointer according to the i386 ABI,
- * i.e. so that on function entry ((sp + 4) & 15) == 0.
- */
- sp = ((sp + 4) & -16ul) - 4;
-
- return (void __user *) sp;
+ return (void __user *)align_sigframe(sp - frame_size);
}
+#ifdef CONFIG_X86_32
+static const struct {
+ u16 poplmovl;
+ u32 val;
+ u16 int80;
+} __attribute__((packed)) retcode = {
+ 0xb858, /* popl %eax; movl $..., %eax */
+ __NR_sigreturn,
+ 0x80cd, /* int $0x80 */
+};
+
+static const struct {
+ u8 movl;
+ u32 val;
+ u16 int80;
+ u8 pad;
+} __attribute__((packed)) rt_retcode = {
+ 0xb8, /* movl $..., %eax */
+ __NR_rt_sigreturn,
+ 0x80cd, /* int $0x80 */
+ 0
+};
+
static int
__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
struct pt_regs *regs)
@@ -388,24 +405,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
return 0;
}
#else /* !CONFIG_X86_32 */
-/*
- * Determine which stack to use..
- */
-static void __user *
-get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
-{
- /* Default to using normal stack - redzone*/
- sp -= 128;
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(sp) == 0)
- sp = current->sas_ss_sp + current->sas_ss_size;
- }
-
- return (void __user *)round_down(sp - size, 64);
-}
-
static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
@@ -414,15 +413,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
int err = 0;
struct task_struct *me = current;
- if (used_math()) {
- fp = get_stack(ka, regs->sp, sig_xstate_size);
- frame = (void __user *)round_down(
- (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
-
- if (save_i387_xstate(fp) < 0)
- return -EFAULT;
- } else
- frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
+ frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9ce666387f37..ef7d10170c30 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -112,11 +112,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
-static atomic_t init_deasserted;
-
-
-/* Set if we find a B stepping CPU */
-static int __cpuinitdata smp_b_stepping;
+atomic_t init_deasserted;
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
@@ -271,8 +267,6 @@ static void __cpuinit smp_callin(void)
cpumask_set_cpu(cpuid, cpu_callin_mask);
}
-static int __cpuinitdata unsafe_smp;
-
/*
* Activate a secondary processor.
*/
@@ -340,76 +334,6 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
-static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
-{
- /*
- * Mask B, Pentium, but not Pentium MMX
- */
- if (c->x86_vendor == X86_VENDOR_INTEL &&
- c->x86 == 5 &&
- c->x86_mask >= 1 && c->x86_mask <= 4 &&
- c->x86_model <= 3)
- /*
- * Remember we have B step Pentia with bugs
- */
- smp_b_stepping = 1;
-
- /*
- * Certain Athlons might work (for various values of 'work') in SMP
- * but they are not certified as MP capable.
- */
- if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
-
- if (num_possible_cpus() == 1)
- goto valid_k7;
-
- /* Athlon 660/661 is valid. */
- if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
- (c->x86_mask == 1)))
- goto valid_k7;
-
- /* Duron 670 is valid */
- if ((c->x86_model == 7) && (c->x86_mask == 0))
- goto valid_k7;
-
- /*
- * Athlon 662, Duron 671, and Athlon >model 7 have capability
- * bit. It's worth noting that the A5 stepping (662) of some
- * Athlon XP's have the MP bit set.
- * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
- * more.
- */
- if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
- ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
- (c->x86_model > 7))
- if (cpu_has_mp)
- goto valid_k7;
-
- /* If we get here, not a certified SMP capable AMD system. */
- unsafe_smp = 1;
- }
-
-valid_k7:
- ;
-}
-
-static void __cpuinit smp_checks(void)
-{
- if (smp_b_stepping)
- printk(KERN_WARNING "WARNING: SMP operation may be unreliable"
- "with B stepping processors.\n");
-
- /*
- * Don't taint if we are running SMP kernel on a single non-MP
- * approved Athlon
- */
- if (unsafe_smp && num_online_cpus() > 1) {
- printk(KERN_INFO "WARNING: This combination of AMD"
- "processors is not suitable for SMP.\n");
- add_taint(TAINT_UNSAFE_SMP);
- }
-}
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -423,7 +347,6 @@ void __cpuinit smp_store_cpu_info(int id)
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
- smp_apply_quirks(c);
}
@@ -614,12 +537,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;
- if (get_uv_system_type() == UV_NON_UNIQUE_APIC) {
- send_status = uv_wakeup_secondary(phys_apicid, start_eip);
- atomic_set(&init_deasserted, 1);
- return send_status;
- }
-
maxlvt = lapic_get_maxlvt();
/*
@@ -748,7 +665,8 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
+ * Returns zero if CPU booted OK, else error code from
+ * ->wakeup_secondary_cpu.
*/
static int __cpuinit do_boot_cpu(int apicid, int cpu)
{
@@ -835,9 +753,13 @@ do_rest:
}
/*
- * Starting actual IPI sequence...
+ * Kick the secondary CPU. Use the method in the APIC driver
+ * if it's defined - or use an INIT boot APIC message otherwise:
*/
- boot_error = apic->wakeup_cpu(apicid, start_ip);
+ if (apic->wakeup_secondary_cpu)
+ boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
+ else
+ boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
if (!boot_error) {
/*
@@ -1194,7 +1116,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
pr_debug("Boot done.\n");
impress_friends();
- smp_checks();
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
#endif
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f04549afcfe9..d038b9c45cf8 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -314,8 +314,6 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
int locals = 0;
struct bau_desc *bau_desc;
- WARN_ON(!in_atomic());
-
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
uv_cpu = uv_blade_processor_id();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c05430ac1b44..a1d288327ff0 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -118,47 +118,6 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
if (!user_mode_vm(regs))
die(str, regs, err);
}
-
-/*
- * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
- * invalid offset set (the LAZY one) and the faulting thread has
- * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
- * we set the offset field correctly and return 1.
- */
-static int lazy_iobitmap_copy(void)
-{
- struct thread_struct *thread;
- struct tss_struct *tss;
- int cpu;
-
- cpu = get_cpu();
- tss = &per_cpu(init_tss, cpu);
- thread = &current->thread;
-
- if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
- thread->io_bitmap_ptr) {
- memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
- thread->io_bitmap_max);
- /*
- * If the previously set map was extending to higher ports
- * than the current one, pad extra space with 0xff (no access).
- */
- if (thread->io_bitmap_max < tss->io_bitmap_max) {
- memset((char *) tss->io_bitmap +
- thread->io_bitmap_max, 0xff,
- tss->io_bitmap_max - thread->io_bitmap_max);
- }
- tss->io_bitmap_max = thread->io_bitmap_max;
- tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
- tss->io_bitmap_owner = thread;
- put_cpu();
-
- return 1;
- }
- put_cpu();
-
- return 0;
-}
#endif
static void __kprobes
@@ -309,11 +268,6 @@ do_general_protection(struct pt_regs *regs, long error_code)
conditional_sti(regs);
#ifdef CONFIG_X86_32
- if (lazy_iobitmap_copy()) {
- /* restart the faulting instruction */
- return;
- }
-
if (regs->flags & X86_VM_MASK)
goto gp_in_vm86;
#endif
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
new file mode 100644
index 000000000000..2ffb6c53326e
--- /dev/null
+++ b/arch/x86/kernel/uv_time.c
@@ -0,0 +1,393 @@
+/*
+ * SGI RTC clock/timer routines.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Dimitri Sivanich
+ */
+#include <linux/clockchips.h>
+
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/cpu.h>
+
+#define RTC_NAME "sgi_rtc"
+
+static cycle_t uv_read_rtc(void);
+static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
+static void uv_rtc_timer_setup(enum clock_event_mode,
+ struct clock_event_device *);
+
+static struct clocksource clocksource_uv = {
+ .name = RTC_NAME,
+ .rating = 400,
+ .read = uv_read_rtc,
+ .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
+ .shift = 10,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static struct clock_event_device clock_event_device_uv = {
+ .name = RTC_NAME,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ .shift = 20,
+ .rating = 400,
+ .irq = -1,
+ .set_next_event = uv_rtc_next_event,
+ .set_mode = uv_rtc_timer_setup,
+ .event_handler = NULL,
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
+
+/* There is one of these allocated per node */
+struct uv_rtc_timer_head {
+ spinlock_t lock;
+ /* next cpu waiting for timer, local node relative: */
+ int next_cpu;
+ /* number of cpus on this node: */
+ int ncpus;
+ struct {
+ int lcpu; /* systemwide logical cpu number */
+ u64 expires; /* next timer expiration for this cpu */
+ } cpu[1];
+};
+
+/*
+ * Access to uv_rtc_timer_head via blade id.
+ */
+static struct uv_rtc_timer_head **blade_info __read_mostly;
+
+static int uv_rtc_enable;
+
+/*
+ * Hardware interface routines
+ */
+
+/* Send IPIs to another node */
+static void uv_rtc_send_IPI(int cpu)
+{
+ unsigned long apicid, val;
+ int pnode;
+
+ apicid = cpu_physical_id(cpu);
+ pnode = uv_apicid_to_pnode(apicid);
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+ (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+ (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
+
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+}
+
+/* Check for an RTC interrupt pending */
+static int uv_intr_pending(int pnode)
+{
+ return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
+ UVH_EVENT_OCCURRED0_RTC1_MASK;
+}
+
+/* Setup interrupt and return non-zero if early expiration occurred. */
+static int uv_setup_intr(int cpu, u64 expires)
+{
+ u64 val;
+ int pnode = uv_cpu_to_pnode(cpu);
+
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+ UVH_RTC1_INT_CONFIG_M_MASK);
+ uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
+
+ uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
+ UVH_EVENT_OCCURRED0_RTC1_MASK);
+
+ val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
+ ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
+
+ /* Set configuration */
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
+ /* Initialize comparator value */
+ uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
+
+ return (expires < uv_read_rtc() && !uv_intr_pending(pnode));
+}
+
+/*
+ * Per-cpu timer tracking routines
+ */
+
+static __init void uv_rtc_deallocate_timers(void)
+{
+ int bid;
+
+ for_each_possible_blade(bid) {
+ kfree(blade_info[bid]);
+ }
+ kfree(blade_info);
+}
+
+/* Allocate per-node list of cpu timer expiration times. */
+static __init int uv_rtc_allocate_timers(void)
+{
+ int cpu;
+
+ blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
+ if (!blade_info)
+ return -ENOMEM;
+ memset(blade_info, 0, uv_possible_blades * sizeof(void *));
+
+ for_each_present_cpu(cpu) {
+ int nid = cpu_to_node(cpu);
+ int bid = uv_cpu_to_blade_id(cpu);
+ int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ struct uv_rtc_timer_head *head = blade_info[bid];
+
+ if (!head) {
+ head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
+ (uv_blade_nr_possible_cpus(bid) *
+ 2 * sizeof(u64)),
+ GFP_KERNEL, nid);
+ if (!head) {
+ uv_rtc_deallocate_timers();
+ return -ENOMEM;
+ }
+ spin_lock_init(&head->lock);
+ head->ncpus = uv_blade_nr_possible_cpus(bid);
+ head->next_cpu = -1;
+ blade_info[bid] = head;
+ }
+
+ head->cpu[bcpu].lcpu = cpu;
+ head->cpu[bcpu].expires = ULLONG_MAX;
+ }
+
+ return 0;
+}
+
+/* Find and set the next expiring timer. */
+static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
+{
+ u64 lowest = ULLONG_MAX;
+ int c, bcpu = -1;
+
+ head->next_cpu = -1;
+ for (c = 0; c < head->ncpus; c++) {
+ u64 exp = head->cpu[c].expires;
+ if (exp < lowest) {
+ bcpu = c;
+ lowest = exp;
+ }
+ }
+ if (bcpu >= 0) {
+ head->next_cpu = bcpu;
+ c = head->cpu[bcpu].lcpu;
+ if (uv_setup_intr(c, lowest))
+ /* If we didn't set it up in time, trigger */
+ uv_rtc_send_IPI(c);
+ } else {
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+ UVH_RTC1_INT_CONFIG_M_MASK);
+ }
+}
+
+/*
+ * Set expiration time for current cpu.
+ *
+ * Returns 1 if we missed the expiration time.
+ */
+static int uv_rtc_set_timer(int cpu, u64 expires)
+{
+ int pnode = uv_cpu_to_pnode(cpu);
+ int bid = uv_cpu_to_blade_id(cpu);
+ struct uv_rtc_timer_head *head = blade_info[bid];
+ int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ u64 *t = &head->cpu[bcpu].expires;
+ unsigned long flags;
+ int next_cpu;
+
+ spin_lock_irqsave(&head->lock, flags);
+
+ next_cpu = head->next_cpu;
+ *t = expires;
+ /* Will this one be next to go off? */
+ if (next_cpu < 0 || bcpu == next_cpu ||
+ expires < head->cpu[next_cpu].expires) {
+ head->next_cpu = bcpu;
+ if (uv_setup_intr(cpu, expires)) {
+ *t = ULLONG_MAX;
+ uv_rtc_find_next_timer(head, pnode);
+ spin_unlock_irqrestore(&head->lock, flags);
+ return 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&head->lock, flags);
+ return 0;
+}
+
+/*
+ * Unset expiration time for current cpu.
+ *
+ * Returns 1 if this timer was pending.
+ */
+static int uv_rtc_unset_timer(int cpu)
+{
+ int pnode = uv_cpu_to_pnode(cpu);
+ int bid = uv_cpu_to_blade_id(cpu);
+ struct uv_rtc_timer_head *head = blade_info[bid];
+ int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ u64 *t = &head->cpu[bcpu].expires;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&head->lock, flags);
+
+ if (head->next_cpu == bcpu && uv_read_rtc() >= *t)
+ rc = 1;
+
+ *t = ULLONG_MAX;
+
+ /* Was the hardware setup for this timer? */
+ if (head->next_cpu == bcpu)
+ uv_rtc_find_next_timer(head, pnode);
+
+ spin_unlock_irqrestore(&head->lock, flags);
+
+ return rc;
+}
+
+
+/*
+ * Kernel interface routines.
+ */
+
+/*
+ * Read the RTC.
+ */
+static cycle_t uv_read_rtc(void)
+{
+ return (cycle_t)uv_read_local_mmr(UVH_RTC);
+}
+
+/*
+ * Program the next event, relative to now
+ */
+static int uv_rtc_next_event(unsigned long delta,
+ struct clock_event_device *ced)
+{
+ int ced_cpu = cpumask_first(ced->cpumask);
+
+ return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
+}
+
+/*
+ * Setup the RTC timer in oneshot mode
+ */
+static void uv_rtc_timer_setup(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ int ced_cpu = cpumask_first(evt->cpumask);
+
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here yet */
+ break;
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ uv_rtc_unset_timer(ced_cpu);
+ break;
+ }
+}
+
+static void uv_rtc_interrupt(void)
+{
+ struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+ int cpu = smp_processor_id();
+
+ if (!ced || !ced->event_handler)
+ return;
+
+ if (uv_rtc_unset_timer(cpu) != 1)
+ return;
+
+ ced->event_handler(ced);
+}
+
+static int __init uv_enable_rtc(char *str)
+{
+ uv_rtc_enable = 1;
+
+ return 1;
+}
+__setup("uvrtc", uv_enable_rtc);
+
+static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
+{
+ struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+
+ *ced = clock_event_device_uv;
+ ced->cpumask = cpumask_of(smp_processor_id());
+ clockevents_register_device(ced);
+}
+
+static __init int uv_rtc_setup_clock(void)
+{
+ int rc;
+
+ if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension)
+ return -ENODEV;
+
+ generic_interrupt_extension = uv_rtc_interrupt;
+
+ clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
+ clocksource_uv.shift);
+
+ rc = clocksource_register(&clocksource_uv);
+ if (rc) {
+ generic_interrupt_extension = NULL;
+ return rc;
+ }
+
+ /* Setup and register clockevents */
+ rc = uv_rtc_allocate_timers();
+ if (rc) {
+ clocksource_unregister(&clocksource_uv);
+ generic_interrupt_extension = NULL;
+ return rc;
+ }
+
+ clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
+ NSEC_PER_SEC, clock_event_device_uv.shift);
+
+ clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
+ sn_rtc_cycles_per_second;
+
+ clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
+ (NSEC_PER_SEC / sn_rtc_cycles_per_second);
+
+ rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
+ if (rc) {
+ clocksource_unregister(&clocksource_uv);
+ generic_interrupt_extension = NULL;
+ uv_rtc_deallocate_timers();
+ }
+
+ return rc;
+}
+arch_initcall(uv_rtc_setup_clock);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 191a876e9e87..31ffc24eec4d 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = {
static irqreturn_t piix4_master_intr(int irq, void *dev_id)
{
int realirq;
- irq_desc_t *desc;
+ struct irq_desc *desc;
unsigned long flags;
spin_lock_irqsave(&i8259A_lock, flags);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6f6800..5bf54e40c6ef 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
+
+#ifdef CONFIG_KEXEC
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+ "kexec control code size is too big")
+#endif
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index c609205df594..74de562812cc 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -22,7 +22,7 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
-#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT
/*
* Interrupt control on vSMPowered systems:
* ~AC is a shadow of IF. If IF is 'on' AC should be 'off'
@@ -114,7 +114,6 @@ static void __init set_vsmp_pv_ops(void)
}
#endif
-#ifdef CONFIG_PCI
static int is_vsmp = -1;
static void __init detect_vsmp_box(void)
@@ -139,15 +138,6 @@ int is_vsmp_box(void)
return 0;
}
}
-#else
-static void __init detect_vsmp_box(void)
-{
-}
-int is_vsmp_box(void)
-{
- return 0;
-}
-#endif
void __init vsmp_init(void)
{
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index f3a5305b8adf..9fe4ddaa8f6f 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -348,6 +348,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
* flush_tlb_user() for both user and kernel mappings unless
* the Page Global Enable (PGE) feature bit is set. */
*dx |= 0x00002000;
+ /* We also lie, and say we're family id 5. 6 or greater
+ * leads to a rdmsr in early_init_intel which we can't handle.
+ * Family ID is returned as bits 8-12 in ax. */
+ *ax &= 0xFFFFF0FF;
+ *ax |= 0x00000500;
break;
case 0x80000000:
/* Futureproof this a little: if they ask how much extended
@@ -594,19 +599,21 @@ static void __init lguest_init_IRQ(void)
/* Some systems map "vectors" to interrupts weirdly. Lguest has
* a straightforward 1 to 1 mapping, so force that here. */
__get_cpu_var(vector_irq)[vector] = i;
- if (vector != SYSCALL_VECTOR) {
- set_intr_gate(vector,
- interrupt[vector-FIRST_EXTERNAL_VECTOR]);
- set_irq_chip_and_handler_name(i, &lguest_irq_controller,
- handle_level_irq,
- "level");
- }
+ if (vector != SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
}
/* This call is required to set up for 4k stacks, where we have
* separate stacks for hard and soft interrupts. */
irq_ctx_init(smp_processor_id());
}
+void lguest_setup_irq(unsigned int irq)
+{
+ irq_to_desc_alloc_cpu(irq, 0);
+ set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
+ handle_level_irq, "level");
+}
+
/*
* Time.
*
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index 491e737ce547..aa0987088774 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -30,20 +30,29 @@ static void fclex(void)
}
/* Needs to be externally visible */
-void finit(void)
+void finit_task(struct task_struct *tsk)
{
- control_word = 0x037f;
- partial_status = 0;
- top = 0; /* We don't keep top in the status word internally. */
- fpu_tag_word = 0xffff;
+ struct i387_soft_struct *soft = &tsk->thread.xstate->soft;
+ struct address *oaddr, *iaddr;
+ soft->cwd = 0x037f;
+ soft->swd = 0;
+ soft->ftop = 0; /* We don't keep top in the status word internally. */
+ soft->twd = 0xffff;
/* The behaviour is different from that detailed in
Section 15.1.6 of the Intel manual */
- operand_address.offset = 0;
- operand_address.selector = 0;
- instruction_address.offset = 0;
- instruction_address.selector = 0;
- instruction_address.opcode = 0;
- no_ip_update = 1;
+ oaddr = (struct address *)&soft->foo;
+ oaddr->offset = 0;
+ oaddr->selector = 0;
+ iaddr = (struct address *)&soft->fip;
+ iaddr->offset = 0;
+ iaddr->selector = 0;
+ iaddr->opcode = 0;
+ soft->no_update = 1;
+}
+
+void finit(void)
+{
+ finit_task(current);
}
/*
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 2b938a384910..08537747cb58 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,4 +1,4 @@
-obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
+obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o gup.o
obj-$(CONFIG_SMP) += tlb.o
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index bcc079c282dd..d11745334a67 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,5 +1,6 @@
#include <linux/highmem.h>
#include <linux/module.h>
+#include <linux/swap.h> /* for totalram_pages */
void *kmap(struct page *page)
{
@@ -156,3 +157,27 @@ EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
EXPORT_SYMBOL(kunmap_atomic);
+
+void __init set_highmem_pages_init(void)
+{
+ struct zone *zone;
+ int nid;
+
+ for_each_zone(zone) {
+ unsigned long zone_start_pfn, zone_end_pfn;
+
+ if (!is_highmem(zone))
+ continue;
+
+ zone_start_pfn = zone->zone_start_pfn;
+ zone_end_pfn = zone_start_pfn + zone->spanned_pages;
+
+ nid = zone_to_nid(zone);
+ printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
+ zone->name, nid, zone_start_pfn, zone_end_pfn);
+
+ add_highpages_with_active_regions(nid, zone_start_pfn,
+ zone_end_pfn);
+ }
+ totalram_pages += totalhigh_pages;
+}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
new file mode 100644
index 000000000000..15219e0d1243
--- /dev/null
+++ b/arch/x86/mm/init.c
@@ -0,0 +1,393 @@
+#include <linux/ioport.h>
+#include <linux/swap.h>
+
+#include <asm/cacheflush.h>
+#include <asm/e820.h>
+#include <asm/init.h>
+#include <asm/page.h>
+#include <asm/page_types.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+
+unsigned long __initdata e820_table_start;
+unsigned long __meminitdata e820_table_end;
+unsigned long __meminitdata e820_table_top;
+
+int after_bootmem;
+
+int direct_gbpages
+#ifdef CONFIG_DIRECT_GBPAGES
+ = 1
+#endif
+;
+
+static void __init find_early_table_space(unsigned long end, int use_pse,
+ int use_gbpages)
+{
+ unsigned long puds, pmds, ptes, tables, start;
+
+ puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
+ tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+
+ if (use_gbpages) {
+ unsigned long extra;
+
+ extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
+ pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+ } else
+ pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+
+ tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
+
+ if (use_pse) {
+ unsigned long extra;
+
+ extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+#ifdef CONFIG_X86_32
+ extra += PMD_SIZE;
+#endif
+ ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ } else
+ ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
+
+#ifdef CONFIG_X86_32
+ /* for fixmap */
+ tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
+#endif
+
+ /*
+ * RED-PEN putting page tables only on node 0 could
+ * cause a hotspot and fill up ZONE_DMA. The page tables
+ * need roughly 0.5KB per GB.
+ */
+#ifdef CONFIG_X86_32
+ start = 0x7000;
+ e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
+ tables, PAGE_SIZE);
+#else /* CONFIG_X86_64 */
+ start = 0x8000;
+ e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
+#endif
+ if (e820_table_start == -1UL)
+ panic("Cannot find space for the kernel page tables");
+
+ e820_table_start >>= PAGE_SHIFT;
+ e820_table_end = e820_table_start;
+ e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
+
+ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
+ end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
+}
+
+struct map_range {
+ unsigned long start;
+ unsigned long end;
+ unsigned page_size_mask;
+};
+
+#ifdef CONFIG_X86_32
+#define NR_RANGE_MR 3
+#else /* CONFIG_X86_64 */
+#define NR_RANGE_MR 5
+#endif
+
+static int save_mr(struct map_range *mr, int nr_range,
+ unsigned long start_pfn, unsigned long end_pfn,
+ unsigned long page_size_mask)
+{
+ if (start_pfn < end_pfn) {
+ if (nr_range >= NR_RANGE_MR)
+ panic("run out of range for init_memory_mapping\n");
+ mr[nr_range].start = start_pfn<<PAGE_SHIFT;
+ mr[nr_range].end = end_pfn<<PAGE_SHIFT;
+ mr[nr_range].page_size_mask = page_size_mask;
+ nr_range++;
+ }
+
+ return nr_range;
+}
+
+#ifdef CONFIG_X86_64
+static void __init init_gbpages(void)
+{
+ if (direct_gbpages && cpu_has_gbpages)
+ printk(KERN_INFO "Using GB pages for direct mapping\n");
+ else
+ direct_gbpages = 0;
+}
+#else
+static inline void init_gbpages(void)
+{
+}
+#endif
+
+/*
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs before bootmem is initialized and gets pages directly from
+ * the physical memory. To access them they are temporarily mapped.
+ */
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+ unsigned long end)
+{
+ unsigned long page_size_mask = 0;
+ unsigned long start_pfn, end_pfn;
+ unsigned long ret = 0;
+ unsigned long pos;
+
+ struct map_range mr[NR_RANGE_MR];
+ int nr_range, i;
+ int use_pse, use_gbpages;
+
+ printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
+
+ if (!after_bootmem)
+ init_gbpages();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ * This will simplify cpa(), which otherwise needs to support splitting
+ * large pages into small in interrupt context, etc.
+ */
+ use_pse = use_gbpages = 0;
+#else
+ use_pse = cpu_has_pse;
+ use_gbpages = direct_gbpages;
+#endif
+
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_PAE
+ set_nx();
+ if (nx_enabled)
+ printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+#endif
+
+ /* Enable PSE if available */
+ if (cpu_has_pse)
+ set_in_cr4(X86_CR4_PSE);
+
+ /* Enable PGE if available */
+ if (cpu_has_pge) {
+ set_in_cr4(X86_CR4_PGE);
+ __supported_pte_mask |= _PAGE_GLOBAL;
+ }
+#endif
+
+ if (use_gbpages)
+ page_size_mask |= 1 << PG_LEVEL_1G;
+ if (use_pse)
+ page_size_mask |= 1 << PG_LEVEL_2M;
+
+ memset(mr, 0, sizeof(mr));
+ nr_range = 0;
+
+ /* head if not big page alignment ? */
+ start_pfn = start >> PAGE_SHIFT;
+ pos = start_pfn << PAGE_SHIFT;
+#ifdef CONFIG_X86_32
+ /*
+ * Don't use a large page for the first 2/4MB of memory
+ * because there are often fixed size MTRRs in there
+ * and overlapping MTRRs into large pages can cause
+ * slowdowns.
+ */
+ if (pos == 0)
+ end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
+ else
+ end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+#else /* CONFIG_X86_64 */
+ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+#endif
+ if (end_pfn > (end >> PAGE_SHIFT))
+ end_pfn = end >> PAGE_SHIFT;
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+ pos = end_pfn << PAGE_SHIFT;
+ }
+
+ /* big page (2M) range */
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+#ifdef CONFIG_X86_32
+ end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+#else /* CONFIG_X86_64 */
+ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+ << (PUD_SHIFT - PAGE_SHIFT);
+ if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
+ end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
+#endif
+
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask & (1<<PG_LEVEL_2M));
+ pos = end_pfn << PAGE_SHIFT;
+ }
+
+#ifdef CONFIG_X86_64
+ /* big page (1G) range */
+ start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+ << (PUD_SHIFT - PAGE_SHIFT);
+ end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask &
+ ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
+ pos = end_pfn << PAGE_SHIFT;
+ }
+
+ /* tail is not big page (1G) alignment */
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+ end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask & (1<<PG_LEVEL_2M));
+ pos = end_pfn << PAGE_SHIFT;
+ }
+#endif
+
+ /* tail is not big page (2M) alignment */
+ start_pfn = pos>>PAGE_SHIFT;
+ end_pfn = end>>PAGE_SHIFT;
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+
+ /* try to merge same page size and continuous */
+ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
+ unsigned long old_start;
+ if (mr[i].end != mr[i+1].start ||
+ mr[i].page_size_mask != mr[i+1].page_size_mask)
+ continue;
+ /* move it */
+ old_start = mr[i].start;
+ memmove(&mr[i], &mr[i+1],
+ (nr_range - 1 - i) * sizeof(struct map_range));
+ mr[i--].start = old_start;
+ nr_range--;
+ }
+
+ for (i = 0; i < nr_range; i++)
+ printk(KERN_DEBUG " %010lx - %010lx page %s\n",
+ mr[i].start, mr[i].end,
+ (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
+ (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+
+ /*
+ * Find space for the kernel direct mapping tables.
+ *
+ * Later we should allocate these tables in the local node of the
+ * memory mapped. Unfortunately this is done currently before the
+ * nodes are discovered.
+ */
+ if (!after_bootmem)
+ find_early_table_space(end, use_pse, use_gbpages);
+
+#ifdef CONFIG_X86_32
+ for (i = 0; i < nr_range; i++)
+ kernel_physical_mapping_init(mr[i].start, mr[i].end,
+ mr[i].page_size_mask);
+ ret = end;
+#else /* CONFIG_X86_64 */
+ for (i = 0; i < nr_range; i++)
+ ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
+ mr[i].page_size_mask);
+#endif
+
+#ifdef CONFIG_X86_32
+ early_ioremap_page_table_range_init();
+
+ load_cr3(swapper_pg_dir);
+#endif
+
+#ifdef CONFIG_X86_64
+ if (!after_bootmem)
+ mmu_cr4_features = read_cr4();
+#endif
+ __flush_tlb_all();
+
+ if (!after_bootmem && e820_table_end > e820_table_start)
+ reserve_early(e820_table_start << PAGE_SHIFT,
+ e820_table_end << PAGE_SHIFT, "PGTABLE");
+
+ if (!after_bootmem)
+ early_memtest(start, end);
+
+ return ret >> PAGE_SHIFT;
+}
+
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+ if (pagenr <= 256)
+ return 1;
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+ return 0;
+ if (!page_is_ram(pagenr))
+ return 1;
+ return 0;
+}
+
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+ unsigned long addr = begin;
+
+ if (addr >= end)
+ return;
+
+ /*
+ * If debugging page accesses then do not free this memory but
+ * mark them not present - any buggy init-section access will
+ * create a kernel page fault:
+ */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
+ begin, PAGE_ALIGN(end));
+ set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+#else
+ /*
+ * We just marked the kernel text read only above, now that
+ * we are going to free part of that, we need to make that
+ * writeable first.
+ */
+ set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
+
+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+
+ for (; addr < end; addr += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(addr));
+ init_page_count(virt_to_page(addr));
+ memset((void *)(addr & ~(PAGE_SIZE-1)),
+ POISON_FREE_INITMEM, PAGE_SIZE);
+ free_page(addr);
+ totalram_pages++;
+ }
+#endif
+}
+
+void free_initmem(void)
+{
+ free_init_pages("unused kernel memory",
+ (unsigned long)(&__init_begin),
+ (unsigned long)(&__init_end));
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ free_init_pages("initrd memory", start, end);
+}
+#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 06708ee94aa4..db81e9a8556b 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,8 +49,7 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
#include <asm/cacheflush.h>
-
-unsigned int __VMALLOC_RESERVE = 128 << 20;
+#include <asm/init.h>
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
@@ -60,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn;
static noinline int do_test_wp_bit(void);
-
-static unsigned long __initdata table_start;
-static unsigned long __meminitdata table_end;
-static unsigned long __meminitdata table_top;
-
-static int __initdata after_init_bootmem;
+bool __read_mostly __vmalloc_start_set = false;
static __init void *alloc_low_page(void)
{
- unsigned long pfn = table_end++;
+ unsigned long pfn = e820_table_end++;
void *adr;
- if (pfn >= table_top)
+ if (pfn >= e820_table_top)
panic("alloc_low_page: ran out of memory");
adr = __va(pfn * PAGE_SIZE);
@@ -92,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
- if (after_init_bootmem)
+ if (after_bootmem)
pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
else
pmd_table = (pmd_t *)alloc_low_page();
@@ -119,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
pte_t *page_table = NULL;
- if (after_init_bootmem) {
+ if (after_bootmem) {
#ifdef CONFIG_DEBUG_PAGEALLOC
page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
#endif
@@ -137,6 +131,23 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
return pte_offset_kernel(pmd, 0);
}
+pmd_t * __init populate_extra_pmd(unsigned long vaddr)
+{
+ int pgd_idx = pgd_index(vaddr);
+ int pmd_idx = pmd_index(vaddr);
+
+ return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
+}
+
+pte_t * __init populate_extra_pte(unsigned long vaddr)
+{
+ int pte_idx = pte_index(vaddr);
+ pmd_t *pmd;
+
+ pmd = populate_extra_pmd(vaddr);
+ return one_page_table_init(pmd) + pte_idx;
+}
+
static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
unsigned long vaddr, pte_t *lastpte)
{
@@ -153,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
- && ((__pa(pte) >> PAGE_SHIFT) < table_start
- || (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
+ && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
+ || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
pte_t *newpte;
int i;
- BUG_ON(after_init_bootmem);
+ BUG_ON(after_bootmem);
newpte = alloc_low_page();
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(newpte + i, pte[i]);
@@ -227,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr)
* of max_low_pfn pages, by creating page tables starting from address
* PAGE_OFFSET:
*/
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
- unsigned long start_pfn,
- unsigned long end_pfn,
- int use_pse)
+unsigned long __init
+kernel_physical_mapping_init(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask)
{
+ int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
+ unsigned long start_pfn, end_pfn;
+ pgd_t *pgd_base = swapper_pg_dir;
int pgd_idx, pmd_idx, pte_ofs;
unsigned long pfn;
pgd_t *pgd;
@@ -240,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
unsigned pages_2m, pages_4k;
int mapping_iter;
+ start_pfn = start >> PAGE_SHIFT;
+ end_pfn = end >> PAGE_SHIFT;
+
/*
* First iteration will setup identity mapping using large/small pages
* based on use_pse, with other attributes same as set by
@@ -354,26 +371,6 @@ repeat:
mapping_iter = 2;
goto repeat;
}
-}
-
-/*
- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
- * is valid. The argument is a physical page number.
- *
- *
- * On x86, access has to be given to the first megabyte of ram because that area
- * contains bios code and data regions used by X and dosemu and similar apps.
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
- * mmio resources as well as potential bios/acpi data regions.
- */
-int devmem_is_allowed(unsigned long pagenr)
-{
- if (pagenr <= 256)
- return 1;
- if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
- return 0;
- if (!page_is_ram(pagenr))
- return 1;
return 0;
}
@@ -469,22 +466,10 @@ void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
work_with_active_regions(nid, add_highpages_work_fn, &data);
}
-#ifndef CONFIG_NUMA
-static void __init set_highmem_pages_init(void)
-{
- add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
-
- totalram_pages += totalhigh_pages;
-}
-#endif /* !CONFIG_NUMA */
-
#else
static inline void permanent_kmaps_init(pgd_t *pgd_base)
{
}
-static inline void set_highmem_pages_init(void)
-{
-}
#endif /* CONFIG_HIGHMEM */
void __init native_pagetable_setup_start(pgd_t *base)
@@ -542,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base)
* be partially populated, and so it avoids stomping on any existing
* mappings.
*/
-static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
+void __init early_ioremap_page_table_range_init(void)
{
+ pgd_t *pgd_base = swapper_pg_dir;
unsigned long vaddr, end;
/*
@@ -638,7 +624,7 @@ static int __init noexec_setup(char *str)
}
early_param("noexec", noexec_setup);
-static void __init set_nx(void)
+void __init set_nx(void)
{
unsigned int v[4], l, h;
@@ -790,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn,
#ifdef CONFIG_FLATMEM
max_mapnr = num_physpages;
#endif
+ __vmalloc_start_set = true;
+
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(max_low_pfn));
@@ -811,176 +799,66 @@ static void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
+static unsigned long __init setup_node_bootmem(int nodeid,
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long bootmap)
+{
+ unsigned long bootmap_size;
+
+ /* don't touch min_low_pfn */
+ bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
+ bootmap >> PAGE_SHIFT,
+ start_pfn, end_pfn);
+ printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
+ nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+ printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
+ nodeid, bootmap, bootmap + bootmap_size);
+ free_bootmem_with_active_regions(nodeid, end_pfn);
+ early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+
+ return bootmap + bootmap_size;
+}
+
void __init setup_bootmem_allocator(void)
{
- int i;
+ int nodeid;
unsigned long bootmap_size, bootmap;
/*
* Initialize the boot-time allocator (with low memory only):
*/
bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
- bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
- max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
+ bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
- /* don't touch min_low_pfn */
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
- min_low_pfn, max_low_pfn);
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
- printk(KERN_INFO " low ram: %08lx - %08lx\n",
- min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
- printk(KERN_INFO " bootmap %08lx - %08lx\n",
- bootmap, bootmap + bootmap_size);
- for_each_online_node(i)
- free_bootmem_with_active_regions(i, max_low_pfn);
- early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
-
- after_init_bootmem = 1;
-}
-
-static void __init find_early_table_space(unsigned long end, int use_pse)
-{
- unsigned long puds, pmds, ptes, tables, start;
-
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = PAGE_ALIGN(puds * sizeof(pud_t));
+ printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
-
- if (use_pse) {
- unsigned long extra;
-
- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
- extra += PMD_SIZE;
- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
- } else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
- tables += PAGE_ALIGN(ptes * sizeof(pte_t));
-
- /* for fixmap */
- tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t));
-
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
- start = 0x7000;
- table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
- tables, PAGE_SIZE);
- if (table_start == -1UL)
- panic("Cannot find space for the kernel page tables");
-
- table_start >>= PAGE_SHIFT;
- table_end = table_start;
- table_top = table_start + (tables>>PAGE_SHIFT);
-
- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, table_start << PAGE_SHIFT,
- (table_start << PAGE_SHIFT) + tables);
-}
+ for_each_online_node(nodeid) {
+ unsigned long start_pfn, end_pfn;
-unsigned long __init_refok init_memory_mapping(unsigned long start,
- unsigned long end)
-{
- pgd_t *pgd_base = swapper_pg_dir;
- unsigned long start_pfn, end_pfn;
- unsigned long big_page_start;
-#ifdef CONFIG_DEBUG_PAGEALLOC
- /*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
- * This will simplify cpa(), which otherwise needs to support splitting
- * large pages into small in interrupt context, etc.
- */
- int use_pse = 0;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ start_pfn = node_start_pfn[nodeid];
+ end_pfn = node_end_pfn[nodeid];
+ if (start_pfn > max_low_pfn)
+ continue;
+ if (end_pfn > max_low_pfn)
+ end_pfn = max_low_pfn;
#else
- int use_pse = cpu_has_pse;
+ start_pfn = 0;
+ end_pfn = max_low_pfn;
#endif
-
- /*
- * Find space for the kernel direct mapping tables.
- */
- if (!after_init_bootmem)
- find_early_table_space(end, use_pse);
-
-#ifdef CONFIG_X86_PAE
- set_nx();
- if (nx_enabled)
- printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
-
- /* Enable PSE if available */
- if (cpu_has_pse)
- set_in_cr4(X86_CR4_PSE);
-
- /* Enable PGE if available */
- if (cpu_has_pge) {
- set_in_cr4(X86_CR4_PGE);
- __supported_pte_mask |= _PAGE_GLOBAL;
- }
-
- /*
- * Don't use a large page for the first 2/4MB of memory
- * because there are often fixed size MTRRs in there
- * and overlapping MTRRs into large pages can cause
- * slowdowns.
- */
- big_page_start = PMD_SIZE;
-
- if (start < big_page_start) {
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
- } else {
- /* head is not big page alignment ? */
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- }
- if (start_pfn < end_pfn)
- kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
-
- /* big page range */
- start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- if (start_pfn < (big_page_start >> PAGE_SHIFT))
- start_pfn = big_page_start >> PAGE_SHIFT;
- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn)
- kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
- use_pse);
-
- /* tail is not big page alignment ? */
- start_pfn = end_pfn;
- if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
- end_pfn = end >> PAGE_SHIFT;
- if (start_pfn < end_pfn)
- kernel_physical_mapping_init(pgd_base, start_pfn,
- end_pfn, 0);
+ bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
+ bootmap);
}
- early_ioremap_page_table_range_init(pgd_base);
-
- load_cr3(swapper_pg_dir);
-
- __flush_tlb_all();
-
- if (!after_init_bootmem)
- reserve_early(table_start << PAGE_SHIFT,
- table_end << PAGE_SHIFT, "PGTABLE");
-
- if (!after_init_bootmem)
- early_memtest(start, end);
-
- return end >> PAGE_SHIFT;
+ after_bootmem = 1;
}
-
/*
* paging_init() sets up the page tables - note that the first 8MB are
* already mapped by head.S.
@@ -1214,52 +1092,6 @@ void mark_rodata_ro(void)
}
#endif
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
-#ifdef CONFIG_DEBUG_PAGEALLOC
- /*
- * If debugging page accesses then do not free this memory but
- * mark them not present - any buggy init-section access will
- * create a kernel page fault:
- */
- printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
- begin, PAGE_ALIGN(end));
- set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
-#else
- unsigned long addr;
-
- /*
- * We just marked the kernel text read only above, now that
- * we are going to free part of that, we need to make that
- * writeable first.
- */
- set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
-
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
-#endif
-}
-
-void free_initmem(void)
-{
- free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
- free_init_pages("initrd memory", start, end);
-}
-#endif
-
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e6d36b490250..54efa57d1c03 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -48,6 +48,7 @@
#include <asm/kdebug.h>
#include <asm/numa.h>
#include <asm/cacheflush.h>
+#include <asm/init.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -61,12 +62,6 @@ static unsigned long dma_reserve __initdata;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
- = 1
-#endif
-;
-
static int __init parse_direct_gbpages_off(char *arg)
{
direct_gbpages = 0;
@@ -87,12 +82,10 @@ early_param("gbpages", parse_direct_gbpages_on);
* around without checking the pgd every time.
*/
-int after_bootmem;
-
pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
-static int do_not_nx __cpuinitdata;
+static int disable_nx __cpuinitdata;
/*
* noexec=on|off
@@ -107,9 +100,9 @@ static int __init nonx_setup(char *str)
return -EINVAL;
if (!strncmp(str, "on", 2)) {
__supported_pte_mask |= _PAGE_NX;
- do_not_nx = 0;
+ disable_nx = 0;
} else if (!strncmp(str, "off", 3)) {
- do_not_nx = 1;
+ disable_nx = 1;
__supported_pte_mask &= ~_PAGE_NX;
}
return 0;
@@ -121,7 +114,7 @@ void __cpuinit check_efer(void)
unsigned long efer;
rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || do_not_nx)
+ if (!(efer & EFER_NX) || disable_nx)
__supported_pte_mask &= ~_PAGE_NX;
}
@@ -168,34 +161,51 @@ static __ref void *spp_getpage(void)
return ptr;
}
-void
-set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
{
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ if (pgd_none(*pgd)) {
+ pud_t *pud = (pud_t *)spp_getpage();
+ pgd_populate(&init_mm, pgd, pud);
+ if (pud != pud_offset(pgd, 0))
+ printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
+ pud, pud_offset(pgd, 0));
+ }
+ return pud_offset(pgd, vaddr);
+}
- pud = pud_page + pud_index(vaddr);
+static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
+{
if (pud_none(*pud)) {
- pmd = (pmd_t *) spp_getpage();
+ pmd_t *pmd = (pmd_t *) spp_getpage();
pud_populate(&init_mm, pud, pmd);
- if (pmd != pmd_offset(pud, 0)) {
+ if (pmd != pmd_offset(pud, 0))
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
- pmd, pmd_offset(pud, 0));
- return;
- }
+ pmd, pmd_offset(pud, 0));
}
- pmd = pmd_offset(pud, vaddr);
+ return pmd_offset(pud, vaddr);
+}
+
+static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
+{
if (pmd_none(*pmd)) {
- pte = (pte_t *) spp_getpage();
+ pte_t *pte = (pte_t *) spp_getpage();
pmd_populate_kernel(&init_mm, pmd, pte);
- if (pte != pte_offset_kernel(pmd, 0)) {
+ if (pte != pte_offset_kernel(pmd, 0))
printk(KERN_ERR "PAGETABLE BUG #02!\n");
- return;
- }
}
+ return pte_offset_kernel(pmd, vaddr);
+}
+
+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pud = pud_page + pud_index(vaddr);
+ pmd = fill_pmd(pud, vaddr);
+ pte = fill_pte(pmd, vaddr);
- pte = pte_offset_kernel(pmd, vaddr);
set_pte(pte, new_pte);
/*
@@ -205,8 +215,7 @@ set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
__flush_tlb_one(vaddr);
}
-void
-set_pte_vaddr(unsigned long vaddr, pte_t pteval)
+void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
pgd_t *pgd;
pud_t *pud_page;
@@ -223,6 +232,24 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval)
set_pte_vaddr_pud(pud_page, vaddr, pteval);
}
+pmd_t * __init populate_extra_pmd(unsigned long vaddr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+
+ pgd = pgd_offset_k(vaddr);
+ pud = fill_pud(pgd, vaddr);
+ return fill_pmd(pud, vaddr);
+}
+
+pte_t * __init populate_extra_pte(unsigned long vaddr)
+{
+ pmd_t *pmd;
+
+ pmd = populate_extra_pmd(vaddr);
+ return fill_pte(pmd, vaddr);
+}
+
/*
* Create large page table mappings for a range of physical addresses.
*/
@@ -291,13 +318,9 @@ void __init cleanup_highmap(void)
}
}
-static unsigned long __initdata table_start;
-static unsigned long __meminitdata table_end;
-static unsigned long __meminitdata table_top;
-
static __ref void *alloc_low_page(unsigned long *phys)
{
- unsigned long pfn = table_end++;
+ unsigned long pfn = e820_table_end++;
void *adr;
if (after_bootmem) {
@@ -307,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
- if (pfn >= table_top)
+ if (pfn >= e820_table_top)
panic("alloc_low_page: ran out of memory");
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -547,58 +570,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
return phys_pud_init(pud, addr, end, page_size_mask);
}
-static void __init find_early_table_space(unsigned long end, int use_pse,
- int use_gbpages)
-{
- unsigned long puds, pmds, ptes, tables, start;
-
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
- if (use_gbpages) {
- unsigned long extra;
- extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
- pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
- } else
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
-
- if (use_pse) {
- unsigned long extra;
- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
- } else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
- tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
-
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
- start = 0x8000;
- table_start = find_e820_area(start, end, tables, PAGE_SIZE);
- if (table_start == -1UL)
- panic("Cannot find space for the kernel page tables");
-
- table_start >>= PAGE_SHIFT;
- table_end = table_start;
- table_top = table_start + (tables >> PAGE_SHIFT);
-
- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
-}
-
-static void __init init_gbpages(void)
-{
- if (direct_gbpages && cpu_has_gbpages)
- printk(KERN_INFO "Using GB pages for direct mapping\n");
- else
- direct_gbpages = 0;
-}
-
-static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
- unsigned long end,
- unsigned long page_size_mask)
+unsigned long __init
+kernel_physical_mapping_init(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask)
{
unsigned long next, last_map_addr = end;
@@ -635,174 +610,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
return last_map_addr;
}
-struct map_range {
- unsigned long start;
- unsigned long end;
- unsigned page_size_mask;
-};
-
-#define NR_RANGE_MR 5
-
-static int save_mr(struct map_range *mr, int nr_range,
- unsigned long start_pfn, unsigned long end_pfn,
- unsigned long page_size_mask)
-{
-
- if (start_pfn < end_pfn) {
- if (nr_range >= NR_RANGE_MR)
- panic("run out of range for init_memory_mapping\n");
- mr[nr_range].start = start_pfn<<PAGE_SHIFT;
- mr[nr_range].end = end_pfn<<PAGE_SHIFT;
- mr[nr_range].page_size_mask = page_size_mask;
- nr_range++;
- }
-
- return nr_range;
-}
-
-/*
- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
- * This runs before bootmem is initialized and gets pages directly from
- * the physical memory. To access them they are temporarily mapped.
- */
-unsigned long __init_refok init_memory_mapping(unsigned long start,
- unsigned long end)
-{
- unsigned long last_map_addr = 0;
- unsigned long page_size_mask = 0;
- unsigned long start_pfn, end_pfn;
- unsigned long pos;
-
- struct map_range mr[NR_RANGE_MR];
- int nr_range, i;
- int use_pse, use_gbpages;
-
- printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
-
- /*
- * Find space for the kernel direct mapping tables.
- *
- * Later we should allocate these tables in the local node of the
- * memory mapped. Unfortunately this is done currently before the
- * nodes are discovered.
- */
- if (!after_bootmem)
- init_gbpages();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- /*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
- * This will simplify cpa(), which otherwise needs to support splitting
- * large pages into small in interrupt context, etc.
- */
- use_pse = use_gbpages = 0;
-#else
- use_pse = cpu_has_pse;
- use_gbpages = direct_gbpages;
-#endif
-
- if (use_gbpages)
- page_size_mask |= 1 << PG_LEVEL_1G;
- if (use_pse)
- page_size_mask |= 1 << PG_LEVEL_2M;
-
- memset(mr, 0, sizeof(mr));
- nr_range = 0;
-
- /* head if not big page alignment ?*/
- start_pfn = start >> PAGE_SHIFT;
- pos = start_pfn << PAGE_SHIFT;
- end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* big page (2M) range*/
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
- << (PUD_SHIFT - PAGE_SHIFT);
- if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
- end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* big page (1G) range */
- start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
- << (PUD_SHIFT - PAGE_SHIFT);
- end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask &
- ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* tail is not big page (1G) alignment */
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* tail is not big page (2M) alignment */
- start_pfn = pos>>PAGE_SHIFT;
- end_pfn = end>>PAGE_SHIFT;
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
-
- /* try to merge same page size and continuous */
- for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
- unsigned long old_start;
- if (mr[i].end != mr[i+1].start ||
- mr[i].page_size_mask != mr[i+1].page_size_mask)
- continue;
- /* move it */
- old_start = mr[i].start;
- memmove(&mr[i], &mr[i+1],
- (nr_range - 1 - i) * sizeof (struct map_range));
- mr[i--].start = old_start;
- nr_range--;
- }
-
- for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG " %010lx - %010lx page %s\n",
- mr[i].start, mr[i].end,
- (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
- (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
-
- if (!after_bootmem)
- find_early_table_space(end, use_pse, use_gbpages);
-
- for (i = 0; i < nr_range; i++)
- last_map_addr = kernel_physical_mapping_init(
- mr[i].start, mr[i].end,
- mr[i].page_size_mask);
-
- if (!after_bootmem)
- mmu_cr4_features = read_cr4();
- __flush_tlb_all();
-
- if (!after_bootmem && table_end > table_start)
- reserve_early(table_start << PAGE_SHIFT,
- table_end << PAGE_SHIFT, "PGTABLE");
-
- printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
- last_map_addr, end);
-
- if (!after_bootmem)
- early_memtest(start, end);
-
- return last_map_addr >> PAGE_SHIFT;
-}
-
#ifndef CONFIG_NUMA
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -874,28 +681,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif /* CONFIG_MEMORY_HOTPLUG */
-/*
- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
- * is valid. The argument is a physical page number.
- *
- *
- * On x86, access has to be given to the first megabyte of ram because that area
- * contains bios code and data regions used by X and dosemu and similar apps.
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
- * mmio resources as well as potential bios/acpi data regions.
- */
-int devmem_is_allowed(unsigned long pagenr)
-{
- if (pagenr <= 256)
- return 1;
- if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
- return 0;
- if (!page_is_ram(pagenr))
- return 1;
- return 0;
-}
-
-
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
kcore_modules, kcore_vsyscall;
@@ -945,43 +730,6 @@ void __init mem_init(void)
initsize >> 10);
}
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr = begin;
-
- if (addr >= end)
- return;
-
- /*
- * If debugging page accesses then do not free this memory but
- * mark them not present - any buggy init-section access will
- * create a kernel page fault:
- */
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
- begin, PAGE_ALIGN(end));
- set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
-#else
- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
-
- for (; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)(addr & ~(PAGE_SIZE-1)),
- POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
-#endif
-}
-
-void free_initmem(void)
-{
- free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
-}
-
#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
@@ -1020,13 +768,6 @@ void mark_rodata_ro(void)
#endif
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
- free_init_pages("initrd memory", start, end);
-}
-#endif
-
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 433f7bd4648a..aca924a30ee6 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -38,8 +38,7 @@ unsigned long __phys_addr(unsigned long x)
} else {
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
x -= PAGE_OFFSET;
- VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM :
- !phys_addr_valid(x));
+ VIRTUAL_BUG_ON(!phys_addr_valid(x));
}
return x;
}
@@ -56,10 +55,8 @@ bool __virt_addr_valid(unsigned long x)
if (x < PAGE_OFFSET)
return false;
x -= PAGE_OFFSET;
- if (system_state == SYSTEM_BOOTING ?
- x > MAXMEM : !phys_addr_valid(x)) {
+ if (!phys_addr_valid(x))
return false;
- }
}
return pfn_valid(x >> PAGE_SHIFT);
@@ -76,10 +73,9 @@ static inline int phys_addr_valid(unsigned long addr)
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
- /* VMALLOC_* aren't constants; not available at the boot time */
+ /* VMALLOC_* aren't constants */
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
- VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING &&
- is_vmalloc_addr((void *) x));
+ VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);
@@ -89,7 +85,9 @@ bool __virt_addr_valid(unsigned long x)
{
if (x < PAGE_OFFSET)
return false;
- if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x))
+ if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
+ return false;
+ if (x >= FIXADDR_START)
return false;
return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
}
@@ -508,13 +506,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr)
return &bm_pte[pte_index(addr)];
}
+static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
+
void __init early_ioremap_init(void)
{
pmd_t *pmd;
+ int i;
if (early_ioremap_debug)
printk(KERN_INFO "early_ioremap_init()\n");
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
+ slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
+
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
memset(bm_pte, 0, sizeof(bm_pte));
pmd_populate_kernel(&init_mm, pmd, bm_pte);
@@ -581,6 +585,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
+
static int __init check_early_ioremap_leak(void)
{
int count = 0;
@@ -602,7 +607,8 @@ static int __init check_early_ioremap_leak(void)
}
late_initcall(check_early_ioremap_leak);
-static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
+static void __init __iomem *
+__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
{
unsigned long offset, last_addr;
unsigned int nrpages;
@@ -668,9 +674,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo
--nrpages;
}
if (early_ioremap_debug)
- printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
+ printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
- prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0));
+ prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
return prev_map[slot];
}
@@ -738,8 +744,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)
}
prev_map[slot] = NULL;
}
-
-void __this_fixmap_does_not_exist(void)
-{
- WARN_ON(1);
-}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 93d82038af4b..6a518dd08a36 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,11 +32,14 @@ struct kmmio_fault_page {
struct list_head list;
struct kmmio_fault_page *release_next;
unsigned long page; /* location of the fault page */
+ bool old_presence; /* page presence prior to arming */
+ bool armed;
/*
* Number of times this page has been registered as a part
* of a probe. If zero, page is disarmed and this may be freed.
- * Used only by writers (RCU).
+ * Used only by writers (RCU) and post_kmmio_handler().
+ * Protected by kmmio_lock, when linked into kmmio_page_table.
*/
int count;
};
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
return NULL;
}
-static void set_page_present(unsigned long addr, bool present,
- unsigned int *pglevel)
+static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
+{
+ pmdval_t v = pmd_val(*pmd);
+ *old = !!(v & _PAGE_PRESENT);
+ v &= ~_PAGE_PRESENT;
+ if (present)
+ v |= _PAGE_PRESENT;
+ set_pmd(pmd, __pmd(v));
+}
+
+static void set_pte_presence(pte_t *pte, bool present, bool *old)
+{
+ pteval_t v = pte_val(*pte);
+ *old = !!(v & _PAGE_PRESENT);
+ v &= ~_PAGE_PRESENT;
+ if (present)
+ v |= _PAGE_PRESENT;
+ set_pte_atomic(pte, __pte(v));
+}
+
+static int set_page_presence(unsigned long addr, bool present, bool *old)
{
- pteval_t pteval;
- pmdval_t pmdval;
unsigned int level;
- pmd_t *pmd;
pte_t *pte = lookup_address(addr, &level);
if (!pte) {
pr_err("kmmio: no pte for page 0x%08lx\n", addr);
- return;
+ return -1;
}
- if (pglevel)
- *pglevel = level;
-
switch (level) {
case PG_LEVEL_2M:
- pmd = (pmd_t *)pte;
- pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
- if (present)
- pmdval |= _PAGE_PRESENT;
- set_pmd(pmd, __pmd(pmdval));
+ set_pmd_presence((pmd_t *)pte, present, old);
break;
-
case PG_LEVEL_4K:
- pteval = pte_val(*pte) & ~_PAGE_PRESENT;
- if (present)
- pteval |= _PAGE_PRESENT;
- set_pte_atomic(pte, __pte(pteval));
+ set_pte_presence(pte, present, old);
break;
-
default:
pr_err("kmmio: unexpected page level 0x%x.\n", level);
- return;
+ return -1;
}
__flush_tlb_one(addr);
+ return 0;
}
-/** Mark the given page as not present. Access to it will trigger a fault. */
-static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+/*
+ * Mark the given page as not present. Access to it will trigger a fault.
+ *
+ * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
+ * protection is ignored here. RCU read lock is assumed held, so the struct
+ * will not disappear unexpectedly. Furthermore, the caller must guarantee,
+ * that double arming the same virtual address (page) cannot occur.
+ *
+ * Double disarming on the other hand is allowed, and may occur when a fault
+ * and mmiotrace shutdown happen simultaneously.
+ */
+static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
{
- set_page_present(page & PAGE_MASK, false, pglevel);
+ int ret;
+ WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
+ if (f->armed) {
+ pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
+ f->page, f->count, f->old_presence);
+ }
+ ret = set_page_presence(f->page, false, &f->old_presence);
+ WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
+ f->armed = true;
+ return ret;
}
-/** Mark the given page as present. */
-static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+/** Restore the given page to saved presence state. */
+static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
{
- set_page_present(page & PAGE_MASK, true, pglevel);
+ bool tmp;
+ int ret = set_page_presence(f->page, f->old_presence, &tmp);
+ WARN_ONCE(ret < 0,
+ KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+ f->armed = false;
}
/*
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx = &get_cpu_var(kmmio_ctx);
if (ctx->active) {
- disarm_kmmio_fault_page(faultpage->page, NULL);
if (addr == ctx->addr) {
/*
- * On SMP we sometimes get recursive probe hits on the
- * same address. Context is already saved, fall out.
+ * A second fault on the same page means some other
+ * condition needs handling by do_page_fault(), the
+ * page really not being present is the most common.
*/
- pr_debug("kmmio: duplicate probe hit on CPU %d, for "
- "address 0x%08lx.\n",
- smp_processor_id(), addr);
- ret = 1;
- goto no_kmmio_ctx;
- }
- /*
- * Prevent overwriting already in-flight context.
- * This should not happen, let's hope disarming at least
- * prevents a panic.
- */
- pr_emerg("kmmio: recursive probe hit on CPU %d, "
+ pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
+ addr, smp_processor_id());
+
+ if (!faultpage->old_presence)
+ pr_info("kmmio: unexpected secondary hit for "
+ "address 0x%08lx on CPU %d.\n", addr,
+ smp_processor_id());
+ } else {
+ /*
+ * Prevent overwriting already in-flight context.
+ * This should not happen, let's hope disarming at
+ * least prevents a panic.
+ */
+ pr_emerg("kmmio: recursive probe hit on CPU %d, "
"for address 0x%08lx. Ignoring.\n",
smp_processor_id(), addr);
- pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
- ctx->addr);
+ pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
+ ctx->addr);
+ disarm_kmmio_fault_page(faultpage);
+ }
goto no_kmmio_ctx;
}
ctx->active++;
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
regs->flags &= ~X86_EFLAGS_IF;
/* Now we set present bit in PTE and single step. */
- disarm_kmmio_fault_page(ctx->fpage->page, NULL);
+ disarm_kmmio_fault_page(ctx->fpage);
/*
* If another cpu accesses the same page while we are stepping,
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
if (!ctx->active) {
- pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+ pr_warning("kmmio: spurious debug trap on CPU %d.\n",
smp_processor_id());
goto out;
}
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
if (ctx->probe && ctx->probe->post_handler)
ctx->probe->post_handler(ctx->probe, condition, regs);
- arm_kmmio_fault_page(ctx->fpage->page, NULL);
+ /* Prevent racing against release_kmmio_fault_page(). */
+ spin_lock(&kmmio_lock);
+ if (ctx->fpage->count)
+ arm_kmmio_fault_page(ctx->fpage);
+ spin_unlock(&kmmio_lock);
regs->flags &= ~X86_EFLAGS_TF;
regs->flags |= ctx->saved_flags;
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page)
f = get_kmmio_fault_page(page);
if (f) {
if (!f->count)
- arm_kmmio_fault_page(f->page, NULL);
+ arm_kmmio_fault_page(f);
f->count++;
return 0;
}
- f = kmalloc(sizeof(*f), GFP_ATOMIC);
+ f = kzalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
return -1;
f->count = 1;
f->page = page;
- list_add_rcu(&f->list, kmmio_page_list(f->page));
- arm_kmmio_fault_page(f->page, NULL);
+ if (arm_kmmio_fault_page(f)) {
+ kfree(f);
+ return -1;
+ }
+
+ list_add_rcu(&f->list, kmmio_page_list(f->page));
return 0;
}
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page,
f->count--;
BUG_ON(f->count < 0);
if (!f->count) {
- disarm_kmmio_fault_page(f->page, NULL);
+ disarm_kmmio_fault_page(f);
f->release_next = *release_list;
*release_list = f;
}
@@ -408,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
static void remove_kmmio_fault_pages(struct rcu_head *head)
{
- struct kmmio_delayed_release *dr = container_of(
- head,
- struct kmmio_delayed_release,
- rcu);
+ struct kmmio_delayed_release *dr =
+ container_of(head, struct kmmio_delayed_release, rcu);
struct kmmio_fault_page *p = dr->release_list;
struct kmmio_fault_page **prevp = &dr->release_list;
unsigned long flags;
+
spin_lock_irqsave(&kmmio_lock, flags);
while (p) {
- if (!p->count)
+ if (!p->count) {
list_del_rcu(&p->list);
- else
+ prevp = &p->release_next;
+ } else {
*prevp = p->release_next;
- prevp = &p->release_next;
+ }
p = p->release_next;
}
spin_unlock_irqrestore(&kmmio_lock, flags);
+
/* This is the real RCU destroy call. */
call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 0bcd7883d036..605c8be06217 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg)
{
if (arg)
memtest_pattern = simple_strtoul(arg, NULL, 0);
+ else
+ memtest_pattern = ARRAY_SIZE(patterns);
+
return 0;
}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 3957cd6d6454..3daefa04ace5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -416,39 +416,14 @@ void __init initmem_init(unsigned long start_pfn,
for_each_online_node(nid)
propagate_e820_map_node(nid);
- for_each_online_node(nid)
+ for_each_online_node(nid) {
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+ NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+ }
- NODE_DATA(0)->bdata = &bootmem_node_data[0];
setup_bootmem_allocator();
}
-void __init set_highmem_pages_init(void)
-{
-#ifdef CONFIG_HIGHMEM
- struct zone *zone;
- int nid;
-
- for_each_zone(zone) {
- unsigned long zone_start_pfn, zone_end_pfn;
-
- if (!is_highmem(zone))
- continue;
-
- zone_start_pfn = zone->zone_start_pfn;
- zone_end_pfn = zone_start_pfn + zone->spanned_pages;
-
- nid = zone_to_nid(zone);
- printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
- zone->name, nid, zone_start_pfn, zone_end_pfn);
-
- add_highpages_with_active_regions(nid, zone_start_pfn,
- zone_end_pfn);
- }
- totalram_pages += totalhigh_pages;
-#endif
-}
-
#ifdef CONFIG_MEMORY_HOTPLUG
static int paddr_to_nid(u64 addr)
{
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 86f2ffc43c3d..5b7c7c8464fe 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -313,6 +313,24 @@ int ptep_clear_flush_young(struct vm_area_struct *vma,
return young;
}
+/**
+ * reserve_top_address - reserves a hole in the top of kernel address space
+ * @reserve - size of hole to reserve
+ *
+ * Can be used to relocate the fixmap area and poke a hole in the top
+ * of kernel address space to make room for a hypervisor.
+ */
+void __init reserve_top_address(unsigned long reserve)
+{
+#ifdef CONFIG_X86_32
+ BUG_ON(fixmaps_set > 0);
+ printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
+ (int)-reserve);
+ __FIXADDR_TOP = -reserve - PAGE_SIZE;
+ __VMALLOC_RESERVE += reserve;
+#endif
+}
+
int fixmaps_set;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 0951db9ee519..f2e477c91c1b 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -20,6 +20,8 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+unsigned int __VMALLOC_RESERVE = 128 << 20;
+
/*
* Associate a virtual page frame with a given physical page frame
* and protection flags for that frame.
@@ -97,22 +99,6 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
unsigned long __FIXADDR_TOP = 0xfffff000;
EXPORT_SYMBOL(__FIXADDR_TOP);
-/**
- * reserve_top_address - reserves a hole in the top of kernel address space
- * @reserve - size of hole to reserve
- *
- * Can be used to relocate the fixmap area and poke a hole in the top
- * of kernel address space to make room for a hypervisor.
- */
-void __init reserve_top_address(unsigned long reserve)
-{
- BUG_ON(fixmaps_set > 0);
- printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
- (int)-reserve);
- __FIXADDR_TOP = -reserve - PAGE_SIZE;
- __VMALLOC_RESERVE += reserve;
-}
-
/*
* vmalloc=size forces the vmalloc area to be exactly 'size'
* bytes. This can be used to increase (or decrease) the
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index ab50a8d7402c..427fd1b56df5 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,5 +1,5 @@
/*
- * Written by Pekka Paalanen, 2008 <pq@iki.fi>
+ * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
*/
#include <linux/module.h>
#include <linux/io.h>
@@ -9,35 +9,74 @@
static unsigned long mmio_address;
module_param(mmio_address, ulong, 0);
-MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
+MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
+ "(or 8 MB if read_far is non-zero).");
+
+static unsigned long read_far = 0x400100;
+module_param(read_far, ulong, 0);
+MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB "
+ "(default: 0x400100).");
+
+static unsigned v16(unsigned i)
+{
+ return i * 12 + 7;
+}
+
+static unsigned v32(unsigned i)
+{
+ return i * 212371 + 13;
+}
static void do_write_test(void __iomem *p)
{
unsigned int i;
+ pr_info(MODULE_NAME ": write test.\n");
mmiotrace_printk("Write test.\n");
+
for (i = 0; i < 256; i++)
iowrite8(i, p + i);
+
for (i = 1024; i < (5 * 1024); i += 2)
- iowrite16(i * 12 + 7, p + i);
+ iowrite16(v16(i), p + i);
+
for (i = (5 * 1024); i < (16 * 1024); i += 4)
- iowrite32(i * 212371 + 13, p + i);
+ iowrite32(v32(i), p + i);
}
static void do_read_test(void __iomem *p)
{
unsigned int i;
+ unsigned errs[3] = { 0 };
+ pr_info(MODULE_NAME ": read test.\n");
mmiotrace_printk("Read test.\n");
+
for (i = 0; i < 256; i++)
- ioread8(p + i);
+ if (ioread8(p + i) != i)
+ ++errs[0];
+
for (i = 1024; i < (5 * 1024); i += 2)
- ioread16(p + i);
+ if (ioread16(p + i) != v16(i))
+ ++errs[1];
+
for (i = (5 * 1024); i < (16 * 1024); i += 4)
- ioread32(p + i);
+ if (ioread32(p + i) != v32(i))
+ ++errs[2];
+
+ mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n",
+ errs[0], errs[1], errs[2]);
}
-static void do_test(void)
+static void do_read_far_test(void __iomem *p)
{
- void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
+ pr_info(MODULE_NAME ": read far test.\n");
+ mmiotrace_printk("Read far test.\n");
+
+ ioread32(p + read_far);
+}
+
+static void do_test(unsigned long size)
+{
+ void __iomem *p = ioremap_nocache(mmio_address, size);
if (!p) {
pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
return;
@@ -45,11 +84,15 @@ static void do_test(void)
mmiotrace_printk("ioremap returned %p.\n", p);
do_write_test(p);
do_read_test(p);
+ if (read_far && read_far < size - 4)
+ do_read_far_test(p);
iounmap(p);
}
static int __init init(void)
{
+ unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
+
if (mmio_address == 0) {
pr_err(MODULE_NAME ": you have to use the module argument "
"mmio_address.\n");
@@ -58,10 +101,11 @@ static int __init init(void)
return -ENXIO;
}
- pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
- "in PCI address space, and writing "
- "rubbish in there.\n", mmio_address);
- do_test();
+ pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
+ "address space, and writing 16 kB of rubbish in there.\n",
+ size >> 10, mmio_address);
+ do_test(size);
+ pr_info(MODULE_NAME ": All done.\n");
return 0;
}
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index e9f80c744cf3..10131fbdaada 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -78,8 +78,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
if (cpu_has_arch_perfmon) {
union cpuid10_eax eax;
eax.full = cpuid_eax(0xa);
- if (counter_width < eax.split.bit_width)
- counter_width = eax.split.bit_width;
+
+ /*
+ * For Core2 (family 6, model 15), don't reset the
+ * counter width:
+ */
+ if (!(eax.split.version_id == 0 &&
+ current_cpu_data.x86 == 6 &&
+ current_cpu_data.x86_model == 15)) {
+
+ if (counter_width < eax.split.bit_width)
+ counter_width = eax.split.bit_width;
+ }
}
/* clear all counters */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c52f4034c7fd..82cd39a6cbd3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -103,7 +103,7 @@ static void xen_vcpu_setup(int cpu)
vcpup = &per_cpu(xen_vcpu_info, cpu);
- info.mfn = virt_to_mfn(vcpup);
+ info.mfn = arbitrary_virt_to_mfn(vcpup);
info.offset = offset_in_page(vcpup);
printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
@@ -301,8 +301,10 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
frames = mcs.args;
for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- frames[f] = virt_to_mfn(va);
+ frames[f] = arbitrary_virt_to_mfn((void *)va);
+
make_lowmem_page_readonly((void *)va);
+ make_lowmem_page_readonly(mfn_to_virt(frames[f]));
}
MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
@@ -314,7 +316,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
unsigned int cpu, unsigned int i)
{
struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
+ xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
struct multicall_space mc = __xen_mc_entry(0);
MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
@@ -488,7 +490,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
break;
default: {
- xmaddr_t maddr = virt_to_machine(&dt[entry]);
+ xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
xen_mc_flush();
if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 319bd40a57c2..cb6afa4ec95c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -276,6 +276,13 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
p2m_top[topidx][idx] = mfn;
}
+unsigned long arbitrary_virt_to_mfn(void *vaddr)
+{
+ xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
+
+ return PFN_DOWN(maddr.maddr);
+}
+
xmaddr_t arbitrary_virt_to_machine(void *vaddr)
{
unsigned long address = (unsigned long)vaddr;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 035582ae815d..8d470562ffc9 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -219,6 +219,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
struct vcpu_guest_context *ctxt;
struct desc_struct *gdt;
+ unsigned long gdt_mfn;
if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
return 0;
@@ -248,9 +249,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->ldt_ents = 0;
BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+
+ gdt_mfn = arbitrary_virt_to_mfn(gdt);
make_lowmem_page_readonly(gdt);
+ make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
- ctxt->gdt_frames[0] = virt_to_mfn(gdt);
+ ctxt->gdt_frames[0] = gdt_mfn;
ctxt->gdt_ents = GDT_ENTRIES;
ctxt->user_regs.cs = __KERNEL_CS;