summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/oprofile/common.c2
-rw-r--r--arch/arm/common/locomo.c45
-rw-r--r--arch/i386/Kconfig37
-rw-r--r--arch/i386/boot/Makefile9
-rw-r--r--arch/i386/boot/compressed/misc.c32
-rw-r--r--arch/i386/boot/video.S19
-rw-r--r--arch/i386/kernel/Makefile8
-rw-r--r--arch/i386/kernel/alternative.c118
-rw-r--r--arch/i386/kernel/apic.c16
-rw-r--r--arch/i386/kernel/apm.c6
-rw-r--r--arch/i386/kernel/cpu/amd.c16
-rw-r--r--arch/i386/kernel/cpu/intel.c6
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c113
-rw-r--r--arch/i386/kernel/crash.c7
-rw-r--r--arch/i386/kernel/entry.S263
-rw-r--r--arch/i386/kernel/hpet.c67
-rw-r--r--arch/i386/kernel/i8253.c118
-rw-r--r--arch/i386/kernel/io_apic.c49
-rw-r--r--arch/i386/kernel/irq.c2
-rw-r--r--arch/i386/kernel/kprobes.c95
-rw-r--r--arch/i386/kernel/nmi.c72
-rw-r--r--arch/i386/kernel/numaq.c10
-rw-r--r--arch/i386/kernel/process.c8
-rw-r--r--arch/i386/kernel/setup.c1
-rw-r--r--arch/i386/kernel/smp.c12
-rw-r--r--arch/i386/kernel/smpboot.c1
-rw-r--r--arch/i386/kernel/time.c157
-rw-r--r--arch/i386/kernel/timers/Makefile9
-rw-r--r--arch/i386/kernel/timers/common.c172
-rw-r--r--arch/i386/kernel/timers/timer.c75
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c259
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c217
-rw-r--r--arch/i386/kernel/timers/timer_none.c39
-rw-r--r--arch/i386/kernel/timers/timer_pit.c177
-rw-r--r--arch/i386/kernel/timers/timer_pm.c342
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c617
-rw-r--r--arch/i386/kernel/traps.c70
-rw-r--r--arch/i386/kernel/tsc.c478
-rw-r--r--arch/i386/kernel/vmlinux.lds.S9
-rw-r--r--arch/i386/lib/delay.c65
-rw-r--r--arch/i386/mm/fault.c38
-rw-r--r--arch/i386/oprofile/nmi_int.c4
-rw-r--r--arch/i386/oprofile/op_model_athlon.c1
-rw-r--r--arch/i386/oprofile/op_model_p4.c1
-rw-r--r--arch/i386/oprofile/op_model_ppro.c1
-rw-r--r--arch/i386/pci/pcbios.c6
-rw-r--r--arch/ia64/kernel/process.c4
-rw-r--r--arch/ia64/mm/fault.c36
-rw-r--r--arch/m68k/mm/memory.c6
-rw-r--r--arch/m68k/sun3/sun3dvma.c6
-rw-r--r--arch/m68knommu/Kconfig78
-rw-r--r--arch/m68knommu/kernel/vmlinux.lds.S160
-rw-r--r--arch/m68knommu/platform/5307/head.S82
-rw-r--r--arch/m68knommu/platform/68328/head-pilot.S3
-rw-r--r--arch/m68knommu/platform/68328/head-ram.S6
-rw-r--r--arch/mips/oprofile/common.c2
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/mm/fault.c36
-rw-r--r--arch/powerpc/oprofile/common.c2
-rw-r--r--arch/sh/oprofile/op_model_sh7750.c2
-rw-r--r--arch/sparc/kernel/of_device.c2
-rw-r--r--arch/sparc/kernel/prom.c106
-rw-r--r--arch/sparc/lib/Makefile2
-rw-r--r--arch/sparc/lib/iomap.c48
-rw-r--r--arch/sparc64/kernel/auxio.c3
-rw-r--r--arch/sparc64/kernel/irq.c61
-rw-r--r--arch/sparc64/kernel/of_device.c3
-rw-r--r--arch/sparc64/kernel/prom.c107
-rw-r--r--arch/sparc64/mm/fault.c36
-rw-r--r--arch/sparc64/mm/init.c1
-rw-r--r--arch/x86_64/Kconfig51
-rw-r--r--arch/x86_64/Kconfig.debug18
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/boot/Makefile9
-rw-r--r--arch/x86_64/boot/compressed/misc.c46
-rw-r--r--arch/x86_64/boot/tools/build.c6
-rw-r--r--arch/x86_64/boot/video.S19
-rw-r--r--arch/x86_64/defconfig159
-rw-r--r--arch/x86_64/ia32/fpu32.c1
-rw-r--r--arch/x86_64/ia32/ia32_signal.c2
-rw-r--r--arch/x86_64/ia32/ia32entry.S11
-rw-r--r--arch/x86_64/ia32/ptrace32.c43
-rw-r--r--arch/x86_64/ia32/sys_ia32.c25
-rw-r--r--arch/x86_64/kernel/Makefile8
-rw-r--r--arch/x86_64/kernel/aperture.c26
-rw-r--r--arch/x86_64/kernel/apic.c32
-rw-r--r--arch/x86_64/kernel/crash.c4
-rw-r--r--arch/x86_64/kernel/e820.c2
-rw-r--r--arch/x86_64/kernel/entry.S113
-rw-r--r--arch/x86_64/kernel/genapic_flat.c30
-rw-r--r--arch/x86_64/kernel/head64.c2
-rw-r--r--arch/x86_64/kernel/i8259.c14
-rw-r--r--arch/x86_64/kernel/io_apic.c45
-rw-r--r--arch/x86_64/kernel/irq.c30
-rw-r--r--arch/x86_64/kernel/k8.c118
-rw-r--r--arch/x86_64/kernel/mce.c2
-rw-r--r--arch/x86_64/kernel/mce_amd.c506
-rw-r--r--arch/x86_64/kernel/module.c38
-rw-r--r--arch/x86_64/kernel/nmi.c89
-rw-r--r--arch/x86_64/kernel/pci-calgary.c1018
-rw-r--r--arch/x86_64/kernel/pci-dma.c55
-rw-r--r--arch/x86_64/kernel/pci-gart.c155
-rw-r--r--arch/x86_64/kernel/pci-nommu.c9
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/pmtimer.c2
-rw-r--r--arch/x86_64/kernel/process.c16
-rw-r--r--arch/x86_64/kernel/reboot.c1
-rw-r--r--arch/x86_64/kernel/setup.c181
-rw-r--r--arch/x86_64/kernel/setup64.c3
-rw-r--r--arch/x86_64/kernel/signal.c3
-rw-r--r--arch/x86_64/kernel/smp.c10
-rw-r--r--arch/x86_64/kernel/smpboot.c23
-rw-r--r--arch/x86_64/kernel/tce.c202
-rw-r--r--arch/x86_64/kernel/time.c87
-rw-r--r--arch/x86_64/kernel/traps.c83
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S29
-rw-r--r--arch/x86_64/kernel/vsyscall.c4
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c114
-rw-r--r--arch/x86_64/lib/csum-partial.c1
-rw-r--r--arch/x86_64/lib/csum-wrappers.c1
-rw-r--r--arch/x86_64/lib/delay.c5
-rw-r--r--arch/x86_64/lib/memmove.c4
-rw-r--r--arch/x86_64/lib/usercopy.c13
-rw-r--r--arch/x86_64/mm/fault.c47
-rw-r--r--arch/x86_64/mm/init.c48
-rw-r--r--arch/x86_64/mm/ioremap.c5
-rw-r--r--arch/x86_64/pci/k8-bus.c10
127 files changed, 4714 insertions, 3533 deletions
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index ba788cfdc3c6..9fc0eeb4f0ab 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -112,7 +112,7 @@ op_axp_create_files(struct super_block * sb, struct dentry * root)
for (i = 0; i < model->num_counters; ++i) {
struct dentry *dir;
- char buf[3];
+ char buf[4];
snprintf(buf, sizeof buf, "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index a7dc1370695b..0dafba3a701d 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -629,21 +629,6 @@ static int locomo_resume(struct platform_device *dev)
#endif
-#define LCM_ALC_EN 0x8000
-
-void frontlight_set(struct locomo *lchip, int duty, int vr, int bpwf)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&lchip->lock, flags);
- locomo_writel(bpwf, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
- udelay(100);
- locomo_writel(duty, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
- locomo_writel(bpwf | LCM_ALC_EN, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
- spin_unlock_irqrestore(&lchip->lock, flags);
-}
-
-
/**
* locomo_probe - probe for a single LoCoMo chip.
* @phys_addr: physical address of device.
@@ -698,14 +683,10 @@ __locomo_probe(struct device *me, struct resource *mem, int irq)
, lchip->base + LOCOMO_GPD);
locomo_writel(0, lchip->base + LOCOMO_GIE);
- /* FrontLight */
+ /* Frontlight */
locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
- /* Same constants can be used for collie and poodle
- (depending on CONFIG options in original sharp code)? */
- frontlight_set(lchip, 163, 0, 148);
-
/* Longtime timer */
locomo_writel(0, lchip->base + LOCOMO_LTINT);
/* SPI */
@@ -1063,6 +1044,30 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int
}
/*
+ * Frontlight control
+ */
+
+static struct locomo *locomo_chip_driver(struct locomo_dev *ldev);
+
+void locomo_frontlight_set(struct locomo_dev *dev, int duty, int vr, int bpwf)
+{
+ unsigned long flags;
+ struct locomo *lchip = locomo_chip_driver(dev);
+
+ if (vr)
+ locomo_gpio_write(dev, LOCOMO_GPIO_FL_VR, 1);
+ else
+ locomo_gpio_write(dev, LOCOMO_GPIO_FL_VR, 0);
+
+ spin_lock_irqsave(&lchip->lock, flags);
+ locomo_writel(bpwf, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
+ udelay(100);
+ locomo_writel(duty, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
+ locomo_writel(bpwf | LOCOMO_ALC_EN, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
+ spin_unlock_irqrestore(&lchip->lock, flags);
+}
+
+/*
* LoCoMo "Register Access Bus."
*
* We model this as a regular bus type, and hang devices directly
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 1596101cfaf8..f3eaf22f273d 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86_32
486, 586, Pentiums, and various instruction-set-compatible chips by
AMD, Cyrix, and others.
+config GENERIC_TIME
+ bool
+ default y
+
config SEMAPHORE_SLEEPERS
bool
default y
@@ -324,6 +328,15 @@ config X86_MCE_P4THERMAL
Enabling this feature will cause a message to be printed when the P4
enters thermal throttling.
+config VM86
+ default y
+ bool "Enable VM86 support" if EMBEDDED
+ help
+ This option is required by programs like DOSEMU to run 16-bit legacy
+ code on X86 processors. It also may be needed by software like
+ XFree86 to initialize some video cards via BIOS. Disabling this
+ option saves about 6k.
+
config TOSHIBA
tristate "Toshiba Laptop support"
---help---
@@ -1046,13 +1059,27 @@ config SCx200
tristate "NatSemi SCx200 support"
depends on !X86_VOYAGER
help
- This provides basic support for the National Semiconductor SCx200
- processor. Right now this is just a driver for the GPIO pins.
+ This provides basic support for National Semiconductor's
+ (now AMD's) Geode processors. The driver probes for the
+ PCI-IDs of several on-chip devices, so its a good dependency
+ for other scx200_* drivers.
- If you don't know what to do here, say N.
+ If compiled as a module, the driver is named scx200.
- This support is also available as a module. If compiled as a
- module, it will be called scx200.
+config SCx200HR_TIMER
+ tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
+ depends on SCx200 && GENERIC_TIME
+ default y
+ help
+ This driver provides a clocksource built upon the on-chip
+ 27MHz high-resolution timer. Its also a workaround for
+ NSC Geode SC-1100's buggy TSC, which loses time when the
+ processor goes idle (as is done by the scheduler). The
+ other workaround is idle=poll boot option.
+
+config K8_NB
+ def_bool y
+ depends on AGP_AMD64
source "drivers/pcmcia/Kconfig"
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index 33e55476381b..e97946626064 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
isoimage: $(BOOTIMAGE)
-rm -rf $(obj)/isoimage
mkdir $(obj)/isoimage
- cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
- $(obj)/isoimage
+ for i in lib lib64 share end ; do \
+ if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+ cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+ break ; \
+ fi ; \
+ if [ $$i = end ] ; then exit 1 ; fi ; \
+ done
cp $(BOOTIMAGE) $(obj)/isoimage/linux
echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index f19f3a7492a5..b2ccd543410d 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -24,14 +24,6 @@
#undef memset
#undef memcpy
-
-/*
- * Why do we do this? Don't ask me..
- *
- * Incomprehensible are the ways of bootloaders.
- */
-static void* memset(void *, int, size_t);
-static void* memcpy(void *, __const void *, size_t);
#define memzero(s, n) memset ((s), 0, (n))
typedef unsigned char uch;
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
#endif
#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
-extern char input_data[];
+extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0;
static void *malloc(int size);
static void free(void *where);
+static void *memset(void *s, int c, unsigned n);
+static void *memcpy(void *dest, const void *src, unsigned n);
+
static void putstr(const char *);
extern int end;
@@ -205,7 +200,7 @@ static void putstr(const char *s)
outb_p(0xff & (pos >> 1), vidport+1);
}
-static void* memset(void* s, int c, size_t n)
+static void* memset(void* s, int c, unsigned n)
{
int i;
char *ss = (char*)s;
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n)
return s;
}
-static void* memcpy(void* __dest, __const void* __src,
- size_t __n)
+static void* memcpy(void* dest, const void* src, unsigned n)
{
int i;
- char *d = (char *)__dest, *s = (char *)__src;
+ char *d = (char *)dest, *s = (char *)src;
- for (i=0;i<__n;i++) d[i] = s[i];
- return __dest;
+ for (i=0;i<n;i++) d[i] = s[i];
+ return dest;
}
/* ===========================================================================
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void)
#else
if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
#endif
- output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
+ output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
free_mem_end_ptr = (long)real_mode;
}
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
#ifdef STANDARD_MEMORY_BIOS_CALL
if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) <
- (3*1024))
- error("Less than 4MB of memory");
+ if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
#endif
- mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+ mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index c9343c3a8082..8c2a6faeeae5 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -1929,7 +1929,7 @@ skip10: movb %ah, %al
ret
store_edid:
-#ifdef CONFIG_FB_FIRMWARE_EDID
+#ifdef CONFIG_FIRMWARE_EDID
pushw %es # just save all registers
pushw %ax
pushw %bx
@@ -1947,6 +1947,22 @@ store_edid:
rep
stosl
+ pushw %es # save ES
+ xorw %di, %di # Report Capability
+ pushw %di
+ popw %es # ES:DI must be 0:0
+ movw $0x4f15, %ax
+ xorw %bx, %bx
+ xorw %cx, %cx
+ int $0x10
+ popw %es # restore ES
+
+ cmpb $0x00, %ah # call successful
+ jne no_edid
+
+ cmpb $0x4f, %al # function supported
+ jne no_edid
+
movw $0x4f15, %ax # do VBE/DDC
movw $0x01, %bx
movw $0x00, %cx
@@ -1954,6 +1970,7 @@ store_edid:
movw $0x140, %di
int $0x10
+no_edid:
popw %di # restore all registers
popw %dx
popw %cx
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 96fb8a020af2..5e70c2fb273a 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o bootflag.o \
- quirks.o i8237.o topology.o alternative.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
obj-y += cpu/
-obj-y += timers/
obj-y += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
@@ -37,6 +36,8 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_HPET_TIMER) += hpet.o
+obj-$(CONFIG_K8_NB) += k8.o
EXTRA_AFLAGS := -traditional
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
$(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
$(call if_changed,syscall)
+
+k8-y += ../../x86_64/kernel/k8.o
+
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 5cbd6f99fb2a..50eb0e03777e 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -4,27 +4,41 @@
#include <asm/alternative.h>
#include <asm/sections.h>
-#define DEBUG 0
-#if DEBUG
-# define DPRINTK(fmt, args...) printk(fmt, args)
-#else
-# define DPRINTK(fmt, args...)
-#endif
+static int no_replacement = 0;
+static int smp_alt_once = 0;
+static int debug_alternative = 0;
+
+static int __init noreplacement_setup(char *s)
+{
+ no_replacement = 1;
+ return 1;
+}
+static int __init bootonly(char *str)
+{
+ smp_alt_once = 1;
+ return 1;
+}
+static int __init debug_alt(char *str)
+{
+ debug_alternative = 1;
+ return 1;
+}
+__setup("noreplacement", noreplacement_setup);
+__setup("smp-alt-boot", bootonly);
+__setup("debug-alternative", debug_alt);
+
+#define DPRINTK(fmt, args...) if (debug_alternative) \
+ printk(KERN_DEBUG fmt, args)
+
+#ifdef GENERIC_NOP1
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm("\t.data\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8);
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-asm("\t.data\nk7nops: "
- K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
- K7_NOP7 K7_NOP8);
-
-extern unsigned char intelnops[], k8nops[], k7nops[];
+extern unsigned char intelnops[];
static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
NULL,
intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
intelnops + 1 + 2 + 3 + 4 + 5 + 6,
intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef K8_NOP1
+asm("\t.data\nk8nops: "
+ K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+ K8_NOP7 K8_NOP8);
+extern unsigned char k8nops[];
static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
k8nops + 1 + 2 + 3 + 4 + 5 + 6,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef K7_NOP1
+asm("\t.data\nk7nops: "
+ K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+ K7_NOP7 K7_NOP8);
+extern unsigned char k7nops[];
static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
NULL,
k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
k7nops + 1 + 2 + 3 + 4 + 5 + 6,
k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef CONFIG_X86_64
+
+extern char __vsyscall_0;
+static inline unsigned char** find_nop_table(void)
+{
+ return k8_nops;
+}
+
+#else /* CONFIG_X86_64 */
+
static struct nop {
int cpuid;
unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
{ -1, NULL }
};
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
-extern u8 *__smp_locks[], *__smp_locks_end[];
-
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
-
static unsigned char** find_nop_table(void)
{
unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
return noptable;
}
+#endif /* CONFIG_X86_64 */
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
+extern u8 *__smp_locks[], *__smp_locks_end[];
+
+extern u8 __smp_alt_begin[], __smp_alt_end[];
+
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
unsigned char **noptable = find_nop_table();
struct alt_instr *a;
+ u8 *instr;
int diff, i, k;
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
BUG_ON(a->replacementlen > a->instrlen);
if (!boot_cpu_has(a->cpuid))
continue;
- memcpy(a->instr, a->replacement, a->replacementlen);
+ instr = a->instr;
+#ifdef CONFIG_X86_64
+ /* vsyscall code is not mapped yet. resolve it manually. */
+ if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
+ instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
+ DPRINTK("%s: vsyscall fixup: %p => %p\n",
+ __FUNCTION__, a->instr, instr);
+ }
+#endif
+ memcpy(instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
/* Pad the rest with nops */
for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
static LIST_HEAD(smp_alt_modules);
static DEFINE_SPINLOCK(smp_alt);
-static int smp_alt_once = 0;
-static int __init bootonly(char *str)
-{
- smp_alt_once = 1;
- return 1;
-}
-__setup("smp-alt-boot", bootonly);
-
void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
struct smp_alt_module *smp;
unsigned long flags;
+ if (no_replacement)
+ return;
+
if (smp_alt_once) {
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
struct smp_alt_module *item;
unsigned long flags;
- if (smp_alt_once)
+ if (no_replacement || smp_alt_once)
return;
spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
struct smp_alt_module *mod;
unsigned long flags;
- if (smp_alt_once)
+ if (no_replacement || smp_alt_once)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
void __init alternative_instructions(void)
{
+ if (no_replacement) {
+ printk(KERN_INFO "(SMP-)alternatives turned off\n");
+ free_init_pages("SMP alternatives",
+ (unsigned long)__smp_alt_begin,
+ (unsigned long)__smp_alt_end);
+ return;
+ }
apply_alternatives(__alt_instructions, __alt_instructions_end);
/* switch to patch-once-at-boottime-only mode and free the
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 5ab59c12335b..7ce09492fc0c 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -36,6 +36,7 @@
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
#include <asm/i8253.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
#include <mach_apicdef.h>
@@ -156,7 +157,7 @@ void clear_local_APIC(void)
maxlvt = get_maxlvt();
/*
- * Masking an LVT entry on a P6 can trigger a local APIC error
+ * Masking an LVT entry can trigger a local APIC error
* if the vector is zero. Mask LVTERR first to prevent this.
*/
if (maxlvt >= 3) {
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void)
unsigned long v;
v = apic_read(APIC_LVTT);
- apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+ /*
+ * When an illegal vector value (0-15) is written to an LVT
+ * entry and delivery mode is Fixed, the APIC may signal an
+ * illegal vector error, with out regard to whether the mask
+ * bit is set or whether an interrupt is actually seen on input.
+ *
+ * Boot sequence might call this function when the LVTT has
+ * '0' vector value. So make sure vector field is set to
+ * valid value.
+ */
+ v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+ apic_write_around(APIC_LVTT, v);
}
}
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 9e819eb68229..7c5729d1fd06 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -764,9 +764,9 @@ static int apm_do_idle(void)
int idled = 0;
int polling;
- polling = test_thread_flag(TIF_POLLING_NRFLAG);
+ polling = !!(current_thread_info()->status & TS_POLLING);
if (polling) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
}
if (!need_resched()) {
@@ -774,7 +774,7 @@ static int apm_do_idle(void)
ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
}
if (polling)
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
if (!idled)
return 0;
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 786d1a57048b..fd0457c9c827 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_HT
/*
- * On a AMD dual core setup the lower bits of the APIC id
- * distingush the cores. Assumes number of cores is a power
- * of two.
+ * On a AMD multi core setup the lower bits of the APIC id
+ * distingush the cores.
*/
if (c->x86_max_cores > 1) {
int cpu = smp_processor_id();
- unsigned bits = 0;
- while ((1 << bits) < c->x86_max_cores)
- bits++;
+ unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
phys_proc_id[cpu] >>= bits;
printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
@@ -240,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c)
}
#endif
+ if (cpuid_eax(0x80000000) >= 0x80000006)
+ num_cache_leaves = 3;
}
static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 5386b29bb5a5..10afc645c540 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
select_idle_routine(c);
l2 = init_intel_cacheinfo(c);
+ if (c->cpuid_level > 9 ) {
+ unsigned eax = cpuid_eax(10);
+ /* Check for version and the number of counters */
+ if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+ set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+ }
/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index c8547a6fa7e6..6c37b4fd8ce2 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,6 +4,7 @@
* Changes:
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ * Andi Kleen : CPUID4 emulation on AMD.
*/
#include <linux/init.h>
@@ -130,25 +131,111 @@ struct _cpuid4_info {
cpumask_t shared_cpu_map;
};
-static unsigned short num_cache_leaves;
+unsigned short num_cache_leaves;
+
+/* AMD doesn't have CPUID4. Emulate it here to report the same
+ information to the user. This makes some assumptions about the machine:
+ No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+
+ In theory the TLBs could be reported as fake type (they are in "dummy").
+ Maybe later */
+union l1_cache {
+ struct {
+ unsigned line_size : 8;
+ unsigned lines_per_tag : 8;
+ unsigned assoc : 8;
+ unsigned size_in_kb : 8;
+ };
+ unsigned val;
+};
+
+union l2_cache {
+ struct {
+ unsigned line_size : 8;
+ unsigned lines_per_tag : 4;
+ unsigned assoc : 4;
+ unsigned size_in_kb : 16;
+ };
+ unsigned val;
+};
+
+static unsigned short assocs[] = {
+ [1] = 1, [2] = 2, [4] = 4, [6] = 8,
+ [8] = 16,
+ [0xf] = 0xffff // ??
+ };
+static unsigned char levels[] = { 1, 1, 2 };
+static unsigned char types[] = { 1, 2, 3 };
+
+static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+ union _cpuid4_leaf_ebx *ebx,
+ union _cpuid4_leaf_ecx *ecx)
+{
+ unsigned dummy;
+ unsigned line_size, lines_per_tag, assoc, size_in_kb;
+ union l1_cache l1i, l1d;
+ union l2_cache l2;
+
+ eax->full = 0;
+ ebx->full = 0;
+ ecx->full = 0;
+
+ cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
+ cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
+
+ if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
+ return;
+
+ eax->split.is_self_initializing = 1;
+ eax->split.type = types[leaf];
+ eax->split.level = levels[leaf];
+ eax->split.num_threads_sharing = 0;
+ eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+ if (leaf <= 1) {
+ union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
+ assoc = l1->assoc;
+ line_size = l1->line_size;
+ lines_per_tag = l1->lines_per_tag;
+ size_in_kb = l1->size_in_kb;
+ } else {
+ assoc = l2.assoc;
+ line_size = l2.line_size;
+ lines_per_tag = l2.lines_per_tag;
+ /* cpu_data has errata corrections for K7 applied */
+ size_in_kb = current_cpu_data.x86_cache_size;
+ }
+
+ if (assoc == 0xf)
+ eax->split.is_fully_associative = 1;
+ ebx->split.coherency_line_size = line_size - 1;
+ ebx->split.ways_of_associativity = assocs[assoc] - 1;
+ ebx->split.physical_line_partition = lines_per_tag - 1;
+ ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+ (ebx->split.ways_of_associativity + 1) - 1;
+}
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
- unsigned int eax, ebx, ecx, edx;
- union _cpuid4_leaf_eax cache_eax;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
+ unsigned edx;
- cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
- cache_eax.full = eax;
- if (cache_eax.split.type == CACHE_TYPE_NULL)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ amd_cpuid4(index, &eax, &ebx, &ecx);
+ else
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */
- this_leaf->eax.full = eax;
- this_leaf->ebx.full = ebx;
- this_leaf->ecx.full = ecx;
- this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
- (this_leaf->ebx.split.coherency_line_size + 1) *
- (this_leaf->ebx.split.physical_line_partition + 1) *
- (this_leaf->ebx.split.ways_of_associativity + 1);
+ this_leaf->eax = eax;
+ this_leaf->ebx = ebx;
+ this_leaf->ecx = ecx;
+ this_leaf->size = (ecx.split.number_of_sets + 1) *
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 21dc1bbb8067..0c88d3ec8c18 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
return 1;
}
-/*
- * By using the NMI code instead of a vector we just sneak thru the
- * word generator coming out with just what we want. AND it does
- * not matter if clustered_apic_mode is set or not.
- */
static void smp_send_nmi_allbutself(void)
{
- send_IPI_allbutself(APIC_DM_NMI);
+ send_IPI_allbutself(NMI_VECTOR);
}
static void nmi_shootdown_cpus(void)
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index cfc683f153b9..e6e4506e749a 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -48,6 +48,7 @@
#include <asm/smp.h>
#include <asm/page.h>
#include <asm/desc.h>
+#include <asm/dwarf2.h>
#include "irq_vectors.h"
#define nr_syscalls ((syscall_table_size)/4)
@@ -85,31 +86,67 @@ VM_MASK = 0x00020000
#define SAVE_ALL \
cld; \
pushl %es; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET es, 0;*/\
pushl %ds; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET ds, 0;*/\
pushl %eax; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET eax, 0;\
pushl %ebp; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ebp, 0;\
pushl %edi; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET edi, 0;\
pushl %esi; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET esi, 0;\
pushl %edx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET edx, 0;\
pushl %ecx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ecx, 0;\
pushl %ebx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ebx, 0;\
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
movl %edx, %es;
#define RESTORE_INT_REGS \
popl %ebx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ebx;\
popl %ecx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ecx;\
popl %edx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE edx;\
popl %esi; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE esi;\
popl %edi; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE edi;\
popl %ebp; \
- popl %eax
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ebp;\
+ popl %eax; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE eax
#define RESTORE_REGS \
RESTORE_INT_REGS; \
1: popl %ds; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE ds;*/\
2: popl %es; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE es;*/\
.section .fixup,"ax"; \
3: movl $0,(%esp); \
jmp 1b; \
@@ -122,13 +159,43 @@ VM_MASK = 0x00020000
.long 2b,4b; \
.previous
+#define RING0_INT_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, 3*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+#define RING0_EC_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, 4*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+#define RING0_PTREGS_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, OLDESP-EBX;\
+ /*CFI_OFFSET cs, CS-OLDESP;*/\
+ CFI_OFFSET eip, EIP-OLDESP;\
+ /*CFI_OFFSET es, ES-OLDESP;*/\
+ /*CFI_OFFSET ds, DS-OLDESP;*/\
+ CFI_OFFSET eax, EAX-OLDESP;\
+ CFI_OFFSET ebp, EBP-OLDESP;\
+ CFI_OFFSET edi, EDI-OLDESP;\
+ CFI_OFFSET esi, ESI-OLDESP;\
+ CFI_OFFSET edx, EDX-OLDESP;\
+ CFI_OFFSET ecx, ECX-OLDESP;\
+ CFI_OFFSET ebx, EBX-OLDESP
ENTRY(ret_from_fork)
+ CFI_STARTPROC
pushl %eax
+ CFI_ADJUST_CFA_OFFSET -4
call schedule_tail
GET_THREAD_INFO(%ebp)
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
jmp syscall_exit
+ CFI_ENDPROC
/*
* Return to user mode is not as complex as all this looks,
@@ -139,6 +206,7 @@ ENTRY(ret_from_fork)
# userspace resumption stub bypassing syscall exit tracing
ALIGN
+ RING0_PTREGS_FRAME
ret_from_exception:
preempt_stop
ret_from_intr:
@@ -171,20 +239,33 @@ need_resched:
call preempt_schedule_irq
jmp need_resched
#endif
+ CFI_ENDPROC
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
# sysenter call handler stub
ENTRY(sysenter_entry)
+ CFI_STARTPROC simple
+ CFI_DEF_CFA esp, 0
+ CFI_REGISTER esp, ebp
movl TSS_sysenter_esp0(%esp),%esp
sysenter_past_esp:
sti
pushl $(__USER_DS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ss, 0*/
pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esp, 0
pushfl
+ CFI_ADJUST_CFA_OFFSET 4
pushl $(__USER_CS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET cs, 0*/
pushl $SYSENTER_RETURN
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eip, 0
/*
* Load the potential sixth argument from user stack.
@@ -199,6 +280,7 @@ sysenter_past_esp:
.previous
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
@@ -219,11 +301,14 @@ sysenter_past_esp:
xorl %ebp,%ebp
sti
sysexit
+ CFI_ENDPROC
# system call handler stub
ENTRY(system_call)
+ RING0_INT_FRAME # can't unwind into user space anyway
pushl %eax # save orig_eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
testl $TF_MASK,EFLAGS(%esp)
@@ -256,10 +341,12 @@ restore_all:
movb CS(%esp), %al
andl $(VM_MASK | (4 << 8) | 3), %eax
cmpl $((4 << 8) | 3), %eax
+ CFI_REMEMBER_STATE
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
RESTORE_REGS
addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
1: iret
.section .fixup,"ax"
iret_exc:
@@ -273,6 +360,7 @@ iret_exc:
.long 1b,iret_exc
.previous
+ CFI_RESTORE_STATE
ldt_ss:
larl OLDSS(%esp), %eax
jnz restore_nocheck
@@ -285,11 +373,13 @@ ldt_ss:
* CPUs, which we can try to work around to make
* dosemu and wine happy. */
subl $8, %esp # reserve space for switch16 pointer
+ CFI_ADJUST_CFA_OFFSET 8
cli
movl %esp, %eax
/* Set up the 16bit stack frame with switch32 pointer on top,
* and a switch16 pointer on top of the current frame. */
call setup_x86_bogus_stack
+ CFI_ADJUST_CFA_OFFSET -8 # frame has moved
RESTORE_REGS
lss 20+4(%esp), %esp # switch to 16bit stack
1: iret
@@ -297,9 +387,11 @@ ldt_ss:
.align 4
.long 1b,iret_exc
.previous
+ CFI_ENDPROC
# perform work that needs to be done immediately before resumption
ALIGN
+ RING0_PTREGS_FRAME # can't unwind into user space anyway
work_pending:
testb $_TIF_NEED_RESCHED, %cl
jz work_notifysig
@@ -329,8 +421,10 @@ work_notifysig: # deal with pending signals and
work_notifysig_v86:
#ifdef CONFIG_VM86
pushl %ecx # save ti_flags for do_notify_resume
+ CFI_ADJUST_CFA_OFFSET 4
call save_v86_state # %eax contains pt_regs pointer
popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
movl %eax, %esp
xorl %edx, %edx
call do_notify_resume
@@ -363,19 +457,21 @@ syscall_exit_work:
movl $1, %edx
call do_syscall_trace
jmp resume_userspace
+ CFI_ENDPROC
- ALIGN
+ RING0_INT_FRAME # can't unwind into user space anyway
syscall_fault:
pushl %eax # save orig_eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
movl $-EFAULT,EAX(%esp)
jmp resume_userspace
- ALIGN
syscall_badsys:
movl $-ENOSYS,EAX(%esp)
jmp resume_userspace
+ CFI_ENDPROC
#define FIXUP_ESPFIX_STACK \
movl %esp, %eax; \
@@ -387,16 +483,21 @@ syscall_badsys:
movl %eax, %esp;
#define UNWIND_ESPFIX_STACK \
pushl %eax; \
+ CFI_ADJUST_CFA_OFFSET 4; \
movl %ss, %eax; \
/* see if on 16bit stack */ \
cmpw $__ESPFIX_SS, %ax; \
- jne 28f; \
- movl $__KERNEL_DS, %edx; \
- movl %edx, %ds; \
- movl %edx, %es; \
+ je 28f; \
+27: popl %eax; \
+ CFI_ADJUST_CFA_OFFSET -4; \
+.section .fixup,"ax"; \
+28: movl $__KERNEL_DS, %eax; \
+ movl %eax, %ds; \
+ movl %eax, %es; \
/* switch to 32bit stack */ \
- FIXUP_ESPFIX_STACK \
-28: popl %eax;
+ FIXUP_ESPFIX_STACK; \
+ jmp 27b; \
+.previous
/*
* Build the entry stubs and pointer table with
@@ -408,9 +509,14 @@ ENTRY(interrupt)
vector=0
ENTRY(irq_entries_start)
+ RING0_INT_FRAME
.rept NR_IRQS
ALIGN
+ .if vector
+ CFI_ADJUST_CFA_OFFSET -4
+ .endif
1: pushl $vector-256
+ CFI_ADJUST_CFA_OFFSET 4
jmp common_interrupt
.data
.long 1b
@@ -424,60 +530,99 @@ common_interrupt:
movl %esp,%eax
call do_IRQ
jmp ret_from_intr
+ CFI_ENDPROC
#define BUILD_INTERRUPT(name, nr) \
ENTRY(name) \
+ RING0_INT_FRAME; \
pushl $nr-256; \
- SAVE_ALL \
+ CFI_ADJUST_CFA_OFFSET 4; \
+ SAVE_ALL; \
movl %esp,%eax; \
call smp_/**/name; \
- jmp ret_from_intr;
+ jmp ret_from_intr; \
+ CFI_ENDPROC
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
ENTRY(divide_error)
+ RING0_INT_FRAME
pushl $0 # no error code
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_divide_error
+ CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
pushl %ds
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ds, 0*/
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax, 0
xorl %eax, %eax
pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebp, 0
pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx, 0
decl %eax # eax = -1
pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
cld
pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0*/
UNWIND_ESPFIX_STACK
popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_REGISTER es, ecx*/
movl ES(%esp), %edi # get the function address
movl ORIG_EAX(%esp), %edx # get the error code
movl %eax, ORIG_EAX(%esp)
movl %ecx, ES(%esp)
+ /*CFI_REL_OFFSET es, ES*/
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
movl %esp,%eax # pt_regs pointer
call *%edi
jmp ret_from_exception
+ CFI_ENDPROC
ENTRY(coprocessor_error)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_coprocessor_error
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(simd_coprocessor_error)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_simd_coprocessor_error
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(device_not_available)
+ RING0_INT_FRAME
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
movl %cr0, %eax
testl $0x4, %eax # EM (math emulation bit)
@@ -487,9 +632,12 @@ ENTRY(device_not_available)
jmp ret_from_exception
device_not_available_emulate:
pushl $0 # temporary storage for ORIG_EIP
+ CFI_ADJUST_CFA_OFFSET 4
call math_emulate
addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
jmp ret_from_exception
+ CFI_ENDPROC
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
@@ -514,16 +662,19 @@ label: \
pushl $sysenter_past_esp
KPROBE_ENTRY(debug)
+ RING0_INT_FRAME
cmpl $sysenter_entry,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # error code 0
movl %esp,%eax # pt_regs pointer
call do_debug
jmp ret_from_exception
+ CFI_ENDPROC
.previous .text
/*
* NMI is doubly nasty. It can happen _while_ we're handling
@@ -534,14 +685,18 @@ debug_stack_correct:
* fault happened on the sysenter path.
*/
ENTRY(nmi)
+ RING0_INT_FRAME
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
je nmi_16bit_stack
cmpl $sysenter_entry,(%esp)
je nmi_stack_fixup
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
movl %esp,%eax
/* Do not access memory above the end of our stack page,
* it might not exist.
@@ -549,16 +704,19 @@ ENTRY(nmi)
andl $(THREAD_SIZE-1),%eax
cmpl $(THREAD_SIZE-20),%eax
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
jae nmi_stack_correct
cmpl $sysenter_entry,12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
jmp restore_all
+ CFI_ENDPROC
nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
@@ -574,94 +732,177 @@ nmi_debug_stack_check:
jmp nmi_stack_correct
nmi_16bit_stack:
+ RING0_INT_FRAME
/* create the pointer to lss back */
pushl %ss
+ CFI_ADJUST_CFA_OFFSET 4
pushl %esp
+ CFI_ADJUST_CFA_OFFSET 4
movzwl %sp, %esp
addw $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)
+ CFI_ADJUST_CFA_OFFSET 4
.endr
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
FIXUP_ESPFIX_STACK # %eax == %esp
+ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
xorl %edx,%edx # zero error code
call do_nmi
RESTORE_REGS
lss 12+4(%esp), %esp # back to 16bit stack
1: iret
+ CFI_ENDPROC
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
.previous
KPROBE_ENTRY(int3)
+ RING0_INT_FRAME
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
jmp ret_from_exception
+ CFI_ENDPROC
.previous .text
ENTRY(overflow)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_overflow
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(bounds)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_bounds
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(invalid_op)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_invalid_op
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(coprocessor_segment_overrun)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_coprocessor_segment_overrun
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(invalid_TSS)
+ RING0_EC_FRAME
pushl $do_invalid_TSS
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(segment_not_present)
+ RING0_EC_FRAME
pushl $do_segment_not_present
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(stack_segment)
+ RING0_EC_FRAME
pushl $do_stack_segment
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
KPROBE_ENTRY(general_protection)
+ RING0_EC_FRAME
pushl $do_general_protection
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
.previous .text
ENTRY(alignment_check)
+ RING0_EC_FRAME
pushl $do_alignment_check
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
KPROBE_ENTRY(page_fault)
+ RING0_EC_FRAME
pushl $do_page_fault
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
.previous .text
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl machine_check_vector
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
#endif
ENTRY(spurious_interrupt_bug)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_spurious_interrupt_bug
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+ CFI_STARTPROC
+ movl 4(%esp), %edx
+ movl (%esp), %ecx
+ leal 4(%esp), %eax
+ movl %ebx, EBX(%edx)
+ xorl %ebx, %ebx
+ movl %ebx, ECX(%edx)
+ movl %ebx, EDX(%edx)
+ movl %esi, ESI(%edx)
+ movl %edi, EDI(%edx)
+ movl %ebp, EBP(%edx)
+ movl %ebx, EAX(%edx)
+ movl $__USER_DS, DS(%edx)
+ movl $__USER_DS, ES(%edx)
+ movl %ebx, ORIG_EAX(%edx)
+ movl %ecx, EIP(%edx)
+ movl 12(%esp), %ecx
+ movl $__KERNEL_CS, CS(%edx)
+ movl %ebx, EFLAGS(%edx)
+ movl %eax, OLDESP(%edx)
+ movl 8(%esp), %eax
+ movl %ecx, 8(%esp)
+ movl EBX(%edx), %ebx
+ movl $__KERNEL_DS, OLDSS(%edx)
+ jmpl *%eax
+ CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
.section .rodata,"a"
#include "syscall_table.S"
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
new file mode 100644
index 000000000000..c6737c35815d
--- /dev/null
+++ b/arch/i386/kernel/hpet.c
@@ -0,0 +1,67 @@
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/hpet.h>
+#include <linux/init.h>
+
+#include <asm/hpet.h>
+#include <asm/io.h>
+
+#define HPET_MASK CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+static void *hpet_ptr;
+
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)readl(hpet_ptr);
+}
+
+static struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = HPET_MASK,
+ .mult = 0, /* set below */
+ .shift = HPET_SHIFT,
+ .is_continuous = 1,
+};
+
+static int __init init_hpet_clocksource(void)
+{
+ unsigned long hpet_period;
+ void __iomem* hpet_base;
+ u64 tmp;
+
+ if (!hpet_address)
+ return -ENODEV;
+
+ /* calculate the hpet address: */
+ hpet_base =
+ (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+ hpet_ptr = hpet_base + HPET_COUNTER;
+
+ /* calculate the frequency: */
+ hpet_period = readl(hpet_base + HPET_PERIOD);
+
+ /*
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ return clocksource_register(&clocksource_hpet);
+}
+
+module_init(init_hpet_clocksource);
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
new file mode 100644
index 000000000000..477b24daff53
--- /dev/null
+++ b/arch/i386/kernel/i8253.c
@@ -0,0 +1,118 @@
+/*
+ * i8253.c 8253/PIT functions
+ *
+ */
+#include <linux/clocksource.h>
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/sysdev.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/smp.h>
+#include <asm/delay.h>
+#include <asm/i8253.h>
+#include <asm/io.h>
+
+#include "io_ports.h"
+
+DEFINE_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
+
+void setup_pit_timer(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ udelay(10);
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+/*
+ * Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static cycle_t pit_read(void)
+{
+ unsigned long flags;
+ int count;
+ u32 jifs;
+ static int old_count;
+ static u32 old_jifs;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ /*
+ * Although our caller may have the read side of xtime_lock,
+ * this is now a seqlock, and we are cheating in this routine
+ * by having side effects on state that we cannot undo if
+ * there is a collision on the seqlock and our caller has to
+ * retry. (Namely, old_jifs and old_count.) So we must treat
+ * jiffies as volatile despite the lock. We read jiffies
+ * before latching the timer count to guarantee that although
+ * the jiffies value might be older than the count (that is,
+ * the counter may underflow between the last point where
+ * jiffies was incremented and the point where we latch the
+ * count), it cannot be newer.
+ */
+ jifs = jiffies;
+ outb_p(0x00, PIT_MODE); /* latch the count ASAP */
+ count = inb_p(PIT_CH0); /* read the latched count */
+ count |= inb_p(PIT_CH0) << 8;
+
+ /* VIA686a test code... reset the latch if count > max + 1 */
+ if (count > LATCH) {
+ outb_p(0x34, PIT_MODE);
+ outb_p(LATCH & 0xff, PIT_CH0);
+ outb(LATCH >> 8, PIT_CH0);
+ count = LATCH - 1;
+ }
+
+ /*
+ * It's possible for count to appear to go the wrong way for a
+ * couple of reasons:
+ *
+ * 1. The timer counter underflows, but we haven't handled the
+ * resulting interrupt and incremented jiffies yet.
+ * 2. Hardware problem with the timer, not giving us continuous time,
+ * the counter does small "jumps" upwards on some Pentium systems,
+ * (see c't 95/10 page 335 for Neptun bug.)
+ *
+ * Previous attempts to handle these cases intelligently were
+ * buggy, so we just do the simple thing now.
+ */
+ if (count > old_count && jifs == old_jifs) {
+ count = old_count;
+ }
+ old_count = count;
+ old_jifs = jifs;
+
+ spin_unlock_irqrestore(&i8253_lock, flags);
+
+ count = (LATCH - 1) - count;
+
+ return (cycle_t)(jifs * LATCH) + count;
+}
+
+static struct clocksource clocksource_pit = {
+ .name = "pit",
+ .rating = 110,
+ .read = pit_read,
+ .mask = CLOCKSOURCE_MASK(32),
+ .mult = 0,
+ .shift = 20,
+};
+
+static int __init init_pit_clocksource(void)
+{
+ if (num_possible_cpus() > 4) /* PIT does not scale! */
+ return 0;
+
+ clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+ return clocksource_register(&clocksource_pit);
+}
+module_init(init_pit_clocksource);
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index a62df3e764c5..72ae414e4d49 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -38,6 +38,7 @@
#include <asm/desc.h>
#include <asm/timer.h>
#include <asm/i8259.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
@@ -50,6 +51,7 @@ atomic_t irq_mis_count;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
int timer_over_8254 __initdata = 1;
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
int assign_irq_vector(int irq)
{
static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+ unsigned long flags;
+ int vector;
+
+ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
- BUG_ON(irq >= NR_IRQ_VECTORS);
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+ spin_lock_irqsave(&vector_lock, flags);
+
+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+ spin_unlock_irqrestore(&vector_lock, flags);
return IO_APIC_VECTOR(irq);
+ }
next:
current_vector += 8;
if (current_vector == SYSCALL_VECTOR)
@@ -1172,16 +1181,21 @@ next:
if (current_vector >= FIRST_SYSTEM_VECTOR) {
offset++;
- if (!(offset%8))
+ if (!(offset%8)) {
+ spin_unlock_irqrestore(&vector_lock, flags);
return -ENOSPC;
+ }
current_vector = FIRST_DEVICE_VECTOR + offset;
}
- vector_irq[current_vector] = irq;
+ vector = current_vector;
+ vector_irq[vector] = irq;
if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = current_vector;
+ IO_APIC_VECTOR(irq) = vector;
- return current_vector;
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ return vector;
}
static struct hw_interrupt_type ioapic_level_type;
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type;
static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
{
- if (use_pci_vector() && !platform_legacy_irq(irq)) {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[vector].handler = &ioapic_level_type;
- else
- irq_desc[vector].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[vector]);
- } else {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[irq].handler = &ioapic_level_type;
- else
- irq_desc[irq].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[irq]);
- }
+ unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[idx].handler = &ioapic_level_type;
+ else
+ irq_desc[idx].handler = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[idx]);
}
static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 49ce4c31b713..061533e0cb5e 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (i == 0) {
seq_printf(p, " ");
for_each_online_cpu(j)
- seq_printf(p, "CPU%d ",j);
+ seq_printf(p, "CPU%-8d",j);
seq_putc(p, '\n');
}
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 395a9a6dff88..727e419ad78a 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -57,34 +57,85 @@ static __always_inline void set_jmp_op(void *from, void *to)
/*
* returns non-zero if opcodes can be boosted.
*/
-static __always_inline int can_boost(kprobe_opcode_t opcode)
+static __always_inline int can_boost(kprobe_opcode_t *opcodes)
{
- switch (opcode & 0xf0 ) {
+#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
+ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
+ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
+ (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
+ (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
+ << (row % 32))
+ /*
+ * Undefined/reserved opcodes, conditional jump, Opcode Extension
+ * Groups, and some special opcodes can not be boost.
+ */
+ static const unsigned long twobyte_is_boostable[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
+ W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
+ W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
+ W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
+ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
+ W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
+ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
+ W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
+ W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
+ W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
+ W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
+ W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
+ W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
+ W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
+ W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ };
+#undef W
+ kprobe_opcode_t opcode;
+ kprobe_opcode_t *orig_opcodes = opcodes;
+retry:
+ if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+ return 0;
+ opcode = *(opcodes++);
+
+ /* 2nd-byte opcode */
+ if (opcode == 0x0f) {
+ if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+ return 0;
+ return test_bit(*opcodes, twobyte_is_boostable);
+ }
+
+ switch (opcode & 0xf0) {
+ case 0x60:
+ if (0x63 < opcode && opcode < 0x67)
+ goto retry; /* prefixes */
+ /* can't boost Address-size override and bound */
+ return (opcode != 0x62 && opcode != 0x67);
case 0x70:
return 0; /* can't boost conditional jump */
- case 0x90:
- /* can't boost call and pushf */
- return opcode != 0x9a && opcode != 0x9c;
case 0xc0:
- /* can't boost undefined opcodes and soft-interruptions */
- return (0xc1 < opcode && opcode < 0xc6) ||
- (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
+ /* can't boost software-interruptions */
+ return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
case 0xd0:
/* can boost AA* and XLAT */
return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
case 0xe0:
- /* can boost in/out and (may be) jmps */
- return (0xe3 < opcode && opcode != 0xe8);
+ /* can boost in/out and absolute jmps */
+ return ((opcode & 0x04) || opcode == 0xea);
case 0xf0:
+ if ((opcode & 0x0c) == 0 && opcode != 0xf1)
+ goto retry; /* lock/rep(ne) prefix */
/* clear and set flags can be boost */
return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
default:
- /* currently, can't boost 2 bytes opcodes */
- return opcode != 0x0f;
+ if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
+ goto retry; /* prefixes */
+ /* can't boost CS override and call */
+ return (opcode != 0x2e && opcode != 0x9a);
}
}
-
/*
* returns non-zero if opcode modifies the interrupt flag.
*/
@@ -109,7 +160,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = *p->addr;
- if (can_boost(p->opcode)) {
+ if (can_boost(p->addr)) {
p->ainsn.boostable = 0;
} else {
p->ainsn.boostable = -1;
@@ -208,7 +259,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
struct kprobe_ctlblk *kcb;
#ifdef CONFIG_PREEMPT
unsigned pre_preempt_count = preempt_count();
-#endif /* CONFIG_PREEMPT */
+#else
+ unsigned pre_preempt_count = 1;
+#endif
addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
@@ -285,22 +338,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
/* handler has already set things up, so skip ss setup */
return 1;
- if (p->ainsn.boostable == 1 &&
-#ifdef CONFIG_PREEMPT
- !(pre_preempt_count) && /*
- * This enables booster when the direct
- * execution path aren't preempted.
- */
-#endif /* CONFIG_PREEMPT */
- !p->post_handler && !p->break_handler ) {
+ss_probe:
+ if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){
/* Boost up -- we can execute copied instructions directly */
reset_current_kprobe();
regs->eip = (unsigned long)p->ainsn.insn;
preempt_enable_no_resched();
return 1;
}
-
-ss_probe:
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index d43b498ec745..a76e93146585 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -14,21 +14,17 @@
*/
#include <linux/config.h>
-#include <linux/mm.h>
#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sysdev.h>
#include <linux/sysctl.h>
+#include <linux/percpu.h>
#include <asm/smp.h>
-#include <asm/div64.h>
#include <asm/nmi.h>
+#include <asm/intel_arch_perfmon.h>
#include "mach_traps.h"
@@ -100,6 +96,9 @@ int nmi_active;
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
+static void disable_intel_arch_watchdog(void);
+
static void disable_lapic_nmi_watchdog(void)
{
if (nmi_active <= 0)
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void)
wrmsr(MSR_K7_EVNTSEL0, 0, 0);
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ disable_intel_arch_watchdog();
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void)
return 1;
}
+static void disable_intel_arch_watchdog(void)
+{
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+ unsigned int evntsel;
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return 0;
+
+ nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+ clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+ clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ write_watchdog_counter("INTEL_ARCH_PERFCTR0");
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ return 1;
+}
+
void setup_apic_nmi_watchdog (void)
{
switch (boot_cpu_data.x86_vendor) {
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void)
setup_k7_watchdog();
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ if (!setup_intel_arch_watchdog())
+ return;
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
+ else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
+ nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
/* Only P6 based Pentium M need to re-unmask
* the apic vector but it doesn't hurt
* other P6 variant */
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
index 5f5b075f860a..0caf14652bad 100644
--- a/arch/i386/kernel/numaq.c
+++ b/arch/i386/kernel/numaq.c
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void)
return 1;
}
-static int __init numaq_dsc_disable(void)
+static int __init numaq_tsc_disable(void)
{
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- tsc_disable = 1;
+ if (num_online_nodes() > 1) {
+ printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+ tsc_disable = 1;
+ }
return 0;
}
-core_initcall(numaq_dsc_disable);
+arch_initcall(numaq_tsc_disable);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 6259afea46d1..6946b06e2784 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -102,7 +102,7 @@ void default_idle(void)
local_irq_enable();
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
while (!need_resched()) {
local_irq_disable();
@@ -111,7 +111,7 @@ void default_idle(void)
else
local_irq_enable();
}
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
} else {
while (!need_resched())
cpu_relax();
@@ -174,7 +174,7 @@ void cpu_idle(void)
{
int cpu = smp_processor_id();
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs)
cr3 = read_cr3();
cr4 = read_cr4_safe();
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
- show_trace(NULL, &regs->esp);
+ show_trace(NULL, regs, &regs->esp);
}
/*
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 6bef9273733e..4a65040cc624 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1575,6 +1575,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ tsc_init();
}
static __init int add_pcspkr(void)
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index d134e9643a58..c10789d7a9d3 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m
static inline int __prepare_ICR (unsigned int shortcut, int vector)
{
- return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+ unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+
+ switch (vector) {
+ default:
+ icr |= APIC_DM_FIXED | vector;
+ break;
+ case NMI_VECTOR:
+ icr |= APIC_DM_NMI;
+ break;
+ }
+ return icr;
}
static inline int __prepare_ICR2 (unsigned int mask)
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index bd0ca5c9f053..bce5470ecb42 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -52,6 +52,7 @@
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/arch_hooks.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 9d3074759856..5f43d0410122 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -82,13 +82,6 @@ extern unsigned long wall_jiffies;
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-#include <asm/i8253.h>
-
-DEFINE_SPINLOCK(i8253_lock);
-EXPORT_SYMBOL(i8253_lock);
-
-struct timer_opts *cur_timer __read_mostly = &timer_none;
-
/*
* This is a special lock that is owned by the CPU and holds the index
* register we are working with. It is required for NMI access to the
@@ -118,99 +111,19 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
}
EXPORT_SYMBOL(rtc_cmos_write);
-/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long seq;
- unsigned long usec, sec;
- unsigned long max_ntp_tick;
-
- do {
- unsigned long lost;
-
- seq = read_seqbegin(&xtime_lock);
-
- usec = cur_timer->get_offset();
- lost = jiffies - wall_jiffies;
-
- /*
- * If time_adjust is negative then NTP is slowing the clock
- * so make sure not to go into next possible interval.
- * Better to lose some accuracy than have time go backwards..
- */
- if (unlikely(time_adjust < 0)) {
- max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
- usec = min(usec, max_ntp_tick);
-
- if (lost)
- usec += lost * max_ntp_tick;
- }
- else if (unlikely(lost))
- usec += lost * (USEC_PER_SEC / HZ);
-
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- } while (read_seqretry(&xtime_lock, seq));
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
- */
- nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
static int set_rtc_mmss(unsigned long nowtime)
{
int retval;
-
- WARN_ON(irqs_disabled());
+ unsigned long flags;
/* gets recalled with irq locally disabled */
- spin_lock_irq(&rtc_lock);
+ /* XXX - does irqsave resolve this? -johnstul */
+ spin_lock_irqsave(&rtc_lock, flags);
if (efi_enabled)
retval = efi_set_rtc_mmss(nowtime);
else
retval = mach_set_rtc_mmss(nowtime);
- spin_unlock_irq(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
@@ -218,16 +131,6 @@ static int set_rtc_mmss(unsigned long nowtime)
int timer_ack;
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- * Note: This function is required to return accurate
- * time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
- return cur_timer->monotonic_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -242,11 +145,21 @@ EXPORT_SYMBOL(profile_pc);
#endif
/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
*/
-static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
+ /*
+ * Here we are in the timer irq handler. We just have irqs locally
+ * disabled but we don't know if the timer_bh is running on the other
+ * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+ * the irq version of write_lock because as just said we have irq
+ * locally disabled. -arca
+ */
+ write_seqlock(&xtime_lock);
+
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
/*
@@ -279,27 +192,6 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
irq = inb_p( 0x61 ); /* read the current state */
outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
}
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
-
- cur_timer->mark_offset();
-
- do_timer_interrupt(irq, regs);
write_sequnlock(&xtime_lock);
@@ -380,7 +272,6 @@ void notify_arch_cmos_timer(void)
static long clock_cmos_diff, sleep_start;
-static struct timer_opts *last_timer;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
@@ -389,10 +280,6 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
clock_cmos_diff = -get_cmos_time();
clock_cmos_diff += get_seconds();
sleep_start = get_cmos_time();
- last_timer = cur_timer;
- cur_timer = &timer_none;
- if (last_timer->suspend)
- last_timer->suspend(state);
return 0;
}
@@ -415,10 +302,6 @@ static int timer_resume(struct sys_device *dev)
jiffies_64 += sleep_length;
wall_jiffies += sleep_length;
write_sequnlock_irqrestore(&xtime_lock, flags);
- if (last_timer->resume)
- last_timer->resume();
- cur_timer = last_timer;
- last_timer = NULL;
touch_softlockup_watchdog();
return 0;
}
@@ -460,9 +343,6 @@ static void __init hpet_time_init(void)
printk("Using HPET for base-timer\n");
}
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
#endif
@@ -484,8 +364,5 @@ void __init time_init(void)
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
deleted file mode 100644
index 8fa12be658dd..000000000000
--- a/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
-
-obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
deleted file mode 100644
index 8163fe0cf1f0..000000000000
--- a/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Common functions used across the timers go here
- */
-
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-#include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/timer.h>
-#include <asm/hpet.h>
-
-#include "mach_timer.h"
-
-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME (5 * 1000020/HZ)
-
-unsigned long calibrate_tsc(void)
-{
- mach_prepare_counter();
-
- {
- unsigned long startlow, starthigh;
- unsigned long endlow, endhigh;
- unsigned long count;
-
- rdtsc(startlow,starthigh);
- mach_countup(&count);
- rdtsc(endlow,endhigh);
-
-
- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (endlow), "=d" (endhigh)
- :"g" (startlow), "g" (starthigh),
- "0" (endlow), "1" (endhigh));
-
- /* Error: ECPUTOOFAST */
- if (endhigh)
- goto bad_ctc;
-
- /* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
- goto bad_ctc;
-
- __asm__("divl %2"
- :"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
- return endlow;
- }
-
- /*
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
-bad_ctc:
- return 0;
-}
-
-#ifdef CONFIG_HPET_TIMER
-/* ------ Calibrate the TSC using HPET -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
- * Second output is parameter 1 (when non NULL)
- * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
- * calibrate_tsc() calibrates the processor TSC by comparing
- * it to the HPET timer of known frequency.
- * Too much 64-bit arithmetic here to do this cleanly in C
- */
-#define CALIBRATE_CNT_HPET (5 * hpet_tick)
-#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-
-unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
-{
- unsigned long tsc_startlow, tsc_starthigh;
- unsigned long tsc_endlow, tsc_endhigh;
- unsigned long hpet_start, hpet_end;
- unsigned long result, remain;
-
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtsc(tsc_startlow, tsc_starthigh);
- do {
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
- rdtsc(tsc_endlow, tsc_endhigh);
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (tsc_endlow), "=d" (tsc_endhigh)
- :"g" (tsc_startlow), "g" (tsc_starthigh),
- "0" (tsc_endlow), "1" (tsc_endhigh));
-
- /* Error: ECPUTOOFAST */
- if (tsc_endhigh)
- goto bad_calibration;
-
- /* Error: ECPUTOOSLOW */
- if (tsc_endlow <= CALIBRATE_TIME_HPET)
- goto bad_calibration;
-
- ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
- if (remain > (tsc_endlow >> 1))
- result++; /* rounding the result */
-
- if (tsc_hpet_quotient_ptr) {
- unsigned long tsc_hpet_quotient;
-
- ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
- CALIBRATE_CNT_HPET);
- if (remain > (tsc_endlow >> 1))
- tsc_hpet_quotient++; /* rounding the result */
- *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
- }
-
- return result;
-bad_calibration:
- /*
- * the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- return 0;
-}
-#endif
-
-
-unsigned long read_timer_tsc(void)
-{
- unsigned long retval;
- rdtscl(retval);
- return retval;
-}
-
-
-/* calculate cpu_khz */
-void init_cpu_khz(void)
-{
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
-}
-
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
deleted file mode 100644
index 7e39ed8e33f8..000000000000
--- a/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <asm/timer.h>
-
-#ifdef CONFIG_HPET_TIMER
-/*
- * HPET memory read is slower than tsc reads, but is more dependable as it
- * always runs at constant frequency and reduces complexity due to
- * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
- * timer_pit when HPET is active. So, we default to timer_tsc.
- */
-#endif
-/* list of timers, ordered by preference, NULL terminated */
-static struct init_timer_opts* __initdata timers[] = {
-#ifdef CONFIG_X86_CYCLONE_TIMER
- &timer_cyclone_init,
-#endif
-#ifdef CONFIG_HPET_TIMER
- &timer_hpet_init,
-#endif
-#ifdef CONFIG_X86_PM_TIMER
- &timer_pmtmr_init,
-#endif
- &timer_tsc_init,
- &timer_pit_init,
- NULL,
-};
-
-static char clock_override[10] __initdata;
-
-static int __init clock_setup(char* str)
-{
- if (str)
- strlcpy(clock_override, str, sizeof(clock_override));
- return 1;
-}
-__setup("clock=", clock_setup);
-
-
-/* The chosen timesource has been found to be bad.
- * Fall back to a known good timesource (the PIT)
- */
-void clock_fallback(void)
-{
- cur_timer = &timer_pit;
-}
-
-/* iterates through the list of timers, returning the first
- * one that initializes successfully.
- */
-struct timer_opts* __init select_timer(void)
-{
- int i = 0;
-
- /* find most preferred working timer */
- while (timers[i]) {
- if (timers[i]->init)
- if (timers[i]->init(clock_override) == 0)
- return timers[i]->opts;
- ++i;
- }
-
- panic("select_timer: Cannot find a suitable timer\n");
- return NULL;
-}
-
-int read_current_timer(unsigned long *timer_val)
-{
- if (cur_timer->read_timer) {
- *timer_val = cur_timer->read_timer();
- return 0;
- }
- return -1;
-}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
deleted file mode 100644
index 13892a65c941..000000000000
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Cyclone-timer:
- * This code implements timer_ops for the cyclone counter found
- * on IBM x440, x360, and other Summit based systems.
- *
- * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-#define CYCLONE_CBAR_ADDR 0xFEB00CD0
-#define CYCLONE_PMCC_OFFSET 0x51A0
-#define CYCLONE_MPMC_OFFSET 0x51D0
-#define CYCLONE_MPCS_OFFSET 0x51A8
-#define CYCLONE_TIMER_FREQ 100000000
-#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
-int use_cyclone = 0;
-
-static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
-static u32 last_cyclone_low;
-static u32 last_cyclone_high;
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* helper macro to atomically read both cyclone counter registers */
-#define read_cyclone_counter(low,high) \
- do{ \
- high = cyclone_timer[1]; low = cyclone_timer[0]; \
- } while (high != cyclone_timer[1]);
-
-
-static void mark_offset_cyclone(void)
-{
- unsigned long lost, delay;
- unsigned long delta = last_cyclone_low;
- int count;
- unsigned long long this_offset, last_offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-
- spin_lock(&i8253_lock);
- read_cyclone_counter(last_cyclone_low,last_cyclone_high);
-
- /* read values for delay_at_last_interrupt */
- outb_p(0x00, 0x43); /* latch the count ASAP */
-
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
- spin_unlock(&i8253_lock);
-
- /* lost tick compensation */
- delta = last_cyclone_low - delta;
- delta /= (CYCLONE_TIMER_FREQ/1000000);
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2)
- jiffies_64 += lost-1;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-
- /* catch corner case where tick rollover occured
- * between cyclone and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static unsigned long get_offset_cyclone(void)
-{
- u32 offset;
-
- if(!cyclone_timer)
- return delay_at_last_interrupt;
-
- /* Read the cyclone timer */
- offset = cyclone_timer[0];
-
- /* .. relative to previous jiffy */
- offset = offset - last_cyclone_low;
-
- /* convert cyclone ticks to microseconds */
- /* XXX slow, can we speed this up? */
- offset = offset/(CYCLONE_TIMER_FREQ/1000000);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + offset;
-}
-
-static unsigned long long monotonic_clock_cyclone(void)
-{
- u32 now_low, now_high;
- unsigned long long last_offset, this_offset, base;
- unsigned long long ret;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
-
- /* Read the cyclone counter */
- read_cyclone_counter(now_low,now_high);
- this_offset = ((unsigned long long)now_high<<32)|now_low;
-
- /* convert to nanoseconds */
- ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
- return ret * (1000000000 / CYCLONE_TIMER_FREQ);
-}
-
-static int __init init_cyclone(char* override)
-{
- u32* reg;
- u32 base; /* saved cyclone base address */
- u32 pageaddr; /* page that contains cyclone_timer register */
- u32 offset; /* offset from pageaddr to cyclone_timer register */
- int i;
-
- /* check clock override */
- if (override[0] && strncmp(override,"cyclone",7))
- return -ENODEV;
-
- /*make sure we're on a summit box*/
- if(!use_cyclone) return -ENODEV;
-
- printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
-
- /* find base address */
- pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
- offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
- return -ENODEV;
- }
- base = *reg;
- if(!base){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
- return -ENODEV;
- }
-
- /* setup PMCC */
- pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* setup MPCS */
- pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* map in cyclone_timer */
- pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!cyclone_timer){
- printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
- return -ENODEV;
- }
-
- /*quick test to make sure its ticking*/
- for(i=0; i<3; i++){
- u32 old = cyclone_timer[0];
- int stall = 100;
- while(stall--) barrier();
- if(cyclone_timer[0] == old){
- printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
- cyclone_timer = 0;
- return -ENODEV;
- }
- }
-
- init_cpu_khz();
-
- /* Everything looks good! */
- return 0;
-}
-
-
-static void delay_cyclone(unsigned long loops)
-{
- unsigned long bclock, now;
- if(!cyclone_timer)
- return;
- bclock = cyclone_timer[0];
- do {
- rep_nop();
- now = cyclone_timer[0];
- } while ((now-bclock) < loops);
-}
-/************************************************************/
-
-/* cyclone timer_opts struct */
-static struct timer_opts timer_cyclone = {
- .name = "cyclone",
- .mark_offset = mark_offset_cyclone,
- .get_offset = get_offset_cyclone,
- .monotonic_clock = monotonic_clock_cyclone,
- .delay = delay_cyclone,
-};
-
-struct init_timer_opts __initdata timer_cyclone_init = {
- .init = init_cyclone,
- .opts = &timer_cyclone,
-};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
deleted file mode 100644
index 17a6fe7166e7..000000000000
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-#include <asm/hpet.h>
-
-static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
-static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
-static unsigned long hpet_last; /* hpet counter value at last tick*/
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static unsigned long long monotonic_clock_hpet(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-static unsigned long get_offset_hpet(void)
-{
- register unsigned long eax, edx;
-
- eax = hpet_readl(HPET_COUNTER);
- eax -= hpet_last; /* hpet delta */
- eax = min(hpet_tick, eax);
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- *
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- *
- * Using a mull instead of a divl saves some cycles in critical path.
- */
- ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-static void mark_offset_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- offset = hpet_readl(HPET_COUNTER);
- if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
- int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
- jiffies_64 += lost_ticks;
- }
- hpet_last = offset;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-}
-
-static void delay_hpet(unsigned long loops)
-{
- unsigned long hpet_start, hpet_end;
- unsigned long eax;
-
- /* loops is the number of cpu cycles. Convert it to hpet clocks */
- ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
-
- hpet_start = hpet_readl(HPET_COUNTER);
- do {
- rep_nop();
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < (loops));
-}
-
-static struct timer_opts timer_hpet;
-
-static int __init init_hpet(char* override)
-{
- unsigned long result, remain;
-
- /* check clock override */
- if (override[0] && strncmp(override,"hpet",4))
- return -ENODEV;
-
- if (!is_hpet_enabled())
- return -ENODEV;
-
- printk("Using HPET for gettimeofday\n");
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
- eax, edx);
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- }
- /* set this only when cpu_has_tsc */
- timer_hpet.read_timer = read_timer_tsc;
- }
-
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
- hpet_usec_quotient = result;
-
- return 0;
-}
-
-static int hpet_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- hpet_last = hpet_readl(HPET_COUNTER);
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_hpet __read_mostly = {
- .name = "hpet",
- .mark_offset = mark_offset_hpet,
- .get_offset = get_offset_hpet,
- .monotonic_clock = monotonic_clock_hpet,
- .delay = delay_hpet,
- .resume = hpet_resume,
-};
-
-struct init_timer_opts __initdata timer_hpet_init = {
- .init = init_hpet,
- .opts = &timer_hpet,
-};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
deleted file mode 100644
index 4ea2f414dbbd..000000000000
--- a/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <linux/init.h>
-#include <asm/timer.h>
-
-static void mark_offset_none(void)
-{
- /* nothing needed */
-}
-
-static unsigned long get_offset_none(void)
-{
- return 0;
-}
-
-static unsigned long long monotonic_clock_none(void)
-{
- return 0;
-}
-
-static void delay_none(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-/* none timer_opts struct */
-struct timer_opts timer_none = {
- .name = "none",
- .mark_offset = mark_offset_none,
- .get_offset = get_offset_none,
- .monotonic_clock = monotonic_clock_none,
- .delay = delay_none,
-};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
deleted file mode 100644
index b9b6bd56b9ba..000000000000
--- a/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/timex.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-#include <asm/i8253.h>
-
-#include "do_timer.h"
-#include "io_ports.h"
-
-static int count_p; /* counter in get_offset_pit() */
-
-static int __init init_pit(char* override)
-{
- /* check clock override */
- if (override[0] && strncmp(override,"pit",3))
- printk(KERN_ERR "Warning: clock= override failed. Defaulting "
- "to PIT\n");
- init_cpu_khz();
- count_p = LATCH;
- return 0;
-}
-
-static void mark_offset_pit(void)
-{
- /* nothing needed */
-}
-
-static unsigned long long monotonic_clock_pit(void)
-{
- return 0;
-}
-
-static void delay_pit(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-
-/* This function must be called with xtime_lock held.
- * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
- *
- * However, the pc-audio speaker driver changes the divisor so that
- * it gets interrupted rather more often - it loads 64 into the
- * counter rather than 11932! This has an adverse impact on
- * do_gettimeoffset() -- it stops working! What is also not
- * good is that the interval that our timer function gets called
- * is no longer 10.0002 ms, but 9.9767 ms. To get around this
- * would require using a different timing source. Maybe someone
- * could use the RTC - I know that this can interrupt at frequencies
- * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
- * it so that at startup, the timer code in sched.c would select
- * using either the RTC or the 8253 timer. The decision would be
- * based on whether there was any other device around that needed
- * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
- * and then do some jiggery to have a version of do_timer that
- * advanced the clock by 1/1024 s. Every time that reached over 1/100
- * of a second, then do all the old code. If the time was kept correct
- * then do_gettimeoffset could just return 0 - there is no low order
- * divider that can be accessed.
- *
- * Ideally, you would be able to use the RTC for the speaker driver,
- * but it appears that the speaker driver really needs interrupt more
- * often than every 120 us or so.
- *
- * Anyway, this needs more thought.... pjsg (1993-08-28)
- *
- * If you are really that interested, you should be reading
- * comp.protocols.time.ntp!
- */
-
-static unsigned long get_offset_pit(void)
-{
- int count;
- unsigned long flags;
- static unsigned long jiffies_p = 0;
-
- /*
- * cache volatile jiffies temporarily; we have xtime_lock.
- */
- unsigned long jiffies_t;
-
- spin_lock_irqsave(&i8253_lock, flags);
- /* timer count may underflow right here */
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
-
- /*
- * We do this guaranteed double memory access instead of a _p
- * postfix in the previous port access. Wheee, hackady hack
- */
- jiffies_t = jiffies;
-
- count |= inb_p(PIT_CH0) << 8;
-
- /* VIA686a test code... reset the latch if count > max + 1 */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- /*
- * avoiding timer inconsistencies (they are rare, but they happen)...
- * there are two kinds of problems that must be avoided here:
- * 1. the timer counter underflows
- * 2. hardware problem with the timer, not giving us continuous time,
- * the counter does small "jumps" upwards on some Pentium systems,
- * (see c't 95/10 page 335 for Neptun bug.)
- */
-
- if( jiffies_t == jiffies_p ) {
- if( count > count_p ) {
- /* the nutcase */
- count = do_timer_overflow(count);
- }
- } else
- jiffies_p = jiffies_t;
-
- count_p = count;
-
- spin_unlock_irqrestore(&i8253_lock, flags);
-
- count = ((LATCH-1) - count) * TICK_SIZE;
- count = (count + LATCH/2) / LATCH;
-
- return count;
-}
-
-
-/* tsc timer_opts struct */
-struct timer_opts timer_pit = {
- .name = "pit",
- .mark_offset = mark_offset_pit,
- .get_offset = get_offset_pit,
- .monotonic_clock = monotonic_clock_pit,
- .delay = delay_pit,
-};
-
-struct init_timer_opts __initdata timer_pit_init = {
- .init = init_pit,
- .opts = &timer_pit,
-};
-
-void setup_pit_timer(void)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- udelay(10);
- outb(LATCH >> 8 , PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
-}
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
deleted file mode 100644
index 144e94a04933..000000000000
--- a/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * (C) Dominik Brodowski <linux@brodo.de> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <asm/types.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-
-#include <linux/timex.h>
-#include "mach_timer.h"
-
-/* Number of PMTMR ticks expected during calibration run */
-#define PMTMR_TICKS_PER_SEC 3579545
-#define PMTMR_EXPECTED_RATE \
- ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
-
-
-/* The I/O port the PMTMR resides at.
- * The location is detected during setup_arch(),
- * in arch/i386/acpi/boot.c */
-u32 pmtmr_ioport = 0;
-
-
-/* value of the Power timer at last timer interrupt */
-static u32 offset_tick;
-static u32 offset_delay;
-
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
-static int pmtmr_need_workaround __read_mostly = 1;
-
-/*helper function to safely read acpi pm timesource*/
-static inline u32 read_pmtmr(void)
-{
- if (pmtmr_need_workaround) {
- u32 v1, v2, v3;
-
- /* It has been reported that because of various broken
- * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
- * source is not latched, so you must read it multiple
- * times to insure a safe value is read.
- */
- do {
- v1 = inl(pmtmr_ioport);
- v2 = inl(pmtmr_ioport);
- v3 = inl(pmtmr_ioport);
- } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
- || (v3 > v1 && v3 < v2));
-
- /* mask the output to 24 bits */
- return v2 & ACPI_PM_MASK;
- }
-
- return inl(pmtmr_ioport) & ACPI_PM_MASK;
-}
-
-
-/*
- * Some boards have the PMTMR running way too fast. We check
- * the PMTMR rate against PIT channel 2 to catch these cases.
- */
-static int verify_pmtmr_rate(void)
-{
- u32 value1, value2;
- unsigned long count, delta;
-
- mach_prepare_counter();
- value1 = read_pmtmr();
- mach_countup(&count);
- value2 = read_pmtmr();
- delta = (value2 - value1) & ACPI_PM_MASK;
-
- /* Check that the PMTMR delta is within 5% of what we expect */
- if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
- delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
- printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
- return -1;
- }
-
- return 0;
-}
-
-
-static int init_pmtmr(char* override)
-{
- u32 value1, value2;
- unsigned int i;
-
- if (override[0] && strncmp(override,"pmtmr",5))
- return -ENODEV;
-
- if (!pmtmr_ioport)
- return -ENODEV;
-
- /* we use the TSC for delay_pmtmr, so make sure it exists */
- if (!cpu_has_tsc)
- return -ENODEV;
-
- /* "verify" this timing source */
- value1 = read_pmtmr();
- for (i = 0; i < 10000; i++) {
- value2 = read_pmtmr();
- if (value2 == value1)
- continue;
- if (value2 > value1)
- goto pm_good;
- if ((value2 < value1) && ((value2) < 0xFFF))
- goto pm_good;
- printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
- return -EINVAL;
- }
- printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
- return -ENODEV;
-
-pm_good:
- if (verify_pmtmr_rate() != 0)
- return -ENODEV;
-
- init_cpu_khz();
- return 0;
-}
-
-static inline u32 cyc2us(u32 cycles)
-{
- /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
- * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
- *
- * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
- * easily be multiplied with 286 (=0x11E) without having to fear
- * u32 overflows.
- */
- cycles *= 286;
- return (cycles >> 10);
-}
-
-/*
- * this gets called during each timer interrupt
- * - Called while holding the writer xtime_lock
- */
-static void mark_offset_pmtmr(void)
-{
- u32 lost, delta, last_offset;
- static int first_run = 1;
- last_offset = offset_tick;
-
- write_seqlock(&monotonic_lock);
-
- offset_tick = read_pmtmr();
-
- /* calculate tick interval */
- delta = (offset_tick - last_offset) & ACPI_PM_MASK;
-
- /* convert to usecs */
- delta = cyc2us(delta);
-
- /* update the monotonic base value */
- monotonic_base += delta * NSEC_PER_USEC;
- write_sequnlock(&monotonic_lock);
-
- /* convert to ticks */
- delta += offset_delay;
- lost = delta / (USEC_PER_SEC / HZ);
- offset_delay = delta % (USEC_PER_SEC / HZ);
-
-
- /* compensate for lost ticks */
- if (lost >= 2)
- jiffies_64 += lost - 1;
-
- /* don't calculate delay for first run,
- or if we've got less then a tick */
- if (first_run || (lost < 1)) {
- first_run = 0;
- offset_delay = 0;
- }
-}
-
-static int pmtmr_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- offset_tick = read_pmtmr();
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-static unsigned long long monotonic_clock_pmtmr(void)
-{
- u32 last_offset, this_offset;
- unsigned long long base, ret;
- unsigned seq;
-
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = offset_tick;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the pmtmr */
- this_offset = read_pmtmr();
-
- /* convert to nanoseconds */
- ret = (this_offset - last_offset) & ACPI_PM_MASK;
- ret = base + (cyc2us(ret) * NSEC_PER_USEC);
- return ret;
-}
-
-static void delay_pmtmr(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-
-/*
- * get the offset (in microseconds) from the last call to mark_offset()
- * - Called holding a reader xtime_lock
- */
-static unsigned long get_offset_pmtmr(void)
-{
- u32 now, offset, delta = 0;
-
- offset = offset_tick;
- now = read_pmtmr();
- delta = (now - offset)&ACPI_PM_MASK;
-
- return (unsigned long) offset_delay + cyc2us(delta);
-}
-
-
-/* acpi timer_opts struct */
-static struct timer_opts timer_pmtmr = {
- .name = "pmtmr",
- .mark_offset = mark_offset_pmtmr,
- .get_offset = get_offset_pmtmr,
- .monotonic_clock = monotonic_clock_pmtmr,
- .delay = delay_pmtmr,
- .read_timer = read_timer_tsc,
- .resume = pmtmr_resume,
-};
-
-struct init_timer_opts __initdata timer_pmtmr_init = {
- .init = init_pmtmr,
- .opts = &timer_pmtmr,
-};
-
-#ifdef CONFIG_PCI
-/*
- * PIIX4 Errata:
- *
- * The power management timer may return improper results when read.
- * Although the timer value settles properly after incrementing,
- * while incrementing there is a 3 ns window every 69.8 ns where the
- * timer value is indeterminate (a 4.2% chance that the data will be
- * incorrect when read). As a result, the ACPI free running count up
- * timer specification is violated due to erroneous reads.
- */
-static int __init pmtmr_bug_check(void)
-{
- static struct pci_device_id gray_list[] __initdata = {
- /* these chipsets may have bug. */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82801DB_0) },
- { },
- };
- struct pci_dev *dev;
- int pmtmr_has_bug = 0;
- u8 rev;
-
- if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround)
- return 0;
-
- dev = pci_get_device(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
- if (dev) {
- pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
- /* the bug has been fixed in PIIX4M */
- if (rev < 3) {
- printk(KERN_WARNING "* Found PM-Timer Bug on this "
- "chipset. Due to workarounds for a bug,\n"
- "* this time source is slow. Consider trying "
- "other time sources (clock=)\n");
- pmtmr_has_bug = 1;
- }
- pci_dev_put(dev);
- }
-
- if (pci_dev_present(gray_list)) {
- printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due"
- " to workarounds for a bug,\n"
- "* this time source is slow. If you are sure your timer"
- " does not have\n"
- "* this bug, please use \"pmtmr_good\" to disable the "
- "workaround\n");
- pmtmr_has_bug = 1;
- }
-
- if (!pmtmr_has_bug)
- pmtmr_need_workaround = 0;
-
- return 0;
-}
-device_initcall(pmtmr_bug_check);
-#endif
-
-static int __init pmtr_good_setup(char *__str)
-{
- pmtmr_need_workaround = 0;
- return 1;
-}
-__setup("pmtmr_good", pmtr_good_setup);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
deleted file mode 100644
index f1187ddb0d0f..000000000000
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- *
- * 2004-06-25 Jesper Juhl
- * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
- * failing to inline.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/cpufreq.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-/* processor.h for distable_tsc flag */
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-
-#include <asm/hpet.h>
-#include <asm/i8253.h>
-
-#ifdef CONFIG_HPET_TIMER
-static unsigned long hpet_usec_quotient;
-static unsigned long hpet_last;
-static struct timer_opts timer_tsc;
-#endif
-
-static inline void cpufreq_delayed_get(void);
-
-int tsc_disable __devinitdata = 0;
-
-static int use_tsc;
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* Avoid compensating for lost ticks before TSCs are synched */
-static int detect_lost_ticks;
-static int __init start_lost_tick_compensation(void)
-{
- detect_lost_ticks = 1;
- return 0;
-}
-late_initcall(start_lost_tick_compensation);
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static int count2; /* counter for mark_offset_tsc() */
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-static unsigned long get_offset_tsc(void)
-{
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= last_tsc_low; /* tsc_low delta */
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + edx;
-}
-
-static unsigned long long monotonic_clock_tsc(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * In the NUMA case we dont use the TSC as they are not
- * synchronized across all CPUs.
- */
-#ifndef CONFIG_NUMA
- if (!use_tsc)
-#endif
- /* no locking but a rare wrong value is not a big deal */
- return jiffies_64 * (1000000000 / HZ);
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}
-
-static void delay_tsc(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-#ifdef CONFIG_HPET_TIMER
-static void mark_offset_tsc_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset, temp, hpet_current;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
- /* read Pentium cycle counter */
-
- hpet_current = hpet_readl(HPET_COUNTER);
- rdtsc(last_tsc_low, last_tsc_high);
-
- /* lost tick compensation */
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
- && detect_lost_ticks) {
- int lost_ticks = (offset - hpet_last) / hpet_tick;
- jiffies_64 += lost_ticks;
- }
- hpet_last = hpet_current;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- */
- delay_at_last_interrupt = hpet_current - offset;
- ASM_MUL64_REG(temp, delay_at_last_interrupt,
- hpet_usec_quotient, delay_at_last_interrupt);
-}
-#endif
-
-
-#ifdef CONFIG_CPU_FREQ
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
- unsigned int cpu;
- for_each_online_cpu(cpu) {
- cpufreq_get(cpu);
- }
- cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static inline void cpufreq_delayed_get(void)
-{
- if (cpufreq_init && !cpufreq_delayed_issched) {
- cpufreq_delayed_issched = 1;
- printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
- schedule_work(&cpufreq_delayed_get_work);
- }
-}
-
-/* If the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-
-static unsigned int ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
-static unsigned int cpu_khz_ref = 0;
-#endif
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
-
- if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
- write_seqlock_irq(&xtime_lock);
- if (!ref_freq) {
- if (!freq->old){
- ref_freq = freq->new;
- goto end;
- }
- ref_freq = freq->old;
- loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
-#ifndef CONFIG_SMP
- fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
- cpu_khz_ref = cpu_khz;
-#endif
- }
-
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-#ifndef CONFIG_SMP
- if (cpu_khz)
- cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
- if (use_tsc) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
- fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
- set_cyc2ns_scale(cpu_khz);
- }
- }
-#endif
- }
-
-end:
- if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
- write_sequnlock_irq(&xtime_lock);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-
-static int __init cpufreq_tsc(void)
-{
- int ret;
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
- ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- if (!ret)
- cpufreq_init = 1;
- return ret;
-}
-core_initcall(cpufreq_tsc);
-
-#else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
-#endif
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
- unsigned int cpu_khz_old = cpu_khz;
-
- if (cpu_has_tsc) {
- local_irq_disable();
- init_cpu_khz();
- local_irq_enable();
- cpu_data[0].loops_per_jiffy =
- cpufreq_scale(cpu_data[0].loops_per_jiffy,
- cpu_khz_old,
- cpu_khz);
- return 0;
- } else
- return -ENODEV;
-#else
- return -ENODEV;
-#endif
-}
-EXPORT_SYMBOL(recalibrate_cpu_khz);
-
-static void mark_offset_tsc(void)
-{
- unsigned long lost,delay;
- unsigned long delta = last_tsc_low;
- int count;
- int countmp;
- static int count1 = 0;
- unsigned long long this_offset, last_offset;
- static int lost_count = 0;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
-
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
-
- /* read Pentium cycle counter */
-
- rdtsc(last_tsc_low, last_tsc_high);
-
- spin_lock(&i8253_lock);
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
- count |= inb(PIT_CH0) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- spin_unlock(&i8253_lock);
-
- if (pit_latch_buggy) {
- /* get center value of last 3 time lutch */
- if ((count2 >= count && count >= count1)
- || (count1 >= count && count >= count2)) {
- count2 = count1; count1 = count;
- } else if ((count1 >= count2 && count2 >= count)
- || (count >= count2 && count2 >= count1)) {
- countmp = count;count = count2;
- count2 = count1;count1 = countmp;
- } else {
- count2 = count1; count1 = count; count = count1;
- }
- }
-
- /* lost tick compensation */
- delta = last_tsc_low - delta;
- {
- register unsigned long eax, edx;
- eax = delta;
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
- delta = edx;
- }
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2 && detect_lost_ticks) {
- jiffies_64 += lost-1;
-
- /* sanity check to ensure we're not always losing ticks */
- if (lost_count++ > 100) {
- printk(KERN_WARNING "Losing too many ticks!\n");
- printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
- printk(KERN_WARNING "Possible reasons for this are:\n");
- printk(KERN_WARNING " You're running with Speedstep,\n");
- printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
- printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
- printk(KERN_WARNING "Falling back to a sane timesource now.\n");
-
- clock_fallback();
- }
- /* ... but give the TSC a fair chance */
- if (lost_count > 25)
- cpufreq_delayed_get();
- } else
- lost_count = 0;
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
- /* catch corner case where tick rollover occured
- * between tsc and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static int __init init_tsc(char* override)
-{
-
- /* check clock override */
- if (override[0] && strncmp(override,"tsc",3)) {
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled()) {
- printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
- } else
-#endif
- {
- return -ENODEV;
- }
- }
-
- /*
- * If we have APM enabled or the CPU clock speed is variable
- * (CPU stops clock on HLT or slows clock to save power)
- * then the TSC timestamps may diverge by up to 1 jiffy from
- * 'real time' but nothing will break.
- * The most frequent case is that the CPU is "woken" from a halt
- * state by the timer interrupt itself, so we get 0 error. In the
- * rare cases where a driver would "wake" the CPU and request a
- * timestamp, the maximum error is < 1 jiffy. But timestamps are
- * still perfectly ordered.
- * Note that the TSC counter will be reset if APM suspends
- * to disk; this won't break the kernel, though, 'cuz we're
- * smart. See arch/i386/kernel/apm.c.
- */
- /*
- * Firstly we have to do a CPU check for chips with
- * a potentially buggy TSC. At this point we haven't run
- * the ident/bugs checks so we must run this hook as it
- * may turn off the TSC flag.
- *
- * NOTE: this doesn't yet handle SMP 486 machines where only
- * some CPU's have a TSC. Thats never worked and nobody has
- * moaned if you have the only one in the world - you fix it!
- */
-
- count2 = LATCH; /* initialize counter for mark_offset_tsc() */
-
- if (cpu_has_tsc) {
- unsigned long tsc_quotient;
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer) {
- unsigned long result, remain;
- printk("Using TSC for gettimeofday\n");
- tsc_quotient = calibrate_tsc_hpet(NULL);
- timer_tsc.mark_offset = &mark_offset_tsc_hpet;
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_tsc_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick,
- 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
-
- hpet_usec_quotient = result;
- } else
-#endif
- {
- tsc_quotient = calibrate_tsc();
- }
-
- if (tsc_quotient) {
- fast_gettimeoffset_quotient = tsc_quotient;
- use_tsc = 1;
- /*
- * We could be more selective here I suspect
- * and just enable this for the next intel chips ?
- */
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- return 0;
- }
- }
- return -ENODEV;
-}
-
-static int tsc_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer)
- hpet_last = hpet_readl(HPET_COUNTER);
-#endif
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-#ifndef CONFIG_X86_TSC
-/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
- * in cpu/common.c */
-static int __init tsc_setup(char *str)
-{
- tsc_disable = 1;
- return 1;
-}
-#else
-static int __init tsc_setup(char *str)
-{
- printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC.\n");
- return 1;
-}
-#endif
-__setup("notsc", tsc_setup);
-
-
-
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_tsc = {
- .name = "tsc",
- .mark_offset = mark_offset_tsc,
- .get_offset = get_offset_tsc,
- .monotonic_clock = monotonic_clock_tsc,
- .delay = delay_tsc,
- .read_timer = read_timer_tsc,
- .resume = tsc_resume,
-};
-
-struct init_timer_opts __initdata timer_tsc_init = {
- .init = init_tsc,
- .opts = &timer_tsc,
-};
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index dcc14477af1f..78464097470a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -28,6 +28,7 @@
#include <linux/utsname.h>
#include <linux/kprobes.h>
#include <linux/kexec.h>
+#include <linux/unwind.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -47,7 +48,7 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/nmi.h>
-
+#include <asm/unwind.h>
#include <asm/smp.h>
#include <asm/arch_hooks.h>
#include <asm/kdebug.h>
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
static int kstack_depth_to_print = 24;
+static int call_trace = 1;
ATOMIC_NOTIFIER_HEAD(i386die_chain);
int register_die_notifier(struct notifier_block *nb)
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
return ebp;
}
-static void show_trace_log_lvl(struct task_struct *task,
+static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
+{
+ int n = 0;
+ int printed = 0; /* nr of entries already printed on current line */
+
+ while (unwind(info) == 0 && UNW_PC(info)) {
+ ++n;
+ printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
+ if (arch_unw_user_mode(info))
+ break;
+ }
+ if (printed)
+ printk("\n");
+ return n;
+}
+
+static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl)
{
unsigned long ebp;
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task,
if (!task)
task = current;
+ if (call_trace >= 0) {
+ int unw_ret = 0;
+ struct unwind_frame_info info;
+
+ if (regs) {
+ if (unwind_init_frame_info(&info, task, regs) == 0)
+ unw_ret = show_trace_unwind(&info, log_lvl);
+ } else if (task == current)
+ unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
+ else {
+ if (unwind_init_blocked(&info, task) == 0)
+ unw_ret = show_trace_unwind(&info, log_lvl);
+ }
+ if (unw_ret > 0) {
+ if (call_trace > 0)
+ return;
+ printk("%sLegacy call trace:\n", log_lvl);
+ }
+ }
+
if (task == current) {
/* Grab ebp right from our regs */
asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task,
}
}
-void show_trace(struct task_struct *task, unsigned long * stack)
+void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
{
- show_trace_log_lvl(task, stack, "");
+ show_trace_log_lvl(task, regs, stack, "");
}
-static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
- char *log_lvl)
+static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *esp, char *log_lvl)
{
unsigned long *stack;
int i;
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
printk("%08lx ", *stack++);
}
printk("\n%sCall Trace:\n", log_lvl);
- show_trace_log_lvl(task, esp, log_lvl);
+ show_trace_log_lvl(task, regs, esp, log_lvl);
}
void show_stack(struct task_struct *task, unsigned long *esp)
{
printk(" ");
- show_stack_log_lvl(task, esp, "");
+ show_stack_log_lvl(task, NULL, esp, "");
}
/*
@@ -241,7 +279,7 @@ void dump_stack(void)
{
unsigned long stack;
- show_trace(current, &stack);
+ show_trace(current, NULL, &stack);
}
EXPORT_SYMBOL(dump_stack);
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs)
u8 __user *eip;
printk("\n" KERN_EMERG "Stack: ");
- show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG);
+ show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
printk(KERN_EMERG "Code: ");
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s)
return 1;
}
__setup("kstack=", kstack_setup);
+
+static int __init call_trace_setup(char *s)
+{
+ if (strcmp(s, "old") == 0)
+ call_trace = -1;
+ else if (strcmp(s, "both") == 0)
+ call_trace = 0;
+ else if (strcmp(s, "new") == 0)
+ call_trace = 1;
+ return 1;
+}
+__setup("call_trace=", call_trace_setup);
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
new file mode 100644
index 000000000000..7e0d8dab2075
--- /dev/null
+++ b/arch/i386/kernel/tsc.c
@@ -0,0 +1,478 @@
+/*
+ * This code largely moved from arch/i386/kernel/timer/timer_tsc.c
+ * which was originally moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/workqueue.h>
+#include <linux/cpufreq.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+
+#include <asm/delay.h>
+#include <asm/tsc.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+
+#include "mach_timer.h"
+
+/*
+ * On some systems the TSC frequency does not
+ * change with the cpu frequency. So we need
+ * an extra value to store the TSC freq
+ */
+unsigned int tsc_khz;
+
+int tsc_disable __cpuinitdata = 0;
+
+#ifdef CONFIG_X86_TSC
+static int __init tsc_setup(char *str)
+{
+ printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
+ "cannot disable TSC.\n");
+ return 1;
+}
+#else
+/*
+ * disable flag for tsc. Takes effect by clearing the TSC cpu flag
+ * in cpu/common.c
+ */
+static int __init tsc_setup(char *str)
+{
+ tsc_disable = 1;
+
+ return 1;
+}
+#endif
+
+__setup("notsc", tsc_setup);
+
+/*
+ * code to mark and check if the TSC is unstable
+ * due to cpufreq or due to unsynced TSCs
+ */
+static int tsc_unstable;
+
+static inline int check_tsc_unstable(void)
+{
+ return tsc_unstable;
+}
+
+void mark_tsc_unstable(void)
+{
+ tsc_unstable = 1;
+}
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+/* Accellerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
+ *
+ * Then we use scaling math (suggested by george@mvista.com) to get:
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better percision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * (mathieu.desnoyers@polymtl.ca)
+ *
+ * -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale __read_mostly;
+
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_khz)
+{
+ cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+ unsigned long long this_offset;
+
+ /*
+ * in the NUMA case we dont use the TSC as they are not
+ * synchronized across all CPUs.
+ */
+#ifndef CONFIG_NUMA
+ if (!cpu_khz || check_tsc_unstable())
+#endif
+ /* no locking but a rare wrong value is not a big deal */
+ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+
+ /* read the Time Stamp Counter: */
+ rdtscll(this_offset);
+
+ /* return the value in ns */
+ return cycles_2_ns(this_offset);
+}
+
+static unsigned long calculate_cpu_khz(void)
+{
+ unsigned long long start, end;
+ unsigned long count;
+ u64 delta64;
+ int i;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ /* run 3 times to ensure the cache is warm */
+ for (i = 0; i < 3; i++) {
+ mach_prepare_counter();
+ rdtscll(start);
+ mach_countup(&count);
+ rdtscll(end);
+ }
+ /*
+ * Error: ECTCNEVERSET
+ * The CTC wasn't reliable: we got a hit on the very first read,
+ * or the CPU was so fast/slow that the quotient wouldn't fit in
+ * 32 bits..
+ */
+ if (count <= 1)
+ goto err;
+
+ delta64 = end - start;
+
+ /* cpu freq too fast: */
+ if (delta64 > (1ULL<<32))
+ goto err;
+
+ /* cpu freq too slow: */
+ if (delta64 <= CALIBRATE_TIME_MSEC)
+ goto err;
+
+ delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
+ do_div(delta64,CALIBRATE_TIME_MSEC);
+
+ local_irq_restore(flags);
+ return (unsigned long)delta64;
+err:
+ local_irq_restore(flags);
+ return 0;
+}
+
+int recalibrate_cpu_khz(void)
+{
+#ifndef CONFIG_SMP
+ unsigned long cpu_khz_old = cpu_khz;
+
+ if (cpu_has_tsc) {
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
+ cpu_data[0].loops_per_jiffy =
+ cpufreq_scale(cpu_data[0].loops_per_jiffy,
+ cpu_khz_old, cpu_khz);
+ return 0;
+ } else
+ return -ENODEV;
+#else
+ return -ENODEV;
+#endif
+}
+
+EXPORT_SYMBOL(recalibrate_cpu_khz);
+
+void tsc_init(void)
+{
+ if (!cpu_has_tsc || tsc_disable)
+ return;
+
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
+
+ if (!cpu_khz)
+ return;
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ (unsigned long)cpu_khz / 1000,
+ (unsigned long)cpu_khz % 1000);
+
+ set_cyc2ns_scale(cpu_khz);
+ use_tsc_delay();
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ cpufreq_get(cpu);
+
+ cpufreq_delayed_issched = 0;
+}
+
+/*
+ * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+static inline void cpufreq_delayed_get(void)
+{
+ if (cpufreq_init && !cpufreq_delayed_issched) {
+ cpufreq_delayed_issched = 1;
+ printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
+ schedule_work(&cpufreq_delayed_get_work);
+ }
+}
+
+/*
+ * if the CPU frequency is scaled, TSC-based delays will need a different
+ * loops_per_jiffy value to function properly.
+ */
+static unsigned int ref_freq = 0;
+static unsigned long loops_per_jiffy_ref = 0;
+static unsigned long cpu_khz_ref = 0;
+
+static int
+time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freq = data;
+
+ if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
+ write_seqlock_irq(&xtime_lock);
+
+ if (!ref_freq) {
+ if (!freq->old){
+ ref_freq = freq->new;
+ goto end;
+ }
+ ref_freq = freq->old;
+ loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+ cpu_khz_ref = cpu_khz;
+ }
+
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+ (val == CPUFREQ_RESUMECHANGE)) {
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+ cpu_data[freq->cpu].loops_per_jiffy =
+ cpufreq_scale(loops_per_jiffy_ref,
+ ref_freq, freq->new);
+
+ if (cpu_khz) {
+
+ if (num_online_cpus() == 1)
+ cpu_khz = cpufreq_scale(cpu_khz_ref,
+ ref_freq, freq->new);
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
+ tsc_khz = cpu_khz;
+ set_cyc2ns_scale(cpu_khz);
+ /*
+ * TSC based sched_clock turns
+ * to junk w/ cpufreq
+ */
+ mark_tsc_unstable();
+ }
+ }
+ }
+end:
+ if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
+ write_sequnlock_irq(&xtime_lock);
+
+ return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+ .notifier_call = time_cpufreq_notifier
+};
+
+static int __init cpufreq_tsc(void)
+{
+ int ret;
+
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+ ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (!ret)
+ cpufreq_init = 1;
+
+ return ret;
+}
+
+core_initcall(cpufreq_tsc);
+
+#endif
+
+/* clock source code */
+
+static unsigned long current_tsc_khz = 0;
+static int tsc_update_callback(void);
+
+static cycle_t read_tsc(void)
+{
+ cycle_t ret;
+
+ rdtscll(ret);
+
+ return ret;
+}
+
+static struct clocksource clocksource_tsc = {
+ .name = "tsc",
+ .rating = 300,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 0, /* to be set */
+ .shift = 22,
+ .update_callback = tsc_update_callback,
+ .is_continuous = 1,
+};
+
+static int tsc_update_callback(void)
+{
+ int change = 0;
+
+ /* check to see if we should switch to the safe clocksource: */
+ if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
+ clocksource_tsc.rating = 50;
+ clocksource_reselect();
+ change = 1;
+ }
+
+ /* only update if tsc_khz has changed: */
+ if (current_tsc_khz != tsc_khz) {
+ current_tsc_khz = tsc_khz;
+ clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+ clocksource_tsc.shift);
+ change = 1;
+ }
+
+ return change;
+}
+
+static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
+{
+ printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
+ d->ident);
+ mark_tsc_unstable();
+ return 0;
+}
+
+/* List of systems that have known TSC problems */
+static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
+ {
+ .callback = dmi_mark_tsc_unstable,
+ .ident = "IBM Thinkpad 380XD",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
+ },
+ },
+ {}
+};
+
+#define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */
+static struct timer_list verify_tsc_freq_timer;
+
+/* XXX - Probably should add locking */
+static void verify_tsc_freq(unsigned long unused)
+{
+ static u64 last_tsc;
+ static unsigned long last_jiffies;
+
+ u64 now_tsc, interval_tsc;
+ unsigned long now_jiffies, interval_jiffies;
+
+
+ if (check_tsc_unstable())
+ return;
+
+ rdtscll(now_tsc);
+ now_jiffies = jiffies;
+
+ if (!last_jiffies) {
+ goto out;
+ }
+
+ interval_jiffies = now_jiffies - last_jiffies;
+ interval_tsc = now_tsc - last_tsc;
+ interval_tsc *= HZ;
+ do_div(interval_tsc, cpu_khz*1000);
+
+ if (interval_tsc < (interval_jiffies * 3 / 4)) {
+ printk("TSC appears to be running slowly. "
+ "Marking it as unstable\n");
+ mark_tsc_unstable();
+ return;
+ }
+
+out:
+ last_tsc = now_tsc;
+ last_jiffies = now_jiffies;
+ /* set us up to go off on the next interval: */
+ mod_timer(&verify_tsc_freq_timer,
+ jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL));
+}
+
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+static __init int unsynchronized_tsc(void)
+{
+ /*
+ * Intel systems are normally all synchronized.
+ * Exceptions must mark TSC as unstable:
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return 0;
+
+ /* assume multi socket systems are not synchronized: */
+ return num_possible_cpus() > 1;
+}
+
+static int __init init_tsc_clocksource(void)
+{
+
+ if (cpu_has_tsc && tsc_khz && !tsc_disable) {
+ /* check blacklist */
+ dmi_check_system(bad_tsc_dmi_table);
+
+ if (unsynchronized_tsc()) /* mark unstable if unsynced */
+ mark_tsc_unstable();
+ current_tsc_khz = tsc_khz;
+ clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+ clocksource_tsc.shift);
+ /* lower the rating if we already know its unstable: */
+ if (check_tsc_unstable())
+ clocksource_tsc.rating = 50;
+
+ init_timer(&verify_tsc_freq_timer);
+ verify_tsc_freq_timer.function = verify_tsc_freq;
+ verify_tsc_freq_timer.expires =
+ jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL);
+ add_timer(&verify_tsc_freq_timer);
+
+ return clocksource_register(&clocksource_tsc);
+ }
+
+ return 0;
+}
+
+module_init(init_tsc_clocksource);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 7512f39c9f25..2d4f1386e2b1 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -71,6 +71,15 @@ SECTIONS
.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
_edata = .; /* End of data section */
+#ifdef CONFIG_STACK_UNWIND
+ . = ALIGN(4);
+ .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
+ __start_unwind = .;
+ *(.eh_frame)
+ __end_unwind = .;
+ }
+#endif
+
. = ALIGN(THREAD_SIZE); /* init_task */
.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
*(.data.init_task)
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index c49a6acbee56..3c0714c4b669 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -10,43 +10,92 @@
* we have to worry about.
*/
+#include <linux/module.h>
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/delay.h>
-#include <linux/module.h>
+
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/timer.h>
#ifdef CONFIG_SMP
-#include <asm/smp.h>
+# include <asm/smp.h>
#endif
-extern struct timer_opts* timer;
+/* simple loop based delay: */
+static void delay_loop(unsigned long loops)
+{
+ int d0;
+
+ __asm__ __volatile__(
+ "\tjmp 1f\n"
+ ".align 16\n"
+ "1:\tjmp 2f\n"
+ ".align 16\n"
+ "2:\tdecl %0\n\tjns 2b"
+ :"=&a" (d0)
+ :"0" (loops));
+}
+
+/* TSC based delay: */
+static void delay_tsc(unsigned long loops)
+{
+ unsigned long bclock, now;
+
+ rdtscl(bclock);
+ do {
+ rep_nop();
+ rdtscl(now);
+ } while ((now-bclock) < loops);
+}
+
+/*
+ * Since we calibrate only once at boot, this
+ * function should be set once at boot and not changed
+ */
+static void (*delay_fn)(unsigned long) = delay_loop;
+
+void use_tsc_delay(void)
+{
+ delay_fn = delay_tsc;
+}
+
+int read_current_timer(unsigned long *timer_val)
+{
+ if (delay_fn == delay_tsc) {
+ rdtscl(*timer_val);
+ return 0;
+ }
+ return -1;
+}
void __delay(unsigned long loops)
{
- cur_timer->delay(loops);
+ delay_fn(loops);
}
inline void __const_udelay(unsigned long xloops)
{
int d0;
+
xloops *= 4;
__asm__("mull %0"
:"=d" (xloops), "=&a" (d0)
- :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
- __delay(++xloops);
+ :"1" (xloops), "0"
+ (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
+
+ __delay(++xloops);
}
void __udelay(unsigned long usecs)
{
- __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+ __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
}
void __ndelay(unsigned long nsecs)
{
- __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+ __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
}
EXPORT_SYMBOL(__delay);
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index bd6fe96cc16d..6ee7faaf2c1b 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -30,6 +30,40 @@
extern void die(const char *,struct pt_regs *,long);
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = str,
+ .err = err,
+ .trapnr = trap,
+ .signr = sig
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
+
/*
* Unlock any spinlocks which will prevent us from getting the
* message out
@@ -324,7 +358,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
if (unlikely(address >= TASK_SIZE)) {
if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
return;
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
/*
@@ -334,7 +368,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
goto bad_area_nosemaphore;
}
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index ec0fd3cfa774..fa8a37bcb391 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -281,9 +281,9 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
for (i = 0; i < model->num_counters; ++i) {
struct dentry * dir;
- char buf[2];
+ char buf[4];
- snprintf(buf, 2, "%d", i);
+ snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
index 3ad9a72a5036..693bdea4a52b 100644
--- a/arch/i386/oprofile/op_model_athlon.c
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -13,6 +13,7 @@
#include <linux/oprofile.h>
#include <asm/ptrace.h>
#include <asm/msr.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index ac8a066035c2..7c61d357b82b 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -14,6 +14,7 @@
#include <asm/ptrace.h>
#include <asm/fixmap.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index d719015fc044..5c3ab4b027ad 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -14,6 +14,7 @@
#include <asm/ptrace.h>
#include <asm/msr.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
index 1eec0868f4b3..ed1512a175ab 100644
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -371,8 +371,7 @@ void __devinit pcibios_sort(void)
list_for_each(ln, &pci_devices) {
d = pci_dev_g(ln);
if (d->bus->number == bus && d->devfn == devfn) {
- list_del(&d->global_list);
- list_add_tail(&d->global_list, &sorted_devices);
+ list_move_tail(&d->global_list, &sorted_devices);
if (d == dev)
found = 1;
break;
@@ -390,8 +389,7 @@ void __devinit pcibios_sort(void)
if (!found) {
printk(KERN_WARNING "PCI: Device %s not found by BIOS\n",
pci_name(dev));
- list_del(&dev->global_list);
- list_add_tail(&dev->global_list, &sorted_devices);
+ list_move_tail(&dev->global_list, &sorted_devices);
}
}
list_splice(&sorted_devices, &pci_devices);
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 355d57970ba3..b045c279136c 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -272,9 +272,9 @@ cpu_idle (void)
/* endless idle loop with no priority at all */
while (1) {
if (can_do_pal_halt)
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
else
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
if (!need_resched()) {
void (*idle)(void);
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index d98ec49570b8..14ef7cceb208 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -19,6 +19,40 @@
extern void die (char *, struct pt_regs *, long);
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
+/* Hook to register for page fault notifications */
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = str,
+ .err = err,
+ .trapnr = trap,
+ .signr = sig
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
/*
* Return TRUE if ADDRESS points at a page in the kernel's mapped segment
* (inside region 5, on ia64) and that page is present.
@@ -84,7 +118,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
/*
* This is to handle the kprobes on user space access instructions
*/
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT,
SIGSEGV) == NOTIFY_STOP)
return;
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c
index d6d582a5abb0..a226668f20c3 100644
--- a/arch/m68k/mm/memory.c
+++ b/arch/m68k/mm/memory.c
@@ -94,8 +94,7 @@ pmd_t *get_pointer_table (void)
PD_MARKBITS(dp) = mask & ~tmp;
if (!PD_MARKBITS(dp)) {
/* move to end of list */
- list_del(dp);
- list_add_tail(dp, &ptable_list);
+ list_move_tail(dp, &ptable_list);
}
return (pmd_t *) (page_address(PD_PAGE(dp)) + off);
}
@@ -123,8 +122,7 @@ int free_pointer_table (pmd_t *ptable)
* move this descriptor to the front of the list, since
* it has one or more free tables.
*/
- list_del(dp);
- list_add(dp, &ptable_list);
+ list_move(dp, &ptable_list);
}
return 0;
}
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index f04a1d25f1a2..97c7bfde8ae8 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -119,8 +119,7 @@ static inline int refill(void)
if(hole->end == prev->start) {
hole->size += prev->size;
hole->end = prev->end;
- list_del(&(prev->list));
- list_add(&(prev->list), &hole_cache);
+ list_move(&(prev->list), &hole_cache);
ret++;
}
@@ -182,8 +181,7 @@ static inline unsigned long get_baddr(int len, unsigned long align)
#endif
return hole->end;
} else if(hole->size == newlen) {
- list_del(&(hole->list));
- list_add(&(hole->list), &hole_cache);
+ list_move(&(hole->list), &hole_cache);
dvma_entry_use(hole->start) = newlen;
#ifdef DVMA_DEBUG
dvma_allocs++;
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 6c6980b9b6d4..8b6e723eb82b 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -472,38 +472,46 @@ config 4KSTACKS
running more threads on a system and also reduces the pressure
on the VM subsystem for higher order allocations.
-choice
- prompt "RAM size"
- default AUTO
-
-config RAMAUTO
- bool "AUTO"
- ---help---
- Configure the RAM size on your platform. Many platforms can auto
- detect this, on those choose the AUTO option. Otherwise set the
- RAM size you intend using.
-
-config RAM4MB
- bool "4MiB"
- help
- Set RAM size to be 4MiB.
-
-config RAM8MB
- bool "8MiB"
- help
- Set RAM size to be 8MiB.
-
-config RAM16MB
- bool "16MiB"
- help
- Set RAM size to be 16MiB.
-
-config RAM32MB
- bool "32MiB"
- help
- Set RAM size to be 32MiB.
-
-endchoice
+comment "RAM configuration"
+
+config RAMBASE
+ hex "Address of the base of RAM"
+ default "0"
+ help
+ Define the address that RAM starts at. On many platforms this is
+ 0, the base of the address space. And this is the default. Some
+ platforms choose to setup their RAM at other addresses within the
+ processor address space.
+
+config RAMSIZE
+ hex "Size of RAM (in bytes)"
+ default "0x400000"
+ help
+ Define the size of the system RAM. If you select 0 then the
+ kernel will try to probe the RAM size at runtime. This is not
+ supported on all CPU types.
+
+config VECTORBASE
+ hex "Address of the base of system vectors"
+ default "0"
+ help
+ Define the address of the the system vectors. Commonly this is
+ put at the start of RAM, but it doesn't have to be. On ColdFire
+ platforms this address is programmed into the VBR register, thus
+ actually setting the address to use.
+
+config KERNELBASE
+ hex "Address of the base of kernel code"
+ default "0x400"
+ help
+ Typically on m68k systems the kernel will not start at the base
+ of RAM, but usually some small offset from it. Define the start
+ address of the kernel here. The most common setup will have the
+ processor vectors at the base of RAM and then the start of the
+ kernel. On some platforms some RAM is reserved for boot loaders
+ and the kernel starts after that. The 0x400 default was based on
+ a system with the RAM based at address 0, and leaving enough room
+ for the theoretical maximum number of 256 vectors.
choice
prompt "RAM bus width"
@@ -511,7 +519,7 @@ choice
config RAMAUTOBIT
bool "AUTO"
- ---help---
+ help
Select the physical RAM data bus size. Not needed on most platforms,
so you can generally choose AUTO.
@@ -545,7 +553,9 @@ config RAMKERNEL
config ROMKERNEL
bool "ROM"
help
- The kernel will be resident in FLASH/ROM when running.
+ The kernel will be resident in FLASH/ROM when running. This is
+ often referred to as Execute-in-Place (XIP), since the kernel
+ code executes from the position it is stored in the FLASH/ROM.
endchoice
diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index a331cc90797c..6a2f0c693254 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -1,7 +1,7 @@
/*
* vmlinux.lds.S -- master linker script for m68knommu arch
*
- * (C) Copyright 2002-2004, Greg Ungerer <gerg@snapgear.com>
+ * (C) Copyright 2002-2006, Greg Ungerer <gerg@snapgear.com>
*
* This ends up looking compilcated, because of the number of
* address variations for ram and rom/flash layouts. The real
@@ -22,13 +22,7 @@
#define ROM_START 0x10c10400
#define ROM_LENGTH 0xfec00
#define ROM_END 0x10d00000
-#define RAMVEC_START 0x00000000
-#define RAMVEC_LENGTH 0x400
-#define RAM_START 0x10000400
-#define RAM_LENGTH 0xffc00
-#define RAM_END 0x10100000
-#define _ramend _ram_end_notused
-#define DATA_ADDR RAM_START
+#define DATA_ADDR CONFIG_KERNELBASE
#endif
/*
@@ -41,11 +35,6 @@
#define ROM_START 0x10c10400
#define ROM_LENGTH 0x1efc00
#define ROM_END 0x10e00000
-#define RAMVEC_START 0x00000000
-#define RAMVEC_LENGTH 0x400
-#define RAM_START 0x00020400
-#define RAM_LENGTH 0x7dfc00
-#define RAM_END 0x00800000
#endif
#ifdef CONFIG_ROMKERNEL
#define ROMVEC_START 0x10c10000
@@ -53,11 +42,6 @@
#define ROM_START 0x10c10400
#define ROM_LENGTH 0x1efc00
#define ROM_END 0x10e00000
-#define RAMVEC_START 0x00000000
-#define RAMVEC_LENGTH 0x400
-#define RAM_START 0x00020000
-#define RAM_LENGTH 0x600000
-#define RAM_END 0x00800000
#endif
#ifdef CONFIG_HIMEMKERNEL
#define ROMVEC_START 0x00600000
@@ -65,141 +49,28 @@
#define ROM_START 0x00600400
#define ROM_LENGTH 0x1efc00
#define ROM_END 0x007f0000
-#define RAMVEC_START 0x00000000
-#define RAMVEC_LENGTH 0x400
-#define RAM_START 0x00020000
-#define RAM_LENGTH 0x5e0000
-#define RAM_END 0x00600000
#endif
#endif
-#ifdef CONFIG_DRAGEN2
-#define RAM_START 0x10000
-#define RAM_LENGTH 0x7f0000
-#endif
-
#ifdef CONFIG_UCQUICC
#define ROMVEC_START 0x00000000
#define ROMVEC_LENGTH 0x404
#define ROM_START 0x00000404
#define ROM_LENGTH 0x1ff6fc
#define ROM_END 0x00200000
-#define RAMVEC_START 0x00200000
-#define RAMVEC_LENGTH 0x404
-#define RAM_START 0x00200404
-#define RAM_LENGTH 0x1ff6fc
-#define RAM_END 0x00400000
-#endif
-
-/*
- * The standard Arnewsh 5206 board only has 1MiB of ram. Not normally
- * enough to be useful. Assume the user has fitted something larger,
- * at least 4MiB in size. No point in not letting the kernel completely
- * link, it will be obvious if it is too big when they go to load it.
- */
-#if defined(CONFIG_ARN5206)
-#define RAM_START 0x10000
-#define RAM_LENGTH 0x3f0000
-#endif
-
-/*
- * The Motorola 5206eLITE board only has 1MiB of static RAM.
- */
-#if defined(CONFIG_ELITE)
-#define RAM_START 0x30020000
-#define RAM_LENGTH 0xe0000
-#endif
-
-/*
- * All the Motorola eval boards have the same basic arrangement.
- * The end of RAM will vary depending on how much ram is fitted,
- * but this isn't important here, we assume at least 4MiB.
- */
-#if defined(CONFIG_M5206eC3) || defined(CONFIG_M5249C3) || \
- defined(CONFIG_M5272C3) || defined(CONFIG_M5307C3) || \
- defined(CONFIG_ARN5307) || defined(CONFIG_M5407C3) || \
- defined(CONFIG_M5271EVB) || defined(CONFIG_M5275EVB) || \
- defined(CONFIG_M5235EVB)
-#define RAM_START 0x20000
-#define RAM_LENGTH 0x3e0000
-#endif
-
-/*
- * The Freescale 5208EVB board has 32MB of RAM.
- */
-#if defined(CONFIG_M5208EVB)
-#define RAM_START 0x40020000
-#define RAM_LENGTH 0x01fe0000
-#endif
-
-/*
- * The senTec COBRA5272 board has nearly the same memory layout as
- * the M5272C3. We assume 16MiB ram.
- */
-#if defined(CONFIG_COBRA5272)
-#define RAM_START 0x20000
-#define RAM_LENGTH 0xfe0000
-#endif
-
-#if defined(CONFIG_M5282EVB)
-#define RAM_START 0x10000
-#define RAM_LENGTH 0x3f0000
-#endif
-
-/*
- * The senTec COBRA5282 board has the same memory layout as the M5282EVB.
- */
-#if defined(CONFIG_COBRA5282)
-#define RAM_START 0x10000
-#define RAM_LENGTH 0x3f0000
-#endif
-
-
-/*
- * The EMAC SoM-5282EM module.
- */
-#if defined(CONFIG_SOM5282EM)
-#define RAM_START 0x10000
-#define RAM_LENGTH 0xff0000
-#endif
-
-
-/*
- * These flash boot boards use all of ram for operation. Again the
- * actual memory size is not important here, assume at least 4MiB.
- * They currently have no support for running in flash.
- */
-#if defined(CONFIG_NETtel) || defined(CONFIG_eLIA) || \
- defined(CONFIG_DISKtel) || defined(CONFIG_SECUREEDGEMP3) || \
- defined(CONFIG_HW_FEITH)
-#define RAM_START 0x400
-#define RAM_LENGTH 0x3ffc00
-#endif
-
-/*
- * Sneha Boards mimimun memory
- * The end of RAM will vary depending on how much ram is fitted,
- * but this isn't important here, we assume at least 4MiB.
- */
-#if defined(CONFIG_CPU16B)
-#define RAM_START 0x20000
-#define RAM_LENGTH 0x3e0000
-#endif
-
-#if defined(CONFIG_MOD5272)
-#define RAM_START 0x02000000
-#define RAM_LENGTH 0x00800000
-#define RAMVEC_START 0x20000000
-#define RAMVEC_LENGTH 0x00000400
#endif
#if defined(CONFIG_RAMKERNEL)
+#define RAM_START CONFIG_KERNELBASE
+#define RAM_LENGTH (CONFIG_RAMBASE + CONFIG_RAMSIZE - CONFIG_KERNELBASE)
#define TEXT ram
#define DATA ram
#define INIT ram
#define BSS ram
#endif
#if defined(CONFIG_ROMKERNEL) || defined(CONFIG_HIMEMKERNEL)
+#define RAM_START CONFIG_RAMBASE
+#define RAM_LENGTH CONFIG_RAMSIZE
#define TEXT rom
#define DATA ram
#define INIT ram
@@ -215,13 +86,7 @@ OUTPUT_ARCH(m68k)
ENTRY(_start)
MEMORY {
-#ifdef RAMVEC_START
- ramvec : ORIGIN = RAMVEC_START, LENGTH = RAMVEC_LENGTH
-#endif
ram : ORIGIN = RAM_START, LENGTH = RAM_LENGTH
-#ifdef RAM_END
- eram : ORIGIN = RAM_END, LENGTH = 0
-#endif
#ifdef ROM_START
romvec : ORIGIN = ROMVEC_START, LENGTH = ROMVEC_LENGTH
rom : ORIGIN = ROM_START, LENGTH = ROM_LENGTH
@@ -308,12 +173,6 @@ SECTIONS {
__rom_end = . ;
} > erom
#endif
-#ifdef RAMVEC_START
- . = RAMVEC_START ;
- .ramvec : {
- __ramvec = .;
- } > ramvec
-#endif
.data DATA_ADDR : {
. = ALIGN(4);
@@ -373,12 +232,5 @@ SECTIONS {
_ebss = . ;
} > BSS
-#ifdef RAM_END
- . = RAM_END ;
- .eram : {
- __ramend = . ;
- _ramend = . ;
- } > eram
-#endif
}
diff --git a/arch/m68knommu/platform/5307/head.S b/arch/m68knommu/platform/5307/head.S
index c30c462b99b1..1d9eb301d7ac 100644
--- a/arch/m68knommu/platform/5307/head.S
+++ b/arch/m68knommu/platform/5307/head.S
@@ -3,7 +3,7 @@
/*
* head.S -- common startup code for ColdFire CPUs.
*
- * (C) Copyright 1999-2004, Greg Ungerer (gerg@snapgear.com).
+ * (C) Copyright 1999-2006, Greg Ungerer <gerg@snapgear.com>.
*/
/*****************************************************************************/
@@ -19,47 +19,15 @@
/*****************************************************************************/
/*
- * Define fixed memory sizes. Configuration of a fixed memory size
- * overrides everything else. If the user defined a size we just
- * blindly use it (they know what they are doing right :-)
- */
-#if defined(CONFIG_RAM32MB)
-#define MEM_SIZE 0x02000000 /* memory size 32Mb */
-#elif defined(CONFIG_RAM16MB)
-#define MEM_SIZE 0x01000000 /* memory size 16Mb */
-#elif defined(CONFIG_RAM8MB)
-#define MEM_SIZE 0x00800000 /* memory size 8Mb */
-#elif defined(CONFIG_RAM4MB)
-#define MEM_SIZE 0x00400000 /* memory size 4Mb */
-#elif defined(CONFIG_RAM1MB)
-#define MEM_SIZE 0x00100000 /* memory size 1Mb */
-#endif
-
-/*
- * Memory size exceptions for special cases. Some boards may be set
- * for auto memory sizing, but we can't do it that way for some reason.
- * For example the 5206eLITE board has static RAM, and auto-detecting
- * the SDRAM will do you no good at all. Same goes for the MOD5272.
- */
-#ifdef CONFIG_RAMAUTO
-#if defined(CONFIG_M5206eLITE)
-#define MEM_SIZE 0x00100000 /* 1MiB default memory */
-#endif
-#if defined(CONFIG_MOD5272)
-#define MEM_SIZE 0x00800000 /* 8MiB default memory */
-#endif
-#endif /* CONFIG_RAMAUTO */
-
-
-/*
- * If we don't have a fixed memory size now, then lets build in code
+ * If we don't have a fixed memory size, then lets build in code
* to auto detect the DRAM size. Obviously this is the prefered
- * method, and should work for most boards (it won't work for those
- * that do not have their RAM starting at address 0).
+ * method, and should work for most boards. It won't work for those
+ * that do not have their RAM starting at address 0, and it only
+ * works on SDRAM (not boards fitted with SRAM).
*/
-#if defined(MEM_SIZE)
+#if CONFIG_RAMSIZE != 0
.macro GET_MEM_SIZE
- movel #MEM_SIZE,%d0 /* hard coded memory size */
+ movel #CONFIG_RAMSIZE,%d0 /* hard coded memory size */
.endm
#elif defined(CONFIG_M5206) || defined(CONFIG_M5206e) || \
@@ -98,37 +66,7 @@
.endm
#else
-#error "ERROR: I don't know how to determine your boards memory size?"
-#endif
-
-
-/*
- * Most ColdFire boards have their DRAM starting at address 0.
- * Notable exception is the 5206eLITE board, another is the MOD5272.
- */
-#if defined(CONFIG_M5206eLITE)
-#define MEM_BASE 0x30000000
-#endif
-#if defined(CONFIG_MOD5272)
-#define MEM_BASE 0x02000000
-#define VBR_BASE 0x20000000 /* vectors in SRAM */
-#endif
-#if defined(CONFIG_M5208EVB)
-#define MEM_BASE 0x40000000
-#endif
-
-#ifndef MEM_BASE
-#define MEM_BASE 0x00000000 /* memory base at address 0 */
-#endif
-
-/*
- * The default location for the vectors is at the base of RAM.
- * Some boards might like to use internal SRAM or something like
- * that. If no board specific header defines an alternative then
- * use the base of RAM.
- */
-#ifndef VBR_BASE
-#define VBR_BASE MEM_BASE /* vector address */
+#error "ERROR: I don't know how to probe your boards memory size?"
#endif
/*****************************************************************************/
@@ -191,11 +129,11 @@ _start:
* Create basic memory configuration. Set VBR accordingly,
* and size memory.
*/
- movel #VBR_BASE,%a7
+ movel #CONFIG_VECTORBASE,%a7
movec %a7,%VBR /* set vectors addr */
movel %a7,_ramvec
- movel #MEM_BASE,%a7 /* mark the base of RAM */
+ movel #CONFIG_RAMBASE,%a7 /* mark the base of RAM */
movel %a7,_rambase
GET_MEM_SIZE /* macro code determines size */
diff --git a/arch/m68knommu/platform/68328/head-pilot.S b/arch/m68knommu/platform/68328/head-pilot.S
index c46775fe04be..46b3604f999c 100644
--- a/arch/m68knommu/platform/68328/head-pilot.S
+++ b/arch/m68knommu/platform/68328/head-pilot.S
@@ -21,7 +21,6 @@
.global _start
.global _rambase
-.global __ramvec
.global _ramvec
.global _ramstart
.global _ramend
@@ -121,7 +120,7 @@ L0:
DBG_PUTC('B')
/* Copy command line from beginning of RAM (+16) to end of bss */
- movel #__ramvec, %d7
+ movel #CONFIG_VECTORBASE, %d7
addl #16, %d7
moveal %d7, %a0
moveal #_ebss, %a1
diff --git a/arch/m68knommu/platform/68328/head-ram.S b/arch/m68knommu/platform/68328/head-ram.S
index 6bdc9bce43f2..e8dc9241ff96 100644
--- a/arch/m68knommu/platform/68328/head-ram.S
+++ b/arch/m68knommu/platform/68328/head-ram.S
@@ -1,10 +1,7 @@
#include <linux/config.h>
.global __main
- .global __ram_start
- .global __ram_end
.global __rom_start
- .global __rom_end
.global _rambase
.global _ramstart
@@ -12,6 +9,7 @@
.global splash_bits
.global _start
.global _stext
+ .global _edata
#define DEBUG
#define ROM_OFFSET 0x10C00000
@@ -73,7 +71,7 @@ pclp1:
#ifdef CONFIG_RELOCATE
/* Copy me to RAM */
moveal #__rom_start, %a0
- moveal #__ram_start, %a1
+ moveal #_stext, %a1
moveal #_edata, %a2
/* Copy %a0 to %a1 until %a1 == %a2 */
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index c31e4cff64e0..65eb55400d77 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -38,7 +38,7 @@ static int op_mips_create_files(struct super_block * sb, struct dentry * root)
for (i = 0; i < model->num_counters; ++i) {
struct dentry *dir;
- char buf[3];
+ char buf[4];
snprintf(buf, sizeof buf, "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d20907561f46..7dd5dab789a1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */
u64 tb_to_xs;
unsigned tb_to_us;
-#define TICKLEN_SCALE (SHIFT_SCALE - 10)
+#define TICKLEN_SCALE TICK_LENGTH_SHIFT
u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */
u64 ticklen_to_xs; /* 0.64 fraction */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index fdbba4206d59..a0a9e1e0061e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -40,6 +40,40 @@
#include <asm/kdebug.h>
#include <asm/siginfo.h>
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
+/* Hook to register for page fault notifications */
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = str,
+ .err = err,
+ .trapnr = trap,
+ .signr = sig
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
/*
* Check whether the instruction at regs->nip is a store using
* an update addressing form which will update r1.
@@ -142,7 +176,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */
- if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
return 0;
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 27ad56bd227e..fd0bbbe7a4de 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -94,7 +94,7 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
for (i = 0; i < model->num_counters; ++i) {
struct dentry *dir;
- char buf[3];
+ char buf[4];
snprintf(buf, sizeof buf, "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/sh/oprofile/op_model_sh7750.c b/arch/sh/oprofile/op_model_sh7750.c
index 5ec9ddcc4b0b..c265185b22a7 100644
--- a/arch/sh/oprofile/op_model_sh7750.c
+++ b/arch/sh/oprofile/op_model_sh7750.c
@@ -198,7 +198,7 @@ static int sh7750_perf_counter_create_files(struct super_block *sb, struct dentr
for (i = 0; i < NR_CNTRS; i++) {
struct dentry *dir;
- char buf[3];
+ char buf[4];
snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/sparc/kernel/of_device.c b/arch/sparc/kernel/of_device.c
index 001b8673b4bd..80a809478781 100644
--- a/arch/sparc/kernel/of_device.c
+++ b/arch/sparc/kernel/of_device.c
@@ -138,6 +138,7 @@ struct bus_type ebus_bus_type = {
.suspend = of_device_suspend,
.resume = of_device_resume,
};
+EXPORT_SYMBOL(ebus_bus_type);
#endif
#ifdef CONFIG_SBUS
@@ -149,6 +150,7 @@ struct bus_type sbus_bus_type = {
.suspend = of_device_suspend,
.resume = of_device_resume,
};
+EXPORT_SYMBOL(sbus_bus_type);
#endif
static int __init of_bus_driver_init(void)
diff --git a/arch/sparc/kernel/prom.c b/arch/sparc/kernel/prom.c
index 63b2b9bd778e..946ce6d15819 100644
--- a/arch/sparc/kernel/prom.c
+++ b/arch/sparc/kernel/prom.c
@@ -27,6 +27,11 @@
static struct device_node *allnodes;
+/* use when traversing tree through the allnext, child, sibling,
+ * or parent members of struct device_node.
+ */
+static DEFINE_RWLOCK(devtree_lock);
+
int of_device_is_compatible(struct device_node *device, const char *compat)
{
const char* cp;
@@ -185,6 +190,54 @@ int of_getintprop_default(struct device_node *np, const char *name, int def)
}
EXPORT_SYMBOL(of_getintprop_default);
+int of_set_property(struct device_node *dp, const char *name, void *val, int len)
+{
+ struct property **prevp;
+ void *new_val;
+ int err;
+
+ new_val = kmalloc(len, GFP_KERNEL);
+ if (!new_val)
+ return -ENOMEM;
+
+ memcpy(new_val, val, len);
+
+ err = -ENODEV;
+
+ write_lock(&devtree_lock);
+ prevp = &dp->properties;
+ while (*prevp) {
+ struct property *prop = *prevp;
+
+ if (!strcmp(prop->name, name)) {
+ void *old_val = prop->value;
+ int ret;
+
+ ret = prom_setprop(dp->node, name, val, len);
+ err = -EINVAL;
+ if (ret >= 0) {
+ prop->value = new_val;
+ prop->length = len;
+
+ if (OF_IS_DYNAMIC(prop))
+ kfree(old_val);
+
+ OF_MARK_DYNAMIC(prop);
+
+ err = 0;
+ }
+ break;
+ }
+ prevp = &(*prevp)->next;
+ }
+ write_unlock(&devtree_lock);
+
+ /* XXX Upate procfs if necessary... */
+
+ return err;
+}
+EXPORT_SYMBOL(of_set_property);
+
static unsigned int prom_early_allocated;
static void * __init prom_early_alloc(unsigned long size)
@@ -354,7 +407,9 @@ static char * __init build_full_name(struct device_node *dp)
return n;
}
-static struct property * __init build_one_prop(phandle node, char *prev)
+static unsigned int unique_id;
+
+static struct property * __init build_one_prop(phandle node, char *prev, char *special_name, void *special_val, int special_len)
{
static struct property *tmp = NULL;
struct property *p;
@@ -364,25 +419,34 @@ static struct property * __init build_one_prop(phandle node, char *prev)
p = tmp;
memset(p, 0, sizeof(*p) + 32);
tmp = NULL;
- } else
+ } else {
p = prom_early_alloc(sizeof(struct property) + 32);
+ p->unique_id = unique_id++;
+ }
p->name = (char *) (p + 1);
- if (prev == NULL) {
- prom_firstprop(node, p->name);
+ if (special_name) {
+ p->length = special_len;
+ p->value = prom_early_alloc(special_len);
+ memcpy(p->value, special_val, special_len);
} else {
- prom_nextprop(node, prev, p->name);
- }
- if (strlen(p->name) == 0) {
- tmp = p;
- return NULL;
- }
- p->length = prom_getproplen(node, p->name);
- if (p->length <= 0) {
- p->length = 0;
- } else {
- p->value = prom_early_alloc(p->length);
- len = prom_getproperty(node, p->name, p->value, p->length);
+ if (prev == NULL) {
+ prom_firstprop(node, p->name);
+ } else {
+ prom_nextprop(node, prev, p->name);
+ }
+ if (strlen(p->name) == 0) {
+ tmp = p;
+ return NULL;
+ }
+ p->length = prom_getproplen(node, p->name);
+ if (p->length <= 0) {
+ p->length = 0;
+ } else {
+ p->value = prom_early_alloc(p->length + 1);
+ prom_getproperty(node, p->name, p->value, p->length);
+ ((unsigned char *)p->value)[p->length] = '\0';
+ }
}
return p;
}
@@ -391,9 +455,14 @@ static struct property * __init build_prop_list(phandle node)
{
struct property *head, *tail;
- head = tail = build_one_prop(node, NULL);
+ head = tail = build_one_prop(node, NULL,
+ ".node", &node, sizeof(node));
+
+ tail->next = build_one_prop(node, NULL, NULL, NULL, 0);
+ tail = tail->next;
while(tail) {
- tail->next = build_one_prop(node, tail->name);
+ tail->next = build_one_prop(node, tail->name,
+ NULL, NULL, 0);
tail = tail->next;
}
@@ -422,6 +491,7 @@ static struct device_node * __init create_node(phandle node)
return NULL;
dp = prom_early_alloc(sizeof(*dp));
+ dp->unique_id = unique_id++;
kref_init(&dp->kref);
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index fa5006946062..5db7e1d85385 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -9,3 +9,5 @@ lib-y := mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o memcpy.o memset.o \
strncpy_from_user.o divdi3.o udivdi3.o strlen_user.o \
copy_user.o locks.o atomic.o atomic32.o bitops.o \
lshrdi3.o ashldi3.o rwsem.o muldi3.o bitext.o
+
+obj-y += iomap.o
diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c
new file mode 100644
index 000000000000..54501c1ca785
--- /dev/null
+++ b/arch/sparc/lib/iomap.c
@@ -0,0 +1,48 @@
+/*
+ * Implement the sparc iomap interfaces
+ */
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <asm/io.h>
+
+/* Create a virtual mapping cookie for an IO port range */
+void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+ return (void __iomem *) (unsigned long) port;
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+ unsigned long start = pci_resource_start(dev, bar);
+ unsigned long len = pci_resource_len(dev, bar);
+ unsigned long flags = pci_resource_flags(dev, bar);
+
+ if (!len || !start)
+ return NULL;
+ if (maxlen && len > maxlen)
+ len = maxlen;
+ if (flags & IORESOURCE_IO)
+ return ioport_map(start, len);
+ if (flags & IORESOURCE_MEM) {
+ if (flags & IORESOURCE_CACHEABLE)
+ return ioremap(start, len);
+ return ioremap_nocache(start, len);
+ }
+ /* What? */
+ return NULL;
+}
+
+void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
+{
+ /* nothing to do */
+}
+EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/sparc64/kernel/auxio.c b/arch/sparc64/kernel/auxio.c
index 2c42894b188f..c2c69c167d18 100644
--- a/arch/sparc64/kernel/auxio.c
+++ b/arch/sparc64/kernel/auxio.c
@@ -6,6 +6,7 @@
*/
#include <linux/config.h>
+#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/ioport.h>
@@ -16,8 +17,8 @@
#include <asm/ebus.h>
#include <asm/auxio.h>
-/* This cannot be static, as it is referenced in irq.c */
void __iomem *auxio_register = NULL;
+EXPORT_SYMBOL(auxio_register);
enum auxio_type {
AUXIO_TYPE_NODEV,
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 31e0fbb0d82c..cc89b06d0178 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -563,67 +563,6 @@ void handler_irq(int irq, struct pt_regs *regs)
irq_exit();
}
-#ifdef CONFIG_BLK_DEV_FD
-extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);
-
-/* XXX No easy way to include asm/floppy.h XXX */
-extern unsigned char *pdma_vaddr;
-extern unsigned long pdma_size;
-extern volatile int doing_pdma;
-extern unsigned long fdc_status;
-
-irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
-{
- if (likely(doing_pdma)) {
- void __iomem *stat = (void __iomem *) fdc_status;
- unsigned char *vaddr = pdma_vaddr;
- unsigned long size = pdma_size;
- u8 val;
-
- while (size) {
- val = readb(stat);
- if (unlikely(!(val & 0x80))) {
- pdma_vaddr = vaddr;
- pdma_size = size;
- return IRQ_HANDLED;
- }
- if (unlikely(!(val & 0x20))) {
- pdma_vaddr = vaddr;
- pdma_size = size;
- doing_pdma = 0;
- goto main_interrupt;
- }
- if (val & 0x40) {
- /* read */
- *vaddr++ = readb(stat + 1);
- } else {
- unsigned char data = *vaddr++;
-
- /* write */
- writeb(data, stat + 1);
- }
- size--;
- }
-
- pdma_vaddr = vaddr;
- pdma_size = size;
-
- /* Send Terminal Count pulse to floppy controller. */
- val = readb(auxio_register);
- val |= AUXIO_AUX1_FTCNT;
- writeb(val, auxio_register);
- val &= ~AUXIO_AUX1_FTCNT;
- writeb(val, auxio_register);
-
- doing_pdma = 0;
- }
-
-main_interrupt:
- return floppy_interrupt(irq, dev_cookie, regs);
-}
-EXPORT_SYMBOL(sparc_floppy_irq);
-#endif
-
struct sun5_timer {
u64 count0;
u64 limit0;
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index 566aa343aa62..768475bbce82 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -138,6 +138,7 @@ struct bus_type isa_bus_type = {
.suspend = of_device_suspend,
.resume = of_device_resume,
};
+EXPORT_SYMBOL(isa_bus_type);
struct bus_type ebus_bus_type = {
.name = "ebus",
@@ -147,6 +148,7 @@ struct bus_type ebus_bus_type = {
.suspend = of_device_suspend,
.resume = of_device_resume,
};
+EXPORT_SYMBOL(ebus_bus_type);
#endif
#ifdef CONFIG_SBUS
@@ -158,6 +160,7 @@ struct bus_type sbus_bus_type = {
.suspend = of_device_suspend,
.resume = of_device_resume,
};
+EXPORT_SYMBOL(sbus_bus_type);
#endif
static int __init of_bus_driver_init(void)
diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c
index e9d703eea806..8e87e7ea0325 100644
--- a/arch/sparc64/kernel/prom.c
+++ b/arch/sparc64/kernel/prom.c
@@ -27,6 +27,11 @@
static struct device_node *allnodes;
+/* use when traversing tree through the allnext, child, sibling,
+ * or parent members of struct device_node.
+ */
+static DEFINE_RWLOCK(devtree_lock);
+
int of_device_is_compatible(struct device_node *device, const char *compat)
{
const char* cp;
@@ -185,6 +190,54 @@ int of_getintprop_default(struct device_node *np, const char *name, int def)
}
EXPORT_SYMBOL(of_getintprop_default);
+int of_set_property(struct device_node *dp, const char *name, void *val, int len)
+{
+ struct property **prevp;
+ void *new_val;
+ int err;
+
+ new_val = kmalloc(len, GFP_KERNEL);
+ if (!new_val)
+ return -ENOMEM;
+
+ memcpy(new_val, val, len);
+
+ err = -ENODEV;
+
+ write_lock(&devtree_lock);
+ prevp = &dp->properties;
+ while (*prevp) {
+ struct property *prop = *prevp;
+
+ if (!strcmp(prop->name, name)) {
+ void *old_val = prop->value;
+ int ret;
+
+ ret = prom_setprop(dp->node, name, val, len);
+ err = -EINVAL;
+ if (ret >= 0) {
+ prop->value = new_val;
+ prop->length = len;
+
+ if (OF_IS_DYNAMIC(prop))
+ kfree(old_val);
+
+ OF_MARK_DYNAMIC(prop);
+
+ err = 0;
+ }
+ break;
+ }
+ prevp = &(*prevp)->next;
+ }
+ write_unlock(&devtree_lock);
+
+ /* XXX Upate procfs if necessary... */
+
+ return err;
+}
+EXPORT_SYMBOL(of_set_property);
+
static unsigned int prom_early_allocated;
static void * __init prom_early_alloc(unsigned long size)
@@ -531,7 +584,9 @@ static char * __init build_full_name(struct device_node *dp)
return n;
}
-static struct property * __init build_one_prop(phandle node, char *prev)
+static unsigned int unique_id;
+
+static struct property * __init build_one_prop(phandle node, char *prev, char *special_name, void *special_val, int special_len)
{
static struct property *tmp = NULL;
struct property *p;
@@ -540,25 +595,35 @@ static struct property * __init build_one_prop(phandle node, char *prev)
p = tmp;
memset(p, 0, sizeof(*p) + 32);
tmp = NULL;
- } else
+ } else {
p = prom_early_alloc(sizeof(struct property) + 32);
+ p->unique_id = unique_id++;
+ }
p->name = (char *) (p + 1);
- if (prev == NULL) {
- prom_firstprop(node, p->name);
+ if (special_name) {
+ strcpy(p->name, special_name);
+ p->length = special_len;
+ p->value = prom_early_alloc(special_len);
+ memcpy(p->value, special_val, special_len);
} else {
- prom_nextprop(node, prev, p->name);
- }
- if (strlen(p->name) == 0) {
- tmp = p;
- return NULL;
- }
- p->length = prom_getproplen(node, p->name);
- if (p->length <= 0) {
- p->length = 0;
- } else {
- p->value = prom_early_alloc(p->length);
- prom_getproperty(node, p->name, p->value, p->length);
+ if (prev == NULL) {
+ prom_firstprop(node, p->name);
+ } else {
+ prom_nextprop(node, prev, p->name);
+ }
+ if (strlen(p->name) == 0) {
+ tmp = p;
+ return NULL;
+ }
+ p->length = prom_getproplen(node, p->name);
+ if (p->length <= 0) {
+ p->length = 0;
+ } else {
+ p->value = prom_early_alloc(p->length + 1);
+ prom_getproperty(node, p->name, p->value, p->length);
+ ((unsigned char *)p->value)[p->length] = '\0';
+ }
}
return p;
}
@@ -567,9 +632,14 @@ static struct property * __init build_prop_list(phandle node)
{
struct property *head, *tail;
- head = tail = build_one_prop(node, NULL);
+ head = tail = build_one_prop(node, NULL,
+ ".node", &node, sizeof(node));
+
+ tail->next = build_one_prop(node, NULL, NULL, NULL, 0);
+ tail = tail->next;
while(tail) {
- tail->next = build_one_prop(node, tail->name);
+ tail->next = build_one_prop(node, tail->name,
+ NULL, NULL, 0);
tail = tail->next;
}
@@ -598,6 +668,7 @@ static struct device_node * __init create_node(phandle node)
return NULL;
dp = prom_early_alloc(sizeof(*dp));
+ dp->unique_id = unique_id++;
kref_init(&dp->kref);
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 6e002aacb961..1605967cce91 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -31,6 +31,40 @@
#include <asm/kdebug.h>
#include <asm/mmu_context.h>
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
+/* Hook to register for page fault notifications */
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = str,
+ .err = err,
+ .trapnr = trap,
+ .signr = sig
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
/*
* To debug kernel to catch accesses to certain virtual/physical addresses.
* Mode = 0 selects physical watchpoints, mode = 1 selects virtual watchpoints.
@@ -263,7 +297,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
fault_code = get_thread_fault_code();
- if (notify_die(DIE_PAGE_FAULT, "page_fault", regs,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs,
fault_code, 0, SIGSEGV) == NOTIFY_STOP)
return;
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 513993414747..5c2bcf354ce6 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1568,6 +1568,7 @@ pgprot_t PAGE_EXEC __read_mostly;
unsigned long pg_iobits __read_mostly;
unsigned long _PAGE_IE __read_mostly;
+EXPORT_SYMBOL(_PAGE_IE);
unsigned long _PAGE_E __read_mostly;
EXPORT_SYMBOL(_PAGE_E);
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index af44130f0d65..ccc4a7fb97a3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -386,24 +386,45 @@ config HPET_EMULATE_RTC
bool "Provide RTC interrupt"
depends on HPET_TIMER && RTC=y
-config GART_IOMMU
- bool "K8 GART IOMMU support"
+# Mark as embedded because too many people got it wrong.
+# The code disables itself when not needed.
+config IOMMU
+ bool "IOMMU support" if EMBEDDED
default y
select SWIOTLB
select AGP
depends on PCI
help
- Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors
- and for the bounce buffering software IOMMU.
- Needed to run systems with more than 3GB of memory properly with
- 32-bit PCI devices that do not support DAC (Double Address Cycle).
- The IOMMU can be turned off at runtime with the iommu=off parameter.
- Normally the kernel will take the right choice by itself.
- This option includes a driver for the AMD Opteron/Athlon64 IOMMU
- northbridge and a software emulation used on other systems without
- hardware IOMMU. If unsure, say Y.
-
-# need this always selected by GART_IOMMU for the VIA workaround
+ Support for full DMA access of devices with 32bit memory access only
+ on systems with more than 3GB. This is usually needed for USB,
+ sound, many IDE/SATA chipsets and some other devices.
+ Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART
+ based IOMMU and a software bounce buffer based IOMMU used on Intel
+ systems and as fallback.
+ The code is only active when needed (enough memory and limited
+ device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified
+ too.
+
+config CALGARY_IOMMU
+ bool "IBM Calgary IOMMU support"
+ default y
+ select SWIOTLB
+ depends on PCI && EXPERIMENTAL
+ help
+ Support for hardware IOMMUs in IBM's xSeries x366 and x460
+ systems. Needed to run systems with more than 3GB of memory
+ properly with 32-bit PCI devices that do not support DAC
+ (Double Address Cycle). Calgary also supports bus level
+ isolation, where all DMAs pass through the IOMMU. This
+ prevents them from going anywhere except their intended
+ destination. This catches hard-to-find kernel bugs and
+ mis-behaving drivers and devices that do not use the DMA-API
+ properly to set up their DMA buffers. The IOMMU can be
+ turned off at boot time with the iommu=off parameter.
+ Normally the kernel will make the right choice by itself.
+ If unsure, say Y.
+
+# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
bool
@@ -501,6 +522,10 @@ config REORDER
optimal TLB usage. If you have pretty much any version of binutils,
this can increase your kernel build time by roughly one minute.
+config K8_NB
+ def_bool y
+ depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
+
endmenu
#
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index ea31b4c62105..1d92ab56c0f9 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -13,7 +13,7 @@ config DEBUG_RODATA
If in doubt, say "N".
config IOMMU_DEBUG
- depends on GART_IOMMU && DEBUG_KERNEL
+ depends on IOMMU && DEBUG_KERNEL
bool "Enable IOMMU debugging"
help
Force the IOMMU to on even when you have less than 4GB of
@@ -35,6 +35,22 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+config DEBUG_STACKOVERFLOW
+ bool "Check for stack overflows"
+ depends on DEBUG_KERNEL
+ help
+ This option will cause messages to be printed if free stack space
+ drops below a certain limit.
+
+config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+ help
+ Enables the display of the minimum amount of free stack which each
+ task has ever had available in the sysrq-T and sysrq-P debug output.
+
+ This option will slow down process creation somewhat.
+
#config X86_REMOTE_DEBUG
# bool "kgdb debugging stub"
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index e573e2ab5510..431bb4bc36cd 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -27,6 +27,7 @@ LDFLAGS_vmlinux :=
CHECKFLAGS += -D__x86_64__ -m64
cflags-y :=
+cflags-kernel-y :=
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
@@ -35,7 +36,7 @@ cflags-y += -m64
cflags-y += -mno-red-zone
cflags-y += -mcmodel=kernel
cflags-y += -pipe
-cflags-$(CONFIG_REORDER) += -ffunction-sections
+cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
# this makes reading assembly source easier, but produces worse code
# actually it makes the kernel smaller too.
cflags-y += -fno-reorder-blocks
@@ -55,6 +56,7 @@ cflags-y += $(call cc-option,-funit-at-a-time)
cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
CFLAGS += $(cflags-y)
+CFLAGS_KERNEL += $(cflags-kernel-y)
AFLAGS += -m64
head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index 43ee6c50c277..deb063e7762d 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -107,8 +107,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
isoimage: $(BOOTIMAGE)
-rm -rf $(obj)/isoimage
mkdir $(obj)/isoimage
- cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
- $(obj)/isoimage
+ for i in lib lib64 share end ; do \
+ if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+ cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+ break ; \
+ fi ; \
+ if [ $$i = end ] ; then exit 1 ; fi ; \
+ done
cp $(BOOTIMAGE) $(obj)/isoimage/linux
echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index cf4b88c416dc..3755b2e394d0 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -77,11 +77,11 @@ static void gzip_release(void **);
*/
static unsigned char *real_mode; /* Pointer to real-mode data */
-#define EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
+#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
#ifndef STANDARD_MEMORY_BIOS_CALL
-#define ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
+#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
#endif
-#define SCREEN_INFO (*(struct screen_info *)(real_mode+0))
+#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
extern unsigned char input_data[];
extern int input_len;
@@ -92,9 +92,9 @@ static unsigned long output_ptr = 0;
static void *malloc(int size);
static void free(void *where);
-
-void* memset(void* s, int c, unsigned n);
-void* memcpy(void* dest, const void* src, unsigned n);
+
+static void *memset(void *s, int c, unsigned n);
+static void *memcpy(void *dest, const void *src, unsigned n);
static void putstr(const char *);
@@ -162,8 +162,8 @@ static void putstr(const char *s)
int x,y,pos;
char c;
- x = SCREEN_INFO.orig_x;
- y = SCREEN_INFO.orig_y;
+ x = RM_SCREEN_INFO.orig_x;
+ y = RM_SCREEN_INFO.orig_y;
while ( ( c = *s++ ) != '\0' ) {
if ( c == '\n' ) {
@@ -184,8 +184,8 @@ static void putstr(const char *s)
}
}
- SCREEN_INFO.orig_x = x;
- SCREEN_INFO.orig_y = y;
+ RM_SCREEN_INFO.orig_x = x;
+ RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
outb_p(14, vidport);
@@ -194,7 +194,7 @@ static void putstr(const char *s)
outb_p(0xff & (pos >> 1), vidport+1);
}
-void* memset(void* s, int c, unsigned n)
+static void* memset(void* s, int c, unsigned n)
{
int i;
char *ss = (char*)s;
@@ -203,7 +203,7 @@ void* memset(void* s, int c, unsigned n)
return s;
}
-void* memcpy(void* dest, const void* src, unsigned n)
+static void* memcpy(void* dest, const void* src, unsigned n)
{
int i;
char *d = (char *)dest, *s = (char *)src;
@@ -278,15 +278,15 @@ static void error(char *x)
putstr(x);
putstr("\n\n -- System halted");
- while(1);
+ while(1); /* Halt */
}
-void setup_normal_output_buffer(void)
+static void setup_normal_output_buffer(void)
{
#ifdef STANDARD_MEMORY_BIOS_CALL
- if (EXT_MEM_K < 1024) error("Less than 2MB of memory");
+ if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
#else
- if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory");
+ if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
#endif
output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
free_mem_end_ptr = (long)real_mode;
@@ -297,13 +297,13 @@ struct moveparams {
uch *high_buffer_start; int hcount;
};
-void setup_output_buffer_if_we_run_high(struct moveparams *mv)
+static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
{
high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
#ifdef STANDARD_MEMORY_BIOS_CALL
- if (EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
+ if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
#else
- if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
+ if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
#endif
mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
@@ -319,7 +319,7 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv)
mv->high_buffer_start = high_buffer_start;
}
-void close_output_buffer_if_we_run_high(struct moveparams *mv)
+static void close_output_buffer_if_we_run_high(struct moveparams *mv)
{
if (bytes_out > low_buffer_size) {
mv->lcount = low_buffer_size;
@@ -335,7 +335,7 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
{
real_mode = rmode;
- if (SCREEN_INFO.orig_video_mode == 7) {
+ if (RM_SCREEN_INFO.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
@@ -343,8 +343,8 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
vidport = 0x3d4;
}
- lines = SCREEN_INFO.orig_video_lines;
- cols = SCREEN_INFO.orig_video_cols;
+ lines = RM_SCREEN_INFO.orig_video_lines;
+ cols = RM_SCREEN_INFO.orig_video_cols;
if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
else setup_output_buffer_if_we_run_high(mv);
diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c
index c44f5e2ec100..eae86691709a 100644
--- a/arch/x86_64/boot/tools/build.c
+++ b/arch/x86_64/boot/tools/build.c
@@ -149,10 +149,8 @@ int main(int argc, char ** argv)
sz = sb.st_size;
fprintf (stderr, "System is %d kB\n", sz/1024);
sys_size = (sz + 15) / 16;
- /* 0x40000*16 = 4.0 MB, reasonable estimate for the current maximum */
- if (sys_size > (is_big_kernel ? 0x40000 : DEF_SYSSIZE))
- die("System is too big. Try using %smodules.",
- is_big_kernel ? "" : "bzImage or ");
+ if (!is_big_kernel && sys_size > DEF_SYSSIZE)
+ die("System is too big. Try using bzImage or modules.");
while (sz > 0) {
int l, n;
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
index 32327bb37aff..2aa565c136e5 100644
--- a/arch/x86_64/boot/video.S
+++ b/arch/x86_64/boot/video.S
@@ -1929,6 +1929,7 @@ skip10: movb %ah, %al
ret
store_edid:
+#ifdef CONFIG_FIRMWARE_EDID
pushw %es # just save all registers
pushw %ax
pushw %bx
@@ -1946,6 +1947,22 @@ store_edid:
rep
stosl
+ pushw %es # save ES
+ xorw %di, %di # Report Capability
+ pushw %di
+ popw %es # ES:DI must be 0:0
+ movw $0x4f15, %ax
+ xorw %bx, %bx
+ xorw %cx, %cx
+ int $0x10
+ popw %es # restore ES
+
+ cmpb $0x00, %ah # call successful
+ jne no_edid
+
+ cmpb $0x4f, %al # function supported
+ jne no_edid
+
movw $0x4f15, %ax # do VBE/DDC
movw $0x01, %bx
movw $0x00, %cx
@@ -1953,12 +1970,14 @@ store_edid:
movw $0x140, %di
int $0x10
+no_edid:
popw %di # restore all registers
popw %dx
popw %cx
popw %bx
popw %ax
popw %es
+#endif
ret
# VIDEO_SELECT-only variables
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 69db0c0721d1..e69d403949c8 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.17-rc1-git11
-# Sun Apr 16 07:22:36 2006
+# Linux kernel version: 2.6.17-git6
+# Sat Jun 24 00:52:28 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -42,7 +42,6 @@ CONFIG_IKCONFIG_PROC=y
# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_UID16=y
-CONFIG_VM86=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
@@ -57,7 +56,6 @@ CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
CONFIG_SLAB=y
-CONFIG_DOUBLEFAULT=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
# CONFIG_SLOB is not set
@@ -144,7 +142,8 @@ CONFIG_NR_CPUS=32
CONFIG_HOTPLUG_CPU=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
-CONFIG_GART_IOMMU=y
+CONFIG_IOMMU=y
+# CONFIG_CALGARY_IOMMU is not set
CONFIG_SWIOTLB=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_INTEL=y
@@ -158,6 +157,7 @@ CONFIG_HZ_250=y
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
# CONFIG_REORDER is not set
+CONFIG_K8_NB=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
@@ -293,6 +293,8 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
CONFIG_INET_DIAG=y
CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
@@ -305,7 +307,10 @@ CONFIG_IPV6=y
# CONFIG_INET6_IPCOMP is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
# CONFIG_NETFILTER is not set
#
@@ -344,6 +349,7 @@ CONFIG_IPV6=y
# Network testing
#
# CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
@@ -360,6 +366,7 @@ CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
#
# Connector - unified userspace <-> kernelspace linker
@@ -526,6 +533,7 @@ CONFIG_SCSI_ATA_PIIX=y
# CONFIG_SCSI_SATA_MV is not set
CONFIG_SCSI_SATA_NV=y
# CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_SATA_QSTOR is not set
# CONFIG_SCSI_SATA_PROMISE is not set
# CONFIG_SCSI_SATA_SX4 is not set
@@ -591,10 +599,7 @@ CONFIG_IEEE1394=y
#
# Device Drivers
#
-
-#
-# Texas Instruments PCILynx requires I2C
-#
+# CONFIG_IEEE1394_PCILYNX is not set
CONFIG_IEEE1394_OHCI1394=y
#
@@ -645,7 +650,16 @@ CONFIG_VORTEX=y
#
# Tulip family network device support
#
-# CONFIG_NET_TULIP is not set
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
# CONFIG_HP100 is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
@@ -697,6 +711,7 @@ CONFIG_TIGON3=y
# CONFIG_IXGB is not set
CONFIG_S2IO=m
# CONFIG_S2IO_NAPI is not set
+# CONFIG_MYRI10GE is not set
#
# Token Ring devices
@@ -887,7 +902,56 @@ CONFIG_HPET_MMAP=y
#
# I2C support
#
-# CONFIG_I2C is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+# CONFIG_I2C_ALGOBIT is not set
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
+CONFIG_I2C_ISA=m
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_SENSORS_DS1337 is not set
+# CONFIG_SENSORS_DS1374 is not set
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
#
# SPI support
@@ -898,14 +962,51 @@ CONFIG_HPET_MMAP=y
#
# Dallas's 1-wire bus
#
-# CONFIG_W1 is not set
#
# Hardware Monitoring support
#
CONFIG_HWMON=y
# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+CONFIG_SENSORS_SMSC47B397=m
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
# CONFIG_SENSORS_HDAPS is not set
# CONFIG_HWMON_DEBUG_CHIP is not set
@@ -918,6 +1019,7 @@ CONFIG_HWMON=y
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
#
# Digital Video Broadcasting Devices
@@ -953,28 +1055,17 @@ CONFIG_SOUND=y
# Open Sound System
#
CONFIG_SOUND_PRIME=y
-CONFIG_OBSOLETE_OSS_DRIVER=y
# CONFIG_SOUND_BT878 is not set
-# CONFIG_SOUND_CMPCI is not set
# CONFIG_SOUND_EMU10K1 is not set
# CONFIG_SOUND_FUSION is not set
-# CONFIG_SOUND_CS4281 is not set
-# CONFIG_SOUND_ES1370 is not set
# CONFIG_SOUND_ES1371 is not set
-# CONFIG_SOUND_ESSSOLO1 is not set
-# CONFIG_SOUND_MAESTRO is not set
-# CONFIG_SOUND_MAESTRO3 is not set
CONFIG_SOUND_ICH=y
-# CONFIG_SOUND_SONICVIBES is not set
# CONFIG_SOUND_TRIDENT is not set
# CONFIG_SOUND_MSNDCLAS is not set
# CONFIG_SOUND_MSNDPIN is not set
# CONFIG_SOUND_VIA82CXXX is not set
# CONFIG_SOUND_OSS is not set
-# CONFIG_SOUND_ALI5455 is not set
-# CONFIG_SOUND_FORTE is not set
-# CONFIG_SOUND_RME96XX is not set
-# CONFIG_SOUND_AD1980 is not set
+# CONFIG_SOUND_TVMIXER is not set
#
# USB support
@@ -1000,6 +1091,7 @@ CONFIG_USB_DEVICEFS=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_SPLIT_ISO is not set
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
# CONFIG_USB_ISP116X_HCD is not set
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_BIG_ENDIAN is not set
@@ -1089,10 +1181,12 @@ CONFIG_USB_MON=y
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_LED is not set
+# CONFIG_USB_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_PHIDGETKIT is not set
# CONFIG_USB_PHIDGETSERVO is not set
# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_APPLEDISPLAY is not set
# CONFIG_USB_SISUSBVGA is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TEST is not set
@@ -1141,6 +1235,19 @@ CONFIG_USB_MON=y
# CONFIG_RTC_CLASS is not set
#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+
+#
# Firmware Drivers
#
# CONFIG_EDD is not set
@@ -1175,6 +1282,7 @@ CONFIG_FS_POSIX_ACL=y
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
# CONFIG_QUOTA is not set
CONFIG_DNOTIFY=y
CONFIG_AUTOFS_FS=y
@@ -1331,7 +1439,8 @@ CONFIG_DETECT_SOFTLOCKUP=y
CONFIG_DEBUG_FS=y
# CONFIG_DEBUG_VM is not set
# CONFIG_FRAME_POINTER is not set
-# CONFIG_UNWIND_INFO is not set
+CONFIG_UNWIND_INFO=y
+CONFIG_STACK_UNWIND=y
# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_DEBUG_RODATA is not set
diff --git a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c
index 1c23095f1813..2c8209a3605a 100644
--- a/arch/x86_64/ia32/fpu32.c
+++ b/arch/x86_64/ia32/fpu32.c
@@ -2,7 +2,6 @@
* Copyright 2002 Andi Kleen, SuSE Labs.
* FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
* This is used for ptrace, signals and coredumps in 32bit emulation.
- * $Id: fpu32.c,v 1.1 2002/03/21 14:16:32 ak Exp $
*/
#include <linux/sched.h>
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index e0a92439f634..25e5ca22204c 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -6,8 +6,6 @@
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
* 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen
- *
- * $Id: ia32_signal.c,v 1.22 2002/07/29 10:34:03 ak Exp $
*/
#include <linux/sched.h>
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 4ec594ab1a98..c536fa98ea37 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -155,6 +155,7 @@ sysenter_tracesys:
.previous
jmp sysenter_do_call
CFI_ENDPROC
+ENDPROC(ia32_sysenter_target)
/*
* 32bit SYSCALL instruction entry.
@@ -178,7 +179,7 @@ sysenter_tracesys:
*/
ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple
- CFI_DEF_CFA rsp,0
+ CFI_DEF_CFA rsp,PDA_STACKOFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
swapgs
@@ -249,6 +250,7 @@ cstar_tracesys:
.quad 1b,ia32_badarg
.previous
jmp cstar_do_call
+END(ia32_cstar_target)
ia32_badarg:
movq $-EFAULT,%rax
@@ -314,16 +316,13 @@ ia32_tracesys:
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
jmp ia32_do_syscall
+END(ia32_syscall)
ia32_badsys:
movq $0,ORIG_RAX-ARGOFFSET(%rsp)
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp int_ret_from_sys_call
-ni_syscall:
- movq %rax,%rdi
- jmp sys32_ni_syscall
-
quiet_ni_syscall:
movq $-ENOSYS,%rax
ret
@@ -370,10 +369,10 @@ ENTRY(ia32_ptregs_common)
RESTORE_REST
jmp ia32_sysret /* misbalances the return cache */
CFI_ENDPROC
+END(ia32_ptregs_common)
.section .rodata,"a"
.align 8
- .globl ia32_sys_call_table
ia32_sys_call_table:
.quad sys_restart_syscall
.quad sys_exit
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 23a4515a73b4..a590b7a0d92d 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -7,8 +7,6 @@
*
* This allows to access 64bit processes too; but there is no way to see the extended
* register contents.
- *
- * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
*/
#include <linux/kernel.h>
@@ -27,6 +25,7 @@
#include <asm/debugreg.h>
#include <asm/i387.h>
#include <asm/fpu32.h>
+#include <asm/ia32.h>
/*
* Determines which flags the user has access to [1 = access, 0 = no access].
@@ -199,6 +198,24 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
#undef R32
+static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+{
+ int ret;
+ compat_siginfo_t *si32 = (compat_siginfo_t *)compat_ptr(data);
+ siginfo_t *si = compat_alloc_user_space(sizeof(siginfo_t));
+ if (request == PTRACE_SETSIGINFO) {
+ ret = copy_siginfo_from_user32(si, si32);
+ if (ret)
+ return ret;
+ }
+ ret = sys_ptrace(request, pid, addr, (unsigned long)si);
+ if (ret)
+ return ret;
+ if (request == PTRACE_GETSIGINFO)
+ ret = copy_siginfo_to_user32(si32, si);
+ return ret;
+}
+
asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
{
struct task_struct *child;
@@ -208,9 +225,19 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
__u32 val;
switch (request) {
- default:
+ case PTRACE_TRACEME:
+ case PTRACE_ATTACH:
+ case PTRACE_KILL:
+ case PTRACE_CONT:
+ case PTRACE_SINGLESTEP:
+ case PTRACE_DETACH:
+ case PTRACE_SYSCALL:
+ case PTRACE_SETOPTIONS:
return sys_ptrace(request, pid, addr, data);
+ default:
+ return -EINVAL;
+
case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
case PTRACE_POKEDATA:
@@ -225,10 +252,11 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
case PTRACE_GETFPXREGS:
case PTRACE_GETEVENTMSG:
break;
- }
- if (request == PTRACE_TRACEME)
- return ptrace_traceme();
+ case PTRACE_SETSIGINFO:
+ case PTRACE_GETSIGINFO:
+ return ptrace32_siginfo(request, pid, addr, data);
+ }
child = ptrace_get_task_struct(pid);
if (IS_ERR(child))
@@ -349,8 +377,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
break;
default:
- ret = -EINVAL;
- break;
+ BUG();
}
out:
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index f182b20858e2..dc88154c412b 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -508,19 +508,6 @@ sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
return compat_sys_wait4(pid, stat_addr, options, NULL);
}
-int sys32_ni_syscall(int call)
-{
- struct task_struct *me = current;
- static char lastcomm[sizeof(me->comm)];
-
- if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
- printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
- call, me->comm);
- strncpy(lastcomm, me->comm, sizeof(lastcomm));
- }
- return -ENOSYS;
-}
-
/* 32-bit timeval and related flotsam. */
asmlinkage long
@@ -916,7 +903,7 @@ long sys32_vm86_warning(void)
struct task_struct *me = current;
static char lastcomm[sizeof(me->comm)];
if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
- printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
+ compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
me->comm);
strncpy(lastcomm, me->comm, sizeof(lastcomm));
}
@@ -929,13 +916,3 @@ long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
}
-static int __init ia32_init (void)
-{
- printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n");
- return 0;
-}
-
-__initcall(ia32_init);
-
-extern unsigned long ia32_sys_call_table[];
-EXPORT_SYMBOL(ia32_sys_call_table);
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 059c88313f4e..aeb9c560be88 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- pci-dma.o pci-nommu.o
+ pci-dma.o pci-nommu.o alternative.o
obj-$(CONFIG_X86_MCE) += mce.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -28,11 +28,13 @@ obj-$(CONFIG_PM) += suspend.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
+obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o
+obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
obj-$(CONFIG_X86_VSMP) += vsmp.o
+obj-$(CONFIG_K8_NB) += k8.o
obj-$(CONFIG_MODULES) += module.o
@@ -49,3 +51,5 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
i8237-y += ../../i386/kernel/i8237.o
msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
+alternative-y += ../../i386/kernel/alternative.o
+
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 70b9d21ed675..a195ef06ec55 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -8,7 +8,6 @@
* because only the bootmem allocator can allocate 32+MB.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
- * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $
*/
#include <linux/config.h>
#include <linux/kernel.h>
@@ -24,6 +23,7 @@
#include <asm/proto.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
+#include <asm/k8.h>
int iommu_aperture;
int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +37,6 @@ int fix_aperture __initdata = 1;
/* This code runs before the PCI subsystem is initialized, so just
access the northbridge directly. */
-#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
-
static u32 __init allocate_aperture(void)
{
pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +66,20 @@ static u32 __init allocate_aperture(void)
return (u32)__pa(p);
}
-static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size)
+static int __init aperture_valid(u64 aper_base, u32 aper_size)
{
if (!aper_base)
return 0;
if (aper_size < 64*1024*1024) {
- printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20);
+ printk("Aperture too small (%d MB)\n", aper_size>>20);
return 0;
}
if (aper_base + aper_size >= 0xffffffff) {
- printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
+ printk("Aperture beyond 4GB. Ignoring.\n");
return 0;
}
if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
- printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
+ printk("Aperture pointing to e820 RAM. Ignoring.\n");
return 0;
}
return 1;
@@ -140,7 +138,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
aper, 32 << *order, apsizereg);
- if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order))
+ if (!aperture_valid(aper, (32*1024*1024) << *order))
return 0;
return (u32)aper;
}
@@ -208,10 +206,10 @@ void __init iommu_hole_init(void)
fix = 0;
for (num = 24; num < 32; num++) {
- char name[30];
- if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
- continue;
+ if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
+ continue;
+ iommu_detected = 1;
iommu_aperture = 1;
aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
aper_base, aper_size>>20);
- sprintf(name, "northbridge cpu %d", num-24);
-
- if (!aperture_valid(name, aper_base, aper_size)) {
+ if (!aperture_valid(aper_base, aper_size)) {
fix = 1;
break;
}
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
/* Fix up the north bridges */
for (num = 24; num < 32; num++) {
- if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
+ if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
continue;
/* Don't enable translation yet. That is done later.
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 29ef99001e05..b2ead91df218 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -100,7 +100,7 @@ void clear_local_APIC(void)
maxlvt = get_maxlvt();
/*
- * Masking an LVT entry on a P6 can trigger a local APIC error
+ * Masking an LVT entry can trigger a local APIC error
* if the vector is zero. Mask LVTERR first to prevent this.
*/
if (maxlvt >= 3) {
@@ -851,7 +851,18 @@ void disable_APIC_timer(void)
unsigned long v;
v = apic_read(APIC_LVTT);
- apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+ /*
+ * When an illegal vector value (0-15) is written to an LVT
+ * entry and delivery mode is Fixed, the APIC may signal an
+ * illegal vector error, with out regard to whether the mask
+ * bit is set or whether an interrupt is actually seen on input.
+ *
+ * Boot sequence might call this function when the LVTT has
+ * '0' vector value. So make sure vector field is set to
+ * valid value.
+ */
+ v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+ apic_write(APIC_LVTT, v);
}
}
@@ -909,15 +920,13 @@ int setup_profiling_timer(unsigned int multiplier)
return -EINVAL;
}
-#ifdef CONFIG_X86_MCE_AMD
-void setup_threshold_lvt(unsigned long lvt_off)
+void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
+ unsigned char msg_type, unsigned char mask)
{
- unsigned int v = 0;
- unsigned long reg = (lvt_off << 4) + 0x500;
- v |= THRESHOLD_APIC_VECTOR;
+ unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
+ unsigned int v = (mask << 16) | (msg_type << 8) | vector;
apic_write(reg, v);
}
-#endif /* CONFIG_X86_MCE_AMD */
#undef APIC_DIVISOR
@@ -983,7 +992,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
}
/*
- * oem_force_hpet_timer -- force HPET mode for some boxes.
+ * apic_is_clustered_box() -- Check if we can expect good TSC
*
* Thus far, the major user of this is IBM's Summit2 series:
*
@@ -991,7 +1000,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
* multi-chassis. Use available data to take a good guess.
* If in doubt, go HPET.
*/
-__cpuinit int oem_force_hpet_timer(void)
+__cpuinit int apic_is_clustered_box(void)
{
int i, clusters, zeros;
unsigned id;
@@ -1022,8 +1031,7 @@ __cpuinit int oem_force_hpet_timer(void)
}
/*
- * If clusters > 2, then should be multi-chassis. Return 1 for HPET.
- * Else return 0 to use TSC.
+ * If clusters > 2, then should be multi-chassis.
* May have to revisit this when multi-core + hyperthreaded CPUs come
* out, but AFAIK this will work even for them.
*/
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 4e6c3b729e39..8ca04912b1cc 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -111,14 +111,14 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
for(;;)
- asm("hlt");
+ halt();
return 1;
}
static void smp_send_nmi_allbutself(void)
{
- send_IPI_allbutself(APIC_DM_NMI);
+ send_IPI_allbutself(NMI_VECTOR);
}
/*
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 1ef6028f721e..9e94d834624b 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -1,7 +1,6 @@
/*
* Handle the memory map.
* The functions here do the job until bootmem takes over.
- * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
*
* Getting sanitize_e820_map() in sync with i386 version by applying change:
* - Provisions for empty E820 memory regions (reported by certain BIOSes).
@@ -621,6 +620,7 @@ void __init parse_memmapopt(char *p, char **from)
}
unsigned long pci_mem_start = 0xaeedbabe;
+EXPORT_SYMBOL(pci_mem_start);
/*
* Search for the biggest gap in the low 32 bits of the e820
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 586b34c00c48..7290e72b9a34 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -154,6 +154,7 @@ rff_trace:
GET_THREAD_INFO(%rcx)
jmp rff_action
CFI_ENDPROC
+END(ret_from_fork)
/*
* System call entry. Upto 6 arguments in registers are supported.
@@ -188,7 +189,7 @@ rff_trace:
ENTRY(system_call)
CFI_STARTPROC simple
- CFI_DEF_CFA rsp,0
+ CFI_DEF_CFA rsp,PDA_STACKOFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
swapgs
@@ -285,6 +286,7 @@ tracesys:
/* Use IRET because user could have changed frame */
jmp int_ret_from_sys_call
CFI_ENDPROC
+END(system_call)
/*
* Syscall return path ending with IRET.
@@ -364,6 +366,7 @@ int_restore_rest:
cli
jmp int_with_check
CFI_ENDPROC
+END(int_ret_from_sys_call)
/*
* Certain special system calls that need to save a complete full stack frame.
@@ -375,6 +378,7 @@ int_restore_rest:
leaq \func(%rip),%rax
leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
jmp ptregscall_common
+END(\label)
.endm
CFI_STARTPROC
@@ -404,6 +408,7 @@ ENTRY(ptregscall_common)
CFI_REL_OFFSET rip, 0
ret
CFI_ENDPROC
+END(ptregscall_common)
ENTRY(stub_execve)
CFI_STARTPROC
@@ -418,6 +423,7 @@ ENTRY(stub_execve)
RESTORE_REST
jmp int_ret_from_sys_call
CFI_ENDPROC
+END(stub_execve)
/*
* sigreturn is special because it needs to restore all registers on return.
@@ -435,6 +441,7 @@ ENTRY(stub_rt_sigreturn)
RESTORE_REST
jmp int_ret_from_sys_call
CFI_ENDPROC
+END(stub_rt_sigreturn)
/*
* initial frame state for interrupts and exceptions
@@ -466,29 +473,18 @@ ENTRY(stub_rt_sigreturn)
/* 0(%rsp): interrupt number */
.macro interrupt func
cld
-#ifdef CONFIG_DEBUG_INFO
- SAVE_ALL
- movq %rsp,%rdi
- /*
- * Setup a stack frame pointer. This allows gdb to trace
- * back to the original stack.
- */
- movq %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
-#else
SAVE_ARGS
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
-#endif
+ pushq %rbp
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rbp, 0
+ movq %rsp,%rbp
+ CFI_DEF_CFA_REGISTER rbp
testl $3,CS(%rdi)
je 1f
swapgs
1: incl %gs:pda_irqcount # RED-PEN should check preempt count
- movq %gs:pda_irqstackptr,%rax
- cmoveq %rax,%rsp /*todo This needs CFI annotation! */
- pushq %rdi # save old stack
-#ifndef CONFIG_DEBUG_INFO
- CFI_ADJUST_CFA_OFFSET 8
-#endif
+ cmoveq %gs:pda_irqstackptr,%rsp
call \func
.endm
@@ -497,17 +493,11 @@ ENTRY(common_interrupt)
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
ret_from_intr:
- popq %rdi
-#ifndef CONFIG_DEBUG_INFO
- CFI_ADJUST_CFA_OFFSET -8
-#endif
cli
decl %gs:pda_irqcount
-#ifdef CONFIG_DEBUG_INFO
- movq RBP(%rdi),%rbp
+ leaveq
CFI_DEF_CFA_REGISTER rsp
-#endif
- leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
+ CFI_ADJUST_CFA_OFFSET -8
exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
@@ -589,7 +579,9 @@ retint_kernel:
call preempt_schedule_irq
jmp exit_intr
#endif
+
CFI_ENDPROC
+END(common_interrupt)
/*
* APIC interrupts.
@@ -605,17 +597,21 @@ retint_kernel:
ENTRY(thermal_interrupt)
apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+END(thermal_interrupt)
ENTRY(threshold_interrupt)
apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
+END(threshold_interrupt)
#ifdef CONFIG_SMP
ENTRY(reschedule_interrupt)
apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
+END(reschedule_interrupt)
.macro INVALIDATE_ENTRY num
ENTRY(invalidate_interrupt\num)
apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
+END(invalidate_interrupt\num)
.endm
INVALIDATE_ENTRY 0
@@ -629,17 +625,21 @@ ENTRY(invalidate_interrupt\num)
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
+END(call_function_interrupt)
#endif
#ifdef CONFIG_X86_LOCAL_APIC
ENTRY(apic_timer_interrupt)
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
+END(apic_timer_interrupt)
ENTRY(error_interrupt)
apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
+END(error_interrupt)
ENTRY(spurious_interrupt)
apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
+END(spurious_interrupt)
#endif
/*
@@ -777,6 +777,7 @@ error_kernelspace:
cmpq $gs_change,RIP(%rsp)
je error_swapgs
jmp error_sti
+END(error_entry)
/* Reload gs selector with exception handling */
/* edi: new selector */
@@ -794,6 +795,7 @@ gs_change:
CFI_ADJUST_CFA_OFFSET -8
ret
CFI_ENDPROC
+ENDPROC(load_gs_index)
.section __ex_table,"a"
.align 8
@@ -847,7 +849,7 @@ ENTRY(kernel_thread)
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
-
+ENDPROC(kernel_thread)
child_rip:
/*
@@ -860,6 +862,7 @@ child_rip:
# exit
xorl %edi, %edi
call do_exit
+ENDPROC(child_rip)
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -889,19 +892,24 @@ ENTRY(execve)
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
+ENDPROC(execve)
KPROBE_ENTRY(page_fault)
errorentry do_page_fault
+END(page_fault)
.previous .text
ENTRY(coprocessor_error)
zeroentry do_coprocessor_error
+END(coprocessor_error)
ENTRY(simd_coprocessor_error)
zeroentry do_simd_coprocessor_error
+END(simd_coprocessor_error)
ENTRY(device_not_available)
zeroentry math_state_restore
+END(device_not_available)
/* runs on exception stack */
KPROBE_ENTRY(debug)
@@ -911,6 +919,7 @@ KPROBE_ENTRY(debug)
paranoidentry do_debug, DEBUG_STACK
jmp paranoid_exit
CFI_ENDPROC
+END(debug)
.previous .text
/* runs on exception stack */
@@ -961,6 +970,7 @@ paranoid_schedule:
cli
jmp paranoid_userspace
CFI_ENDPROC
+END(nmi)
.previous .text
KPROBE_ENTRY(int3)
@@ -970,22 +980,28 @@ KPROBE_ENTRY(int3)
paranoidentry do_int3, DEBUG_STACK
jmp paranoid_exit
CFI_ENDPROC
+END(int3)
.previous .text
ENTRY(overflow)
zeroentry do_overflow
+END(overflow)
ENTRY(bounds)
zeroentry do_bounds
+END(bounds)
ENTRY(invalid_op)
zeroentry do_invalid_op
+END(invalid_op)
ENTRY(coprocessor_segment_overrun)
zeroentry do_coprocessor_segment_overrun
+END(coprocessor_segment_overrun)
ENTRY(reserved)
zeroentry do_reserved
+END(reserved)
/* runs on exception stack */
ENTRY(double_fault)
@@ -993,12 +1009,15 @@ ENTRY(double_fault)
paranoidentry do_double_fault
jmp paranoid_exit
CFI_ENDPROC
+END(double_fault)
ENTRY(invalid_TSS)
errorentry do_invalid_TSS
+END(invalid_TSS)
ENTRY(segment_not_present)
errorentry do_segment_not_present
+END(segment_not_present)
/* runs on exception stack */
ENTRY(stack_segment)
@@ -1006,19 +1025,24 @@ ENTRY(stack_segment)
paranoidentry do_stack_segment
jmp paranoid_exit
CFI_ENDPROC
+END(stack_segment)
KPROBE_ENTRY(general_protection)
errorentry do_general_protection
+END(general_protection)
.previous .text
ENTRY(alignment_check)
errorentry do_alignment_check
+END(alignment_check)
ENTRY(divide_error)
zeroentry do_divide_error
+END(divide_error)
ENTRY(spurious_interrupt_bug)
zeroentry do_spurious_interrupt_bug
+END(spurious_interrupt_bug)
#ifdef CONFIG_X86_MCE
/* runs on exception stack */
@@ -1029,6 +1053,7 @@ ENTRY(machine_check)
paranoidentry do_machine_check
jmp paranoid_exit
CFI_ENDPROC
+END(machine_check)
#endif
ENTRY(call_softirq)
@@ -1046,3 +1071,37 @@ ENTRY(call_softirq)
decl %gs:pda_irqcount
ret
CFI_ENDPROC
+ENDPROC(call_softirq)
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+ CFI_STARTPROC
+ movq %r15, R15(%rdi)
+ movq %r14, R14(%rdi)
+ xchgq %rsi, %rdx
+ movq %r13, R13(%rdi)
+ movq %r12, R12(%rdi)
+ xorl %eax, %eax
+ movq %rbp, RBP(%rdi)
+ movq %rbx, RBX(%rdi)
+ movq (%rsp), %rcx
+ movq %rax, R11(%rdi)
+ movq %rax, R10(%rdi)
+ movq %rax, R9(%rdi)
+ movq %rax, R8(%rdi)
+ movq %rax, RAX(%rdi)
+ movq %rax, RCX(%rdi)
+ movq %rax, RDX(%rdi)
+ movq %rax, RSI(%rdi)
+ movq %rax, RDI(%rdi)
+ movq %rax, ORIG_RAX(%rdi)
+ movq %rcx, RIP(%rdi)
+ leaq 8(%rsp), %rcx
+ movq $__KERNEL_CS, CS(%rdi)
+ movq %rax, EFLAGS(%rdi)
+ movq %rcx, RSP(%rdi)
+ movq $__KERNEL_DS, SS(%rdi)
+ jmpq *%rdx
+ CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 1a2ab825be98..21c7066e236a 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -78,22 +78,29 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
static void flat_send_IPI_allbutself(int vector)
{
-#ifndef CONFIG_HOTPLUG_CPU
- if (((num_online_cpus()) - 1) >= 1)
- __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+#ifdef CONFIG_HOTPLUG_CPU
+ int hotplug = 1;
#else
- cpumask_t allbutme = cpu_online_map;
+ int hotplug = 0;
+#endif
+ if (hotplug || vector == NMI_VECTOR) {
+ cpumask_t allbutme = cpu_online_map;
- cpu_clear(smp_processor_id(), allbutme);
+ cpu_clear(smp_processor_id(), allbutme);
- if (!cpus_empty(allbutme))
- flat_send_IPI_mask(allbutme, vector);
-#endif
+ if (!cpus_empty(allbutme))
+ flat_send_IPI_mask(allbutme, vector);
+ } else if (num_online_cpus() > 1) {
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+ }
}
static void flat_send_IPI_all(int vector)
{
- __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+ if (vector == NMI_VECTOR)
+ flat_send_IPI_mask(cpu_online_map, vector);
+ else
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
}
static int flat_apic_id_registered(void)
@@ -108,10 +115,7 @@ static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
static unsigned int phys_pkg_id(int index_msb)
{
- u32 ebx;
-
- ebx = cpuid_ebx(1);
- return ((ebx >> 24) & 0xFF) >> index_msb;
+ return hard_smp_processor_id() >> index_msb;
}
struct genapic apic_flat = {
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index cea20a66c150..e6a71c9556d9 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -2,8 +2,6 @@
* linux/arch/x86_64/kernel/head64.c -- prepare to run common code
*
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- *
- * $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $
*/
#include <linux/init.h>
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 5ecd34ab8c2b..9b1a4e147321 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -44,11 +44,11 @@
BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-#define BUILD_14_IRQS(x) \
+#define BUILD_15_IRQS(x) \
BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- BI(x,c) BI(x,d)
+ BI(x,c) BI(x,d) BI(x,e)
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -73,13 +73,13 @@ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
#ifdef CONFIG_PCI_MSI
- BUILD_14_IRQS(0xe)
+ BUILD_15_IRQS(0xe)
#endif
#endif
#undef BUILD_16_IRQS
-#undef BUILD_14_IRQS
+#undef BUILD_15_IRQS
#undef BI
@@ -92,11 +92,11 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-#define IRQLIST_14(x) \
+#define IRQLIST_15(x) \
IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- IRQ(x,c), IRQ(x,d)
+ IRQ(x,c), IRQ(x,d), IRQ(x,e)
void (*interrupt[NR_IRQS])(void) = {
IRQLIST_16(0x0),
@@ -108,7 +108,7 @@ void (*interrupt[NR_IRQS])(void) = {
IRQLIST_16(0xc), IRQLIST_16(0xd)
#ifdef CONFIG_PCI_MSI
- , IRQLIST_14(0xe)
+ , IRQLIST_15(0xe)
#endif
#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 9cc7031b7151..c768d8a036d0 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -41,6 +41,7 @@
#include <asm/mach_apic.h>
#include <asm/acpi.h>
#include <asm/dma.h>
+#include <asm/nmi.h>
#define __apicdebuginit __init
@@ -56,6 +57,7 @@ int timer_over_8254 __initdata = 0;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
@@ -317,7 +319,7 @@ void __init check_ioapic(void)
vendor &= 0xffff;
switch (vendor) {
case PCI_VENDOR_ID_VIA:
-#ifdef CONFIG_GART_IOMMU
+#ifdef CONFIG_IOMMU
if ((end_pfn > MAX_DMA32_PFN ||
force_iommu) &&
!iommu_aperture_allowed) {
@@ -834,10 +836,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
int assign_irq_vector(int irq)
{
static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+ unsigned long flags;
+ int vector;
BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+
+ spin_lock_irqsave(&vector_lock, flags);
+
+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+ spin_unlock_irqrestore(&vector_lock, flags);
return IO_APIC_VECTOR(irq);
+ }
next:
current_vector += 8;
if (current_vector == IA32_SYSCALL_VECTOR)
@@ -849,11 +858,14 @@ next:
current_vector = FIRST_DEVICE_VECTOR + offset;
}
- vector_irq[current_vector] = irq;
+ vector = current_vector;
+ vector_irq[vector] = irq;
if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = current_vector;
+ IO_APIC_VECTOR(irq) = vector;
+
+ spin_unlock_irqrestore(&vector_lock, flags);
- return current_vector;
+ return vector;
}
extern void (*interrupt[NR_IRQS])(void);
@@ -866,21 +878,14 @@ static struct hw_interrupt_type ioapic_edge_type;
static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
{
- if (use_pci_vector() && !platform_legacy_irq(irq)) {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[vector].handler = &ioapic_level_type;
- else
- irq_desc[vector].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[vector]);
- } else {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[irq].handler = &ioapic_level_type;
- else
- irq_desc[irq].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[irq]);
- }
+ unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[idx].handler = &ioapic_level_type;
+ else
+ irq_desc[idx].handler = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[idx]);
}
static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index d8bd0b345b1e..59518d4d4358 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -26,6 +26,30 @@ atomic_t irq_mis_count;
#endif
#endif
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+/*
+ * Probabilistic stack overflow check:
+ *
+ * Only check the stack in process context, because everything else
+ * runs on the big interrupt stacks. Checking reliably is too expensive,
+ * so we just check from interrupts.
+ */
+static inline void stack_overflow_check(struct pt_regs *regs)
+{
+ u64 curbase = (u64) current->thread_info;
+ static unsigned long warned = -60*HZ;
+
+ if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
+ regs->rsp < curbase + sizeof(struct thread_info) + 128 &&
+ time_after(jiffies, warned + 60*HZ)) {
+ printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
+ current->comm, curbase, regs->rsp);
+ show_stack(NULL,NULL);
+ warned = jiffies;
+ }
+}
+#endif
+
/*
* Generic, controller-independent functions:
*/
@@ -39,7 +63,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (i == 0) {
seq_printf(p, " ");
for_each_online_cpu(j)
- seq_printf(p, "CPU%d ",j);
+ seq_printf(p, "CPU%-8d",j);
seq_putc(p, '\n');
}
@@ -96,7 +120,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
exit_idle();
irq_enter();
-
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ stack_overflow_check(regs);
+#endif
__do_IRQ(irq, regs);
irq_exit();
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
new file mode 100644
index 000000000000..6416682d33d0
--- /dev/null
+++ b/arch/x86_64/kernel/k8.c
@@ -0,0 +1,118 @@
+/*
+ * Shared support code for AMD K8 northbridges and derivates.
+ * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
+ */
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/k8.h>
+
+int num_k8_northbridges;
+EXPORT_SYMBOL(num_k8_northbridges);
+
+static u32 *flush_words;
+
+struct pci_device_id k8_nb_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+ {}
+};
+EXPORT_SYMBOL(k8_nb_ids);
+
+struct pci_dev **k8_northbridges;
+EXPORT_SYMBOL(k8_northbridges);
+
+static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+{
+ do {
+ dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+ if (!dev)
+ break;
+ } while (!pci_match_id(&k8_nb_ids[0], dev));
+ return dev;
+}
+
+int cache_k8_northbridges(void)
+{
+ int i;
+ struct pci_dev *dev;
+ if (num_k8_northbridges)
+ return 0;
+
+ num_k8_northbridges = 0;
+ dev = NULL;
+ while ((dev = next_k8_northbridge(dev)) != NULL)
+ num_k8_northbridges++;
+
+ k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
+ GFP_KERNEL);
+ if (!k8_northbridges)
+ return -ENOMEM;
+
+ flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
+ if (!flush_words) {
+ kfree(k8_northbridges);
+ return -ENOMEM;
+ }
+
+ dev = NULL;
+ i = 0;
+ while ((dev = next_k8_northbridge(dev)) != NULL) {
+ k8_northbridges[i++] = dev;
+ pci_read_config_dword(dev, 0x9c, &flush_words[i]);
+ }
+ k8_northbridges[i] = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+
+/* Ignores subdevice/subvendor but as far as I can figure out
+ they're useless anyways */
+int __init early_is_k8_nb(u32 device)
+{
+ struct pci_device_id *id;
+ u32 vendor = device & 0xffff;
+ device >>= 16;
+ for (id = k8_nb_ids; id->vendor; id++)
+ if (vendor == id->vendor && device == id->device)
+ return 1;
+ return 0;
+}
+
+void k8_flush_garts(void)
+{
+ int flushed, i;
+ unsigned long flags;
+ static DEFINE_SPINLOCK(gart_lock);
+
+ /* Avoid races between AGP and IOMMU. In theory it's not needed
+ but I'm not sure if the hardware won't lose flush requests
+ when another is pending. This whole thing is so expensive anyways
+ that it doesn't matter to serialize more. -AK */
+ spin_lock_irqsave(&gart_lock, flags);
+ flushed = 0;
+ for (i = 0; i < num_k8_northbridges; i++) {
+ pci_write_config_dword(k8_northbridges[i], 0x9c,
+ flush_words[i]|1);
+ flushed++;
+ }
+ for (i = 0; i < num_k8_northbridges; i++) {
+ u32 w;
+ /* Make sure the hardware actually executed the flush*/
+ for (;;) {
+ pci_read_config_dword(k8_northbridges[i],
+ 0x9c, &w);
+ if (!(w & 1))
+ break;
+ cpu_relax();
+ }
+ }
+ spin_unlock_irqrestore(&gart_lock, flags);
+ if (!flushed)
+ printk("nothing to flush?\n");
+}
+EXPORT_SYMBOL_GPL(k8_flush_garts);
+
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index c69fc43cee7b..acd5816b1a6f 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -562,7 +562,7 @@ static struct sysdev_class mce_sysclass = {
set_kset_name("machinecheck"),
};
-static DEFINE_PER_CPU(struct sys_device, device_mce);
+DEFINE_PER_CPU(struct sys_device, device_mce);
/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index d13b241ad094..335200aa2737 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -1,5 +1,5 @@
/*
- * (c) 2005 Advanced Micro Devices, Inc.
+ * (c) 2005, 2006 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
@@ -8,9 +8,10 @@
*
* Support : jacob.shin@amd.com
*
- * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
- * MC4_MISC0 exists per physical processor.
+ * April 2006
+ * - added support for AMD Family 0x10 processors
*
+ * All MC4_MISCi registers are shared between multi-cores
*/
#include <linux/cpu.h>
@@ -29,32 +30,45 @@
#include <asm/percpu.h>
#include <asm/idle.h>
-#define PFX "mce_threshold: "
-#define VERSION "version 1.00.9"
-#define NR_BANKS 5
-#define THRESHOLD_MAX 0xFFF
-#define INT_TYPE_APIC 0x00020000
-#define MASK_VALID_HI 0x80000000
-#define MASK_LVTOFF_HI 0x00F00000
-#define MASK_COUNT_EN_HI 0x00080000
-#define MASK_INT_TYPE_HI 0x00060000
-#define MASK_OVERFLOW_HI 0x00010000
+#define PFX "mce_threshold: "
+#define VERSION "version 1.1.1"
+#define NR_BANKS 6
+#define NR_BLOCKS 9
+#define THRESHOLD_MAX 0xFFF
+#define INT_TYPE_APIC 0x00020000
+#define MASK_VALID_HI 0x80000000
+#define MASK_LVTOFF_HI 0x00F00000
+#define MASK_COUNT_EN_HI 0x00080000
+#define MASK_INT_TYPE_HI 0x00060000
+#define MASK_OVERFLOW_HI 0x00010000
#define MASK_ERR_COUNT_HI 0x00000FFF
-#define MASK_OVERFLOW 0x0001000000000000L
+#define MASK_BLKPTR_LO 0xFF000000
+#define MCG_XBLK_ADDR 0xC0000400
-struct threshold_bank {
+struct threshold_block {
+ unsigned int block;
+ unsigned int bank;
unsigned int cpu;
- u8 bank;
- u8 interrupt_enable;
+ u32 address;
+ u16 interrupt_enable;
u16 threshold_limit;
struct kobject kobj;
+ struct list_head miscj;
};
-static struct threshold_bank threshold_defaults = {
+/* defaults used early on boot */
+static struct threshold_block threshold_defaults = {
.interrupt_enable = 0,
.threshold_limit = THRESHOLD_MAX,
};
+struct threshold_bank {
+ struct kobject kobj;
+ struct threshold_block *blocks;
+ cpumask_t cpus;
+};
+static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
+
#ifdef CONFIG_SMP
static unsigned char shared_bank[NR_BANKS] = {
0, 0, 0, 0, 1
@@ -68,12 +82,12 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
*/
/* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_bank *b,
+static void threshold_restart_bank(struct threshold_block *b,
int reset, u16 old_limit)
{
u32 mci_misc_hi, mci_misc_lo;
- rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+ rdmsr(b->address, mci_misc_lo, mci_misc_hi);
if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
reset = 1; /* limit cannot be lower than err count */
@@ -94,35 +108,57 @@ static void threshold_restart_bank(struct threshold_bank *b,
(mci_misc_hi &= ~MASK_INT_TYPE_HI);
mci_misc_hi |= MASK_COUNT_EN_HI;
- wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+ wrmsr(b->address, mci_misc_lo, mci_misc_hi);
}
+/* cpu init entry point, called from mce.c with preempt off */
void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
{
- int bank;
- u32 mci_misc_lo, mci_misc_hi;
+ unsigned int bank, block;
unsigned int cpu = smp_processor_id();
+ u32 low = 0, high = 0, address = 0;
for (bank = 0; bank < NR_BANKS; ++bank) {
- rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
+ for (block = 0; block < NR_BLOCKS; ++block) {
+ if (block == 0)
+ address = MSR_IA32_MC0_MISC + bank * 4;
+ else if (block == 1)
+ address = MCG_XBLK_ADDR
+ + ((low & MASK_BLKPTR_LO) >> 21);
+ else
+ ++address;
+
+ if (rdmsr_safe(address, &low, &high))
+ continue;
- /* !valid, !counter present, bios locked */
- if (!(mci_misc_hi & MASK_VALID_HI) ||
- !(mci_misc_hi & MASK_VALID_HI >> 1) ||
- (mci_misc_hi & MASK_VALID_HI >> 2))
- continue;
+ if (!(high & MASK_VALID_HI)) {
+ if (block)
+ continue;
+ else
+ break;
+ }
- per_cpu(bank_map, cpu) |= (1 << bank);
+ if (!(high & MASK_VALID_HI >> 1) ||
+ (high & MASK_VALID_HI >> 2))
+ continue;
+ if (!block)
+ per_cpu(bank_map, cpu) |= (1 << bank);
#ifdef CONFIG_SMP
- if (shared_bank[bank] && cpu_core_id[cpu])
- continue;
+ if (shared_bank[bank] && c->cpu_core_id)
+ break;
#endif
+ high &= ~MASK_LVTOFF_HI;
+ high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
+ wrmsr(address, low, high);
- setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
- threshold_defaults.cpu = cpu;
- threshold_defaults.bank = bank;
- threshold_restart_bank(&threshold_defaults, 0, 0);
+ setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
+ THRESHOLD_APIC_VECTOR,
+ K8_APIC_EXT_INT_MSG_FIX, 0);
+
+ threshold_defaults.address = address;
+ threshold_restart_bank(&threshold_defaults, 0, 0);
+ }
}
}
@@ -137,8 +173,9 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
*/
asmlinkage void mce_threshold_interrupt(void)
{
- int bank;
+ unsigned int bank, block;
struct mce m;
+ u32 low = 0, high = 0, address = 0;
ack_APIC_irq();
exit_idle();
@@ -150,15 +187,42 @@ asmlinkage void mce_threshold_interrupt(void)
/* assume first bank caused it */
for (bank = 0; bank < NR_BANKS; ++bank) {
- m.bank = MCE_THRESHOLD_BASE + bank;
- rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
+ for (block = 0; block < NR_BLOCKS; ++block) {
+ if (block == 0)
+ address = MSR_IA32_MC0_MISC + bank * 4;
+ else if (block == 1)
+ address = MCG_XBLK_ADDR
+ + ((low & MASK_BLKPTR_LO) >> 21);
+ else
+ ++address;
+
+ if (rdmsr_safe(address, &low, &high))
+ continue;
- if (m.misc & MASK_OVERFLOW) {
- mce_log(&m);
- goto out;
+ if (!(high & MASK_VALID_HI)) {
+ if (block)
+ continue;
+ else
+ break;
+ }
+
+ if (!(high & MASK_VALID_HI >> 1) ||
+ (high & MASK_VALID_HI >> 2))
+ continue;
+
+ if (high & MASK_OVERFLOW_HI) {
+ rdmsrl(address, m.misc);
+ rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
+ m.status);
+ m.bank = K8_MCE_THRESHOLD_BASE
+ + bank * NR_BLOCKS
+ + block;
+ mce_log(&m);
+ goto out;
+ }
}
}
- out:
+out:
irq_exit();
}
@@ -166,20 +230,12 @@ asmlinkage void mce_threshold_interrupt(void)
* Sysfs Interface
*/
-static struct sysdev_class threshold_sysclass = {
- set_kset_name("threshold"),
-};
-
-static DEFINE_PER_CPU(struct sys_device, device_threshold);
-
struct threshold_attr {
- struct attribute attr;
- ssize_t(*show) (struct threshold_bank *, char *);
- ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
+ struct attribute attr;
+ ssize_t(*show) (struct threshold_block *, char *);
+ ssize_t(*store) (struct threshold_block *, const char *, size_t count);
};
-static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
-
static cpumask_t affinity_set(unsigned int cpu)
{
cpumask_t oldmask = current->cpus_allowed;
@@ -194,15 +250,15 @@ static void affinity_restore(cpumask_t oldmask)
set_cpus_allowed(current, oldmask);
}
-#define SHOW_FIELDS(name) \
- static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
- { \
- return sprintf(buf, "%lx\n", (unsigned long) b->name); \
- }
+#define SHOW_FIELDS(name) \
+static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
+{ \
+ return sprintf(buf, "%lx\n", (unsigned long) b->name); \
+}
SHOW_FIELDS(interrupt_enable)
SHOW_FIELDS(threshold_limit)
-static ssize_t store_interrupt_enable(struct threshold_bank *b,
+static ssize_t store_interrupt_enable(struct threshold_block *b,
const char *buf, size_t count)
{
char *end;
@@ -219,7 +275,7 @@ static ssize_t store_interrupt_enable(struct threshold_bank *b,
return end - buf;
}
-static ssize_t store_threshold_limit(struct threshold_bank *b,
+static ssize_t store_threshold_limit(struct threshold_block *b,
const char *buf, size_t count)
{
char *end;
@@ -242,18 +298,18 @@ static ssize_t store_threshold_limit(struct threshold_bank *b,
return end - buf;
}
-static ssize_t show_error_count(struct threshold_bank *b, char *buf)
+static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
u32 high, low;
cpumask_t oldmask;
oldmask = affinity_set(b->cpu);
- rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
+ rdmsr(b->address, low, high);
affinity_restore(oldmask);
return sprintf(buf, "%x\n",
(high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
}
-static ssize_t store_error_count(struct threshold_bank *b,
+static ssize_t store_error_count(struct threshold_block *b,
const char *buf, size_t count)
{
cpumask_t oldmask;
@@ -269,13 +325,13 @@ static ssize_t store_error_count(struct threshold_bank *b,
.store = _store, \
};
-#define ATTR_FIELDS(name) \
- static struct threshold_attr name = \
+#define RW_ATTR(name) \
+static struct threshold_attr name = \
THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
-ATTR_FIELDS(interrupt_enable);
-ATTR_FIELDS(threshold_limit);
-ATTR_FIELDS(error_count);
+RW_ATTR(interrupt_enable);
+RW_ATTR(threshold_limit);
+RW_ATTR(error_count);
static struct attribute *default_attrs[] = {
&interrupt_enable.attr,
@@ -284,12 +340,12 @@ static struct attribute *default_attrs[] = {
NULL
};
-#define to_bank(k) container_of(k,struct threshold_bank,kobj)
-#define to_attr(a) container_of(a,struct threshold_attr,attr)
+#define to_block(k) container_of(k, struct threshold_block, kobj)
+#define to_attr(a) container_of(a, struct threshold_attr, attr)
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
- struct threshold_bank *b = to_bank(kobj);
+ struct threshold_block *b = to_block(kobj);
struct threshold_attr *a = to_attr(attr);
ssize_t ret;
ret = a->show ? a->show(b, buf) : -EIO;
@@ -299,7 +355,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
static ssize_t store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t count)
{
- struct threshold_bank *b = to_bank(kobj);
+ struct threshold_block *b = to_block(kobj);
struct threshold_attr *a = to_attr(attr);
ssize_t ret;
ret = a->store ? a->store(b, buf, count) : -EIO;
@@ -316,69 +372,174 @@ static struct kobj_type threshold_ktype = {
.default_attrs = default_attrs,
};
+static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
+ unsigned int bank,
+ unsigned int block,
+ u32 address)
+{
+ int err;
+ u32 low, high;
+ struct threshold_block *b = NULL;
+
+ if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
+ return 0;
+
+ if (rdmsr_safe(address, &low, &high))
+ goto recurse;
+
+ if (!(high & MASK_VALID_HI)) {
+ if (block)
+ goto recurse;
+ else
+ return 0;
+ }
+
+ if (!(high & MASK_VALID_HI >> 1) ||
+ (high & MASK_VALID_HI >> 2))
+ goto recurse;
+
+ b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
+ if (!b)
+ return -ENOMEM;
+ memset(b, 0, sizeof(struct threshold_block));
+
+ b->block = block;
+ b->bank = bank;
+ b->cpu = cpu;
+ b->address = address;
+ b->interrupt_enable = 0;
+ b->threshold_limit = THRESHOLD_MAX;
+
+ INIT_LIST_HEAD(&b->miscj);
+
+ if (per_cpu(threshold_banks, cpu)[bank]->blocks)
+ list_add(&b->miscj,
+ &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
+ else
+ per_cpu(threshold_banks, cpu)[bank]->blocks = b;
+
+ kobject_set_name(&b->kobj, "misc%i", block);
+ b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj;
+ b->kobj.ktype = &threshold_ktype;
+ err = kobject_register(&b->kobj);
+ if (err)
+ goto out_free;
+recurse:
+ if (!block) {
+ address = (low & MASK_BLKPTR_LO) >> 21;
+ if (!address)
+ return 0;
+ address += MCG_XBLK_ADDR;
+ } else
+ ++address;
+
+ err = allocate_threshold_blocks(cpu, bank, ++block, address);
+ if (err)
+ goto out_free;
+
+ return err;
+
+out_free:
+ if (b) {
+ kobject_unregister(&b->kobj);
+ kfree(b);
+ }
+ return err;
+}
+
/* symlinks sibling shared banks to first core. first core owns dir/files. */
-static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
+static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
- int err = 0;
+ int i, err = 0;
struct threshold_bank *b = NULL;
+ cpumask_t oldmask = CPU_MASK_NONE;
+ char name[32];
+
+ sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
- if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */
- char name[16];
- unsigned lcpu = first_cpu(cpu_core_map[cpu]);
- if (cpu_core_id[lcpu])
- goto out; /* first core not up yet */
+ if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */
+ i = first_cpu(cpu_core_map[cpu]);
+
+ /* first core not up yet */
+ if (cpu_data[i].cpu_core_id)
+ goto out;
+
+ /* already linked */
+ if (per_cpu(threshold_banks, cpu)[bank])
+ goto out;
+
+ b = per_cpu(threshold_banks, i)[bank];
- b = per_cpu(threshold_banks, lcpu)[bank];
if (!b)
goto out;
- sprintf(name, "bank%i", bank);
- err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
+
+ err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
&b->kobj, name);
if (err)
goto out;
+
+ b->cpus = cpu_core_map[cpu];
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
}
#endif
- b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
+ b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
if (!b) {
err = -ENOMEM;
goto out;
}
memset(b, 0, sizeof(struct threshold_bank));
- b->cpu = cpu;
- b->bank = bank;
- b->interrupt_enable = 0;
- b->threshold_limit = THRESHOLD_MAX;
- kobject_set_name(&b->kobj, "bank%i", bank);
- b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
- b->kobj.ktype = &threshold_ktype;
-
+ kobject_set_name(&b->kobj, "threshold_bank%i", bank);
+ b->kobj.parent = &per_cpu(device_mce, cpu).kobj;
+#ifndef CONFIG_SMP
+ b->cpus = CPU_MASK_ALL;
+#else
+ b->cpus = cpu_core_map[cpu];
+#endif
err = kobject_register(&b->kobj);
- if (err) {
- kfree(b);
- goto out;
- }
+ if (err)
+ goto out_free;
+
per_cpu(threshold_banks, cpu)[bank] = b;
- out:
+
+ oldmask = affinity_set(cpu);
+ err = allocate_threshold_blocks(cpu, bank, 0,
+ MSR_IA32_MC0_MISC + bank * 4);
+ affinity_restore(oldmask);
+
+ if (err)
+ goto out_free;
+
+ for_each_cpu_mask(i, b->cpus) {
+ if (i == cpu)
+ continue;
+
+ err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
+ &b->kobj, name);
+ if (err)
+ goto out;
+
+ per_cpu(threshold_banks, i)[bank] = b;
+ }
+
+ goto out;
+
+out_free:
+ per_cpu(threshold_banks, cpu)[bank] = NULL;
+ kfree(b);
+out:
return err;
}
/* create dir/files for all valid threshold banks */
static __cpuinit int threshold_create_device(unsigned int cpu)
{
- int bank;
+ unsigned int bank;
int err = 0;
- per_cpu(device_threshold, cpu).id = cpu;
- per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
- err = sysdev_register(&per_cpu(device_threshold, cpu));
- if (err)
- goto out;
-
for (bank = 0; bank < NR_BANKS; ++bank) {
if (!(per_cpu(bank_map, cpu) & 1 << bank))
continue;
@@ -386,7 +547,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
if (err)
goto out;
}
- out:
+out:
return err;
}
@@ -397,92 +558,85 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
* of shared sysfs dir/files, and rest of the cores will be symlinked to it.
*/
-/* cpu hotplug call removes all symlinks before first core dies */
+static __cpuinit void deallocate_threshold_block(unsigned int cpu,
+ unsigned int bank)
+{
+ struct threshold_block *pos = NULL;
+ struct threshold_block *tmp = NULL;
+ struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
+
+ if (!head)
+ return;
+
+ list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
+ kobject_unregister(&pos->kobj);
+ list_del(&pos->miscj);
+ kfree(pos);
+ }
+
+ kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
+ per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
+}
+
static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
{
+ int i = 0;
struct threshold_bank *b;
- char name[16];
+ char name[32];
b = per_cpu(threshold_banks, cpu)[bank];
+
if (!b)
return;
- if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
- sprintf(name, "bank%i", bank);
- sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
- per_cpu(threshold_banks, cpu)[bank] = NULL;
- } else {
- kobject_unregister(&b->kobj);
- kfree(per_cpu(threshold_banks, cpu)[bank]);
+
+ if (!b->blocks)
+ goto free_out;
+
+ sprintf(name, "threshold_bank%i", bank);
+
+ /* sibling symlink */
+ if (shared_bank[bank] && b->blocks->cpu != cpu) {
+ sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
+ per_cpu(threshold_banks, i)[bank] = NULL;
+ return;
+ }
+
+ /* remove all sibling symlinks before unregistering */
+ for_each_cpu_mask(i, b->cpus) {
+ if (i == cpu)
+ continue;
+
+ sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
+ per_cpu(threshold_banks, i)[bank] = NULL;
}
+
+ deallocate_threshold_block(cpu, bank);
+
+free_out:
+ kobject_unregister(&b->kobj);
+ kfree(b);
+ per_cpu(threshold_banks, cpu)[bank] = NULL;
}
static __cpuinit void threshold_remove_device(unsigned int cpu)
{
- int bank;
+ unsigned int bank;
for (bank = 0; bank < NR_BANKS; ++bank) {
if (!(per_cpu(bank_map, cpu) & 1 << bank))
continue;
threshold_remove_bank(cpu, bank);
}
- sysdev_unregister(&per_cpu(device_threshold, cpu));
}
-/* link all existing siblings when first core comes up */
-static __cpuinit int threshold_create_symlinks(unsigned int cpu)
-{
- int bank, err = 0;
- unsigned int lcpu = 0;
-
- if (cpu_core_id[cpu])
- return 0;
- for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
- if (lcpu == cpu)
- continue;
- for (bank = 0; bank < NR_BANKS; ++bank) {
- if (!(per_cpu(bank_map, cpu) & 1 << bank))
- continue;
- if (!shared_bank[bank])
- continue;
- err = threshold_create_bank(lcpu, bank);
- }
- }
- return err;
-}
-
-/* remove all symlinks before first core dies. */
-static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
-{
- int bank;
- unsigned int lcpu = 0;
- if (cpu_core_id[cpu])
- return;
- for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
- if (lcpu == cpu)
- continue;
- for (bank = 0; bank < NR_BANKS; ++bank) {
- if (!(per_cpu(bank_map, cpu) & 1 << bank))
- continue;
- if (!shared_bank[bank])
- continue;
- threshold_remove_bank(lcpu, bank);
- }
- }
-}
#else /* !CONFIG_HOTPLUG_CPU */
-static __cpuinit void threshold_create_symlinks(unsigned int cpu)
-{
-}
-static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
-{
-}
static void threshold_remove_device(unsigned int cpu)
{
}
#endif
/* get notified when a cpu comes on/off */
-static int threshold_cpu_callback(struct notifier_block *nfb,
+static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
/* cpu was unsigned int to begin with */
@@ -494,13 +648,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
threshold_create_device(cpu);
- threshold_create_symlinks(cpu);
- break;
- case CPU_DOWN_PREPARE:
- threshold_remove_symlinks(cpu);
- break;
- case CPU_DOWN_FAILED:
- threshold_create_symlinks(cpu);
break;
case CPU_DEAD:
threshold_remove_device(cpu);
@@ -512,29 +659,22 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block threshold_cpu_notifier = {
+static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
.notifier_call = threshold_cpu_callback,
};
static __init int threshold_init_device(void)
{
- int err;
- int lcpu = 0;
-
- err = sysdev_class_register(&threshold_sysclass);
- if (err)
- goto out;
+ unsigned lcpu = 0;
/* to hit CPUs online before the notifier is up */
for_each_online_cpu(lcpu) {
- err = threshold_create_device(lcpu);
+ int err = threshold_create_device(lcpu);
if (err)
- goto out;
+ return err;
}
register_cpu_notifier(&threshold_cpu_notifier);
-
- out:
- return err;
+ return 0;
}
device_initcall(threshold_init_device);
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c
index bac195c74bcc..9d0958ff547f 100644
--- a/arch/x86_64/kernel/module.c
+++ b/arch/x86_64/kernel/module.c
@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs,
return -ENOSYS;
}
-extern void apply_alternatives(void *start, void *end);
-
int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
+ const Elf_Shdr *sechdrs,
+ struct module *me)
{
- const Elf_Shdr *s;
+ const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- /* look for .altinstructions to patch */
- for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
- void *seg;
- if (strcmp(".altinstructions", secstrings + s->sh_name))
- continue;
- seg = (void *)s->sh_addr;
- apply_alternatives(seg, seg + s->sh_size);
- }
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+ if (!strcmp(".text", secstrings + s->sh_name))
+ text = s;
+ if (!strcmp(".altinstructions", secstrings + s->sh_name))
+ alt = s;
+ if (!strcmp(".smp_locks", secstrings + s->sh_name))
+ locks= s;
+ }
+
+ if (alt) {
+ /* patch .altinstructions */
+ void *aseg = (void *)alt->sh_addr;
+ apply_alternatives(aseg, aseg + alt->sh_size);
+ }
+ if (locks && text) {
+ void *lseg = (void *)locks->sh_addr;
+ void *tseg = (void *)text->sh_addr;
+ alternatives_smp_module_add(me, me->name,
+ lseg, lseg + locks->sh_size,
+ tseg, tseg + text->sh_size);
+ }
return 0;
}
void module_arch_cleanup(struct module *mod)
{
+ alternatives_smp_module_del(mod);
}
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 4e6357fe0ec3..399489c93132 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -15,11 +15,7 @@
#include <linux/config.h>
#include <linux/mm.h>
#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/sysdev.h>
#include <linux/nmi.h>
@@ -27,14 +23,11 @@
#include <linux/kprobes.h>
#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
#include <asm/nmi.h>
-#include <asm/msr.h>
#include <asm/proto.h>
#include <asm/kdebug.h>
-#include <asm/local.h>
#include <asm/mce.h>
+#include <asm/intel_arch_perfmon.h>
/*
* lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -74,6 +67,9 @@ static unsigned int nmi_p4_cccr_val;
#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
#define MSR_P4_MISC_ENABLE 0x1A0
#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
@@ -105,7 +101,10 @@ static __cpuinit inline int nmi_known_cpu(void)
case X86_VENDOR_AMD:
return boot_cpu_data.x86 == 15;
case X86_VENDOR_INTEL:
- return boot_cpu_data.x86 == 15;
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return 1;
+ else
+ return (boot_cpu_data.x86 == 15);
}
return 0;
}
@@ -211,6 +210,8 @@ int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
+static void disable_intel_arch_watchdog(void);
+
static void disable_lapic_nmi_watchdog(void)
{
if (nmi_active <= 0)
@@ -223,6 +224,8 @@ static void disable_lapic_nmi_watchdog(void)
if (boot_cpu_data.x86 == 15) {
wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+ } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ disable_intel_arch_watchdog();
}
break;
}
@@ -375,6 +378,53 @@ static void setup_k7_watchdog(void)
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
}
+static void disable_intel_arch_watchdog(void)
+{
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+ unsigned int evntsel;
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return 0;
+
+ nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+ clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+ clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ return 1;
+}
+
static int setup_p4_watchdog(void)
{
@@ -428,10 +478,16 @@ void setup_apic_nmi_watchdog(void)
setup_k7_watchdog();
break;
case X86_VENDOR_INTEL:
- if (boot_cpu_data.x86 != 15)
- return;
- if (!setup_p4_watchdog())
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ if (!setup_intel_arch_watchdog())
+ return;
+ } else if (boot_cpu_data.x86 == 15) {
+ if (!setup_p4_watchdog())
+ return;
+ } else {
return;
+ }
+
break;
default:
@@ -516,7 +572,14 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
*/
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
apic_write(APIC_LVTPC, APIC_DM_NMI);
- }
+ } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+ /*
+ * For Intel based architectural perfmon
+ * - LVTPC is masked on interrupt and must be
+ * unmasked by the LVTPC handler.
+ */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ }
wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
}
}
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
new file mode 100644
index 000000000000..d91cb843f54d
--- /dev/null
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -0,0 +1,1018 @@
+/*
+ * Derived from arch/powerpc/kernel/iommu.c
+ *
+ * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
+ * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/pci_ids.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <asm/proto.h>
+#include <asm/calgary.h>
+#include <asm/tce.h>
+#include <asm/pci-direct.h>
+#include <asm/system.h>
+#include <asm/dma.h>
+
+#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
+#define PCI_VENDOR_DEVICE_ID_CALGARY \
+ (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
+
+/* we need these for register space address calculation */
+#define START_ADDRESS 0xfe000000
+#define CHASSIS_BASE 0
+#define ONE_BASED_CHASSIS_NUM 1
+
+/* register offsets inside the host bridge space */
+#define PHB_CSR_OFFSET 0x0110
+#define PHB_PLSSR_OFFSET 0x0120
+#define PHB_CONFIG_RW_OFFSET 0x0160
+#define PHB_IOBASE_BAR_LOW 0x0170
+#define PHB_IOBASE_BAR_HIGH 0x0180
+#define PHB_MEM_1_LOW 0x0190
+#define PHB_MEM_1_HIGH 0x01A0
+#define PHB_IO_ADDR_SIZE 0x01B0
+#define PHB_MEM_1_SIZE 0x01C0
+#define PHB_MEM_ST_OFFSET 0x01D0
+#define PHB_AER_OFFSET 0x0200
+#define PHB_CONFIG_0_HIGH 0x0220
+#define PHB_CONFIG_0_LOW 0x0230
+#define PHB_CONFIG_0_END 0x0240
+#define PHB_MEM_2_LOW 0x02B0
+#define PHB_MEM_2_HIGH 0x02C0
+#define PHB_MEM_2_SIZE_HIGH 0x02D0
+#define PHB_MEM_2_SIZE_LOW 0x02E0
+#define PHB_DOSHOLE_OFFSET 0x08E0
+
+/* PHB_CONFIG_RW */
+#define PHB_TCE_ENABLE 0x20000000
+#define PHB_SLOT_DISABLE 0x1C000000
+#define PHB_DAC_DISABLE 0x01000000
+#define PHB_MEM2_ENABLE 0x00400000
+#define PHB_MCSR_ENABLE 0x00100000
+/* TAR (Table Address Register) */
+#define TAR_SW_BITS 0x0000ffffffff800fUL
+#define TAR_VALID 0x0000000000000008UL
+/* CSR (Channel/DMA Status Register) */
+#define CSR_AGENT_MASK 0xffe0ffff
+
+#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
+#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */
+#define PHBS_PER_CALGARY 4
+
+/* register offsets in Calgary's internal register space */
+static const unsigned long tar_offsets[] = {
+ 0x0580 /* TAR0 */,
+ 0x0588 /* TAR1 */,
+ 0x0590 /* TAR2 */,
+ 0x0598 /* TAR3 */
+};
+
+static const unsigned long split_queue_offsets[] = {
+ 0x4870 /* SPLIT QUEUE 0 */,
+ 0x5870 /* SPLIT QUEUE 1 */,
+ 0x6870 /* SPLIT QUEUE 2 */,
+ 0x7870 /* SPLIT QUEUE 3 */
+};
+
+static const unsigned long phb_offsets[] = {
+ 0x8000 /* PHB0 */,
+ 0x9000 /* PHB1 */,
+ 0xA000 /* PHB2 */,
+ 0xB000 /* PHB3 */
+};
+
+void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES];
+unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
+static int translate_empty_slots __read_mostly = 0;
+static int calgary_detected __read_mostly = 0;
+
+/*
+ * the bitmap of PHBs the user requested that we disable
+ * translation on.
+ */
+static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM);
+
+static void tce_cache_blast(struct iommu_table *tbl);
+
+/* enable this to stress test the chip's TCE cache */
+#ifdef CONFIG_IOMMU_DEBUG
+static inline void tce_cache_blast_stress(struct iommu_table *tbl)
+{
+ tce_cache_blast(tbl);
+}
+#else
+static inline void tce_cache_blast_stress(struct iommu_table *tbl)
+{
+}
+#endif /* BLAST_TCE_CACHE_ON_UNMAP */
+
+static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
+{
+ unsigned int npages;
+
+ npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK);
+ npages >>= PAGE_SHIFT;
+
+ return npages;
+}
+
+static inline int translate_phb(struct pci_dev* dev)
+{
+ int disabled = test_bit(dev->bus->number, translation_disabled);
+ return !disabled;
+}
+
+static void iommu_range_reserve(struct iommu_table *tbl,
+ unsigned long start_addr, unsigned int npages)
+{
+ unsigned long index;
+ unsigned long end;
+
+ index = start_addr >> PAGE_SHIFT;
+
+ /* bail out if we're asked to reserve a region we don't cover */
+ if (index >= tbl->it_size)
+ return;
+
+ end = index + npages;
+ if (end > tbl->it_size) /* don't go off the table */
+ end = tbl->it_size;
+
+ while (index < end) {
+ if (test_bit(index, tbl->it_map))
+ printk(KERN_ERR "Calgary: entry already allocated at "
+ "0x%lx tbl %p dma 0x%lx npages %u\n",
+ index, tbl, start_addr, npages);
+ ++index;
+ }
+ set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
+}
+
+static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+ unsigned int npages)
+{
+ unsigned long offset;
+
+ BUG_ON(npages == 0);
+
+ offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
+ tbl->it_size, npages);
+ if (offset == ~0UL) {
+ tce_cache_blast(tbl);
+ offset = find_next_zero_string(tbl->it_map, 0,
+ tbl->it_size, npages);
+ if (offset == ~0UL) {
+ printk(KERN_WARNING "Calgary: IOMMU full.\n");
+ if (panic_on_overflow)
+ panic("Calgary: fix the allocator.\n");
+ else
+ return bad_dma_address;
+ }
+ }
+
+ set_bit_string(tbl->it_map, offset, npages);
+ tbl->it_hint = offset + npages;
+ BUG_ON(tbl->it_hint > tbl->it_size);
+
+ return offset;
+}
+
+static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
+ unsigned int npages, int direction)
+{
+ unsigned long entry, flags;
+ dma_addr_t ret = bad_dma_address;
+
+ spin_lock_irqsave(&tbl->it_lock, flags);
+
+ entry = iommu_range_alloc(tbl, npages);
+
+ if (unlikely(entry == bad_dma_address))
+ goto error;
+
+ /* set the return dma address */
+ ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
+
+ /* put the TCEs in the HW table */
+ tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
+ direction);
+
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
+
+ return ret;
+
+error:
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
+ printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
+ "iommu %p\n", npages, tbl);
+ return bad_dma_address;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry;
+ unsigned long i;
+
+ entry = dma_addr >> PAGE_SHIFT;
+
+ BUG_ON(entry + npages > tbl->it_size);
+
+ tce_free(tbl, entry, npages);
+
+ for (i = 0; i < npages; ++i) {
+ if (!test_bit(entry + i, tbl->it_map))
+ printk(KERN_ERR "Calgary: bit is off at 0x%lx "
+ "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
+ entry + i, tbl, dma_addr, entry, npages);
+ }
+
+ __clear_bit_string(tbl->it_map, entry, npages);
+
+ tce_cache_blast_stress(tbl);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tbl->it_lock, flags);
+
+ __iommu_free(tbl, dma_addr, npages);
+
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
+}
+
+static void __calgary_unmap_sg(struct iommu_table *tbl,
+ struct scatterlist *sglist, int nelems, int direction)
+{
+ while (nelems--) {
+ unsigned int npages;
+ dma_addr_t dma = sglist->dma_address;
+ unsigned int dmalen = sglist->dma_length;
+
+ if (dmalen == 0)
+ break;
+
+ npages = num_dma_pages(dma, dmalen);
+ __iommu_free(tbl, dma, npages);
+ sglist++;
+ }
+}
+
+void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, int direction)
+{
+ unsigned long flags;
+ struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+ if (!translate_phb(to_pci_dev(dev)))
+ return;
+
+ spin_lock_irqsave(&tbl->it_lock, flags);
+
+ __calgary_unmap_sg(tbl, sglist, nelems, direction);
+
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
+}
+
+static int calgary_nontranslate_map_sg(struct device* dev,
+ struct scatterlist *sg, int nelems, int direction)
+{
+ int i;
+
+ for (i = 0; i < nelems; i++ ) {
+ struct scatterlist *s = &sg[i];
+ BUG_ON(!s->page);
+ s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
+ s->dma_length = s->length;
+ }
+ return nelems;
+}
+
+int calgary_map_sg(struct device *dev, struct scatterlist *sg,
+ int nelems, int direction)
+{
+ struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ unsigned long flags;
+ unsigned long vaddr;
+ unsigned int npages;
+ unsigned long entry;
+ int i;
+
+ if (!translate_phb(to_pci_dev(dev)))
+ return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
+
+ spin_lock_irqsave(&tbl->it_lock, flags);
+
+ for (i = 0; i < nelems; i++ ) {
+ struct scatterlist *s = &sg[i];
+ BUG_ON(!s->page);
+
+ vaddr = (unsigned long)page_address(s->page) + s->offset;
+ npages = num_dma_pages(vaddr, s->length);
+
+ entry = iommu_range_alloc(tbl, npages);
+ if (entry == bad_dma_address) {
+ /* makes sure unmap knows to stop */
+ s->dma_length = 0;
+ goto error;
+ }
+
+ s->dma_address = (entry << PAGE_SHIFT) | s->offset;
+
+ /* insert into HW table */
+ tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
+ direction);
+
+ s->dma_length = s->length;
+ }
+
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
+
+ return nelems;
+error:
+ __calgary_unmap_sg(tbl, sg, nelems, direction);
+ for (i = 0; i < nelems; i++) {
+ sg[i].dma_address = bad_dma_address;
+ sg[i].dma_length = 0;
+ }
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
+ return 0;
+}
+
+dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
+ size_t size, int direction)
+{
+ dma_addr_t dma_handle = bad_dma_address;
+ unsigned long uaddr;
+ unsigned int npages;
+ struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+ uaddr = (unsigned long)vaddr;
+ npages = num_dma_pages(uaddr, size);
+
+ if (translate_phb(to_pci_dev(dev)))
+ dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
+ else
+ dma_handle = virt_to_bus(vaddr);
+
+ return dma_handle;
+}
+
+void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ unsigned int npages;
+
+ if (!translate_phb(to_pci_dev(dev)))
+ return;
+
+ npages = num_dma_pages(dma_handle, size);
+ iommu_free(tbl, dma_handle, npages);
+}
+
+void* calgary_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ void *ret = NULL;
+ dma_addr_t mapping;
+ unsigned int npages, order;
+ struct iommu_table *tbl;
+
+ tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+ size = PAGE_ALIGN(size); /* size rounded up to full pages */
+ npages = size >> PAGE_SHIFT;
+ order = get_order(size);
+
+ /* alloc enough pages (and possibly more) */
+ ret = (void *)__get_free_pages(flag, order);
+ if (!ret)
+ goto error;
+ memset(ret, 0, size);
+
+ if (translate_phb(to_pci_dev(dev))) {
+ /* set up tces to cover the allocated range */
+ mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
+ if (mapping == bad_dma_address)
+ goto free;
+
+ *dma_handle = mapping;
+ } else /* non translated slot */
+ *dma_handle = virt_to_bus(ret);
+
+ return ret;
+
+free:
+ free_pages((unsigned long)ret, get_order(size));
+ ret = NULL;
+error:
+ return ret;
+}
+
+static struct dma_mapping_ops calgary_dma_ops = {
+ .alloc_coherent = calgary_alloc_coherent,
+ .map_single = calgary_map_single,
+ .unmap_single = calgary_unmap_single,
+ .map_sg = calgary_map_sg,
+ .unmap_sg = calgary_unmap_sg,
+};
+
+static inline int busno_to_phbid(unsigned char num)
+{
+ return bus_to_phb(num) % PHBS_PER_CALGARY;
+}
+
+static inline unsigned long split_queue_offset(unsigned char num)
+{
+ size_t idx = busno_to_phbid(num);
+
+ return split_queue_offsets[idx];
+}
+
+static inline unsigned long tar_offset(unsigned char num)
+{
+ size_t idx = busno_to_phbid(num);
+
+ return tar_offsets[idx];
+}
+
+static inline unsigned long phb_offset(unsigned char num)
+{
+ size_t idx = busno_to_phbid(num);
+
+ return phb_offsets[idx];
+}
+
+static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
+{
+ unsigned long target = ((unsigned long)bar) | offset;
+ return (void __iomem*)target;
+}
+
+static void tce_cache_blast(struct iommu_table *tbl)
+{
+ u64 val;
+ u32 aer;
+ int i = 0;
+ void __iomem *bbar = tbl->bbar;
+ void __iomem *target;
+
+ /* disable arbitration on the bus */
+ target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
+ aer = readl(target);
+ writel(0, target);
+
+ /* read plssr to ensure it got there */
+ target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
+ val = readl(target);
+
+ /* poll split queues until all DMA activity is done */
+ target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
+ do {
+ val = readq(target);
+ i++;
+ } while ((val & 0xff) != 0xff && i < 100);
+ if (i == 100)
+ printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
+ "continuing anyway\n");
+
+ /* invalidate TCE cache */
+ target = calgary_reg(bbar, tar_offset(tbl->it_busno));
+ writeq(tbl->tar_val, target);
+
+ /* enable arbitration */
+ target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
+ writel(aer, target);
+ (void)readl(target); /* flush */
+}
+
+static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
+ u64 limit)
+{
+ unsigned int numpages;
+
+ limit = limit | 0xfffff;
+ limit++;
+
+ numpages = ((limit - start) >> PAGE_SHIFT);
+ iommu_range_reserve(dev->sysdata, start, numpages);
+}
+
+static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
+{
+ void __iomem *target;
+ u64 low, high, sizelow;
+ u64 start, limit;
+ struct iommu_table *tbl = dev->sysdata;
+ unsigned char busnum = dev->bus->number;
+ void __iomem *bbar = tbl->bbar;
+
+ /* peripheral MEM_1 region */
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
+ low = be32_to_cpu(readl(target));
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
+ high = be32_to_cpu(readl(target));
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
+ sizelow = be32_to_cpu(readl(target));
+
+ start = (high << 32) | low;
+ limit = sizelow;
+
+ calgary_reserve_mem_region(dev, start, limit);
+}
+
+static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
+{
+ void __iomem *target;
+ u32 val32;
+ u64 low, high, sizelow, sizehigh;
+ u64 start, limit;
+ struct iommu_table *tbl = dev->sysdata;
+ unsigned char busnum = dev->bus->number;
+ void __iomem *bbar = tbl->bbar;
+
+ /* is it enabled? */
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+ val32 = be32_to_cpu(readl(target));
+ if (!(val32 & PHB_MEM2_ENABLE))
+ return;
+
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
+ low = be32_to_cpu(readl(target));
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
+ high = be32_to_cpu(readl(target));
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
+ sizelow = be32_to_cpu(readl(target));
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
+ sizehigh = be32_to_cpu(readl(target));
+
+ start = (high << 32) | low;
+ limit = (sizehigh << 32) | sizelow;
+
+ calgary_reserve_mem_region(dev, start, limit);
+}
+
+/*
+ * some regions of the IO address space do not get translated, so we
+ * must not give devices IO addresses in those regions. The regions
+ * are the 640KB-1MB region and the two PCI peripheral memory holes.
+ * Reserve all of them in the IOMMU bitmap to avoid giving them out
+ * later.
+ */
+static void __init calgary_reserve_regions(struct pci_dev *dev)
+{
+ unsigned int npages;
+ void __iomem *bbar;
+ unsigned char busnum;
+ u64 start;
+ struct iommu_table *tbl = dev->sysdata;
+
+ bbar = tbl->bbar;
+ busnum = dev->bus->number;
+
+ /* reserve bad_dma_address in case it's a legal address */
+ iommu_range_reserve(tbl, bad_dma_address, 1);
+
+ /* avoid the BIOS/VGA first 640KB-1MB region */
+ start = (640 * 1024);
+ npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
+ iommu_range_reserve(tbl, start, npages);
+
+ /* reserve the two PCI peripheral memory regions in IO space */
+ calgary_reserve_peripheral_mem_1(dev);
+ calgary_reserve_peripheral_mem_2(dev);
+}
+
+static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
+{
+ u64 val64;
+ u64 table_phys;
+ void __iomem *target;
+ int ret;
+ struct iommu_table *tbl;
+
+ /* build TCE tables for each PHB */
+ ret = build_tce_table(dev, bbar);
+ if (ret)
+ return ret;
+
+ calgary_reserve_regions(dev);
+
+ /* set TARs for each PHB */
+ target = calgary_reg(bbar, tar_offset(dev->bus->number));
+ val64 = be64_to_cpu(readq(target));
+
+ /* zero out all TAR bits under sw control */
+ val64 &= ~TAR_SW_BITS;
+
+ tbl = dev->sysdata;
+ table_phys = (u64)__pa(tbl->it_base);
+ val64 |= table_phys;
+
+ BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
+ val64 |= (u64) specified_table_size;
+
+ tbl->tar_val = cpu_to_be64(val64);
+ writeq(tbl->tar_val, target);
+ readq(target); /* flush */
+
+ return 0;
+}
+
+static void __init calgary_free_tar(struct pci_dev *dev)
+{
+ u64 val64;
+ struct iommu_table *tbl = dev->sysdata;
+ void __iomem *target;
+
+ target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
+ val64 = be64_to_cpu(readq(target));
+ val64 &= ~TAR_SW_BITS;
+ writeq(cpu_to_be64(val64), target);
+ readq(target); /* flush */
+
+ kfree(tbl);
+ dev->sysdata = NULL;
+}
+
+static void calgary_watchdog(unsigned long data)
+{
+ struct pci_dev *dev = (struct pci_dev *)data;
+ struct iommu_table *tbl = dev->sysdata;
+ void __iomem *bbar = tbl->bbar;
+ u32 val32;
+ void __iomem *target;
+
+ target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
+ val32 = be32_to_cpu(readl(target));
+
+ /* If no error, the agent ID in the CSR is not valid */
+ if (val32 & CSR_AGENT_MASK) {
+ printk(KERN_EMERG "calgary_watchdog: DMA error on bus %d, "
+ "CSR = %#x\n", dev->bus->number, val32);
+ writel(0, target);
+
+ /* Disable bus that caused the error */
+ target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
+ PHB_CONFIG_RW_OFFSET);
+ val32 = be32_to_cpu(readl(target));
+ val32 |= PHB_SLOT_DISABLE;
+ writel(cpu_to_be32(val32), target);
+ readl(target); /* flush */
+ } else {
+ /* Reset the timer */
+ mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
+ }
+}
+
+static void __init calgary_enable_translation(struct pci_dev *dev)
+{
+ u32 val32;
+ unsigned char busnum;
+ void __iomem *target;
+ void __iomem *bbar;
+ struct iommu_table *tbl;
+
+ busnum = dev->bus->number;
+ tbl = dev->sysdata;
+ bbar = tbl->bbar;
+
+ /* enable TCE in PHB Config Register */
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+ val32 = be32_to_cpu(readl(target));
+ val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
+
+ printk(KERN_INFO "Calgary: enabling translation on PHB %d\n", busnum);
+ printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
+ "bus.\n");
+
+ writel(cpu_to_be32(val32), target);
+ readl(target); /* flush */
+
+ init_timer(&tbl->watchdog_timer);
+ tbl->watchdog_timer.function = &calgary_watchdog;
+ tbl->watchdog_timer.data = (unsigned long)dev;
+ mod_timer(&tbl->watchdog_timer, jiffies);
+}
+
+static void __init calgary_disable_translation(struct pci_dev *dev)
+{
+ u32 val32;
+ unsigned char busnum;
+ void __iomem *target;
+ void __iomem *bbar;
+ struct iommu_table *tbl;
+
+ busnum = dev->bus->number;
+ tbl = dev->sysdata;
+ bbar = tbl->bbar;
+
+ /* disable TCE in PHB Config Register */
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+ val32 = be32_to_cpu(readl(target));
+ val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
+
+ printk(KERN_INFO "Calgary: disabling translation on PHB %d!\n", busnum);
+ writel(cpu_to_be32(val32), target);
+ readl(target); /* flush */
+
+ del_timer_sync(&tbl->watchdog_timer);
+}
+
+static inline unsigned int __init locate_register_space(struct pci_dev *dev)
+{
+ int rionodeid;
+ u32 address;
+
+ rionodeid = (dev->bus->number % 15 > 4) ? 3 : 2;
+ /*
+ * register space address calculation as follows:
+ * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
+ * ChassisBase is always zero for x366/x260/x460
+ * RioNodeId is 2 for first Calgary, 3 for second Calgary
+ */
+ address = START_ADDRESS -
+ (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 15)) +
+ (0x100000) * (rionodeid - CHASSIS_BASE);
+ return address;
+}
+
+static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
+{
+ dev->sysdata = NULL;
+ dev->bus->self = dev;
+
+ return 0;
+}
+
+static int __init calgary_init_one(struct pci_dev *dev)
+{
+ u32 address;
+ void __iomem *bbar;
+ int ret;
+
+ address = locate_register_space(dev);
+ /* map entire 1MB of Calgary config space */
+ bbar = ioremap_nocache(address, 1024 * 1024);
+ if (!bbar) {
+ ret = -ENODATA;
+ goto done;
+ }
+
+ ret = calgary_setup_tar(dev, bbar);
+ if (ret)
+ goto iounmap;
+
+ dev->bus->self = dev;
+ calgary_enable_translation(dev);
+
+ return 0;
+
+iounmap:
+ iounmap(bbar);
+done:
+ return ret;
+}
+
+static int __init calgary_init(void)
+{
+ int i, ret = -ENODEV;
+ struct pci_dev *dev = NULL;
+
+ for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) {
+ dev = pci_get_device(PCI_VENDOR_ID_IBM,
+ PCI_DEVICE_ID_IBM_CALGARY,
+ dev);
+ if (!dev)
+ break;
+ if (!translate_phb(dev)) {
+ calgary_init_one_nontraslated(dev);
+ continue;
+ }
+ if (!tce_table_kva[i] && !translate_empty_slots) {
+ pci_dev_put(dev);
+ continue;
+ }
+ ret = calgary_init_one(dev);
+ if (ret)
+ goto error;
+ }
+
+ return ret;
+
+error:
+ for (i--; i >= 0; i--) {
+ dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
+ PCI_DEVICE_ID_IBM_CALGARY,
+ dev);
+ if (!translate_phb(dev)) {
+ pci_dev_put(dev);
+ continue;
+ }
+ if (!tce_table_kva[i] && !translate_empty_slots)
+ continue;
+ calgary_disable_translation(dev);
+ calgary_free_tar(dev);
+ pci_dev_put(dev);
+ }
+
+ return ret;
+}
+
+static inline int __init determine_tce_table_size(u64 ram)
+{
+ int ret;
+
+ if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
+ return specified_table_size;
+
+ /*
+ * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
+ * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
+ * larger table size has twice as many entries, so shift the
+ * max ram address by 13 to divide by 8K and then look at the
+ * order of the result to choose between 0-7.
+ */
+ ret = get_order(ram >> 13);
+ if (ret > TCE_TABLE_SIZE_8M)
+ ret = TCE_TABLE_SIZE_8M;
+
+ return ret;
+}
+
+void __init detect_calgary(void)
+{
+ u32 val;
+ int bus, table_idx;
+ void *tbl;
+ int detected = 0;
+
+ /*
+ * if the user specified iommu=off or iommu=soft or we found
+ * another HW IOMMU already, bail out.
+ */
+ if (swiotlb || no_iommu || iommu_detected)
+ return;
+
+ specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
+
+ for (bus = 0, table_idx = 0;
+ bus <= num_online_nodes() * MAX_PHB_BUS_NUM;
+ bus++) {
+ BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM);
+ if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
+ continue;
+ if (test_bit(bus, translation_disabled)) {
+ printk(KERN_INFO "Calgary: translation is disabled for "
+ "PHB 0x%x\n", bus);
+ /* skip this phb, don't allocate a tbl for it */
+ tce_table_kva[table_idx] = NULL;
+ table_idx++;
+ continue;
+ }
+ /*
+ * scan the first slot of the PCI bus to see if there
+ * are any devices present
+ */
+ val = read_pci_config(bus, 1, 0, 0);
+ if (val != 0xffffffff || translate_empty_slots) {
+ tbl = alloc_tce_table();
+ if (!tbl)
+ goto cleanup;
+ detected = 1;
+ } else
+ tbl = NULL;
+
+ tce_table_kva[table_idx] = tbl;
+ table_idx++;
+ }
+
+ if (detected) {
+ iommu_detected = 1;
+ calgary_detected = 1;
+ printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
+ "TCE table spec is %d.\n", specified_table_size);
+ }
+ return;
+
+cleanup:
+ for (--table_idx; table_idx >= 0; --table_idx)
+ if (tce_table_kva[table_idx])
+ free_tce_table(tce_table_kva[table_idx]);
+}
+
+int __init calgary_iommu_init(void)
+{
+ int ret;
+
+ if (no_iommu || swiotlb)
+ return -ENODEV;
+
+ if (!calgary_detected)
+ return -ENODEV;
+
+ /* ok, we're trying to use Calgary - let's roll */
+ printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
+
+ ret = calgary_init();
+ if (ret) {
+ printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
+ "falling back to no_iommu\n", ret);
+ if (end_pfn > MAX_DMA32_PFN)
+ printk(KERN_ERR "WARNING more than 4GB of memory, "
+ "32bit PCI may malfunction.\n");
+ return ret;
+ }
+
+ force_iommu = 1;
+ dma_ops = &calgary_dma_ops;
+
+ return 0;
+}
+
+static int __init calgary_parse_options(char *p)
+{
+ unsigned int bridge;
+ size_t len;
+ char* endp;
+
+ while (*p) {
+ if (!strncmp(p, "64k", 3))
+ specified_table_size = TCE_TABLE_SIZE_64K;
+ else if (!strncmp(p, "128k", 4))
+ specified_table_size = TCE_TABLE_SIZE_128K;
+ else if (!strncmp(p, "256k", 4))
+ specified_table_size = TCE_TABLE_SIZE_256K;
+ else if (!strncmp(p, "512k", 4))
+ specified_table_size = TCE_TABLE_SIZE_512K;
+ else if (!strncmp(p, "1M", 2))
+ specified_table_size = TCE_TABLE_SIZE_1M;
+ else if (!strncmp(p, "2M", 2))
+ specified_table_size = TCE_TABLE_SIZE_2M;
+ else if (!strncmp(p, "4M", 2))
+ specified_table_size = TCE_TABLE_SIZE_4M;
+ else if (!strncmp(p, "8M", 2))
+ specified_table_size = TCE_TABLE_SIZE_8M;
+
+ len = strlen("translate_empty_slots");
+ if (!strncmp(p, "translate_empty_slots", len))
+ translate_empty_slots = 1;
+
+ len = strlen("disable");
+ if (!strncmp(p, "disable", len)) {
+ p += len;
+ if (*p == '=')
+ ++p;
+ if (*p == '\0')
+ break;
+ bridge = simple_strtol(p, &endp, 0);
+ if (p == endp)
+ break;
+
+ if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) {
+ printk(KERN_INFO "Calgary: disabling "
+ "translation for PHB 0x%x\n", bridge);
+ set_bit(bridge, translation_disabled);
+ }
+ }
+
+ p = strpbrk(p, ",");
+ if (!p)
+ break;
+
+ p++; /* skip ',' */
+ }
+ return 1;
+}
+__setup("calgary=", calgary_parse_options);
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index a9275c9557cf..9c44f4f2433d 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <asm/io.h>
#include <asm/proto.h>
+#include <asm/calgary.h>
int iommu_merge __read_mostly = 0;
EXPORT_SYMBOL(iommu_merge);
@@ -33,12 +34,15 @@ int panic_on_overflow __read_mostly = 0;
int force_iommu __read_mostly= 0;
#endif
+/* Set this to 1 if there is a HW IOMMU in the system */
+int iommu_detected __read_mostly = 0;
+
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible
to i386. */
struct device fallback_dev = {
.bus_id = "fallback device",
- .coherent_dma_mask = 0xffffffff,
+ .coherent_dma_mask = DMA_32BIT_MASK,
.dma_mask = &fallback_dev.coherent_dma_mask,
};
@@ -77,7 +81,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
dev = &fallback_dev;
dma_mask = dev->coherent_dma_mask;
if (dma_mask == 0)
- dma_mask = 0xffffffff;
+ dma_mask = DMA_32BIT_MASK;
/* Don't invoke OOM killer */
gfp |= __GFP_NORETRY;
@@ -90,7 +94,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
larger than 16MB and in this case we have a chance of
finding fitting memory in the next higher zone first. If
not retry with true GFP_DMA. -AK */
- if (dma_mask <= 0xffffffff)
+ if (dma_mask <= DMA_32BIT_MASK)
gfp |= GFP_DMA32;
again:
@@ -111,7 +115,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* Don't use the 16MB ZONE_DMA unless absolutely
needed. It's better to use remapping first. */
- if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) {
+ if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
goto again;
}
@@ -174,7 +178,7 @@ int dma_supported(struct device *dev, u64 mask)
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
- if (mask < 0x00ffffff)
+ if (mask < DMA_24BIT_MASK)
return 0;
/* Tell the device to use SAC when IOMMU force is on. This
@@ -189,7 +193,7 @@ int dma_supported(struct device *dev, u64 mask)
SAC for these. Assume all masks <= 40 bits are of this
type. Normally this doesn't make any difference, but gives
more gentle handling of IOMMU overflow. */
- if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
+ if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
return 0;
}
@@ -266,7 +270,7 @@ __init int iommu_setup(char *p)
swiotlb = 1;
#endif
-#ifdef CONFIG_GART_IOMMU
+#ifdef CONFIG_IOMMU
gart_parse_options(p);
#endif
@@ -276,3 +280,40 @@ __init int iommu_setup(char *p)
}
return 1;
}
+__setup("iommu=", iommu_setup);
+
+void __init pci_iommu_alloc(void)
+{
+ /*
+ * The order of these functions is important for
+ * fall-back/fail-over reasons
+ */
+#ifdef CONFIG_IOMMU
+ iommu_hole_init();
+#endif
+
+#ifdef CONFIG_CALGARY_IOMMU
+ detect_calgary();
+#endif
+
+#ifdef CONFIG_SWIOTLB
+ pci_swiotlb_init();
+#endif
+}
+
+static int __init pci_iommu_init(void)
+{
+#ifdef CONFIG_CALGARY_IOMMU
+ calgary_iommu_init();
+#endif
+
+#ifdef CONFIG_IOMMU
+ gart_iommu_init();
+#endif
+
+ no_iommu_init();
+ return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 82a7c9bfdfa0..4ca674d16b09 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -32,6 +32,7 @@
#include <asm/kdebug.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
+#include <asm/k8.h>
unsigned long iommu_bus_base; /* GART remapping area (physical) */
static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */
also seen with Qlogic at least). */
int iommu_fullflush = 1;
-#define MAX_NB 8
-
/* Allocation bitmap for the remapping area */
static DEFINE_SPINLOCK(iommu_bitmap_lock);
static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
#define to_pages(addr,size) \
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-#define for_all_nb(dev) \
- dev = NULL; \
- while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
-
-static struct pci_dev *northbridges[MAX_NB];
-static u32 northbridge_flush_word[MAX_NB];
-
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
@@ -93,7 +85,7 @@ static unsigned long alloc_iommu(int size)
offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
if (offset == -1) {
need_flush = 1;
- offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
+ offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
}
if (offset != -1) {
set_bit_string(iommu_gart_bitmap, offset, size);
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
/*
* Use global flush state to avoid races with multiple flushers.
*/
-static void flush_gart(struct device *dev)
+static void flush_gart(void)
{
unsigned long flags;
- int flushed = 0;
- int i, max;
-
spin_lock_irqsave(&iommu_bitmap_lock, flags);
- if (need_flush) {
- max = 0;
- for (i = 0; i < MAX_NB; i++) {
- if (!northbridges[i])
- continue;
- pci_write_config_dword(northbridges[i], 0x9c,
- northbridge_flush_word[i] | 1);
- flushed++;
- max = i;
- }
- for (i = 0; i <= max; i++) {
- u32 w;
- if (!northbridges[i])
- continue;
- /* Make sure the hardware actually executed the flush. */
- for (;;) {
- pci_read_config_dword(northbridges[i], 0x9c, &w);
- if (!(w & 1))
- break;
- cpu_relax();
- }
- }
- if (!flushed)
- printk("nothing to flush?\n");
+ if (need_flush) {
+ k8_flush_garts();
need_flush = 0;
}
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
-
-
#ifdef CONFIG_IOMMU_LEAK
#define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
size_t size, int dir)
{
dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
- flush_gart(dev);
+ flush_gart();
return map;
}
@@ -289,6 +254,28 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
}
/*
+ * Free a DMA mapping.
+ */
+void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
+ size_t size, int direction)
+{
+ unsigned long iommu_page;
+ int npages;
+ int i;
+
+ if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
+ dma_addr >= iommu_bus_base + iommu_size)
+ return;
+ iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
+ npages = to_pages(dma_addr, size);
+ for (i = 0; i < npages; i++) {
+ iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
+ CLEAR_LEAK(iommu_page + i);
+ }
+ free_iommu(iommu_page, npages);
+}
+
+/*
* Wrapper for pci_unmap_single working with scatterlists.
*/
void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
@@ -299,7 +286,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di
struct scatterlist *s = &sg[i];
if (!s->dma_length || !s->length)
break;
- dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
+ gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
}
}
@@ -329,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
s->dma_address = addr;
s->dma_length = s->length;
}
- flush_gart(dev);
+ flush_gart();
return nents;
}
@@ -436,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
goto error;
out++;
- flush_gart(dev);
+ flush_gart();
if (out < nents)
sg[out].dma_length = 0;
return out;
error:
- flush_gart(NULL);
+ flush_gart();
gart_unmap_sg(dev, sg, nents, dir);
/* When it was forced or merged try again in a dumb way */
if (force_iommu || iommu_merge) {
@@ -458,28 +445,6 @@ error:
return 0;
}
-/*
- * Free a DMA mapping.
- */
-void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, int direction)
-{
- unsigned long iommu_page;
- int npages;
- int i;
-
- if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
- dma_addr >= iommu_bus_base + iommu_size)
- return;
- iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
- npages = to_pages(dma_addr, size);
- for (i = 0; i < npages; i++) {
- iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
- CLEAR_LEAK(iommu_page + i);
- }
- free_iommu(iommu_page, npages);
-}
-
static int no_agp;
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
void *gatt;
unsigned aper_base, new_aper_base;
unsigned aper_size, gatt_size, new_aper_size;
-
+ int i;
+
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
- for_all_nb(dev) {
+ dev = NULL;
+ for (i = 0; i < num_k8_northbridges; i++) {
+ dev = k8_northbridges[i];
new_aper_base = read_aperture(dev, &new_aper_size);
if (!new_aper_base)
goto nommu;
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
panic("Cannot allocate GATT table");
memset(gatt, 0, gatt_size);
agp_gatt_table = gatt;
-
- for_all_nb(dev) {
+
+ for (i = 0; i < num_k8_northbridges; i++) {
u32 ctl;
u32 gatt_reg;
+ dev = k8_northbridges[i];
gatt_reg = __pa(gatt) >> 12;
gatt_reg <<= 4;
pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
pci_write_config_dword(dev, 0x90, ctl);
}
- flush_gart(NULL);
+ flush_gart();
printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
return 0;
@@ -602,15 +571,19 @@ static struct dma_mapping_ops gart_dma_ops = {
.unmap_sg = gart_unmap_sg,
};
-static int __init pci_iommu_init(void)
+void __init gart_iommu_init(void)
{
struct agp_kern_info info;
unsigned long aper_size;
unsigned long iommu_start;
- struct pci_dev *dev;
unsigned long scratch;
long i;
+ if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
+ printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
+ return;
+ }
+
#ifndef CONFIG_AGP_AMD64
no_agp = 1;
#else
@@ -622,7 +595,11 @@ static int __init pci_iommu_init(void)
#endif
if (swiotlb)
- return -1;
+ return;
+
+ /* Did we detect a different HW IOMMU? */
+ if (iommu_detected && !iommu_aperture)
+ return;
if (no_iommu ||
(!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
@@ -634,15 +611,7 @@ static int __init pci_iommu_init(void)
"but IOMMU not available.\n"
KERN_ERR "WARNING 32bit PCI may malfunction.\n");
}
- return -1;
- }
-
- i = 0;
- for_all_nb(dev)
- i++;
- if (i > MAX_NB) {
- printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
- return -1;
+ return;
}
printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
@@ -707,26 +676,10 @@ static int __init pci_iommu_init(void)
for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
iommu_gatt_base[i] = gart_unmapped_entry;
- for_all_nb(dev) {
- u32 flag;
- int cpu = PCI_SLOT(dev->devfn) - 24;
- if (cpu >= MAX_NB)
- continue;
- northbridges[cpu] = dev;
- pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
- northbridge_flush_word[cpu] = flag;
- }
-
- flush_gart(NULL);
-
+ flush_gart();
dma_ops = &gart_dma_ops;
-
- return 0;
}
-/* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
-
void gart_parse_options(char *p)
{
int arg;
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 1f6ecc62061d..c4c3cc36ac5b 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -4,6 +4,8 @@
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/string.h>
+#include <linux/dma-mapping.h>
+
#include <asm/proto.h>
#include <asm/processor.h>
#include <asm/dma.h>
@@ -12,10 +14,11 @@ static int
check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
{
if (hwdev && bus + size > *hwdev->dma_mask) {
- if (*hwdev->dma_mask >= 0xffffffffULL)
+ if (*hwdev->dma_mask >= DMA_32BIT_MASK)
printk(KERN_ERR
- "nommu_%s: overflow %Lx+%lu of device mask %Lx\n",
- name, (long long)bus, size, (long long)*hwdev->dma_mask);
+ "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
+ name, (long long)bus, size,
+ (long long)*hwdev->dma_mask);
return 0;
}
return 1;
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index 990ed67896f2..ebdb77fe2057 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -31,7 +31,7 @@ struct dma_mapping_ops swiotlb_dma_ops = {
void pci_swiotlb_init(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
- if (!iommu_aperture && !no_iommu &&
+ if (!iommu_detected && !no_iommu &&
(end_pfn > MAX_DMA32_PFN || force_iommu))
swiotlb = 1;
if (swiotlb) {
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
index bf421ed26808..7554458dc9cb 100644
--- a/arch/x86_64/kernel/pmtimer.c
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -27,7 +27,7 @@
/* The I/O port the PMTMR resides at.
* The location is detected during setup_arch(),
* in arch/i386/kernel/acpi/boot.c */
-u32 pmtmr_ioport;
+u32 pmtmr_ioport __read_mostly;
/* value of the Power timer at last timer interrupt */
static u32 offset_delay;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index fb903e65e079..ca56e19b8b6e 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -10,7 +10,6 @@
* Andi Kleen.
*
* CPU hotplug support - ashok.raj@intel.com
- * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
*/
/*
@@ -64,6 +63,7 @@ EXPORT_SYMBOL(boot_option_idle_override);
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void);
+EXPORT_SYMBOL(pm_idle);
static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -111,7 +111,7 @@ static void default_idle(void)
{
local_irq_enable();
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
while (!need_resched()) {
local_irq_disable();
@@ -120,7 +120,7 @@ static void default_idle(void)
else
local_irq_enable();
}
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
}
/*
@@ -203,8 +203,7 @@ static inline void play_dead(void)
*/
void cpu_idle (void)
{
- set_thread_flag(TIF_POLLING_NRFLAG);
-
+ current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
@@ -335,7 +334,7 @@ void show_regs(struct pt_regs *regs)
{
printk("CPU %d:", smp_processor_id());
__show_regs(regs);
- show_trace(&regs->rsp);
+ show_trace(NULL, regs, (void *)(regs + 1));
}
/*
@@ -365,8 +364,11 @@ void flush_thread(void)
struct task_struct *tsk = current;
struct thread_info *t = current_thread_info();
- if (t->flags & _TIF_ABI_PENDING)
+ if (t->flags & _TIF_ABI_PENDING) {
t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
+ if (t->flags & _TIF_IA32)
+ current_thread_info()->status |= TS_COMPAT;
+ }
tsk->thread.debugreg0 = 0;
tsk->thread.debugreg1 = 0;
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index 57117b8beb2b..2d6769847456 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -20,6 +20,7 @@
* Power off function, if any
*/
void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
static long no_idt[3];
static enum {
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index fb850b52b4da..1129918ede82 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -5,8 +5,6 @@
*
* Nov 2001 Dave Jones <davej@suse.de>
* Forked from i386 setup code.
- *
- * $Id$
*/
/*
@@ -65,9 +63,7 @@
#include <asm/setup.h>
#include <asm/mach_apic.h>
#include <asm/numa.h>
-#include <asm/swiotlb.h>
#include <asm/sections.h>
-#include <asm/gart-mapping.h>
#include <asm/dmi.h>
/*
@@ -75,6 +71,7 @@
*/
struct cpuinfo_x86 boot_cpu_data __read_mostly;
+EXPORT_SYMBOL(boot_cpu_data);
unsigned long mmu_cr4_features;
@@ -103,12 +100,14 @@ char dmi_alloc_data[DMI_MAX_DATA];
* Setup options
*/
struct screen_info screen_info;
+EXPORT_SYMBOL(screen_info);
struct sys_desc_table_struct {
unsigned short length;
unsigned char table[0];
};
struct edid_info edid_info;
+EXPORT_SYMBOL_GPL(edid_info);
struct e820map e820;
extern int root_mountflags;
@@ -473,80 +472,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
}
#endif
-/* Use inline assembly to define this because the nops are defined
- as inline assembly strings in the include files and we cannot
- get them easily into strings. */
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-
-extern unsigned char k8nops[];
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
- NULL,
- k8nops,
- k8nops + 1,
- k8nops + 1 + 2,
- k8nops + 1 + 2 + 3,
- k8nops + 1 + 2 + 3 + 4,
- k8nops + 1 + 2 + 3 + 4 + 5,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-
-extern char __vsyscall_0;
-
-/* Replace instructions with better alternatives for this CPU type.
-
- This runs before SMP is initialized to avoid SMP problems with
- self modifying code. This implies that assymetric systems where
- APs have less capabilities than the boot processor are not handled.
- In this case boot with "noreplacement". */
-void apply_alternatives(void *start, void *end)
-{
- struct alt_instr *a;
- int diff, i, k;
- for (a = start; (void *)a < end; a++) {
- u8 *instr;
-
- if (!boot_cpu_has(a->cpuid))
- continue;
-
- BUG_ON(a->replacementlen > a->instrlen);
- instr = a->instr;
- /* vsyscall code is not mapped yet. resolve it manually. */
- if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
- instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
- __inline_memcpy(instr, a->replacement, a->replacementlen);
- diff = a->instrlen - a->replacementlen;
-
- /* Pad the rest with nops */
- for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
- k = diff;
- if (k > ASM_NOP_MAX)
- k = ASM_NOP_MAX;
- __inline_memcpy(instr + i, k8_nops[k], k);
- }
- }
-}
-
-static int no_replacement __initdata = 0;
-
-void __init alternative_instructions(void)
-{
- extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
- if (no_replacement)
- return;
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static int __init noreplacement_setup(char *s)
-{
- no_replacement = 1;
- return 1;
-}
-
-__setup("noreplacement", noreplacement_setup);
-
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
#ifdef CONFIG_EDD_MODULE
@@ -779,10 +704,6 @@ void __init setup_arch(char **cmdline_p)
e820_setup_gap();
-#ifdef CONFIG_GART_IOMMU
- iommu_hole_init();
-#endif
-
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
@@ -867,24 +788,32 @@ static int nearby_node(int apicid)
static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
- int cpu = smp_processor_id();
unsigned bits;
#ifdef CONFIG_NUMA
+ int cpu = smp_processor_id();
int node = 0;
unsigned apicid = hard_smp_processor_id();
#endif
+ unsigned ecx = cpuid_ecx(0x80000008);
+
+ c->x86_max_cores = (ecx & 0xff) + 1;
- bits = 0;
- while ((1 << bits) < c->x86_max_cores)
- bits++;
+ /* CPU telling us the core id bits shift? */
+ bits = (ecx >> 12) & 0xF;
+
+ /* Otherwise recompute */
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
/* Low order bits define the core id (index of core in socket) */
- cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
+ c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
/* Convert the APIC ID into the socket ID */
- phys_proc_id[cpu] = phys_pkg_id(bits);
+ c->phys_proc_id = phys_pkg_id(bits);
#ifdef CONFIG_NUMA
- node = phys_proc_id[cpu];
+ node = c->phys_proc_id;
if (apicid_to_node[apicid] != NUMA_NO_NODE)
node = apicid_to_node[apicid];
if (!node_online(node)) {
@@ -897,7 +826,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
but in the same order as the HT nodeids.
If that doesn't result in a usable node fall back to the
path for the previous case. */
- int ht_nodeid = apicid - (phys_proc_id[0] << bits);
+ int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
if (ht_nodeid >= 0 &&
apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
node = apicid_to_node[ht_nodeid];
@@ -907,15 +836,13 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
}
numa_set_node(cpu, node);
- printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n",
- cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
+ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
#endif
#endif
}
-static int __init init_amd(struct cpuinfo_x86 *c)
+static void __init init_amd(struct cpuinfo_x86 *c)
{
- int r;
unsigned level;
#ifdef CONFIG_SMP
@@ -948,8 +875,8 @@ static int __init init_amd(struct cpuinfo_x86 *c)
if (c->x86 >= 6)
set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
- r = get_model_name(c);
- if (!r) {
+ level = get_model_name(c);
+ if (!level) {
switch (c->x86) {
case 15:
/* Should distinguish Models here, but this is only
@@ -964,13 +891,12 @@ static int __init init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & (1<<8))
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
- if (c->extended_cpuid_level >= 0x80000008) {
- c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-
+ /* Multi core CPU? */
+ if (c->extended_cpuid_level >= 0x80000008)
amd_detect_cmp(c);
- }
- return r;
+ /* Fix cpuid4 emulation for more */
+ num_cache_leaves = 3;
}
static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -978,13 +904,14 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
#ifdef CONFIG_SMP
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
- int cpu = smp_processor_id();
cpuid(1, &eax, &ebx, &ecx, &edx);
- if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
+ if (!cpu_has(c, X86_FEATURE_HT))
return;
+ if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+ goto out;
smp_num_siblings = (ebx & 0xff0000) >> 16;
@@ -999,10 +926,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
}
index_msb = get_count_order(smp_num_siblings);
- phys_proc_id[cpu] = phys_pkg_id(index_msb);
-
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- phys_proc_id[cpu]);
+ c->phys_proc_id = phys_pkg_id(index_msb);
smp_num_siblings = smp_num_siblings / c->x86_max_cores;
@@ -1010,13 +934,15 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
core_bits = get_count_order(c->x86_max_cores);
- cpu_core_id[cpu] = phys_pkg_id(index_msb) &
+ c->cpu_core_id = phys_pkg_id(index_msb) &
((1 << core_bits) - 1);
-
- if (c->x86_max_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- cpu_core_id[cpu]);
}
+out:
+ if ((c->x86_max_cores * smp_num_siblings) > 1) {
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id);
+ }
+
#endif
}
@@ -1025,15 +951,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
*/
static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
{
- unsigned int eax;
+ unsigned int eax, t;
if (c->cpuid_level < 4)
return 1;
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (4), "c" (0)
- : "bx", "dx");
+ cpuid_count(4, 0, &eax, &t, &t, &t);
if (eax & 0x1f)
return ((eax >> 26) + 1);
@@ -1046,16 +969,17 @@ static void srat_detect_node(void)
#ifdef CONFIG_NUMA
unsigned node;
int cpu = smp_processor_id();
+ int apicid = hard_smp_processor_id();
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
- node = apicid_to_node[hard_smp_processor_id()];
+ node = apicid_to_node[apicid];
if (node == NUMA_NO_NODE)
node = first_node(node_online_map);
numa_set_node(cpu, node);
if (acpi_numa > 0)
- printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
+ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
#endif
}
@@ -1065,6 +989,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
unsigned n;
init_intel_cacheinfo(c);
+ if (c->cpuid_level > 9 ) {
+ unsigned eax = cpuid_eax(10);
+ /* Check for version and the number of counters */
+ if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+ set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
+ }
+
n = c->extended_cpuid_level;
if (n >= 0x80000008) {
unsigned eax = cpuid_eax(0x80000008);
@@ -1156,7 +1087,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
}
#ifdef CONFIG_SMP
- phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+ c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
#endif
}
@@ -1283,7 +1214,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
+ NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
/* Transmeta-defined */
"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -1294,7 +1225,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* Other (Linux-defined) */
"cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
"constant_tsc", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1364,9 +1295,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#ifdef CONFIG_SMP
if (smp_num_siblings * c->x86_max_cores > 1) {
int cpu = c - cpu_data;
- seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
+ seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
- seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
+ seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
}
#endif
@@ -1440,7 +1371,7 @@ struct seq_operations cpuinfo_op = {
.show = show_cpuinfo,
};
-#ifdef CONFIG_INPUT_PCSPKR
+#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
#include <linux/platform_device.h>
static __init int add_pcspkr(void)
{
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 8a691fa6d393..f5934cb4a2b6 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -3,7 +3,6 @@
* Copyright (C) 1995 Linus Torvalds
* Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
* See setup.c for older changelog.
- * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
*/
#include <linux/config.h>
#include <linux/init.h>
@@ -31,6 +30,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(_cpu_pda);
struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -38,6 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
unsigned long __supported_pte_mask __read_mostly = ~0UL;
+EXPORT_SYMBOL(__supported_pte_mask);
static int do_not_nx __cpuinitdata = 0;
/* noexec=on|off
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index e5f5ce7909a3..28161170fb0a 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -7,8 +7,6 @@
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
* 2000-2002 x86-64 support by Andi Kleen
- *
- * $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $
*/
#include <linux/sched.h>
@@ -239,7 +237,6 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
rsp = regs->rsp - 128;
/* This is the X/Open sanctioned signal stack switching. */
- /* RED-PEN: redzone on that stack? */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(rsp) == 0)
rsp = current->sas_ss_sp + current->sas_ss_size;
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4a6628b14d99..8188bae9c6d5 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -224,6 +224,7 @@ void flush_tlb_current_task(void)
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
preempt_enable();
}
+EXPORT_SYMBOL(flush_tlb_current_task);
void flush_tlb_mm (struct mm_struct * mm)
{
@@ -244,6 +245,7 @@ void flush_tlb_mm (struct mm_struct * mm)
preempt_enable();
}
+EXPORT_SYMBOL(flush_tlb_mm);
void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
{
@@ -266,6 +268,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
preempt_enable();
}
+EXPORT_SYMBOL(flush_tlb_page);
static void do_flush_tlb_all(void* info)
{
@@ -443,6 +446,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
spin_unlock(&call_lock);
return 0;
}
+EXPORT_SYMBOL(smp_call_function);
void smp_stop_cpu(void)
{
@@ -460,7 +464,7 @@ static void smp_really_stop_cpu(void *dummy)
{
smp_stop_cpu();
for (;;)
- asm("hlt");
+ halt();
}
void smp_send_stop(void)
@@ -520,13 +524,13 @@ asmlinkage void smp_call_function_interrupt(void)
int safe_smp_processor_id(void)
{
- int apicid, i;
+ unsigned apicid, i;
if (disable_apic)
return 0;
apicid = hard_smp_processor_id();
- if (x86_cpu_to_apicid[apicid] == apicid)
+ if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
return apicid;
for (i = 0; i < NR_CPUS; ++i) {
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 71a7222cf9ce..4e9755179ecf 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -63,13 +63,11 @@
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
-/* Package ID of each logical CPU */
-u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
-/* core ID of each logical CPU */
-u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+EXPORT_SYMBOL(cpu_llc_id);
/* Bitmask of currently online CPUs */
cpumask_t cpu_online_map __read_mostly;
@@ -82,18 +80,21 @@ EXPORT_SYMBOL(cpu_online_map);
*/
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
+EXPORT_SYMBOL(cpu_callout_map);
cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_data);
/* Set when the idlers are all forked */
int smp_threads_ready;
/* representing HT siblings of each logical CPU */
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_sibling_map);
/* representing HT and core siblings of each logical CPU */
cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
@@ -472,8 +473,8 @@ static inline void set_cpu_sibling_map(int cpu)
if (smp_num_siblings > 1) {
for_each_cpu_mask(i, cpu_sibling_setup_map) {
- if (phys_proc_id[cpu] == phys_proc_id[i] &&
- cpu_core_id[cpu] == cpu_core_id[i]) {
+ if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
+ c[cpu].cpu_core_id == c[i].cpu_core_id) {
cpu_set(i, cpu_sibling_map[cpu]);
cpu_set(cpu, cpu_sibling_map[i]);
cpu_set(i, cpu_core_map[cpu]);
@@ -500,7 +501,7 @@ static inline void set_cpu_sibling_map(int cpu)
cpu_set(i, c[cpu].llc_shared_map);
cpu_set(cpu, c[i].llc_shared_map);
}
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
cpu_set(i, cpu_core_map[cpu]);
cpu_set(cpu, cpu_core_map[i]);
/*
@@ -797,6 +798,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
}
+ alternatives_smp_switch(1);
+
c_idle.idle = get_idle_for_cpu(cpu);
if (c_idle.idle) {
@@ -1199,8 +1202,8 @@ static void remove_siblinginfo(int cpu)
cpu_clear(cpu, cpu_sibling_map[sibling]);
cpus_clear(cpu_sibling_map[cpu]);
cpus_clear(cpu_core_map[cpu]);
- phys_proc_id[cpu] = BAD_APICID;
- cpu_core_id[cpu] = BAD_APICID;
+ c[cpu].phys_proc_id = 0;
+ c[cpu].cpu_core_id = 0;
cpu_clear(cpu, cpu_sibling_setup_map);
}
@@ -1259,6 +1262,8 @@ void __cpu_die(unsigned int cpu)
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
printk ("CPU %d is now offline\n", cpu);
+ if (1 == num_online_cpus())
+ alternatives_smp_switch(0);
return;
}
msleep(100);
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
new file mode 100644
index 000000000000..8d4c67f61b8e
--- /dev/null
+++ b/arch/x86_64/kernel/tce.c
@@ -0,0 +1,202 @@
+/*
+ * Derived from arch/powerpc/platforms/pseries/iommu.c
+ *
+ * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
+ * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/bootmem.h>
+#include <asm/tce.h>
+#include <asm/calgary.h>
+#include <asm/proto.h>
+
+/* flush a tce at 'tceaddr' to main memory */
+static inline void flush_tce(void* tceaddr)
+{
+ /* a single tce can't cross a cache line */
+ if (cpu_has_clflush)
+ asm volatile("clflush (%0)" :: "r" (tceaddr));
+ else
+ asm volatile("wbinvd":::"memory");
+}
+
+void tce_build(struct iommu_table *tbl, unsigned long index,
+ unsigned int npages, unsigned long uaddr, int direction)
+{
+ u64* tp;
+ u64 t;
+ u64 rpn;
+
+ t = (1 << TCE_READ_SHIFT);
+ if (direction != DMA_TO_DEVICE)
+ t |= (1 << TCE_WRITE_SHIFT);
+
+ tp = ((u64*)tbl->it_base) + index;
+
+ while (npages--) {
+ rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
+ t &= ~TCE_RPN_MASK;
+ t |= (rpn << TCE_RPN_SHIFT);
+
+ *tp = cpu_to_be64(t);
+ flush_tce(tp);
+
+ uaddr += PAGE_SIZE;
+ tp++;
+ }
+}
+
+void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
+{
+ u64* tp;
+
+ tp = ((u64*)tbl->it_base) + index;
+
+ while (npages--) {
+ *tp = cpu_to_be64(0);
+ flush_tce(tp);
+ tp++;
+ }
+}
+
+static inline unsigned int table_size_to_number_of_entries(unsigned char size)
+{
+ /*
+ * size is the order of the table, 0-7
+ * smallest table is 8K entries, so shift result by 13 to
+ * multiply by 8K
+ */
+ return (1 << size) << 13;
+}
+
+static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
+{
+ unsigned int bitmapsz;
+ unsigned int tce_table_index;
+ unsigned long bmppages;
+ int ret;
+
+ tbl->it_busno = dev->bus->number;
+
+ /* set the tce table size - measured in entries */
+ tbl->it_size = table_size_to_number_of_entries(specified_table_size);
+
+ tce_table_index = bus_to_phb(tbl->it_busno);
+ tbl->it_base = (unsigned long)tce_table_kva[tce_table_index];
+ if (!tbl->it_base) {
+ printk(KERN_ERR "Calgary: iommu_table_setparms: "
+ "no table allocated?!\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * number of bytes needed for the bitmap size in number of
+ * entries; we need one bit per entry
+ */
+ bitmapsz = tbl->it_size / BITS_PER_BYTE;
+ bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
+ if (!bmppages) {
+ printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ tbl->it_map = (unsigned long*)bmppages;
+
+ memset(tbl->it_map, 0, bitmapsz);
+
+ tbl->it_hint = 0;
+
+ spin_lock_init(&tbl->it_lock);
+
+ return 0;
+
+done:
+ return ret;
+}
+
+int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
+{
+ struct iommu_table *tbl;
+ int ret;
+
+ if (dev->sysdata) {
+ printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
+ dev, dev->sysdata);
+ BUG();
+ }
+
+ tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
+ if (!tbl) {
+ printk(KERN_ERR "Calgary: error allocating iommu_table\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ ret = tce_table_setparms(dev, tbl);
+ if (ret)
+ goto free_tbl;
+
+ tce_free(tbl, 0, tbl->it_size);
+
+ tbl->bbar = bbar;
+
+ /*
+ * NUMA is already using the bus's sysdata pointer, so we use
+ * the bus's pci_dev's sysdata instead.
+ */
+ dev->sysdata = tbl;
+
+ return 0;
+
+free_tbl:
+ kfree(tbl);
+done:
+ return ret;
+}
+
+void* alloc_tce_table(void)
+{
+ unsigned int size;
+
+ size = table_size_to_number_of_entries(specified_table_size);
+ size *= TCE_ENTRY_SIZE;
+
+ return __alloc_bootmem_low(size, size, 0);
+}
+
+void free_tce_table(void *tbl)
+{
+ unsigned int size;
+
+ if (!tbl)
+ return;
+
+ size = table_size_to_number_of_entries(specified_table_size);
+ size *= TCE_ENTRY_SIZE;
+
+ free_bootmem(__pa(tbl), size);
+}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7392570f975d..ebbee6f59ff5 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -8,7 +8,7 @@
* Copyright (c) 1995 Markus Kuhn
* Copyright (c) 1996 Ingo Molnar
* Copyright (c) 1998 Andrea Arcangeli
- * Copyright (c) 2002 Vojtech Pavlik
+ * Copyright (c) 2002,2006 Vojtech Pavlik
* Copyright (c) 2003 Andi Kleen
* RTC support code taken from arch/i386/kernel/timers/time_hpet.c
*/
@@ -51,14 +51,21 @@ extern int using_apic_timer;
static char *time_init_gtod(void);
DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
DEFINE_SPINLOCK(i8253_lock);
int nohpet __initdata = 0;
static int notsc __initdata = 0;
-#undef HPET_HACK_ENABLE_DANGEROUS
+#define USEC_PER_TICK (USEC_PER_SEC / HZ)
+#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
+#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
+
+#define NS_SCALE 10 /* 2^10, carefully chosen */
+#define US_SCALE 32 /* 2^32, arbitralrily chosen */
unsigned int cpu_khz; /* TSC clocks / usec, not used here */
+EXPORT_SYMBOL(cpu_khz);
static unsigned long hpet_period; /* fsecs / HPET clock */
unsigned long hpet_tick; /* HPET clocks / interrupt */
int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
@@ -90,7 +97,7 @@ static inline unsigned int do_gettimeoffset_tsc(void)
t = get_cycles_sync();
if (t < vxtime.last_tsc)
t = vxtime.last_tsc; /* hack */
- x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
+ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
return x;
}
@@ -98,7 +105,7 @@ static inline unsigned int do_gettimeoffset_hpet(void)
{
/* cap counter read to one tick to avoid inconsistencies */
unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
- return (min(counter,hpet_tick) * vxtime.quot) >> 32;
+ return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
}
unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
@@ -118,7 +125,7 @@ void do_gettimeofday(struct timeval *tv)
seq = read_seqbegin(&xtime_lock);
sec = xtime.tv_sec;
- usec = xtime.tv_nsec / 1000;
+ usec = xtime.tv_nsec / NSEC_PER_USEC;
/* i386 does some correction here to keep the clock
monotonous even when ntpd is fixing drift.
@@ -129,14 +136,14 @@ void do_gettimeofday(struct timeval *tv)
in arch/x86_64/kernel/vsyscall.c and export all needed
variables in vmlinux.lds. -AK */
- t = (jiffies - wall_jiffies) * (1000000L / HZ) +
+ t = (jiffies - wall_jiffies) * USEC_PER_TICK +
do_gettimeoffset();
usec += t;
} while (read_seqretry(&xtime_lock, seq));
- tv->tv_sec = sec + usec / 1000000;
- tv->tv_usec = usec % 1000000;
+ tv->tv_sec = sec + usec / USEC_PER_SEC;
+ tv->tv_usec = usec % USEC_PER_SEC;
}
EXPORT_SYMBOL(do_gettimeofday);
@@ -157,8 +164,8 @@ int do_settimeofday(struct timespec *tv)
write_seqlock_irq(&xtime_lock);
- nsec -= do_gettimeoffset() * 1000 +
- (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ);
+ nsec -= do_gettimeoffset() * NSEC_PER_USEC +
+ (jiffies - wall_jiffies) * NSEC_PER_TICK;
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -288,7 +295,7 @@ unsigned long long monotonic_clock(void)
this_offset = hpet_readl(HPET_COUNTER);
} while (read_seqretry(&xtime_lock, seq));
offset = (this_offset - last_offset);
- offset *= (NSEC_PER_SEC/HZ) / hpet_tick;
+ offset *= NSEC_PER_TICK / hpet_tick;
} else {
do {
seq = read_seqbegin(&xtime_lock);
@@ -297,7 +304,8 @@ unsigned long long monotonic_clock(void)
base = monotonic_base;
} while (read_seqretry(&xtime_lock, seq));
this_offset = get_cycles_sync();
- offset = (this_offset - last_offset)*1000 / cpu_khz;
+ /* FIXME: 1000 or 1000000? */
+ offset = (this_offset - last_offset)*1000 / cpu_khz;
}
return base + offset;
}
@@ -382,7 +390,7 @@ void main_timer_handler(struct pt_regs *regs)
}
monotonic_base +=
- (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
+ (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
vxtime.last = offset;
#ifdef CONFIG_X86_PM_TIMER
@@ -391,24 +399,25 @@ void main_timer_handler(struct pt_regs *regs)
#endif
} else {
offset = (((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
+ vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
if (offset < 0)
offset = 0;
- if (offset > (USEC_PER_SEC / HZ)) {
- lost = offset / (USEC_PER_SEC / HZ);
- offset %= (USEC_PER_SEC / HZ);
+ if (offset > USEC_PER_TICK) {
+ lost = offset / USEC_PER_TICK;
+ offset %= USEC_PER_TICK;
}
- monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ;
+ /* FIXME: 1000 or 1000000? */
+ monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
if ((((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> 32) < offset)
+ vxtime.tsc_quot) >> US_SCALE) < offset)
vxtime.last_tsc = tsc -
- (((long) offset << 32) / vxtime.tsc_quot) - 1;
+ (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
}
if (lost > 0) {
@@ -468,16 +477,15 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
}
static unsigned int cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+ cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+ return (cyc * cyc2ns_scale) >> NS_SCALE;
}
unsigned long long sched_clock(void)
@@ -490,7 +498,7 @@ unsigned long long sched_clock(void)
Disadvantage is a small drift between CPUs in some configurations,
but that should be tolerable. */
if (__vxtime.mode == VXTIME_HPET)
- return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32;
+ return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
#endif
/* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
@@ -633,7 +641,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+ vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
}
set_cyc2ns_scale(cpu_khz_ref);
@@ -789,8 +797,8 @@ static int hpet_timer_stop_set_go(unsigned long tick)
if (hpet_use_timer) {
hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
HPET_TN_32BIT, HPET_T0_CFG);
- hpet_writel(hpet_tick, HPET_T0_CMP);
- hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
+ hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
+ hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
cfg |= HPET_CFG_LEGACY;
}
/*
@@ -825,8 +833,7 @@ static int hpet_init(void)
if (hpet_period < 100000 || hpet_period > 100000000)
return -1;
- hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
- hpet_period;
+ hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
hpet_use_timer = (id & HPET_ID_LEGSUP);
@@ -890,18 +897,6 @@ void __init time_init(void)
char *timename;
char *gtod;
-#ifdef HPET_HACK_ENABLE_DANGEROUS
- if (!vxtime.hpet_address) {
- printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
- "manually!\n");
- outl(0x800038a0, 0xcf8);
- outl(0xff000001, 0xcfc);
- outl(0x800038a0, 0xcf8);
- vxtime.hpet_address = inl(0xcfc) & 0xfffffffe;
- printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
- "at %#lx.\n", vxtime.hpet_address);
- }
-#endif
if (nohpet)
vxtime.hpet_address = 0;
@@ -912,7 +907,7 @@ void __init time_init(void)
-xtime.tv_sec, -xtime.tv_nsec);
if (!hpet_init())
- vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period;
+ vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
else
vxtime.hpet_address = 0;
@@ -941,8 +936,8 @@ void __init time_init(void)
vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
- vxtime.quot = (1000000L << 32) / vxtime_hz;
- vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+ vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
+ vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
vxtime.last_tsc = get_cycles_sync();
setup_irq(0, &irq0);
@@ -956,10 +951,10 @@ void __init time_init(void)
__cpuinit int unsynchronized_tsc(void)
{
#ifdef CONFIG_SMP
- if (oem_force_hpet_timer())
+ if (apic_is_clustered_box())
return 1;
/* Intel systems are normally all synchronized. Exceptions
- are handled in the OEM check above. */
+ are handled in the check above. */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
return 0;
#endif
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index cea335e8746c..3d11a2fe45b7 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -6,8 +6,6 @@
*
* Pentium III FXSR, SSE support
* Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $
*/
/*
@@ -31,6 +29,7 @@
#include <linux/nmi.h>
#include <linux/kprobes.h>
#include <linux/kexec.h>
+#include <linux/unwind.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -41,7 +40,7 @@
#include <asm/i387.h>
#include <asm/kdebug.h>
#include <asm/processor.h>
-
+#include <asm/unwind.h>
#include <asm/smp.h>
#include <asm/pgalloc.h>
#include <asm/pda.h>
@@ -71,6 +70,7 @@ asmlinkage void machine_check(void);
asmlinkage void spurious_interrupt_bug(void);
ATOMIC_NOTIFIER_HEAD(die_chain);
+EXPORT_SYMBOL(die_chain);
int register_die_notifier(struct notifier_block *nb)
{
@@ -107,7 +107,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
preempt_enable_no_resched();
}
-static int kstack_depth_to_print = 10;
+static int kstack_depth_to_print = 12;
+static int call_trace = 1;
#ifdef CONFIG_KALLSYMS
#include <linux/kallsyms.h>
@@ -191,6 +192,25 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
return NULL;
}
+static int show_trace_unwind(struct unwind_frame_info *info, void *context)
+{
+ int i = 11, n = 0;
+
+ while (unwind(info) == 0 && UNW_PC(info)) {
+ ++n;
+ if (i > 50) {
+ printk("\n ");
+ i = 7;
+ } else
+ i += printk(" ");
+ i += printk_address(UNW_PC(info));
+ if (arch_unw_user_mode(info))
+ break;
+ }
+ printk("\n");
+ return n;
+}
+
/*
* x86-64 can have upto three kernel stacks:
* process stack
@@ -198,15 +218,39 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
-void show_trace(unsigned long *stack)
+void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
{
const unsigned cpu = safe_smp_processor_id();
unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
- int i;
+ int i = 11;
unsigned used = 0;
printk("\nCall Trace:");
+ if (!tsk)
+ tsk = current;
+
+ if (call_trace >= 0) {
+ int unw_ret = 0;
+ struct unwind_frame_info info;
+
+ if (regs) {
+ if (unwind_init_frame_info(&info, tsk, regs) == 0)
+ unw_ret = show_trace_unwind(&info, NULL);
+ } else if (tsk == current)
+ unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
+ else {
+ if (unwind_init_blocked(&info, tsk) == 0)
+ unw_ret = show_trace_unwind(&info, NULL);
+ }
+ if (unw_ret > 0) {
+ if (call_trace > 0)
+ return;
+ printk("Legacy call trace:");
+ i = 18;
+ }
+ }
+
#define HANDLE_STACK(cond) \
do while (cond) { \
unsigned long addr = *stack++; \
@@ -229,7 +273,7 @@ void show_trace(unsigned long *stack)
} \
} while (0)
- for(i = 11; ; ) {
+ for(; ; ) {
const char *id;
unsigned long *estack_end;
estack_end = in_exception_stack(cpu, (unsigned long)stack,
@@ -264,7 +308,7 @@ void show_trace(unsigned long *stack)
printk("\n");
}
-void show_stack(struct task_struct *tsk, unsigned long * rsp)
+static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
{
unsigned long *stack;
int i;
@@ -298,7 +342,12 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
printk("%016lx ", *stack++);
touch_nmi_watchdog();
}
- show_trace((unsigned long *)rsp);
+ show_trace(tsk, regs, rsp);
+}
+
+void show_stack(struct task_struct *tsk, unsigned long * rsp)
+{
+ _show_stack(tsk, NULL, rsp);
}
/*
@@ -307,7 +356,7 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
void dump_stack(void)
{
unsigned long dummy;
- show_trace(&dummy);
+ show_trace(NULL, NULL, &dummy);
}
EXPORT_SYMBOL(dump_stack);
@@ -334,7 +383,7 @@ void show_registers(struct pt_regs *regs)
if (in_kernel) {
printk("Stack: ");
- show_stack(NULL, (unsigned long*)rsp);
+ _show_stack(NULL, regs, (unsigned long*)rsp);
printk("\nCode: ");
if (regs->rip < PAGE_OFFSET)
@@ -383,6 +432,7 @@ void out_of_line_bug(void)
{
BUG();
}
+EXPORT_SYMBOL(out_of_line_bug);
#endif
static DEFINE_SPINLOCK(die_lock);
@@ -1012,3 +1062,14 @@ static int __init kstack_setup(char *s)
}
__setup("kstack=", kstack_setup);
+static int __init call_trace_setup(char *s)
+{
+ if (strcmp(s, "old") == 0)
+ call_trace = -1;
+ else if (strcmp(s, "both") == 0)
+ call_trace = 0;
+ else if (strcmp(s, "new") == 0)
+ call_trace = 1;
+ return 1;
+}
+__setup("call_trace=", call_trace_setup);
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index b81f473c4a19..1c6a5f322919 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -45,6 +45,15 @@ SECTIONS
RODATA
+#ifdef CONFIG_STACK_UNWIND
+ . = ALIGN(8);
+ .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
+ __start_unwind = .;
+ *(.eh_frame)
+ __end_unwind = .;
+ }
+#endif
+
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
*(.data)
@@ -131,6 +140,26 @@ SECTIONS
*(.data.page_aligned)
}
+ /* might get freed after init */
+ . = ALIGN(4096);
+ __smp_alt_begin = .;
+ __smp_alt_instructions = .;
+ .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+ *(.smp_altinstructions)
+ }
+ __smp_alt_instructions_end = .;
+ . = ALIGN(8);
+ __smp_locks = .;
+ .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+ *(.smp_locks)
+ }
+ __smp_locks_end = .;
+ .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
+ *(.smp_altinstr_replacement)
+ }
+ . = ALIGN(4096);
+ __smp_alt_end = .;
+
. = ALIGN(4096); /* Init code and data */
__init_begin = .;
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 9468fb20b0bc..f603037df162 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -107,7 +107,7 @@ static __always_inline long time_syscall(long *t)
int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{
- if (unlikely(!__sysctl_vsyscall))
+ if (!__sysctl_vsyscall)
return gettimeofday(tv,tz);
if (tv)
do_vgettimeofday(tv);
@@ -120,7 +120,7 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
* unlikely */
time_t __vsyscall(1) vtime(time_t *t)
{
- if (unlikely(!__sysctl_vsyscall))
+ if (!__sysctl_vsyscall)
return time_syscall(t);
else if (t)
*t = __xtime.tv_sec;
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 1def21c9f7cd..370952c4ff22 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -1,66 +1,21 @@
+/* Exports for assembly files.
+ All C exports should go in the respective C files. */
+
#include <linux/config.h>
#include <linux/module.h>
#include <linux/smp.h>
-#include <linux/user.h>
-#include <linux/sched.h>
-#include <linux/in6.h>
-#include <linux/interrupt.h>
-#include <linux/smp_lock.h>
-#include <linux/pm.h>
-#include <linux/pci.h>
-#include <linux/apm_bios.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/syscalls.h>
-#include <linux/tty.h>
#include <asm/semaphore.h>
#include <asm/processor.h>
-#include <asm/i387.h>
#include <asm/uaccess.h>
-#include <asm/checksum.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-#include <asm/irq.h>
-#include <asm/mmx.h>
-#include <asm/desc.h>
#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/nmi.h>
-#include <asm/kdebug.h>
-#include <asm/unistd.h>
-#include <asm/tlbflush.h>
-#include <asm/kdebug.h>
-
-extern spinlock_t rtc_lock;
-#ifdef CONFIG_SMP
-extern void __write_lock_failed(rwlock_t *rw);
-extern void __read_lock_failed(rwlock_t *rw);
-#endif
-
-/* platform dependent support */
-EXPORT_SYMBOL(boot_cpu_data);
-//EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(ioremap_nocache);
-EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(pm_idle);
-EXPORT_SYMBOL(pm_power_off);
EXPORT_SYMBOL(__down_failed);
EXPORT_SYMBOL(__down_failed_interruptible);
EXPORT_SYMBOL(__down_failed_trylock);
EXPORT_SYMBOL(__up_wakeup);
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
-EXPORT_SYMBOL(ip_compute_csum);
-/* Delay loops */
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(__ndelay);
-EXPORT_SYMBOL(__delay);
-EXPORT_SYMBOL(__const_udelay);
EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);
@@ -71,42 +26,20 @@ EXPORT_SYMBOL(__put_user_2);
EXPORT_SYMBOL(__put_user_4);
EXPORT_SYMBOL(__put_user_8);
-EXPORT_SYMBOL(strncpy_from_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(clear_user);
-EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(copy_user_generic);
EXPORT_SYMBOL(copy_from_user);
EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(copy_in_user);
-EXPORT_SYMBOL(strnlen_user);
-
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pci_mem_start);
-#endif
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);
-EXPORT_SYMBOL(_cpu_pda);
#ifdef CONFIG_SMP
-EXPORT_SYMBOL(cpu_data);
+extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
EXPORT_SYMBOL(__write_lock_failed);
EXPORT_SYMBOL(__read_lock_failed);
-
-EXPORT_SYMBOL(smp_call_function);
-EXPORT_SYMBOL(cpu_callout_map);
-#endif
-
-#ifdef CONFIG_VT
-EXPORT_SYMBOL(screen_info);
#endif
-EXPORT_SYMBOL(rtc_lock);
-
-EXPORT_SYMBOL_GPL(set_nmi_callback);
-EXPORT_SYMBOL_GPL(unset_nmi_callback);
-
/* Export string functions. We normally rely on gcc builtin for most of these,
but gcc sometimes decides not to inline them. */
#undef memcpy
@@ -114,51 +47,14 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
#undef memmove
extern void * memset(void *,int,__kernel_size_t);
-extern size_t strlen(const char *);
-extern void * memmove(void * dest,const void *src,size_t count);
extern void * memcpy(void *,const void *,__kernel_size_t);
extern void * __memcpy(void *,const void *,__kernel_size_t);
EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
-#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
-/* prototypes are wrong, these are assembly with custom calling functions */
-extern void rwsem_down_read_failed_thunk(void);
-extern void rwsem_wake_thunk(void);
-extern void rwsem_downgrade_thunk(void);
-extern void rwsem_down_write_failed_thunk(void);
-EXPORT_SYMBOL(rwsem_down_read_failed_thunk);
-EXPORT_SYMBOL(rwsem_wake_thunk);
-EXPORT_SYMBOL(rwsem_downgrade_thunk);
-EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
-#endif
-
EXPORT_SYMBOL(empty_zero_page);
-
-EXPORT_SYMBOL(die_chain);
-
-#ifdef CONFIG_SMP
-EXPORT_SYMBOL(cpu_sibling_map);
-EXPORT_SYMBOL(smp_num_siblings);
-#endif
-
-#ifdef CONFIG_BUG
-EXPORT_SYMBOL(out_of_line_bug);
-#endif
-
EXPORT_SYMBOL(init_level4_pgt);
-
-extern unsigned long __supported_pte_mask;
-EXPORT_SYMBOL(__supported_pte_mask);
-
-#ifdef CONFIG_SMP
-EXPORT_SYMBOL(flush_tlb_page);
-#endif
-
-EXPORT_SYMBOL(cpu_khz);
-
EXPORT_SYMBOL(load_gs_index);
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index 5384e227cdf6..c493735218da 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -147,4 +147,5 @@ unsigned short ip_compute_csum(unsigned char * buff, int len)
{
return csum_fold(csum_partial(buff,len,0));
}
+EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c
index 94323f20816e..b1320ec58428 100644
--- a/arch/x86_64/lib/csum-wrappers.c
+++ b/arch/x86_64/lib/csum-wrappers.c
@@ -109,6 +109,7 @@ csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len,
{
return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
__u32 len, unsigned short proto, unsigned int sum)
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 03c460cbdd1c..b6cd3cca2f45 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -9,6 +9,7 @@
*/
#include <linux/config.h>
+#include <linux/module.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <asm/delay.h>
@@ -36,18 +37,22 @@ void __delay(unsigned long loops)
}
while((now-bclock) < loops);
}
+EXPORT_SYMBOL(__delay);
inline void __const_udelay(unsigned long xloops)
{
__delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32);
}
+EXPORT_SYMBOL(__const_udelay);
void __udelay(unsigned long usecs)
{
__const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */
}
+EXPORT_SYMBOL(__udelay);
void __ndelay(unsigned long nsecs)
{
__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86_64/lib/memmove.c b/arch/x86_64/lib/memmove.c
index e93d5255fdc9..751ebae8ec42 100644
--- a/arch/x86_64/lib/memmove.c
+++ b/arch/x86_64/lib/memmove.c
@@ -3,12 +3,13 @@
*/
#define _STRING_C
#include <linux/string.h>
+#include <linux/module.h>
#undef memmove
void *memmove(void * dest,const void *src,size_t count)
{
if (dest < src) {
- __inline_memcpy(dest,src,count);
+ return memcpy(dest,src,count);
} else {
char *p = (char *) dest + count;
char *s = (char *) src + count;
@@ -17,3 +18,4 @@ void *memmove(void * dest,const void *src,size_t count)
}
return dest;
}
+EXPORT_SYMBOL(memmove);
diff --git a/arch/x86_64/lib/usercopy.c b/arch/x86_64/lib/usercopy.c
index 9bc2c295818e..893d43f838cc 100644
--- a/arch/x86_64/lib/usercopy.c
+++ b/arch/x86_64/lib/usercopy.c
@@ -5,6 +5,7 @@
* Copyright 1997 Linus Torvalds
* Copyright 2002 Andi Kleen <ak@suse.de>
*/
+#include <linux/module.h>
#include <asm/uaccess.h>
/*
@@ -47,15 +48,17 @@ __strncpy_from_user(char *dst, const char __user *src, long count)
__do_strncpy_from_user(dst, src, count, res);
return res;
}
+EXPORT_SYMBOL(__strncpy_from_user);
long
strncpy_from_user(char *dst, const char __user *src, long count)
{
long res = -EFAULT;
if (access_ok(VERIFY_READ, src, 1))
- __do_strncpy_from_user(dst, src, count, res);
+ return __strncpy_from_user(dst, src, count);
return res;
}
+EXPORT_SYMBOL(strncpy_from_user);
/*
* Zero Userspace
@@ -94,7 +97,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
[zero] "r" (0UL), [eight] "r" (8UL));
return size;
}
-
+EXPORT_SYMBOL(__clear_user);
unsigned long clear_user(void __user *to, unsigned long n)
{
@@ -102,6 +105,7 @@ unsigned long clear_user(void __user *to, unsigned long n)
return __clear_user(to, n);
return n;
}
+EXPORT_SYMBOL(clear_user);
/*
* Return the size of a string (including the ending 0)
@@ -125,6 +129,7 @@ long __strnlen_user(const char __user *s, long n)
s++;
}
}
+EXPORT_SYMBOL(__strnlen_user);
long strnlen_user(const char __user *s, long n)
{
@@ -132,6 +137,7 @@ long strnlen_user(const char __user *s, long n)
return 0;
return __strnlen_user(s, n);
}
+EXPORT_SYMBOL(strnlen_user);
long strlen_user(const char __user *s)
{
@@ -147,6 +153,7 @@ long strlen_user(const char __user *s)
s++;
}
}
+EXPORT_SYMBOL(strlen_user);
unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
{
@@ -155,3 +162,5 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le
}
return len;
}
+EXPORT_SYMBOL(copy_in_user);
+
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 55250593d8c9..08dc696f54ee 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -41,6 +41,41 @@
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
+/* Hook to register for page fault notifications */
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = str,
+ .err = err,
+ .trapnr = trap,
+ .signr = sig
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
void bust_spinlocks(int yes)
{
int loglevel_save = console_loglevel;
@@ -160,7 +195,7 @@ void dump_pagetable(unsigned long address)
printk("PGD %lx ", pgd_val(*pgd));
if (!pgd_present(*pgd)) goto ret;
- pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address);
+ pud = pud_offset(pgd, address);
if (bad_address(pud)) goto bad;
printk("PUD %lx ", pud_val(*pud));
if (!pud_present(*pud)) goto ret;
@@ -348,7 +383,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
if (vmalloc_fault(address) >= 0)
return;
}
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
/*
@@ -358,7 +393,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
goto bad_area_nosemaphore;
}
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
@@ -410,8 +445,10 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
if (error_code & 4) {
- // XXX: align red zone size with ABI
- if (address + 128 < regs->rsp)
+ /* Allow userspace just enough access below the stack pointer
+ * to let the 'enter' instruction work.
+ */
+ if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
goto bad_area;
}
if (expand_stack(vma, address))
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 4ba34e95d835..02add1d1dfa8 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -41,8 +41,6 @@
#include <asm/proto.h>
#include <asm/smp.h>
#include <asm/sections.h>
-#include <asm/dma-mapping.h>
-#include <asm/swiotlb.h>
#ifndef Dprintk
#define Dprintk(x...)
@@ -90,8 +88,6 @@ void show_mem(void)
printk(KERN_INFO "%lu pages swap cached\n",cached);
}
-/* References to section boundaries */
-
int after_bootmem;
static __init void *spp_getpage(void)
@@ -261,9 +257,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
unsigned long entry;
- if (address > end) {
- for (; i < PTRS_PER_PMD; i++, pmd++)
- set_pmd(pmd, __pmd(0));
+ if (address >= end) {
+ if (!after_bootmem)
+ for (; i < PTRS_PER_PMD; i++, pmd++)
+ set_pmd(pmd, __pmd(0));
break;
}
entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
@@ -341,7 +338,8 @@ static void __init find_early_table_space(unsigned long end)
table_end = table_start;
early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
+ end, table_start << PAGE_SHIFT,
+ (table_start << PAGE_SHIFT) + tables);
}
/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -372,7 +370,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
pud_t *pud;
if (after_bootmem)
- pud = pud_offset_k(pgd, start & PGDIR_MASK);
+ pud = pud_offset(pgd, start & PGDIR_MASK);
else
pud = alloc_low_page(&map, &pud_phys);
@@ -587,10 +585,7 @@ void __init mem_init(void)
{
long codesize, reservedpages, datasize, initsize;
-#ifdef CONFIG_SWIOTLB
- pci_swiotlb_init();
-#endif
- no_iommu_init();
+ pci_iommu_alloc();
/* How many end-of-memory variables you have, grandma! */
max_low_pfn = end_pfn;
@@ -644,20 +639,29 @@ void __init mem_init(void)
#endif
}
-void free_initmem(void)
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
unsigned long addr;
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+ if (begin >= end)
+ return;
+
+ printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
free_page(addr);
totalram_pages++;
}
+}
+
+void free_initmem(void)
+{
memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
- printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
+ free_init_pages("unused kernel memory",
+ (unsigned long)(&__init_begin),
+ (unsigned long)(&__init_end));
}
#ifdef CONFIG_DEBUG_RODATA
@@ -686,15 +690,7 @@ void mark_rodata_ro(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start >= end)
- return;
- printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- }
+ free_init_pages("initrd memory", start, end);
}
#endif
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index ae207064201e..45d7d823c3b8 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -11,6 +11,7 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
@@ -219,6 +220,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
}
return (__force void __iomem *) (offset + (char *)addr);
}
+EXPORT_SYMBOL(__ioremap);
/**
* ioremap_nocache - map bus memory into CPU space
@@ -246,6 +248,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
{
return __ioremap(phys_addr, size, _PAGE_PCD);
}
+EXPORT_SYMBOL(ioremap_nocache);
/**
* iounmap - Free a IO remapping
@@ -291,3 +294,5 @@ void iounmap(volatile void __iomem *addr)
BUG_ON(p != o || o == NULL);
kfree(p);
}
+EXPORT_SYMBOL(iounmap);
+
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index 3acf60ded2a0..b50a7c7c47f8 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -2,6 +2,7 @@
#include <linux/pci.h>
#include <asm/mpspec.h>
#include <linux/cpumask.h>
+#include <asm/k8.h>
/*
* This discovers the pcibus <-> node mapping on AMD K8.
@@ -18,7 +19,6 @@
#define NR_LDT_BUS_NUMBER_REGISTERS 3
#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF)
#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
-#define PCI_DEVICE_ID_K8HTCONFIG 0x1100
/**
* fill_mp_bus_to_cpumask()
@@ -28,8 +28,7 @@
__init static int
fill_mp_bus_to_cpumask(void)
{
- struct pci_dev *nb_dev = NULL;
- int i, j;
+ int i, j, k;
u32 ldtbus, nid;
static int lbnr[3] = {
LDT_BUS_NUMBER_REGISTER_0,
@@ -37,8 +36,9 @@ fill_mp_bus_to_cpumask(void)
LDT_BUS_NUMBER_REGISTER_2
};
- while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD,
- PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) {
+ cache_k8_northbridges();
+ for (k = 0; k < num_k8_northbridges; k++) {
+ struct pci_dev *nb_dev = k8_northbridges[k];
pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);
for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {