diff options
Diffstat (limited to 'arch/powerpc/kernel')
58 files changed, 5560 insertions, 1949 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9ed551b6c172..c287980b7e65 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -4,7 +4,6 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc -CFLAGS_ioctl32.o += -Ifs/ endif ifeq ($(CONFIG_PPC32),y) CFLAGS_prom_init.o += -fPIC @@ -12,16 +11,17 @@ CFLAGS_btext.o += -fPIC endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ - irq.o align.o signal_32.o pmc.o vdso.o + irq.o align.o signal_32.o pmc.o vdso.o \ + init_task.o process.o obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o systbl.o \ - paca.o ioctl32.o cpu_setup_power4.o \ - firmware.o sysfs.o udbg.o idle_64.o + paca.o cpu_setup_power4.o \ + firmware.o sysfs.o idle_64.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o -obj-$(CONFIG_PPC_OF) += of_device.o +obj-$(CONFIG_PPC_OF) += of_device.o prom_parse.o procfs-$(CONFIG_PPC64) := proc_ppc64.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64) := rtas_pci.o @@ -30,12 +30,10 @@ obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o -obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o -obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o -udbgscc-$(CONFIG_PPC64) := udbg_scc.o -obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y) obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o ifeq ($(CONFIG_PPC_MERGE),y) @@ -47,26 +45,26 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds -obj-y += process.o init_task.o time.o \ - prom.o traps.o setup-common.o +obj-y += time.o prom.o traps.o setup-common.o udbg.o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o -obj-$(CONFIG_PPC_OF) += prom_init.o +obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_6xx) += idle_6xx.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o - +obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o module-$(CONFIG_PPC64) += module_64.o obj-$(CONFIG_MODULES) += $(module-y) pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \ pci_direct_iommu.o iomap.o -obj-$(CONFIG_PCI) += $(pci64-y) - -kexec64-$(CONFIG_PPC64) += machine_kexec_64.o -obj-$(CONFIG_KEXEC) += $(kexec64-y) +pci32-$(CONFIG_PPC32) := pci_32.o +obj-$(CONFIG_PCI) += $(pci64-y) $(pci32-y) +kexec-$(CONFIG_PPC64) := machine_kexec_64.o crash.o +kexec-$(CONFIG_PPC32) := machine_kexec_32.o +obj-$(CONFIG_KEXEC) += machine_kexec.o $(kexec-y) ifeq ($(CONFIG_PPC_ISERIES),y) $(obj)/head_64.o: $(obj)/lparmap.s diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 91538d2445bf..840aad43a98b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -92,9 +92,9 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror)); -#ifdef CONFIG_PPC32 + DEFINE(TI_SIGFRAME, offsetof(struct thread_info, nvgprs_frame)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); +#ifdef CONFIG_PPC32 DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); #endif /* CONFIG_PPC32 */ @@ -131,13 +131,11 @@ int main(void) DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); #endif /* CONFIG_HUGETLB_PAGE */ - DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); - DEFINE(PACA_EXDSI, offsetof(struct paca_struct, exdsi)); DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); - DEFINE(PACALPPACA, offsetof(struct paca_struct, lppaca)); + DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index bdfba92b2b38..6223d39177cb 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -31,15 +31,18 @@ static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); -static int g_loc_X; -static int g_loc_Y; -static int g_max_loc_X; -static int g_max_loc_Y; +#define __force_data __attribute__((__section__(".data"))) -static int dispDeviceRowBytes; -static int dispDeviceDepth; -static int dispDeviceRect[4]; -static unsigned char *dispDeviceBase, *logicalDisplayBase; +static int g_loc_X __force_data; +static int g_loc_Y __force_data; +static int g_max_loc_X __force_data; +static int g_max_loc_Y __force_data; + +static int dispDeviceRowBytes __force_data; +static int dispDeviceDepth __force_data; +static int dispDeviceRect[4] __force_data; +static unsigned char *dispDeviceBase __force_data; +static unsigned char *logicalDisplayBase __force_data; unsigned long disp_BAT[2] __initdata = {0, 0}; @@ -47,7 +50,7 @@ unsigned long disp_BAT[2] __initdata = {0, 0}; static unsigned char vga_font[cmapsz]; -int boot_text_mapped; +int boot_text_mapped __force_data = 0; int force_printk_to_btext = 0; #ifdef CONFIG_PPC32 @@ -57,7 +60,7 @@ int force_printk_to_btext = 0; * * The display is mapped to virtual address 0xD0000000, rather * than 1:1, because some some CHRP machines put the frame buffer - * in the region starting at 0xC0000000 (KERNELBASE). + * in the region starting at 0xC0000000 (PAGE_OFFSET). * This mapping is temporary and will disappear as soon as the * setup done by MMU_Init() is applied. * @@ -66,10 +69,9 @@ int force_printk_to_btext = 0; * is really badly aligned, but I didn't encounter this case * yet. */ -void __init -btext_prepare_BAT(void) +void __init btext_prepare_BAT(void) { - unsigned long vaddr = KERNELBASE + 0x10000000; + unsigned long vaddr = PAGE_OFFSET + 0x10000000; unsigned long addr; unsigned long lowbits; @@ -95,12 +97,13 @@ btext_prepare_BAT(void) } #endif -/* This function will enable the early boot text when doing OF booting. This - * way, xmon output should work too + +/* This function can be used to enable the early boot text when doing + * OF booting or within bootx init. It must be followed by a btext_unmap() + * call before the logical address becomes unuseable */ -void __init -btext_setup_display(int width, int height, int depth, int pitch, - unsigned long address) +void __init btext_setup_display(int width, int height, int depth, int pitch, + unsigned long address) { g_loc_X = 0; g_loc_Y = 0; @@ -116,6 +119,11 @@ btext_setup_display(int width, int height, int depth, int pitch, boot_text_mapped = 1; } +void __init btext_unmap(void) +{ + boot_text_mapped = 0; +} + /* Here's a small text engine to use during early boot * or for debugging purposes * @@ -127,7 +135,7 @@ btext_setup_display(int width, int height, int depth, int pitch, * changes. */ -void map_boot_text(void) +static void map_boot_text(void) { unsigned long base, offset, size; unsigned char *vbase; @@ -175,8 +183,9 @@ int btext_initialize(struct device_node *np) if (prop) address = *prop; - /* FIXME: Add support for PCI reg properties */ - + /* FIXME: Add support for PCI reg properties. Right now, only + * reliable on macs + */ if (address == 0) return -EINVAL; @@ -184,7 +193,6 @@ int btext_initialize(struct device_node *np) g_loc_Y = 0; g_max_loc_X = width / 8; g_max_loc_Y = height / 16; - logicalDisplayBase = (unsigned char *)address; dispDeviceBase = (unsigned char *)address; dispDeviceRowBytes = pitch; dispDeviceDepth = depth; @@ -197,14 +205,12 @@ int btext_initialize(struct device_node *np) return 0; } -void __init init_boot_display(void) +int __init btext_find_display(int allow_nonstdout) { char *name; struct device_node *np = NULL; int rc = -ENODEV; - printk("trying to initialize btext ...\n"); - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); if (name != NULL) { np = of_find_node_by_path(name); @@ -218,8 +224,8 @@ void __init init_boot_display(void) } if (np) rc = btext_initialize(np); - if (rc == 0) - return; + if (rc == 0 || !allow_nonstdout) + return rc; for (np = NULL; (np = of_find_node_by_type(np, "display"));) { if (get_property(np, "linux,opened", NULL)) { @@ -228,8 +234,9 @@ void __init init_boot_display(void) printk("result: %d\n", rc); } if (rc == 0) - return; + break; } + return rc; } /* Calc the base address of a given point (x,y) */ @@ -277,44 +284,83 @@ EXPORT_SYMBOL(btext_update_display); void btext_clearscreen(void) { - unsigned long *base = (unsigned long *)calc_base(0, 0); + unsigned int *base = (unsigned int *)calc_base(0, 0); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * - (dispDeviceDepth >> 3)) >> 3; + (dispDeviceDepth >> 3)) >> 2; int i,j; for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++) { - unsigned long *ptr = base; + unsigned int *ptr = base; for(j=width; j; --j) *(ptr++) = 0; - base += (dispDeviceRowBytes >> 3); + base += (dispDeviceRowBytes >> 2); } } +void btext_flushscreen(void) +{ + unsigned int *base = (unsigned int *)calc_base(0, 0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 2; + int i,j; + + for (i=0; i < (dispDeviceRect[3] - dispDeviceRect[1]); i++) + { + unsigned int *ptr = base; + for(j = width; j > 0; j -= 8) { + __asm__ __volatile__ ("dcbst 0,%0" :: "r" (ptr)); + ptr += 8; + } + base += (dispDeviceRowBytes >> 2); + } + __asm__ __volatile__ ("sync" ::: "memory"); +} + +void btext_flushline(void) +{ + unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 2; + int i,j; + + for (i=0; i < 16; i++) + { + unsigned int *ptr = base; + for(j = width; j > 0; j -= 8) { + __asm__ __volatile__ ("dcbst 0,%0" :: "r" (ptr)); + ptr += 8; + } + base += (dispDeviceRowBytes >> 2); + } + __asm__ __volatile__ ("sync" ::: "memory"); +} + + #ifndef NO_SCROLL static void scrollscreen(void) { - unsigned long *src = (unsigned long *)calc_base(0,16); - unsigned long *dst = (unsigned long *)calc_base(0,0); + unsigned int *src = (unsigned int *)calc_base(0,16); + unsigned int *dst = (unsigned int *)calc_base(0,0); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * - (dispDeviceDepth >> 3)) >> 3; + (dispDeviceDepth >> 3)) >> 2; int i,j; for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++) { - unsigned long *src_ptr = src; - unsigned long *dst_ptr = dst; + unsigned int *src_ptr = src; + unsigned int *dst_ptr = dst; for(j=width; j; --j) *(dst_ptr++) = *(src_ptr++); - src += (dispDeviceRowBytes >> 3); - dst += (dispDeviceRowBytes >> 3); + src += (dispDeviceRowBytes >> 2); + dst += (dispDeviceRowBytes >> 2); } for (i=0; i<16; i++) { - unsigned long *dst_ptr = dst; + unsigned int *dst_ptr = dst; for(j=width; j; --j) *(dst_ptr++) = 0; - dst += (dispDeviceRowBytes >> 3); + dst += (dispDeviceRowBytes >> 2); } } #endif /* ndef NO_SCROLL */ @@ -377,6 +423,14 @@ void btext_drawstring(const char *c) btext_drawchar(*c++); } +void btext_drawtext(const char *c, unsigned int len) +{ + if (!boot_text_mapped) + return; + while (len--) + btext_drawchar(*c++); +} + void btext_drawhex(unsigned long v) { char *hex_table = "0123456789abcdef"; diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_power4.S index cca942fe6115..b61d86e7ceb6 100644 --- a/arch/powerpc/kernel/cpu_setup_power4.S +++ b/arch/powerpc/kernel/cpu_setup_power4.S @@ -130,7 +130,7 @@ _GLOBAL(__save_cpu_setup) mfcr r7 /* Get storage ptr */ - LOADADDR(r5,cpu_state_storage) + LOAD_REG_IMMEDIATE(r5,cpu_state_storage) /* We only deal with 970 for now */ mfspr r0,SPRN_PVR @@ -164,7 +164,7 @@ _GLOBAL(__restore_cpu_setup) /* Get storage ptr (FIXME when using anton reloc as we * are running with translation disabled here */ - LOADADDR(r5,cpu_state_storage) + LOAD_REG_IMMEDIATE(r5,cpu_state_storage) /* We only deal with 970 for now */ mfspr r0,SPRN_PVR diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1d85cedbbb7b..10696456a4c6 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -55,7 +55,8 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); #define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) #define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5) #define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS) - +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) /* We only set the spe features if the kernel was compiled with * spe support @@ -78,10 +79,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_power3, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/power3", - .oprofile_model = &op_model_rs64, -#endif + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "power3", }, { /* Power3+ */ .pvr_mask = 0xffff0000, @@ -93,10 +93,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_power3, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/power3", - .oprofile_model = &op_model_rs64, -#endif + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "power3", }, { /* Northstar */ .pvr_mask = 0xffff0000, @@ -108,10 +107,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_power3, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/rs64", - .oprofile_model = &op_model_rs64, -#endif + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "rs64", }, { /* Pulsar */ .pvr_mask = 0xffff0000, @@ -123,10 +121,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_power3, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/rs64", - .oprofile_model = &op_model_rs64, -#endif + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "rs64", }, { /* I-star */ .pvr_mask = 0xffff0000, @@ -138,10 +135,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_power3, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/rs64", - .oprofile_model = &op_model_rs64, -#endif + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "rs64", }, { /* S-star */ .pvr_mask = 0xffff0000, @@ -153,10 +149,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_power3, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/rs64", - .oprofile_model = &op_model_rs64, -#endif + .oprofile_type = PPC_OPROFILE_RS64, + .platform = "rs64", }, { /* Power4 */ .pvr_mask = 0xffff0000, @@ -168,10 +163,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_power4, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/power4", - .oprofile_model = &op_model_rs64, -#endif + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power4", }, { /* Power4+ */ .pvr_mask = 0xffff0000, @@ -183,10 +177,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_power4, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/power4", - .oprofile_model = &op_model_power4, -#endif + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power4", }, { /* PPC970 */ .pvr_mask = 0xffff0000, @@ -199,10 +192,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_ppc970, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/970", - .oprofile_model = &op_model_power4, -#endif + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "ppc970", }, #endif /* CONFIG_PPC64 */ #if defined(CONFIG_PPC64) || defined(CONFIG_POWER4) @@ -221,10 +213,9 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_ppc970, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/970", - .oprofile_model = &op_model_power4, -#endif + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "ppc970", }, #endif /* defined(CONFIG_PPC64) || defined(CONFIG_POWER4) */ #ifdef CONFIG_PPC64 @@ -238,10 +229,9 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/970", - .oprofile_model = &op_model_power4, -#endif + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "ppc970", }, { /* Power5 GR */ .pvr_mask = 0xffff0000, @@ -253,27 +243,25 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .cpu_setup = __setup_cpu_power4, -#ifdef CONFIG_OPROFILE .oprofile_cpu_type = "ppc64/power5", - .oprofile_model = &op_model_power4, -#endif + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power5", }, { /* Power5 GS */ .pvr_mask = 0xffff0000, .pvr_value = 0x003b0000, - .cpu_name = "POWER5 (gs)", + .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, .cpu_setup = __setup_cpu_power4, -#ifdef CONFIG_OPROFILE - .oprofile_cpu_type = "ppc64/power5", - .oprofile_model = &op_model_power4, -#endif + .oprofile_cpu_type = "ppc64/power5+", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power5+", }, - { /* BE DD1.x */ + { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, .cpu_name = "Cell Broadband Engine", @@ -283,6 +271,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_be, + .platform = "ppc-cell-be", }, { /* default match */ .pvr_mask = 0x00000000, @@ -294,6 +283,7 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .cpu_setup = __setup_cpu_power4, + .platform = "power4", } #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC32 @@ -307,6 +297,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc601", }, { /* 603 */ .pvr_mask = 0xffff0000, @@ -316,7 +307,8 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = COMMON_USER, .icache_bsize = 32, .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603 + .cpu_setup = __setup_cpu_603, + .platform = "ppc603", }, { /* 603e */ .pvr_mask = 0xffff0000, @@ -326,7 +318,8 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = COMMON_USER, .icache_bsize = 32, .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603 + .cpu_setup = __setup_cpu_603, + .platform = "ppc603", }, { /* 603ev */ .pvr_mask = 0xffff0000, @@ -336,7 +329,8 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = COMMON_USER, .icache_bsize = 32, .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603 + .cpu_setup = __setup_cpu_603, + .platform = "ppc603", }, { /* 604 */ .pvr_mask = 0xffff0000, @@ -347,7 +341,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 2, - .cpu_setup = __setup_cpu_604 + .cpu_setup = __setup_cpu_604, + .platform = "ppc604", }, { /* 604e */ .pvr_mask = 0xfffff000, @@ -358,7 +353,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_604 + .cpu_setup = __setup_cpu_604, + .platform = "ppc604", }, { /* 604r */ .pvr_mask = 0xffff0000, @@ -369,7 +365,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_604 + .cpu_setup = __setup_cpu_604, + .platform = "ppc604", }, { /* 604ev */ .pvr_mask = 0xffff0000, @@ -380,7 +377,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_604 + .cpu_setup = __setup_cpu_604, + .platform = "ppc604", }, { /* 740/750 (0x4202, don't support TAU ?) */ .pvr_mask = 0xffffffff, @@ -391,7 +389,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750 + .cpu_setup = __setup_cpu_750, + .platform = "ppc750", }, { /* 750CX (80100 and 8010x?) */ .pvr_mask = 0xfffffff0, @@ -402,7 +401,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750cx + .cpu_setup = __setup_cpu_750cx, + .platform = "ppc750", }, { /* 750CX (82201 and 82202) */ .pvr_mask = 0xfffffff0, @@ -413,7 +413,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750cx + .cpu_setup = __setup_cpu_750cx, + .platform = "ppc750", }, { /* 750CXe (82214) */ .pvr_mask = 0xfffffff0, @@ -424,7 +425,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750cx + .cpu_setup = __setup_cpu_750cx, + .platform = "ppc750", }, { /* 750CXe "Gekko" (83214) */ .pvr_mask = 0xffffffff, @@ -435,7 +437,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750cx + .cpu_setup = __setup_cpu_750cx, + .platform = "ppc750", }, { /* 745/755 */ .pvr_mask = 0xfffff000, @@ -446,7 +449,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750 + .cpu_setup = __setup_cpu_750, + .platform = "ppc750", }, { /* 750FX rev 1.x */ .pvr_mask = 0xffffff00, @@ -457,7 +461,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750 + .cpu_setup = __setup_cpu_750, + .platform = "ppc750", }, { /* 750FX rev 2.0 must disable HID0[DPM] */ .pvr_mask = 0xffffffff, @@ -468,7 +473,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750 + .cpu_setup = __setup_cpu_750, + .platform = "ppc750", }, { /* 750FX (All revs except 2.0) */ .pvr_mask = 0xffff0000, @@ -479,7 +485,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750fx + .cpu_setup = __setup_cpu_750fx, + .platform = "ppc750", }, { /* 750GX */ .pvr_mask = 0xffff0000, @@ -490,7 +497,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750fx + .cpu_setup = __setup_cpu_750fx, + .platform = "ppc750", }, { /* 740/750 (L2CR bit need fixup for 740) */ .pvr_mask = 0xffff0000, @@ -501,7 +509,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_750 + .cpu_setup = __setup_cpu_750, + .platform = "ppc750", }, { /* 7400 rev 1.1 ? (no TAU) */ .pvr_mask = 0xffffffff, @@ -512,7 +521,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_7400 + .cpu_setup = __setup_cpu_7400, + .platform = "ppc7400", }, { /* 7400 */ .pvr_mask = 0xffff0000, @@ -523,7 +533,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_7400 + .cpu_setup = __setup_cpu_7400, + .platform = "ppc7400", }, { /* 7410 */ .pvr_mask = 0xffff0000, @@ -534,7 +545,8 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .cpu_setup = __setup_cpu_7410 + .cpu_setup = __setup_cpu_7410, + .platform = "ppc7400", }, { /* 7450 2.0 - no doze/nap */ .pvr_mask = 0xffffffff, @@ -545,7 +557,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7450 2.1 */ .pvr_mask = 0xffffffff, @@ -556,7 +571,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7450 2.3 and newer */ .pvr_mask = 0xffff0000, @@ -567,7 +585,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7455 rev 1.x */ .pvr_mask = 0xffffff00, @@ -578,7 +599,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7455 rev 2.0 */ .pvr_mask = 0xffffffff, @@ -589,7 +613,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7455 others */ .pvr_mask = 0xffff0000, @@ -600,7 +627,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7447/7457 Rev 1.0 */ .pvr_mask = 0xffffffff, @@ -611,7 +641,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7447/7457 Rev 1.1 */ .pvr_mask = 0xffffffff, @@ -622,7 +655,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7447/7457 Rev 1.2 and later */ .pvr_mask = 0xffff0000, @@ -633,7 +669,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7447A */ .pvr_mask = 0xffff0000, @@ -644,7 +683,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 7448 */ .pvr_mask = 0xffff0000, @@ -655,7 +697,10 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 6, - .cpu_setup = __setup_cpu_745x + .cpu_setup = __setup_cpu_745x, + .oprofile_cpu_type = "ppc/7450", + .oprofile_type = PPC_OPROFILE_G4, + .platform = "ppc7450", }, { /* 82xx (8240, 8245, 8260 are all 603e cores) */ .pvr_mask = 0x7fff0000, @@ -665,7 +710,8 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = COMMON_USER, .icache_bsize = 32, .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603 + .cpu_setup = __setup_cpu_603, + .platform = "ppc603", }, { /* All G2_LE (603e core, plus some) have the same pvr */ .pvr_mask = 0x7fff0000, @@ -675,7 +721,8 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = COMMON_USER, .icache_bsize = 32, .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603 + .cpu_setup = __setup_cpu_603, + .platform = "ppc603", }, { /* e300 (a 603e core, plus some) on 83xx */ .pvr_mask = 0x7fff0000, @@ -685,7 +732,8 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = COMMON_USER, .icache_bsize = 32, .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603 + .cpu_setup = __setup_cpu_603, + .platform = "ppc603", }, { /* default match, we assume split I/D cache & TB (non-601)... */ .pvr_mask = 0x00000000, @@ -695,6 +743,7 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = COMMON_USER, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc603", }, #endif /* CLASSIC_PPC */ #ifdef CONFIG_8xx @@ -708,6 +757,7 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, .icache_bsize = 16, .dcache_bsize = 16, + .platform = "ppc823", }, #endif /* CONFIG_8xx */ #ifdef CONFIG_40x @@ -719,6 +769,7 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, .icache_bsize = 16, .dcache_bsize = 16, + .platform = "ppc403", }, { /* 403GCX */ .pvr_mask = 0xffffff00, @@ -729,6 +780,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, .icache_bsize = 16, .dcache_bsize = 16, + .platform = "ppc403", }, { /* 403G ?? */ .pvr_mask = 0xffff0000, @@ -738,6 +790,7 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, .icache_bsize = 16, .dcache_bsize = 16, + .platform = "ppc403", }, { /* 405GP */ .pvr_mask = 0xffff0000, @@ -748,6 +801,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* STB 03xxx */ .pvr_mask = 0xffff0000, @@ -758,6 +812,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* STB 04xxx */ .pvr_mask = 0xffff0000, @@ -768,6 +823,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* NP405L */ .pvr_mask = 0xffff0000, @@ -778,6 +834,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* NP4GS3 */ .pvr_mask = 0xffff0000, @@ -788,6 +845,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* NP405H */ .pvr_mask = 0xffff0000, @@ -798,6 +856,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* 405GPr */ .pvr_mask = 0xffff0000, @@ -808,6 +867,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* STBx25xx */ .pvr_mask = 0xffff0000, @@ -818,6 +878,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* 405LP */ .pvr_mask = 0xffff0000, @@ -827,6 +888,7 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* Xilinx Virtex-II Pro */ .pvr_mask = 0xffff0000, @@ -837,6 +899,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* 405EP */ .pvr_mask = 0xffff0000, @@ -847,6 +910,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, #endif /* CONFIG_40x */ @@ -856,81 +920,90 @@ struct cpu_spec cpu_specs[] = { .pvr_value = 0x40000850, .cpu_name = "440EP Rev. A", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER, /* 440EP has an FPU */ + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440", }, { .pvr_mask = 0xf0000fff, .pvr_value = 0x400008d3, .cpu_name = "440EP Rev. B", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER, /* 440EP has an FPU */ + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440", }, { /* 440GP Rev. B */ .pvr_mask = 0xf0000fff, .pvr_value = 0x40000440, .cpu_name = "440GP Rev. B", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440gp", }, { /* 440GP Rev. C */ .pvr_mask = 0xf0000fff, .pvr_value = 0x40000481, .cpu_name = "440GP Rev. C", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440gp", }, { /* 440GX Rev. A */ .pvr_mask = 0xf0000fff, .pvr_value = 0x50000850, .cpu_name = "440GX Rev. A", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440", }, { /* 440GX Rev. B */ .pvr_mask = 0xf0000fff, .pvr_value = 0x50000851, .cpu_name = "440GX Rev. B", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440", }, { /* 440GX Rev. C */ .pvr_mask = 0xf0000fff, .pvr_value = 0x50000892, .cpu_name = "440GX Rev. C", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440", }, { /* 440GX Rev. F */ .pvr_mask = 0xf0000fff, .pvr_value = 0x50000894, .cpu_name = "440GX Rev. F", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440", }, { /* 440SP Rev. A */ .pvr_mask = 0xff000fff, .pvr_value = 0x53000891, .cpu_name = "440SP Rev. A", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440", }, { /* 440SPe Rev. A */ .pvr_mask = 0xff000fff, @@ -938,9 +1011,10 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "440SPe Rev. A", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc440", }, #endif /* CONFIG_44x */ #ifdef CONFIG_FSL_BOOKE @@ -950,10 +1024,11 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "e200z5", /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ .cpu_features = CPU_FTRS_E200, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_EFP_SINGLE | + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_EFP_SINGLE | PPC_FEATURE_UNIFIED_CACHE, .dcache_bsize = 32, + .platform = "ppc5554", }, { /* e200z6 */ .pvr_mask = 0xfff00000, @@ -961,11 +1036,12 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "e200z6", /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ .cpu_features = CPU_FTRS_E200, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_SPE_COMP | + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_SPE_COMP | PPC_FEATURE_HAS_EFP_SINGLE | PPC_FEATURE_UNIFIED_CACHE, .dcache_bsize = 32, + .platform = "ppc5554", }, { /* e500 */ .pvr_mask = 0xffff0000, @@ -973,12 +1049,15 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "e500", /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ .cpu_features = CPU_FTRS_E500, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_SPE_COMP | + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_SPE_COMP | PPC_FEATURE_HAS_EFP_SINGLE, .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500", + .oprofile_type = PPC_OPROFILE_BOOKE, + .platform = "ppc8540", }, { /* e500v2 */ .pvr_mask = 0xffff0000, @@ -986,12 +1065,16 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "e500v2", /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ .cpu_features = CPU_FTRS_E500_2, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE | PPC_FEATURE_HAS_EFP_DOUBLE, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE | + PPC_FEATURE_HAS_EFP_DOUBLE, .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500", + .oprofile_type = PPC_OPROFILE_BOOKE, + .platform = "ppc8548", }, #endif #if !CLASSIC_PPC @@ -1003,6 +1086,7 @@ struct cpu_spec cpu_specs[] = { .cpu_user_features = PPC_FEATURE_32, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "powerpc", } #endif /* !CLASSIC_PPC */ #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c new file mode 100644 index 000000000000..8c21d378f5d2 --- /dev/null +++ b/arch/powerpc/kernel/crash.c @@ -0,0 +1,197 @@ +/* + * Architecture specific (PPC64) functions for kexec based crash dumps. + * + * Copyright (C) 2005, IBM Corp. + * + * Created by: Haren Myneni + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + * + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/kexec.h> +#include <linux/bootmem.h> +#include <linux/crash_dump.h> +#include <linux/delay.h> +#include <linux/elf.h> +#include <linux/elfcore.h> +#include <linux/init.h> +#include <linux/types.h> + +#include <asm/processor.h> +#include <asm/machdep.h> +#include <asm/kdump.h> +#include <asm/lmb.h> +#include <asm/firmware.h> +#include <asm/smp.h> + +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* This keeps a track of which one is crashing cpu. */ +int crashing_cpu = -1; + +static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note note; + + note.n_namesz = strlen(name) + 1; + note.n_descsz = data_len; + note.n_type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) +3)/4; + memcpy(buf, name, note.n_namesz); + buf += (note.n_namesz + 3)/4; + memcpy(buf, data, note.n_descsz); + buf += (note.n_descsz + 3)/4; + + return buf; +} + +static void final_note(u32 *buf) +{ + struct elf_note note; + + note.n_namesz = 0; + note.n_descsz = 0; + note.n_type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + +static void crash_save_this_cpu(struct pt_regs *regs, int cpu) +{ + struct elf_prstatus prstatus; + u32 *buf; + + if ((cpu < 0) || (cpu >= NR_CPUS)) + return; + + /* Using ELF notes here is opportunistic. + * I need a well defined structure format + * for the data I pass, and I need tags + * on the data to indicate what information I have + * squirrelled away. ELF notes happen to provide + * all of that that no need to invent something new. + */ + buf = (u32*)per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + + memset(&prstatus, 0, sizeof(prstatus)); + prstatus.pr_pid = current->pid; + elf_core_copy_regs(&prstatus.pr_reg, regs); + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + final_note(buf); +} + +#ifdef CONFIG_SMP +static atomic_t waiting_for_crash_ipi; + +void crash_ipi_callback(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + if (cpu == crashing_cpu) + return; + + if (!cpu_online(cpu)) + return; + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 1); + + local_irq_disable(); + + crash_save_this_cpu(regs, cpu); + atomic_dec(&waiting_for_crash_ipi); + kexec_smp_wait(); + /* NOTREACHED */ +} + +static void crash_kexec_prepare_cpus(void) +{ + unsigned int msecs; + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + + crash_send_ipi(crash_ipi_callback); + smp_wmb(); + + /* + * FIXME: Until we will have the way to stop other CPUSs reliabally, + * the crash CPU will send an IPI and wait for other CPUs to + * respond. If not, proceed the kexec boot even though we failed to + * capture other CPU states. + */ + msecs = 1000000; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && (--msecs > 0)) { + barrier(); + mdelay(1); + } + + /* Would it be better to replace the trap vector here? */ + + /* + * FIXME: In case if we do not get all CPUs, one possibility: ask the + * user to do soft reset such that we get all. + * IPI handler is already set by the panic cpu initially. Therefore, + * all cpus could invoke this handler from die() and the panic CPU + * will call machine_kexec() directly from this handler to do + * kexec boot. + */ + if (atomic_read(&waiting_for_crash_ipi)) + printk(KERN_ALERT "done waiting: %d cpus not responding\n", + atomic_read(&waiting_for_crash_ipi)); + /* Leave the IPI callback set */ +} +#else +static void crash_kexec_prepare_cpus(void) +{ + /* + * move the secondarys to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + */ + smp_release_cpus(); +} + +#endif + +void default_machine_crash_shutdown(struct pt_regs *regs) +{ + /* + * This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means stopping other cpus in + * an SMP system. + * The kernel is broken so disable interrupts. + */ + local_irq_disable(); + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 0); + + /* + * Make a note of crashing cpu. Will be used in machine_kexec + * such that another IPI will not be sent. + */ + crashing_cpu = smp_processor_id(); + crash_kexec_prepare_cpus(); + crash_save_this_cpu(regs, crashing_cpu); +} diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c new file mode 100644 index 000000000000..211d72653ea6 --- /dev/null +++ b/arch/powerpc/kernel/crash_dump.c @@ -0,0 +1,111 @@ +/* + * Routines for doing kexec-based kdump. + * + * Copyright (C) 2005, IBM Corp. + * + * Created by: Michael Ellerman + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#undef DEBUG + +#include <linux/crash_dump.h> +#include <linux/bootmem.h> +#include <asm/kdump.h> +#include <asm/lmb.h> +#include <asm/firmware.h> +#include <asm/uaccess.h> + +#ifdef DEBUG +#include <asm/udbg.h> +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +static void __init create_trampoline(unsigned long addr) +{ + /* The maximum range of a single instruction branch, is the current + * instruction's address + (32 MB - 4) bytes. For the trampoline we + * need to branch to current address + 32 MB. So we insert a nop at + * the trampoline address, then the next instruction (+ 4 bytes) + * does a branch to (32 MB - 4). The net effect is that when we + * branch to "addr" we jump to ("addr" + 32 MB). Although it requires + * two instructions it doesn't require any registers. + */ + create_instruction(addr, 0x60000000); /* nop */ + create_branch(addr + 4, addr + PHYSICAL_START, 0); +} + +void __init kdump_setup(void) +{ + unsigned long i; + + DBG(" -> kdump_setup()\n"); + + for (i = KDUMP_TRAMPOLINE_START; i < KDUMP_TRAMPOLINE_END; i += 8) { + create_trampoline(i); + } + + create_trampoline(__pa(system_reset_fwnmi) - PHYSICAL_START); + create_trampoline(__pa(machine_check_fwnmi) - PHYSICAL_START); + + DBG(" <- kdump_setup()\n"); +} + +#ifdef CONFIG_PROC_VMCORE +static int __init parse_elfcorehdr(char *p) +{ + if (p) + elfcorehdr_addr = memparse(p, &p); + + return 0; +} +__setup("elfcorehdr=", parse_elfcorehdr); +#endif + +static int __init parse_savemaxmem(char *p) +{ + if (p) + saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; + + return 0; +} +__setup("savemaxmem=", parse_savemaxmem); + +/* + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. We stitch up a pte, similar to kmap_atomic. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0); + + if (userbuf) { + if (copy_to_user((char __user *)buf, (vaddr + offset), csize)) { + iounmap(vaddr); + return -EFAULT; + } + } else + memcpy(buf, (vaddr + offset), csize); + + iounmap(vaddr); + return csize; +} diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c index 7c3419656ccc..36aaa7663f02 100644 --- a/arch/powerpc/kernel/dma_64.c +++ b/arch/powerpc/kernel/dma_64.c @@ -10,6 +10,7 @@ /* Include the busses we support */ #include <linux/pci.h> #include <asm/vio.h> +#include <asm/ibmebus.h> #include <asm/scatterlist.h> #include <asm/bug.h> @@ -23,6 +24,10 @@ static struct dma_mapping_ops *get_dma_ops(struct device *dev) if (dev->bus == &vio_bus_type) return &vio_dma_ops; #endif +#ifdef CONFIG_IBMEBUS + if (dev->bus == &ibmebus_bus_type) + return &ibmebus_dma_ops; +#endif return NULL; } @@ -47,6 +52,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask) if (dev->bus == &vio_bus_type) return -EIO; #endif /* CONFIG_IBMVIO */ +#ifdef CONFIG_IBMEBUS + if (dev->bus == &ibmebus_bus_type) + return -EIO; +#endif BUG(); return 0; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 2e99ae41723c..f20a67261ec7 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -200,8 +200,6 @@ _GLOBAL(DoSyscall) bl do_show_syscall #endif /* SHOW_SYSCALLS */ rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - li r11,0 - stb r11,TI_SC_NOERR(r10) lwz r11,TI_FLAGS(r10) andi. r11,r11,_TIF_SYSCALL_T_OR_A bne- syscall_dotrace @@ -222,25 +220,21 @@ ret_from_syscall: bl do_show_syscall_exit #endif mr r6,r3 - li r11,-_LAST_ERRNO - cmplw 0,r3,r11 rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - blt+ 30f - lbz r11,TI_SC_NOERR(r12) - cmpwi r11,0 - bne 30f - neg r3,r3 - lwz r10,_CCR(r1) /* Set SO bit in CR */ - oris r10,r10,0x1000 - stw r10,_CCR(r1) - /* disable interrupts so current_thread_info()->flags can't change */ -30: LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + li r8,-_LAST_ERRNO + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_RESTORE_SIGMASK) bne- syscall_exit_work + cmplw 0,r3,r8 + blt+ syscall_exit_cont + lwz r11,_CCR(r1) /* Load CR */ + neg r3,r3 + oris r11,r11,0x1000 /* Set SO bit in CR */ + stw r11,_CCR(r1) syscall_exit_cont: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) /* If the process has its own DBCR0 value, load it up. The single @@ -292,46 +286,113 @@ syscall_dotrace: b syscall_dotrace_cont syscall_exit_work: - stw r6,RESULT(r1) /* Save result */ + andi. r0,r9,_TIF_RESTOREALL + bne- 2f + cmplw 0,r3,r8 + blt+ 1f + andi. r0,r9,_TIF_NOERROR + bne- 1f + lwz r11,_CCR(r1) /* Load CR */ + neg r3,r3 + oris r11,r11,0x1000 /* Set SO bit in CR */ + stw r11,_CCR(r1) + +1: stw r6,RESULT(r1) /* Save result */ stw r3,GPR3(r1) /* Update return value */ - andi. r0,r9,_TIF_SYSCALL_T_OR_A - beq 5f - ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) /* re-enable interrupts */ +2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) + beq 4f + + /* Clear per-syscall TIF flags if any are set, but _leave_ + _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that + yet. */ + + li r11,_TIF_PERSYSCALL_MASK + addi r12,r12,TI_FLAGS +3: lwarx r8,0,r12 + andc r8,r8,r11 +#ifdef CONFIG_IBM405_ERR77 + dcbt 0,r12 +#endif + stwcx. r8,0,r12 + bne- 3b + subi r12,r12,TI_FLAGS + +4: /* Anything which requires enabling interrupts? */ + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) + beq 7f + + /* Save NVGPRS if they're not saved already */ lwz r4,_TRAP(r1) andi. r4,r4,1 - beq 4f + beq 5f SAVE_NVGPRS(r1) li r4,0xc00 stw r4,_TRAP(r1) -4: + + /* Re-enable interrupts */ +5: ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) + + andi. r0,r9,_TIF_SAVE_NVGPRS + bne save_user_nvgprs + +save_user_nvgprs_cont: + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq 7f + addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_leave REST_NVGPRS(r1) -2: - lwz r3,GPR3(r1) + +6: lwz r3,GPR3(r1) LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ SYNC MTMSRD(r10) /* disable interrupts again */ rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ lwz r9,TI_FLAGS(r12) -5: +7: andi. r0,r9,_TIF_NEED_RESCHED - bne 1f + bne 8f lwz r5,_MSR(r1) andi. r5,r5,MSR_PR - beq syscall_exit_cont - andi. r0,r9,_TIF_SIGPENDING - beq syscall_exit_cont + beq ret_from_except + andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK + beq ret_from_except b do_user_signal -1: +8: ori r10,r10,MSR_EE SYNC MTMSRD(r10) /* re-enable interrupts */ bl schedule - b 2b + b 6b + +save_user_nvgprs: + lwz r8,TI_SIGFRAME(r12) + +.macro savewords start, end + 1: stw \start,4*(\start)(r8) + .section __ex_table,"a" + .align 2 + .long 1b,save_user_nvgprs_fault + .previous + .if \end - \start + savewords "(\start+1)",\end + .endif +.endm + savewords 14,31 + b save_user_nvgprs_cont + + +save_user_nvgprs_fault: + li r3,11 /* SIGSEGV */ + lwz r4,TI_TASK(r12) + bl force_sigsegv + rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + lwz r9,TI_FLAGS(r12) + b save_user_nvgprs_cont + #ifdef SHOW_SYSCALLS do_show_syscall: #ifdef SHOW_SYSCALLS_TASK @@ -401,28 +462,10 @@ show_syscalls_task: #endif /* SHOW_SYSCALLS */ /* - * The sigsuspend and rt_sigsuspend system calls can call do_signal - * and thus put the process into the stopped state where we might - * want to examine its user state with ptrace. Therefore we need - * to save all the nonvolatile registers (r13 - r31) before calling - * the C code. + * The fork/clone functions need to copy the full register set into + * the child process. Therefore we need to save all the nonvolatile + * registers (r13 - r31) before calling the C code. */ - .globl ppc_sigsuspend -ppc_sigsuspend: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_sigsuspend - - .globl ppc_rt_sigsuspend -ppc_rt_sigsuspend: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 - stw r0,_TRAP(r1) - b sys_rt_sigsuspend - .globl ppc_fork ppc_fork: SAVE_NVGPRS(r1) @@ -447,14 +490,6 @@ ppc_clone: stw r0,_TRAP(r1) /* register set saved */ b sys_clone - .globl ppc_swapcontext -ppc_swapcontext: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_swapcontext - /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that @@ -626,16 +661,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_601) .long ret_from_except #endif - .globl sigreturn_exit -sigreturn_exit: - subi r1,r3,STACK_FRAME_OVERHEAD - rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) - andi. r0,r9,_TIF_SYSCALL_T_OR_A - beq+ ret_from_except_full - bl do_syscall_trace_leave - /* fall through */ - .globl ret_from_except_full ret_from_except_full: REST_NVGPRS(r1) @@ -658,7 +683,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_RESTORE_SIGMASK) bne do_work restore_user: @@ -892,7 +917,7 @@ recheck: lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched - andi. r0,r9,_TIF_SIGPENDING + andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK beq restore_user do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE @@ -963,7 +988,7 @@ _GLOBAL(enter_rtas) stwu r1,-INT_FRAME_SIZE(r1) mflr r0 stw r0,INT_FRAME_SIZE+4(r1) - LOADADDR(r4, rtas) + LOAD_REG_ADDR(r4, rtas) lis r6,1f@ha /* physical return address for rtas */ addi r6,r6,1f@l tophys(r6,r6) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index bce33a38399f..388f861b8ed1 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -113,9 +113,7 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD #endif clrrdi r11,r1,THREAD_SHIFT - li r12,0 ld r10,TI_FLAGS(r11) - stb r12,TI_SC_NOERR(r11) andi. r11,r10,_TIF_SYSCALL_T_OR_A bne- syscall_dotrace syscall_dotrace_cont: @@ -144,24 +142,12 @@ system_call: /* label this so stack traces look sane */ bctrl /* Call handler */ syscall_exit: + std r3,RESULT(r1) #ifdef SHOW_SYSCALLS - std r3,GPR3(r1) bl .do_show_syscall_exit - ld r3,GPR3(r1) + ld r3,RESULT(r1) #endif - std r3,RESULT(r1) - ld r5,_CCR(r1) - li r10,-_LAST_ERRNO - cmpld r3,r10 clrrdi r12,r1,THREAD_SHIFT - bge- syscall_error -syscall_error_cont: - - /* check for syscall tracing or audit */ - ld r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - bne- syscall_exit_trace -syscall_exit_trace_cont: /* disable interrupts so current_thread_info()->flags can't change, and so that we don't get interrupted after loading SRR0/1. */ @@ -173,8 +159,13 @@ syscall_exit_trace_cont: rotldi r10,r10,16 mtmsrd r10,1 ld r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + li r11,-_LAST_ERRNO + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_SAVE_NVGPRS|_TIF_NOERROR|_TIF_RESTORE_SIGMASK) bne- syscall_exit_work + cmpld r3,r11 + ld r5,_CCR(r1) + bge- syscall_error +syscall_error_cont: ld r7,_NIP(r1) stdcx. r0,0,r1 /* to clear the reservation */ andi. r6,r8,MSR_PR @@ -193,21 +184,12 @@ syscall_exit_trace_cont: rfid b . /* prevent speculative execution */ -syscall_enosys: - li r3,-ENOSYS - std r3,RESULT(r1) - clrrdi r12,r1,THREAD_SHIFT - ld r5,_CCR(r1) - -syscall_error: - lbz r11,TI_SC_NOERR(r12) - cmpwi 0,r11,0 - bne- syscall_error_cont - neg r3,r3 +syscall_error: oris r5,r5,0x1000 /* Set SO bit in CR */ + neg r3,r3 std r5,_CCR(r1) b syscall_error_cont - + /* Traced system call support */ syscall_dotrace: bl .save_nvgprs @@ -225,21 +207,69 @@ syscall_dotrace: ld r10,TI_FLAGS(r10) b syscall_dotrace_cont -syscall_exit_trace: - std r3,GPR3(r1) - bl .save_nvgprs +syscall_enosys: + li r3,-ENOSYS + b syscall_exit + +syscall_exit_work: + /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. + If TIF_NOERROR is set, just save r3 as it is. */ + + andi. r0,r9,_TIF_RESTOREALL + bne- 2f + cmpld r3,r11 /* r10 is -LAST_ERRNO */ + blt+ 1f + andi. r0,r9,_TIF_NOERROR + bne- 1f + ld r5,_CCR(r1) + neg r3,r3 + oris r5,r5,0x1000 /* Set SO bit in CR */ + std r5,_CCR(r1) +1: std r3,GPR3(r1) +2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) + beq 4f + + /* Clear per-syscall TIF flags if any are set, but _leave_ + _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that + yet. */ + + li r11,_TIF_PERSYSCALL_MASK + addi r12,r12,TI_FLAGS +3: ldarx r10,0,r12 + andc r10,r10,r11 + stdcx. r10,0,r12 + bne- 3b + subi r12,r12,TI_FLAGS + +4: bl .save_nvgprs + /* Anything else left to do? */ + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) + beq .ret_from_except_lite + + /* Re-enable interrupts */ + mfmsr r10 + ori r10,r10,MSR_EE + mtmsrd r10,1 + + andi. r0,r9,_TIF_SAVE_NVGPRS + bne save_user_nvgprs + + /* If tracing, re-enable interrupts and do it */ +save_user_nvgprs_cont: + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq 5f + addi r3,r1,STACK_FRAME_OVERHEAD bl .do_syscall_trace_leave REST_NVGPRS(r1) - ld r3,GPR3(r1) - ld r5,_CCR(r1) clrrdi r12,r1,THREAD_SHIFT - b syscall_exit_trace_cont -/* Stuff to do on exit from a system call. */ -syscall_exit_work: - std r3,GPR3(r1) - std r5,_CCR(r1) + /* Disable interrupts again and handle other work if any */ +5: mfmsr r10 + rldicl r10,r10,48,1 + rotldi r10,r10,16 + mtmsrd r10,1 + b .ret_from_except_lite /* Save non-volatile GPRs, if not already saved. */ @@ -252,6 +282,52 @@ _GLOBAL(save_nvgprs) std r0,_TRAP(r1) blr + +save_user_nvgprs: + ld r10,TI_SIGFRAME(r12) + andi. r0,r9,_TIF_32BIT + beq- save_user_nvgprs_64 + + /* 32-bit save to userspace */ + +.macro savewords start, end + 1: stw \start,4*(\start)(r10) + .section __ex_table,"a" + .align 3 + .llong 1b,save_user_nvgprs_fault + .previous + .if \end - \start + savewords "(\start+1)",\end + .endif +.endm + savewords 14,31 + b save_user_nvgprs_cont + +save_user_nvgprs_64: + /* 64-bit save to userspace */ + +.macro savelongs start, end + 1: std \start,8*(\start)(r10) + .section __ex_table,"a" + .align 3 + .llong 1b,save_user_nvgprs_fault + .previous + .if \end - \start + savelongs "(\start+1)",\end + .endif +.endm + savelongs 14,31 + b save_user_nvgprs_cont + +save_user_nvgprs_fault: + li r3,11 /* SIGSEGV */ + ld r4,TI_TASK(r12) + bl .force_sigsegv + + clrrdi r12,r1,THREAD_SHIFT + ld r9,TI_FLAGS(r12) + b save_user_nvgprs_cont + /* * The sigsuspend and rt_sigsuspend system calls can call do_signal * and thus put the process into the stopped state where we might @@ -260,35 +336,6 @@ _GLOBAL(save_nvgprs) * the C code. Similarly, fork, vfork and clone need the full * register state on the stack so that it can be copied to the child. */ -_GLOBAL(ppc32_sigsuspend) - bl .save_nvgprs - bl .compat_sys_sigsuspend - b 70f - -_GLOBAL(ppc64_rt_sigsuspend) - bl .save_nvgprs - bl .sys_rt_sigsuspend - b 70f - -_GLOBAL(ppc32_rt_sigsuspend) - bl .save_nvgprs - bl .compat_sys_rt_sigsuspend -70: cmpdi 0,r3,0 - /* If it returned an error, we need to return via syscall_exit to set - the SO bit in cr0 and potentially stop for ptrace. */ - bne syscall_exit - /* If sigsuspend() returns zero, we are going into a signal handler. We - may need to call audit_syscall_exit() to mark the exit from sigsuspend() */ -#ifdef CONFIG_AUDITSYSCALL - ld r3,PACACURRENT(r13) - ld r4,AUDITCONTEXT(r3) - cmpdi 0,r4,0 - beq .ret_from_except /* No audit_context: Leave immediately. */ - li r4, 2 /* AUDITSC_FAILURE */ - li r5,-4 /* It's always -EINTR */ - bl .audit_syscall_exit -#endif - b .ret_from_except _GLOBAL(ppc_fork) bl .save_nvgprs @@ -305,37 +352,6 @@ _GLOBAL(ppc_clone) bl .sys_clone b syscall_exit -_GLOBAL(ppc32_swapcontext) - bl .save_nvgprs - bl .compat_sys_swapcontext - b 80f - -_GLOBAL(ppc64_swapcontext) - bl .save_nvgprs - bl .sys_swapcontext - b 80f - -_GLOBAL(ppc32_sigreturn) - bl .compat_sys_sigreturn - b 80f - -_GLOBAL(ppc32_rt_sigreturn) - bl .compat_sys_rt_sigreturn - b 80f - -_GLOBAL(ppc64_rt_sigreturn) - bl .sys_rt_sigreturn - -80: cmpdi 0,r3,0 - blt syscall_exit - clrrdi r4,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r4) - andi. r4,r4,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq+ 81f - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_syscall_trace_leave -81: b .ret_from_except - _GLOBAL(ret_from_fork) bl .schedule_tail REST_NVGPRS(r1) @@ -495,7 +511,8 @@ restore: cmpdi 0,r5,0 beq 4f /* Check for pending interrupts (iSeries) */ - ld r3,PACALPPACA+LPPACAANYINT(r13) + ld r3,PACALPPACAPTR(r13) + ld r3,LPPACAANYINT(r3) cmpdi r3,0 beq+ 4f /* skip do_IRQ if no interrupts */ @@ -673,9 +690,8 @@ _GLOBAL(enter_rtas) std r6,PACASAVEDMSR(r13) /* Setup our real return addr */ - SET_REG_TO_LABEL(r4,.rtas_return_loc) - SET_REG_TO_CONST(r9,KERNELBASE) - sub r4,r4,r9 + LOAD_REG_ADDR(r4,.rtas_return_loc) + clrldi r4,r4,2 /* convert to realmode address */ mtlr r4 li r0,0 @@ -690,7 +706,7 @@ _GLOBAL(enter_rtas) sync /* disable interrupts so SRR0/1 */ mtmsrd r0 /* don't get trashed */ - SET_REG_TO_LABEL(r4,rtas) + LOAD_REG_ADDR(r4, rtas) ld r5,RTASENTRY(r4) /* get the rtas->entry value */ ld r4,RTASBASE(r4) /* get the rtas->base value */ @@ -702,8 +718,7 @@ _GLOBAL(enter_rtas) _STATIC(rtas_return_loc) /* relocation is off at this point */ mfspr r4,SPRN_SPRG3 /* Get PACA */ - SET_REG_TO_CONST(r5, KERNELBASE) - sub r4,r4,r5 /* RELOC the PACA base pointer */ + clrldi r4,r4,2 /* convert to realmode address */ mfmsr r6 li r0,MSR_RI @@ -712,7 +727,7 @@ _STATIC(rtas_return_loc) mtmsrd r6 ld r1,PACAR1(r4) /* Restore our SP */ - LOADADDR(r3,.rtas_restore_regs) + LOAD_REG_IMMEDIATE(r3,.rtas_restore_regs) ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ mtspr SPRN_SRR0,r3 diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index b780b42c95fc..e4362dfa37fb 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -39,9 +39,9 @@ _GLOBAL(load_up_fpu) * to another. Instead we call giveup_fpu in switch_to. */ #ifndef CONFIG_SMP - LOADBASE(r3, last_task_used_math) + LOAD_REG_ADDRBASE(r3, last_task_used_math) toreal(r3) - PPC_LL r4,OFF(last_task_used_math)(r3) + PPC_LL r4,ADDROFF(last_task_used_math)(r3) PPC_LCMPI 0,r4,0 beq 1f toreal(r4) @@ -77,7 +77,7 @@ _GLOBAL(load_up_fpu) #ifndef CONFIG_SMP subi r4,r5,THREAD fromreal(r4) - PPC_STL r4,OFF(last_task_used_math)(r3) + PPC_STL r4,ADDROFF(last_task_used_math)(r3) #endif /* CONFIG_SMP */ /* restore registers and return */ /* we haven't used ctr or xer or lr */ @@ -113,8 +113,8 @@ _GLOBAL(giveup_fpu) 1: #ifndef CONFIG_SMP li r5,0 - LOADBASE(r4,last_task_used_math) - PPC_STL r5,OFF(last_task_used_math)(r4) + LOAD_REG_ADDRBASE(r4,last_task_used_math) + PPC_STL r5,ADDROFF(last_task_used_math)(r4) #endif /* CONFIG_SMP */ blr diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index ccdf94731e30..03b25f9359f8 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -120,10 +120,25 @@ __start: * because OF may have I/O devices mapped into that area * (particularly on CHRP). */ +#ifdef CONFIG_PPC_MULTIPLATFORM cmpwi 0,r5,0 beq 1f bl prom_init trap +#endif + +/* + * Check for BootX signature when supporting PowerMac and branch to + * appropriate trampoline if it's present + */ +#ifdef CONFIG_PPC_PMAC +1: lis r31,0x426f + ori r31,r31,0x6f58 + cmpw 0,r3,r31 + bne 1f + bl bootx_init + trap +#endif /* CONFIG_PPC_PMAC */ 1: mr r31,r3 /* save parameters */ mr r30,r4 @@ -153,6 +168,9 @@ __after_mmu_off: bl flush_tlbs bl initial_bats +#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) + bl setup_disp_bat +#endif /* * Call setup_cpu for CPU 0 and initialize 6xx Idle @@ -450,16 +468,11 @@ SystemCall: * by executing an altivec instruction. */ . = 0xf00 - b Trap_0f + b PerformanceMonitor . = 0xf20 b AltiVecUnavailable -Trap_0f: - EXCEPTION_PROLOG - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0xf00, unknown_exception) - /* * Handle TLB miss for instruction on 603/603e. * Note: we get an alternate set of r0 - r3 to use automatically. @@ -703,6 +716,11 @@ AltiVecUnavailable: #endif /* CONFIG_ALTIVEC */ EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) +PerformanceMonitor: + EXCEPTION_PROLOG + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0xf00, performance_monitor_exception) + #ifdef CONFIG_ALTIVEC /* Note that the AltiVec support is closely modeled after the FP * support. Changes to one are likely to be applicable to the @@ -1306,6 +1324,32 @@ initial_bats: blr +#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) +setup_disp_bat: + /* + * setup the display bat prepared for us in prom.c + */ + mflr r8 + bl reloc_offset + mtlr r8 + addis r8,r3,disp_BAT@ha + addi r8,r8,disp_BAT@l + cmpwi cr0,r8,0 + beqlr + lwz r11,0(r8) + lwz r8,4(r8) + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi 0,r9,1 + beq 1f + mtspr SPRN_DBAT3L,r8 + mtspr SPRN_DBAT3U,r11 + blr +1: mtspr SPRN_IBAT3L,r8 + mtspr SPRN_IBAT3U,r11 + blr +#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */ + #ifdef CONFIG_8260 /* Jump into the system reset for the rom. * We first disable the MMU, and then jump to the ROM reset address. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 8a8bf79ef044..308268466342 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -154,11 +154,15 @@ _GLOBAL(__secondary_hold) bne 100b #ifdef CONFIG_HMT - b .hmt_init + SET_REG_IMMEDIATE(r4, .hmt_init) + mtctr r4 + bctr #else #ifdef CONFIG_SMP + LOAD_REG_IMMEDIATE(r4, .pSeries_secondary_smp_init) + mtctr r4 mr r3,r24 - b .pSeries_secondary_smp_init + bctr #else BUG_OPCODE #endif @@ -200,6 +204,21 @@ exception_marker: #define EX_R3 64 #define EX_LR 72 +/* + * We're short on space and time in the exception prolog, so we can't + * use the normal SET_REG_IMMEDIATE macro. Normally we just need the + * low halfword of the address, but for Kdump we need the whole low + * word. + */ +#ifdef CONFIG_CRASH_DUMP +#define LOAD_HANDLER(reg, label) \ + oris reg,reg,(label)@h; /* virt addr of handler ... */ \ + ori reg,reg,(label)@l; /* .. and the rest */ +#else +#define LOAD_HANDLER(reg, label) \ + ori reg,reg,(label)@l; /* virt addr of handler ... */ +#endif + #define EXCEPTION_PROLOG_PSERIES(area, label) \ mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ std r9,area+EX_R9(r13); /* save r9 - r12 */ \ @@ -212,7 +231,7 @@ exception_marker: clrrdi r12,r13,32; /* get high part of &label */ \ mfmsr r10; \ mfspr r11,SPRN_SRR0; /* save SRR0 */ \ - ori r12,r12,(label)@l; /* virt addr of handler */ \ + LOAD_HANDLER(r12,label) \ ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \ mtspr SPRN_SRR0,r12; \ mfspr r12,SPRN_SRR1; /* and SRR1 */ \ @@ -236,8 +255,9 @@ exception_marker: #define EXCEPTION_PROLOG_ISERIES_2 \ mfmsr r10; \ - ld r11,PACALPPACA+LPPACASRR0(r13); \ - ld r12,PACALPPACA+LPPACASRR1(r13); \ + ld r12,PACALPPACAPTR(r13); \ + ld r11,LPPACASRR0(r12); \ + ld r12,LPPACASRR1(r12); \ ori r10,r10,MSR_RI; \ mtmsrd r10,1 @@ -553,6 +573,7 @@ slb_miss_user_pseries: * Vectors for the FWNMI option. Share common code. */ .globl system_reset_fwnmi + .align 7 system_reset_fwnmi: HMT_MEDIUM mtspr SPRN_SPRG1,r13 /* save r13 */ @@ -560,6 +581,7 @@ system_reset_fwnmi: EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) .globl machine_check_fwnmi + .align 7 machine_check_fwnmi: HMT_MEDIUM mtspr SPRN_SPRG1,r13 /* save r13 */ @@ -614,7 +636,8 @@ data_access_slb_iSeries: std r12,PACA_EXSLB+EX_R12(r13) mfspr r10,SPRN_SPRG1 std r10,PACA_EXSLB+EX_R13(r13) - ld r12,PACALPPACA+LPPACASRR1(r13); + ld r12,PACALPPACAPTR(r13) + ld r12,LPPACASRR1(r12) b .slb_miss_realmode STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) @@ -624,7 +647,8 @@ instruction_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ + ld r3,PACALPPACAPTR(r13) + ld r3,LPPACASRR0(r3) /* get SRR0 value */ std r9,PACA_EXSLB+EX_R9(r13) mfcr r9 #ifdef __DISABLED__ @@ -636,7 +660,8 @@ instruction_access_slb_iSeries: std r12,PACA_EXSLB+EX_R12(r13) mfspr r10,SPRN_SPRG1 std r10,PACA_EXSLB+EX_R13(r13) - ld r12,PACALPPACA+LPPACASRR1(r13); + ld r12,PACALPPACAPTR(r13) + ld r12,LPPACASRR1(r12) b .slb_miss_realmode #ifdef __DISABLED__ @@ -693,7 +718,7 @@ system_reset_iSeries: lbz r23,PACAPROCSTART(r13) /* Test if this processor * should start */ sync - LOADADDR(r3,current_set) + LOAD_REG_IMMEDIATE(r3,current_set) sldi r28,r24,3 /* get current_set[cpu#] */ ldx r3,r3,r28 addi r1,r3,THREAD_SIZE @@ -725,16 +750,19 @@ iSeries_secondary_smp_loop: .globl decrementer_iSeries_masked decrementer_iSeries_masked: li r11,1 - stb r11,PACALPPACA+LPPACADECRINT(r13) - lwz r12,PACADEFAULTDECR(r13) + ld r12,PACALPPACAPTR(r13) + stb r11,LPPACADECRINT(r12) + LOAD_REG_ADDRBASE(r12,tb_ticks_per_jiffy) + lwz r12,ADDROFF(tb_ticks_per_jiffy)(r12) mtspr SPRN_DEC,r12 /* fall through */ .globl hardware_interrupt_iSeries_masked hardware_interrupt_iSeries_masked: mtcrf 0x80,r9 /* Restore regs */ - ld r11,PACALPPACA+LPPACASRR0(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) + ld r12,PACALPPACAPTR(r13) + ld r11,LPPACASRR0(r12) + ld r12,LPPACASRR1(r12) mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 ld r9,PACA_EXGEN+EX_R9(r13) @@ -973,7 +1001,8 @@ _GLOBAL(slb_miss_realmode) ld r3,PACA_EXSLB+EX_R3(r13) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ #ifdef CONFIG_PPC_ISERIES - ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ + ld r11,PACALPPACAPTR(r13) + ld r11,LPPACASRR0(r11) /* get SRR0 value */ #endif /* CONFIG_PPC_ISERIES */ mtlr r10 @@ -1345,7 +1374,7 @@ _GLOBAL(do_stab_bolted) * fixed address (the linker can't compute (u64)&initial_stab >> * PAGE_SHIFT). */ - . = STAB0_PHYS_ADDR /* 0x6000 */ + . = STAB0_OFFSET /* 0x6000 */ .globl initial_stab initial_stab: .space 4096 @@ -1391,7 +1420,7 @@ _GLOBAL(pSeries_secondary_smp_init) * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOADADDR(r13, paca) /* Get base vaddr of paca array */ + LOAD_REG_IMMEDIATE(r13, paca) /* Get base vaddr of paca array */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ @@ -1425,8 +1454,8 @@ _GLOBAL(pSeries_secondary_smp_init) #ifdef CONFIG_PPC_ISERIES _STATIC(__start_initialization_iSeries) /* Clear out the BSS */ - LOADADDR(r11,__bss_stop) - LOADADDR(r8,__bss_start) + LOAD_REG_IMMEDIATE(r11,__bss_stop) + LOAD_REG_IMMEDIATE(r8,__bss_start) sub r11,r11,r8 /* bss size */ addi r11,r11,7 /* round up to an even double word */ rldicl. r11,r11,61,3 /* shift right by 3 */ @@ -1437,17 +1466,17 @@ _STATIC(__start_initialization_iSeries) 3: stdu r0,8(r8) bdnz 3b 4: - LOADADDR(r1,init_thread_union) + LOAD_REG_IMMEDIATE(r1,init_thread_union) addi r1,r1,THREAD_SIZE li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) - LOADADDR(r3,cpu_specs) - LOADADDR(r4,cur_cpu_spec) + LOAD_REG_IMMEDIATE(r3,cpu_specs) + LOAD_REG_IMMEDIATE(r4,cur_cpu_spec) li r5,0 bl .identify_cpu - LOADADDR(r2,__toc_start) + LOAD_REG_IMMEDIATE(r2,__toc_start) addi r2,r2,0x4000 addi r2,r2,0x4000 @@ -1485,11 +1514,13 @@ _STATIC(__mmu_off) * */ _GLOBAL(__start_initialization_multiplatform) +#ifdef CONFIG_PPC_MULTIPLATFORM /* * Are we booted from a PROM Of-type client-interface ? */ cmpldi cr0,r5,0 bne .__boot_from_prom /* yes -> prom */ +#endif /* Save parameters */ mr r31,r3 @@ -1505,11 +1536,12 @@ _GLOBAL(__start_initialization_multiplatform) li r24,0 /* Switch off MMU if not already */ - LOADADDR(r4, .__after_prom_start - KERNELBASE) + LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE) add r4,r4,r30 bl .__mmu_off b .__after_prom_start +#ifdef CONFIG_PPC_MULTIPLATFORM _STATIC(__boot_from_prom) /* Save parameters */ mr r31,r3 @@ -1524,7 +1556,7 @@ _STATIC(__boot_from_prom) /* put a relocation offset into r3 */ bl .reloc_offset - LOADADDR(r2,__toc_start) + LOAD_REG_IMMEDIATE(r2,__toc_start) addi r2,r2,0x4000 addi r2,r2,0x4000 @@ -1542,6 +1574,7 @@ _STATIC(__boot_from_prom) bl .prom_init /* We never return */ trap +#endif /* * At this point, r3 contains the physical address we are running at, @@ -1550,7 +1583,7 @@ _STATIC(__boot_from_prom) _STATIC(__after_prom_start) /* - * We need to run with __start at physical address 0. + * We need to run with __start at physical address PHYSICAL_START. * This will leave some code in the first 256B of * real memory, which are reserved for software use. * The remainder of the first page is loaded with the fixed @@ -1563,9 +1596,9 @@ _STATIC(__after_prom_start) */ bl .reloc_offset mr r26,r3 - SET_REG_TO_CONST(r27,KERNELBASE) + LOAD_REG_IMMEDIATE(r27, KERNELBASE) - li r3,0 /* target addr */ + LOAD_REG_IMMEDIATE(r3, PHYSICAL_START) /* target addr */ // XXX FIXME: Use phys returned by OF (r30) add r4,r27,r26 /* source addr */ @@ -1573,7 +1606,7 @@ _STATIC(__after_prom_start) /* i.e. where we are running */ /* the source addr */ - LOADADDR(r5,copy_to_here) /* # bytes of memory to copy */ + LOAD_REG_IMMEDIATE(r5,copy_to_here) /* # bytes of memory to copy */ sub r5,r5,r27 li r6,0x100 /* Start offset, the first 0x100 */ @@ -1583,11 +1616,11 @@ _STATIC(__after_prom_start) /* this includes the code being */ /* executed here. */ - LOADADDR(r0, 4f) /* Jump to the copy of this code */ + LOAD_REG_IMMEDIATE(r0, 4f) /* Jump to the copy of this code */ mtctr r0 /* that we just made/relocated */ bctr -4: LOADADDR(r5,klimit) +4: LOAD_REG_IMMEDIATE(r5,klimit) add r5,r5,r26 ld r5,0(r5) /* get the value of klimit */ sub r5,r5,r27 @@ -1669,7 +1702,7 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOADADDR(r4, paca) /* Get base vaddr of paca array */ + LOAD_REG_IMMEDIATE(r4, paca) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ @@ -1706,7 +1739,7 @@ _GLOBAL(__secondary_start) bl .early_setup_secondary /* Initialize the kernel stack. Just a repeat for iSeries. */ - LOADADDR(r3,current_set) + LOAD_REG_ADDR(r3, current_set) sldi r28,r24,3 /* get current_set[cpu#] */ ldx r1,r3,r28 addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD @@ -1717,8 +1750,8 @@ _GLOBAL(__secondary_start) mtlr r7 /* enable MMU and jump to start_secondary */ - LOADADDR(r3,.start_secondary_prolog) - SET_REG_TO_CONST(r4, MSR_KERNEL) + LOAD_REG_ADDR(r3, .start_secondary_prolog) + LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) #ifdef DO_SOFT_DISABLE ori r4,r4,MSR_EE #endif @@ -1767,8 +1800,8 @@ _STATIC(start_here_multiplatform) * be detached from the kernel completely. Besides, we need * to clear it now for kexec-style entry. */ - LOADADDR(r11,__bss_stop) - LOADADDR(r8,__bss_start) + LOAD_REG_IMMEDIATE(r11,__bss_stop) + LOAD_REG_IMMEDIATE(r8,__bss_start) sub r11,r11,r8 /* bss size */ addi r11,r11,7 /* round up to an even double word */ rldicl. r11,r11,61,3 /* shift right by 3 */ @@ -1806,7 +1839,7 @@ _STATIC(start_here_multiplatform) /* up the htab. This is done because we have relocated the */ /* kernel but are still running in real mode. */ - LOADADDR(r3,init_thread_union) + LOAD_REG_IMMEDIATE(r3,init_thread_union) add r3,r3,r26 /* set up a stack pointer (physical address) */ @@ -1815,14 +1848,14 @@ _STATIC(start_here_multiplatform) stdu r0,-STACK_FRAME_OVERHEAD(r1) /* set up the TOC (physical address) */ - LOADADDR(r2,__toc_start) + LOAD_REG_IMMEDIATE(r2,__toc_start) addi r2,r2,0x4000 addi r2,r2,0x4000 add r2,r2,r26 - LOADADDR(r3,cpu_specs) + LOAD_REG_IMMEDIATE(r3, cpu_specs) add r3,r3,r26 - LOADADDR(r4,cur_cpu_spec) + LOAD_REG_IMMEDIATE(r4,cur_cpu_spec) add r4,r4,r26 mr r5,r26 bl .identify_cpu @@ -1838,15 +1871,15 @@ _STATIC(start_here_multiplatform) * nowhere it can be initialized differently before we reach this * code */ - LOADADDR(r27, boot_cpuid) + LOAD_REG_IMMEDIATE(r27, boot_cpuid) add r27,r27,r26 lwz r27,0(r27) - LOADADDR(r24, paca) /* Get base vaddr of paca array */ + LOAD_REG_IMMEDIATE(r24, paca) /* Get base vaddr of paca array */ mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r24 /* for this processor. */ add r13,r13,r26 /* convert to physical addr */ - mtspr SPRN_SPRG3,r13 /* PPPBBB: Temp... -Peter */ + mtspr SPRN_SPRG3,r13 /* Do very early kernel initializations, including initial hash table, * stab and slb setup before we turn on relocation. */ @@ -1855,8 +1888,8 @@ _STATIC(start_here_multiplatform) mr r3,r31 bl .early_setup - LOADADDR(r3,.start_here_common) - SET_REG_TO_CONST(r4, MSR_KERNEL) + LOAD_REG_IMMEDIATE(r3, .start_here_common) + LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 rfid @@ -1870,7 +1903,7 @@ _STATIC(start_here_common) /* The following code sets up the SP and TOC now that we are */ /* running with translation enabled. */ - LOADADDR(r3,init_thread_union) + LOAD_REG_IMMEDIATE(r3,init_thread_union) /* set up the stack */ addi r1,r3,THREAD_SIZE @@ -1883,16 +1916,16 @@ _STATIC(start_here_common) li r3,0 bl .do_cpu_ftr_fixups - LOADADDR(r26, boot_cpuid) + LOAD_REG_IMMEDIATE(r26, boot_cpuid) lwz r26,0(r26) - LOADADDR(r24, paca) /* Get base vaddr of paca array */ + LOAD_REG_IMMEDIATE(r24, paca) /* Get base vaddr of paca array */ mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r24 /* for this processor. */ mtspr SPRN_SPRG3,r13 /* ptr to current */ - LOADADDR(r4,init_task) + LOAD_REG_IMMEDIATE(r4, init_task) std r4,PACACURRENT(r13) /* Load the TOC */ @@ -1915,7 +1948,7 @@ _STATIC(start_here_common) _GLOBAL(hmt_init) #ifdef CONFIG_HMT - LOADADDR(r5, hmt_thread_data) + LOAD_REG_IMMEDIATE(r5, hmt_thread_data) mfspr r7,SPRN_PVR srwi r7,r7,16 cmpwi r7,0x34 /* Pulsar */ @@ -1936,7 +1969,7 @@ _GLOBAL(hmt_init) b 101f __hmt_secondary_hold: - LOADADDR(r5, hmt_thread_data) + LOAD_REG_IMMEDIATE(r5, hmt_thread_data) clrldi r5,r5,4 li r7,0 mfspr r6,SPRN_PIR @@ -1964,7 +1997,7 @@ __hmt_secondary_hold: #ifdef CONFIG_HMT _GLOBAL(hmt_start_secondary) - LOADADDR(r4,__hmt_secondary_hold) + LOAD_REG_IMMEDIATE(r4,__hmt_secondary_hold) clrldi r4,r4,4 mtspr SPRN_NIADORM, r4 mfspr r4, SPRN_MSRDORM diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c new file mode 100644 index 000000000000..e47d40ac6f39 --- /dev/null +++ b/arch/powerpc/kernel/ibmebus.c @@ -0,0 +1,396 @@ +/* + * IBM PowerPC IBM eBus Infrastructure Support. + * + * Copyright (c) 2005 IBM Corporation + * Heiko J Schick <schickhj@de.ibm.com> + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/init.h> +#include <linux/console.h> +#include <linux/kobject.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <asm/ibmebus.h> +#include <asm/abs_addr.h> + +static struct ibmebus_dev ibmebus_bus_device = { /* fake "parent" device */ + .name = ibmebus_bus_device.ofdev.dev.bus_id, + .ofdev.dev.bus_id = "ibmebus", + .ofdev.dev.bus = &ibmebus_bus_type, +}; + +static void *ibmebus_alloc_coherent(struct device *dev, + size_t size, + dma_addr_t *dma_handle, + gfp_t flag) +{ + void *mem; + + mem = kmalloc(size, flag); + *dma_handle = (dma_addr_t)mem; + + return mem; +} + +static void ibmebus_free_coherent(struct device *dev, + size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + kfree(vaddr); +} + +static dma_addr_t ibmebus_map_single(struct device *dev, + void *ptr, + size_t size, + enum dma_data_direction direction) +{ + return (dma_addr_t)(ptr); +} + +static void ibmebus_unmap_single(struct device *dev, + dma_addr_t dma_addr, + size_t size, + enum dma_data_direction direction) +{ + return; +} + +static int ibmebus_map_sg(struct device *dev, + struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++) { + sg[i].dma_address = (dma_addr_t)page_address(sg[i].page) + + sg[i].offset; + sg[i].dma_length = sg[i].length; + } + + return nents; +} + +static void ibmebus_unmap_sg(struct device *dev, + struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + return; +} + +static int ibmebus_dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +struct dma_mapping_ops ibmebus_dma_ops = { + .alloc_coherent = ibmebus_alloc_coherent, + .free_coherent = ibmebus_free_coherent, + .map_single = ibmebus_map_single, + .unmap_single = ibmebus_unmap_single, + .map_sg = ibmebus_map_sg, + .unmap_sg = ibmebus_unmap_sg, + .dma_supported = ibmebus_dma_supported, +}; + +static int ibmebus_bus_probe(struct device *dev) +{ + struct ibmebus_dev *ibmebusdev = to_ibmebus_dev(dev); + struct ibmebus_driver *ibmebusdrv = to_ibmebus_driver(dev->driver); + const struct of_device_id *id; + int error = -ENODEV; + + if (!ibmebusdrv->probe) + return error; + + id = of_match_device(ibmebusdrv->id_table, &ibmebusdev->ofdev); + if (id) { + error = ibmebusdrv->probe(ibmebusdev, id); + } + + return error; +} + +static int ibmebus_bus_remove(struct device *dev) +{ + struct ibmebus_dev *ibmebusdev = to_ibmebus_dev(dev); + struct ibmebus_driver *ibmebusdrv = to_ibmebus_driver(dev->driver); + + if (ibmebusdrv->remove) { + return ibmebusdrv->remove(ibmebusdev); + } + + return 0; +} + +static void __devinit ibmebus_dev_release(struct device *dev) +{ + of_node_put(to_ibmebus_dev(dev)->ofdev.node); + kfree(to_ibmebus_dev(dev)); +} + +static ssize_t ibmebusdev_show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_ibmebus_dev(dev)->name); +} +static DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, ibmebusdev_show_name, + NULL); + +static struct ibmebus_dev* __devinit ibmebus_register_device_common( + struct ibmebus_dev *dev, char *name) +{ + int err = 0; + + dev->name = name; + dev->ofdev.dev.parent = &ibmebus_bus_device.ofdev.dev; + dev->ofdev.dev.bus = &ibmebus_bus_type; + dev->ofdev.dev.release = ibmebus_dev_release; + + /* An ibmebusdev is based on a of_device. We have to change the + * bus type to use our own DMA mapping operations. + */ + if ((err = of_device_register(&dev->ofdev)) != 0) { + printk(KERN_ERR "%s: failed to register device (%d).\n", + __FUNCTION__, err); + return NULL; + } + + device_create_file(&dev->ofdev.dev, &dev_attr_name); + + return dev; +} + +static struct ibmebus_dev* __devinit ibmebus_register_device_node( + struct device_node *dn) +{ + struct ibmebus_dev *dev; + char *loc_code; + int length; + + loc_code = (char *)get_property(dn, "ibm,loc-code", NULL); + if (!loc_code) { + printk(KERN_WARNING "%s: node %s missing 'ibm,loc-code'\n", + __FUNCTION__, dn->name ? dn->name : "<unknown>"); + return NULL; + } + + if (strlen(loc_code) == 0) { + printk(KERN_WARNING "%s: 'ibm,loc-code' is invalid\n", + __FUNCTION__); + return NULL; + } + + dev = kmalloc(sizeof(struct ibmebus_dev), GFP_KERNEL); + if (!dev) { + return NULL; + } + memset(dev, 0, sizeof(struct ibmebus_dev)); + + dev->ofdev.node = of_node_get(dn); + + length = strlen(loc_code); + memcpy(dev->ofdev.dev.bus_id, loc_code + + (length - min(length, BUS_ID_SIZE - 1)), + min(length, BUS_ID_SIZE - 1)); + + /* Register with generic device framework. */ + if (ibmebus_register_device_common(dev, dn->name) == NULL) { + kfree(dev); + return NULL; + } + + return dev; +} + +static void ibmebus_probe_of_nodes(char* name) +{ + struct device_node *dn = NULL; + + while ((dn = of_find_node_by_name(dn, name))) { + if (ibmebus_register_device_node(dn) == NULL) { + of_node_put(dn); + + return; + } + } + + of_node_put(dn); + + return; +} + +static void ibmebus_add_devices_by_id(struct of_device_id *idt) +{ + while (strlen(idt->name) > 0) { + ibmebus_probe_of_nodes(idt->name); + idt++; + } + + return; +} + +static int ibmebus_match_helper(struct device *dev, void *data) +{ + if (strcmp((char*)data, to_ibmebus_dev(dev)->name) == 0) + return 1; + + return 0; +} + +static int ibmebus_unregister_device(struct device *dev) +{ + device_remove_file(dev, &dev_attr_name); + of_device_unregister(to_of_device(dev)); + + return 0; +} + +static void ibmebus_remove_devices_by_id(struct of_device_id *idt) +{ + struct device *dev; + + while (strlen(idt->name) > 0) { + while ((dev = bus_find_device(&ibmebus_bus_type, NULL, + (void*)idt->name, + ibmebus_match_helper))) { + ibmebus_unregister_device(dev); + } + idt++; + + } + + return; +} + +int ibmebus_register_driver(struct ibmebus_driver *drv) +{ + int err = 0; + + drv->driver.name = drv->name; + drv->driver.bus = &ibmebus_bus_type; + drv->driver.probe = ibmebus_bus_probe; + drv->driver.remove = ibmebus_bus_remove; + + if ((err = driver_register(&drv->driver) != 0)) + return err; + + ibmebus_add_devices_by_id(drv->id_table); + + return 0; +} +EXPORT_SYMBOL(ibmebus_register_driver); + +void ibmebus_unregister_driver(struct ibmebus_driver *drv) +{ + driver_unregister(&drv->driver); + ibmebus_remove_devices_by_id(drv->id_table); +} +EXPORT_SYMBOL(ibmebus_unregister_driver); + +int ibmebus_request_irq(struct ibmebus_dev *dev, + u32 ist, + irqreturn_t (*handler)(int, void*, struct pt_regs *), + unsigned long irq_flags, const char * devname, + void *dev_id) +{ + unsigned int irq = virt_irq_create_mapping(ist); + + if (irq == NO_IRQ) + return -EINVAL; + + irq = irq_offset_up(irq); + + return request_irq(irq, handler, + irq_flags, devname, dev_id); +} +EXPORT_SYMBOL(ibmebus_request_irq); + +void ibmebus_free_irq(struct ibmebus_dev *dev, u32 ist, void *dev_id) +{ + unsigned int irq = virt_irq_create_mapping(ist); + + irq = irq_offset_up(irq); + free_irq(irq, dev_id); + + return; +} +EXPORT_SYMBOL(ibmebus_free_irq); + +static int ibmebus_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev); + struct ibmebus_driver *ebus_drv = to_ibmebus_driver(drv); + const struct of_device_id *ids = ebus_drv->id_table; + const struct of_device_id *found_id; + + if (!ids) + return 0; + + found_id = of_match_device(ids, &ebus_dev->ofdev); + if (found_id) + return 1; + + return 0; +} + +struct bus_type ibmebus_bus_type = { + .name = "ibmebus", + .match = ibmebus_bus_match, +}; +EXPORT_SYMBOL(ibmebus_bus_type); + +static int __init ibmebus_bus_init(void) +{ + int err; + + printk(KERN_INFO "IBM eBus Device Driver\n"); + + err = bus_register(&ibmebus_bus_type); + if (err) { + printk(KERN_ERR ":%s: failed to register IBM eBus.\n", + __FUNCTION__); + return err; + } + + err = device_register(&ibmebus_bus_device.ofdev.dev); + if (err) { + printk(KERN_WARNING "%s: device_register returned %i\n", + __FUNCTION__, err); + bus_unregister(&ibmebus_bus_type); + + return err; + } + + return 0; +} +__initcall(ibmebus_bus_init); diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index 1494e2f177f7..c16b4afab582 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -38,14 +38,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) /* We must dynamically check for the NAP feature as it * can be cleared by CPU init after the fixups are done */ - LOADBASE(r3,cur_cpu_spec) - ld r4,OFF(cur_cpu_spec)(r3) + LOAD_REG_ADDRBASE(r3,cur_cpu_spec) + ld r4,ADDROFF(cur_cpu_spec)(r3) ld r4,CPU_SPEC_FEATURES(r4) andi. r0,r4,CPU_FTR_CAN_NAP beqlr /* Now check if user or arch enabled NAP mode */ - LOADBASE(r3,powersave_nap) - lwz r4,OFF(powersave_nap)(r3) + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) cmpwi 0,r4,0 beqlr diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c deleted file mode 100644 index 0fa3d27fef01..000000000000 --- a/arch/powerpc/kernel/ioctl32.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * ioctl32.c: Conversion between 32bit and 64bit native ioctls. - * - * Based on sparc64 ioctl32.c by: - * - * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) - * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) - * - * ppc64 changes: - * - * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com) - * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com) - * - * These routines maintain argument size conversion between 32bit and 64bit - * ioctls. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define INCLUDES -#include "compat_ioctl.c" -#include <linux/syscalls.h> - -#define CODE -#include "compat_ioctl.c" - -#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL }, -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) - -#define IOCTL_TABLE_START \ - struct ioctl_trans ioctl_start[] = { -#define IOCTL_TABLE_END \ - }; - -IOCTL_TABLE_START -#include <linux/compat_ioctl.h> -#define DECLARES -#include "compat_ioctl.c" - -IOCTL_TABLE_END - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5a71ed9612fe..d1fffce86df9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -31,7 +31,6 @@ * to reduce code space and undefined function references. */ -#include <linux/errno.h> #include <linux/module.h> #include <linux/threads.h> #include <linux/kernel_stat.h> @@ -44,18 +43,12 @@ #include <linux/config.h> #include <linux/init.h> #include <linux/slab.h> -#include <linux/pci.h> #include <linux/delay.h> #include <linux/irq.h> -#include <linux/proc_fs.h> -#include <linux/random.h> #include <linux/seq_file.h> #include <linux/cpumask.h> #include <linux/profile.h> #include <linux/bitops.h> -#ifdef CONFIG_PPC64 -#include <linux/kallsyms.h> -#endif #include <asm/uaccess.h> #include <asm/system.h> @@ -66,8 +59,7 @@ #include <asm/prom.h> #include <asm/ptrace.h> #include <asm/machdep.h> -#ifdef CONFIG_PPC64 -#include <asm/iseries/it_lp_queue.h> +#ifdef CONFIG_PPC_ISERIES #include <asm/paca.h> #endif @@ -78,10 +70,6 @@ EXPORT_SYMBOL(__irq_offset_value); static int ppc_spurious_interrupts; -#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP) -extern void iSeries_smp_message_recv(struct pt_regs *); -#endif - #ifdef CONFIG_PPC32 #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) @@ -195,49 +183,6 @@ void fixup_irqs(cpumask_t map) } #endif -#ifdef CONFIG_PPC_ISERIES -void do_IRQ(struct pt_regs *regs) -{ - struct paca_struct *lpaca; - - irq_enter(); - -#ifdef CONFIG_DEBUG_STACKOVERFLOW - /* Debugging check for stack overflow: is there less than 2KB free? */ - { - long sp; - - sp = __get_SP() & (THREAD_SIZE-1); - - if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { - printk("do_IRQ: stack overflow: %ld\n", - sp - sizeof(struct thread_info)); - dump_stack(); - } - } -#endif - - lpaca = get_paca(); -#ifdef CONFIG_SMP - if (lpaca->lppaca.int_dword.fields.ipi_cnt) { - lpaca->lppaca.int_dword.fields.ipi_cnt = 0; - iSeries_smp_message_recv(regs); - } -#endif /* CONFIG_SMP */ - if (hvlpevent_is_pending()) - process_hvlpevents(regs); - - irq_exit(); - - if (lpaca->lppaca.int_dword.fields.decr_int) { - lpaca->lppaca.int_dword.fields.decr_int = 0; - /* Signal a fake decrementer interrupt */ - timer_interrupt(regs); - } -} - -#else /* CONFIG_PPC_ISERIES */ - void do_IRQ(struct pt_regs *regs) { int irq; @@ -286,16 +231,20 @@ void do_IRQ(struct pt_regs *regs) } else #endif __do_IRQ(irq, regs); - } else -#ifdef CONFIG_PPC32 - if (irq != -2) -#endif - /* That's not SMP safe ... but who cares ? */ - ppc_spurious_interrupts++; + } else if (irq != -2) + /* That's not SMP safe ... but who cares ? */ + ppc_spurious_interrupts++; + irq_exit(); -} -#endif /* CONFIG_PPC_ISERIES */ +#ifdef CONFIG_PPC_ISERIES + if (get_lppaca()->int_dword.fields.decr_int) { + get_lppaca()->int_dword.fields.decr_int = 0; + /* Signal a fake decrementer interrupt */ + timer_interrupt(regs); + } +#endif +} void __init init_IRQ(void) { diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 5368f9c2e6bf..cfab48566db1 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -35,7 +35,6 @@ #include <asm/kdebug.h> #include <asm/sstep.h> -static DECLARE_MUTEX(kprobe_mutex); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -54,19 +53,17 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) /* insn must be on a special executable page on ppc64 */ if (!ret) { - down(&kprobe_mutex); p->ainsn.insn = get_insn_slot(); - up(&kprobe_mutex); if (!p->ainsn.insn) ret = -ENOMEM; } - return ret; -} -void __kprobes arch_copy_kprobe(struct kprobe *p) -{ - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; + if (!ret) { + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; + } + + return ret; } void __kprobes arch_arm_kprobe(struct kprobe *p) @@ -182,6 +179,18 @@ static inline int kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_REENTER; return 1; } else { + if (*addr != BREAKPOINT_INSTRUCTION) { + /* If trap variant, then it belongs not to us */ + kprobe_opcode_t cur_insn = *addr; + if (is_trap(cur_insn)) + goto no_kprobe; + /* The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate + */ + ret = 1; + goto no_kprobe; + } p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c new file mode 100644 index 000000000000..f970ace208d3 --- /dev/null +++ b/arch/powerpc/kernel/legacy_serial.c @@ -0,0 +1,557 @@ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/serial.h> +#include <linux/serial_8250.h> +#include <linux/serial_core.h> +#include <linux/console.h> +#include <linux/pci.h> +#include <asm/io.h> +#include <asm/mmu.h> +#include <asm/prom.h> +#include <asm/serial.h> +#include <asm/udbg.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) do { printk(fmt); } while(0) +#else +#define DBG(fmt...) do { } while(0) +#endif + +#define MAX_LEGACY_SERIAL_PORTS 8 + +static struct plat_serial8250_port +legacy_serial_ports[MAX_LEGACY_SERIAL_PORTS+1]; +static struct legacy_serial_info { + struct device_node *np; + unsigned int speed; + unsigned int clock; + phys_addr_t taddr; +} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; +static unsigned int legacy_serial_count; +static int legacy_serial_console = -1; + +static int __init add_legacy_port(struct device_node *np, int want_index, + int iotype, phys_addr_t base, + phys_addr_t taddr, unsigned long irq, + unsigned int flags) +{ + u32 *clk, *spd, clock = BASE_BAUD * 16; + int index; + + /* get clock freq. if present */ + clk = (u32 *)get_property(np, "clock-frequency", NULL); + if (clk && *clk) + clock = *clk; + + /* get default speed if present */ + spd = (u32 *)get_property(np, "current-speed", NULL); + + /* If we have a location index, then try to use it */ + if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS) + index = want_index; + else + index = legacy_serial_count; + + /* if our index is still out of range, that mean that + * array is full, we could scan for a free slot but that + * make little sense to bother, just skip the port + */ + if (index >= MAX_LEGACY_SERIAL_PORTS) + return -1; + if (index >= legacy_serial_count) + legacy_serial_count = index + 1; + + /* Check if there is a port who already claimed our slot */ + if (legacy_serial_infos[index].np != 0) { + /* if we still have some room, move it, else override */ + if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) { + printk(KERN_INFO "Moved legacy port %d -> %d\n", + index, legacy_serial_count); + legacy_serial_ports[legacy_serial_count] = + legacy_serial_ports[index]; + legacy_serial_infos[legacy_serial_count] = + legacy_serial_infos[index]; + legacy_serial_count++; + } else { + printk(KERN_INFO "Replacing legacy port %d\n", index); + } + } + + /* Now fill the entry */ + memset(&legacy_serial_ports[index], 0, + sizeof(struct plat_serial8250_port)); + if (iotype == UPIO_PORT) + legacy_serial_ports[index].iobase = base; + else + legacy_serial_ports[index].mapbase = base; + legacy_serial_ports[index].iotype = iotype; + legacy_serial_ports[index].uartclk = clock; + legacy_serial_ports[index].irq = irq; + legacy_serial_ports[index].flags = flags; + legacy_serial_infos[index].taddr = taddr; + legacy_serial_infos[index].np = of_node_get(np); + legacy_serial_infos[index].clock = clock; + legacy_serial_infos[index].speed = spd ? *spd : 0; + + printk(KERN_INFO "Found legacy serial port %d for %s\n", + index, np->full_name); + printk(KERN_INFO " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", + (iotype == UPIO_PORT) ? "port" : "mem", + (unsigned long long)base, (unsigned long long)taddr, irq, + legacy_serial_ports[index].uartclk, + legacy_serial_infos[index].speed); + + return index; +} + +static int __init add_legacy_soc_port(struct device_node *np, + struct device_node *soc_dev) +{ + phys_addr_t addr; + u32 *addrp; + unsigned int flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; + + /* We only support ports that have a clock frequency properly + * encoded in the device-tree. + */ + if (get_property(np, "clock-frequency", NULL) == NULL) + return -1; + + /* Get the address */ + addrp = of_get_address(soc_dev, 0, NULL, NULL); + if (addrp == NULL) + return -1; + + addr = of_translate_address(soc_dev, addrp); + + /* Add port, irq will be dealt with later. We passed a translated + * IO port value. It will be fixed up later along with the irq + */ + return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags); +} + +#ifdef CONFIG_ISA +static int __init add_legacy_isa_port(struct device_node *np, + struct device_node *isa_brg) +{ + u32 *reg; + char *typep; + int index = -1; + phys_addr_t taddr; + + /* Get the ISA port number */ + reg = (u32 *)get_property(np, "reg", NULL); + if (reg == NULL) + return -1; + + /* Verify it's an IO port, we don't support anything else */ + if (!(reg[0] & 0x00000001)) + return -1; + + /* Now look for an "ibm,aix-loc" property that gives us ordering + * if any... + */ + typep = (char *)get_property(np, "ibm,aix-loc", NULL); + + /* If we have a location index, then use it */ + if (typep && *typep == 'S') + index = simple_strtol(typep+1, NULL, 0) - 1; + + /* Translate ISA address */ + taddr = of_translate_address(np, reg); + + /* Add port, irq will be dealt with later */ + return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr, NO_IRQ, UPF_BOOT_AUTOCONF); + +} +#endif + +#ifdef CONFIG_PCI +static int __init add_legacy_pci_port(struct device_node *np, + struct device_node *pci_dev) +{ + phys_addr_t addr, base; + u32 *addrp; + unsigned int flags; + int iotype, index = -1, lindex = 0; + + /* We only support ports that have a clock frequency properly + * encoded in the device-tree (that is have an fcode). Anything + * else can't be used that early and will be normally probed by + * the generic 8250_pci driver later on. The reason is that 8250 + * compatible UARTs on PCI need all sort of quirks (port offsets + * etc...) that this code doesn't know about + */ + if (get_property(np, "clock-frequency", NULL) == NULL) + return -1; + + /* Get the PCI address. Assume BAR 0 */ + addrp = of_get_pci_address(pci_dev, 0, NULL, &flags); + if (addrp == NULL) + return -1; + + /* We only support BAR 0 for now */ + iotype = (flags & IORESOURCE_MEM) ? UPIO_MEM : UPIO_PORT; + addr = of_translate_address(pci_dev, addrp); + + /* Set the IO base to the same as the translated address for MMIO, + * or to the domain local IO base for PIO (it will be fixed up later) + */ + if (iotype == UPIO_MEM) + base = addr; + else + base = addrp[2]; + + /* Try to guess an index... If we have subdevices of the pci dev, + * we get to their "reg" property + */ + if (np != pci_dev) { + u32 *reg = (u32 *)get_property(np, "reg", NULL); + if (reg && (*reg < 4)) + index = lindex = *reg; + } + + /* Local index means it's the Nth port in the PCI chip. Unfortunately + * the offset to add here is device specific. We know about those + * EXAR ports and we default to the most common case. If your UART + * doesn't work for these settings, you'll have to add your own special + * cases here + */ + if (device_is_compatible(pci_dev, "pci13a8,152") || + device_is_compatible(pci_dev, "pci13a8,154") || + device_is_compatible(pci_dev, "pci13a8,158")) { + addr += 0x200 * lindex; + base += 0x200 * lindex; + } else { + addr += 8 * lindex; + base += 8 * lindex; + } + + /* Add port, irq will be dealt with later. We passed a translated + * IO port value. It will be fixed up later along with the irq + */ + return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, UPF_BOOT_AUTOCONF); +} +#endif + +/* + * This is called very early, as part of setup_system() or eventually + * setup_arch(), basically before anything else in this file. This function + * will try to build a list of all the available 8250-compatible serial ports + * in the machine using the Open Firmware device-tree. It currently only deals + * with ISA and PCI busses but could be extended. It allows a very early boot + * console to be initialized, that list is also used later to provide 8250 with + * the machine non-PCI ports and to properly pick the default console port + */ +void __init find_legacy_serial_ports(void) +{ + struct device_node *np, *stdout = NULL; + char *path; + int index; + + DBG(" -> find_legacy_serial_port()\n"); + + /* Now find out if one of these is out firmware console */ + path = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (path != NULL) { + stdout = of_find_node_by_path(path); + if (stdout) + DBG("stdout is %s\n", stdout->full_name); + } else { + DBG(" no linux,stdout-path !\n"); + } + + /* First fill our array with SOC ports */ + for (np = NULL; (np = of_find_compatible_node(np, "serial", "ns16550")) != NULL;) { + struct device_node *soc = of_get_parent(np); + if (soc && !strcmp(soc->type, "soc")) { + index = add_legacy_soc_port(np, np); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + } + of_node_put(soc); + } + +#ifdef CONFIG_ISA + /* First fill our array with ISA ports */ + for (np = NULL; (np = of_find_node_by_type(np, "serial"));) { + struct device_node *isa = of_get_parent(np); + if (isa && !strcmp(isa->name, "isa")) { + index = add_legacy_isa_port(np, isa); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + } + of_node_put(isa); + } +#endif + +#ifdef CONFIG_PCI + /* Next, try to locate PCI ports */ + for (np = NULL; (np = of_find_all_nodes(np));) { + struct device_node *pci, *parent = of_get_parent(np); + if (parent && !strcmp(parent->name, "isa")) { + of_node_put(parent); + continue; + } + if (strcmp(np->name, "serial") && strcmp(np->type, "serial")) { + of_node_put(parent); + continue; + } + /* Check for known pciclass, and also check wether we have + * a device with child nodes for ports or not + */ + if (device_is_compatible(np, "pciclass,0700") || + device_is_compatible(np, "pciclass,070002")) + pci = np; + else if (device_is_compatible(parent, "pciclass,0700") || + device_is_compatible(parent, "pciclass,070002")) + pci = parent; + else { + of_node_put(parent); + continue; + } + index = add_legacy_pci_port(np, pci); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + of_node_put(parent); + } +#endif + + DBG("legacy_serial_console = %d\n", legacy_serial_console); + + /* udbg is 64 bits only for now, that will change soon though ... */ + while (legacy_serial_console >= 0) { + struct legacy_serial_info *info = + &legacy_serial_infos[legacy_serial_console]; + void __iomem *addr; + + if (info->taddr == 0) + break; + addr = ioremap(info->taddr, 0x1000); + if (addr == NULL) + break; + if (info->speed == 0) + info->speed = udbg_probe_uart_speed(addr, info->clock); + DBG("default console speed = %d\n", info->speed); + udbg_init_uart(addr, info->speed, info->clock); + break; + } + + DBG(" <- find_legacy_serial_port()\n"); +} + +static struct platform_device serial_device = { + .name = "serial8250", + .id = PLAT8250_DEV_PLATFORM, + .dev = { + .platform_data = legacy_serial_ports, + }, +}; + +static void __init fixup_port_irq(int index, + struct device_node *np, + struct plat_serial8250_port *port) +{ + DBG("fixup_port_irq(%d)\n", index); + + /* Check for interrupts in that node */ + if (np->n_intrs > 0) { + port->irq = np->intrs[0].line; + DBG(" port %d (%s), irq=%d\n", + index, np->full_name, port->irq); + return; + } + + /* Check for interrupts in the parent */ + np = of_get_parent(np); + if (np == NULL) + return; + + if (np->n_intrs > 0) { + port->irq = np->intrs[0].line; + DBG(" port %d (%s), irq=%d\n", + index, np->full_name, port->irq); + } + of_node_put(np); +} + +static void __init fixup_port_pio(int index, + struct device_node *np, + struct plat_serial8250_port *port) +{ +#ifdef CONFIG_PCI + struct pci_controller *hose; + + DBG("fixup_port_pio(%d)\n", index); + + hose = pci_find_hose_for_OF_device(np); + if (hose) { + unsigned long offset = (unsigned long)hose->io_base_virt - +#ifdef CONFIG_PPC64 + pci_io_base; +#else + isa_io_base; +#endif + DBG("port %d, IO %lx -> %lx\n", + index, port->iobase, port->iobase + offset); + port->iobase += offset; + } +#endif +} + +static void __init fixup_port_mmio(int index, + struct device_node *np, + struct plat_serial8250_port *port) +{ + DBG("fixup_port_mmio(%d)\n", index); + + port->membase = ioremap(port->mapbase, 0x100); +} + +/* + * This is called as an arch initcall, hopefully before the PCI bus is + * probed and/or the 8250 driver loaded since we need to register our + * platform devices before 8250 PCI ones are detected as some of them + * must properly "override" the platform ones. + * + * This function fixes up the interrupt value for platform ports as it + * couldn't be done earlier before interrupt maps have been parsed. It + * also "corrects" the IO address for PIO ports for the same reason, + * since earlier, the PHBs virtual IO space wasn't assigned yet. It then + * registers all those platform ports for use by the 8250 driver when it + * finally loads. + */ +static int __init serial_dev_init(void) +{ + int i; + + if (legacy_serial_count == 0) + return -ENODEV; + + /* + * Before we register the platfrom serial devices, we need + * to fixup their interrutps and their IO ports. + */ + DBG("Fixing serial ports interrupts and IO ports ...\n"); + + for (i = 0; i < legacy_serial_count; i++) { + struct plat_serial8250_port *port = &legacy_serial_ports[i]; + struct device_node *np = legacy_serial_infos[i].np; + + if (port->irq == NO_IRQ) + fixup_port_irq(i, np, port); + if (port->iotype == UPIO_PORT) + fixup_port_pio(i, np, port); + if (port->iotype == UPIO_MEM) + fixup_port_mmio(i, np, port); + } + + DBG("Registering platform serial ports\n"); + + return platform_device_register(&serial_device); +} +arch_initcall(serial_dev_init); + + +/* + * This is called very early, as part of console_init() (typically just after + * time_init()). This function is respondible for trying to find a good + * default console on serial ports. It tries to match the open firmware + * default output with one of the available serial console drivers, either + * one of the platform serial ports that have been probed earlier by + * find_legacy_serial_ports() or some more platform specific ones. + */ +static int __init check_legacy_serial_console(void) +{ + struct device_node *prom_stdout = NULL; + int speed = 0, offset = 0; + char *name; + u32 *spd; + + DBG(" -> check_legacy_serial_console()\n"); + + /* The user has requested a console so this is already set up. */ + if (strstr(saved_command_line, "console=")) { + DBG(" console was specified !\n"); + return -EBUSY; + } + + if (!of_chosen) { + DBG(" of_chosen is NULL !\n"); + return -ENODEV; + } + + if (legacy_serial_console < 0) { + DBG(" legacy_serial_console not found !\n"); + return -ENODEV; + } + /* We are getting a weird phandle from OF ... */ + /* ... So use the full path instead */ + name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) { + DBG(" no linux,stdout-path !\n"); + return -ENODEV; + } + prom_stdout = of_find_node_by_path(name); + if (!prom_stdout) { + DBG(" can't find stdout package %s !\n", name); + return -ENODEV; + } + DBG("stdout is %s\n", prom_stdout->full_name); + + name = (char *)get_property(prom_stdout, "name", NULL); + if (!name) { + DBG(" stdout package has no name !\n"); + goto not_found; + } + spd = (u32 *)get_property(prom_stdout, "current-speed", NULL); + if (spd) + speed = *spd; + + if (0) + ; +#ifdef CONFIG_SERIAL_8250_CONSOLE + else if (strcmp(name, "serial") == 0) { + int i; + /* Look for it in probed array */ + for (i = 0; i < legacy_serial_count; i++) { + if (prom_stdout != legacy_serial_infos[i].np) + continue; + offset = i; + speed = legacy_serial_infos[i].speed; + break; + } + if (i >= legacy_serial_count) + goto not_found; + } +#endif /* CONFIG_SERIAL_8250_CONSOLE */ +#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE + else if (strcmp(name, "ch-a") == 0) + offset = 0; + else if (strcmp(name, "ch-b") == 0) + offset = 1; +#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */ + else + goto not_found; + of_node_put(prom_stdout); + + DBG("Found serial console at ttyS%d\n", offset); + + if (speed) { + static char __initdata opt[16]; + sprintf(opt, "%d", speed); + return add_preferred_console("ttyS", offset, opt); + } else + return add_preferred_console("ttyS", offset, NULL); + + not_found: + DBG("No preferred console found !\n"); + of_node_put(prom_stdout); + return -ENODEV; +} +console_initcall(check_legacy_serial_console); + diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 9dda16ccde78..1ae96a8ed7e2 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -55,15 +55,13 @@ static unsigned long get_purr(void) { unsigned long sum_purr = 0; int cpu; - struct paca_struct *lpaca; for_each_cpu(cpu) { - lpaca = paca + cpu; - sum_purr += lpaca->lppaca.emulated_time_base; + sum_purr += lppaca[cpu].emulated_time_base; #ifdef PURR_DEBUG printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n", - cpu, lpaca->lppaca.emulated_time_base); + cpu, lppaca[cpu].emulated_time_base); #endif } return sum_purr; @@ -79,12 +77,11 @@ static int lparcfg_data(struct seq_file *m, void *v) unsigned long pool_id, lp_index; int shared, entitled_capacity, max_entitled_capacity; int processors, max_processors; - struct paca_struct *lpaca = get_paca(); unsigned long purr = get_purr(); seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); - shared = (int)(lpaca->lppaca_ptr->shared_proc); + shared = (int)(get_lppaca()->shared_proc); seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n", e2a(xItExtVpdPanel.mfgID[2]), e2a(xItExtVpdPanel.mfgID[3]), @@ -402,7 +399,7 @@ static int lparcfg_data(struct seq_file *m, void *v) (h_resource >> 0 * 8) & 0xffff); /* pool related entries are apropriate for shared configs */ - if (paca[0].lppaca.shared_proc) { + if (lppaca[0].shared_proc) { h_pic(&pool_idle_time, &pool_procs); @@ -451,7 +448,7 @@ static int lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "partition_potential_processors=%d\n", partition_potential_processors); - seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc); + seq_printf(m, "shared_processor_mode=%d\n", lppaca[0].shared_proc); return 0; } diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c index 5a05a797485f..584d1e3c013d 100644 --- a/arch/powerpc/kernel/lparmap.c +++ b/arch/powerpc/kernel/lparmap.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. */ #include <asm/mmu.h> -#include <asm/page.h> +#include <asm/pgtable.h> #include <asm/iseries/lpar_map.h> const struct LparMap __attribute__((__section__(".text"))) xLparMap = { @@ -16,16 +16,16 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = { .xSegmentTableOffs = STAB0_PAGE, .xEsids = { - { .xKernelEsid = GET_ESID(KERNELBASE), - .xKernelVsid = KERNEL_VSID(KERNELBASE), }, - { .xKernelEsid = GET_ESID(VMALLOCBASE), - .xKernelVsid = KERNEL_VSID(VMALLOCBASE), }, + { .xKernelEsid = GET_ESID(PAGE_OFFSET), + .xKernelVsid = KERNEL_VSID(PAGE_OFFSET), }, + { .xKernelEsid = GET_ESID(VMALLOC_START), + .xKernelVsid = KERNEL_VSID(VMALLOC_START), }, }, .xRanges = { { .xPages = HvPagesToMap, .xOffset = 0, - .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT), + .xVPN = KERNEL_VSID(PAGE_OFFSET) << (SID_SHIFT - HW_PAGE_SHIFT), }, }, }; diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c new file mode 100644 index 000000000000..a81ca1b841ec --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec.c @@ -0,0 +1,61 @@ +/* + * Code to handle transition of Linux booting another kernel. + * + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * Copyright (C) 2005 IBM Corporation. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/kexec.h> +#include <linux/reboot.h> +#include <linux/threads.h> +#include <asm/machdep.h> + +void machine_crash_shutdown(struct pt_regs *regs) +{ + if (ppc_md.machine_crash_shutdown) + ppc_md.machine_crash_shutdown(regs); +} + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + if (ppc_md.machine_kexec_prepare) + return ppc_md.machine_kexec_prepare(image); + /* + * Fail if platform doesn't provide its own machine_kexec_prepare + * implementation. + */ + return -ENOSYS; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + if (ppc_md.machine_kexec_cleanup) + ppc_md.machine_kexec_cleanup(image); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +NORET_TYPE void machine_kexec(struct kimage *image) +{ + if (ppc_md.machine_kexec) + ppc_md.machine_kexec(image); + else { + /* + * Fall back to normal restart if platform doesn't provide + * its own kexec function, and user insist to kexec... + */ + machine_restart(NULL); + } + for(;;); +} diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c new file mode 100644 index 000000000000..443606134dff --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -0,0 +1,65 @@ +/* + * PPC32 code to handle Linux booting another kernel. + * + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * Copyright (C) 2005 IBM Corporation. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/kexec.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <asm/cacheflush.h> +#include <asm/hw_irq.h> +#include <asm/io.h> + +typedef NORET_TYPE void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long reboot_code_buffer, + unsigned long start_address) ATTRIB_NORET; + +/* + * This is a generic machine_kexec function suitable at least for + * non-OpenFirmware embedded platforms. + * It merely copies the image relocation code to the control page and + * jumps to it. + * A platform specific function may just call this one. + */ +void default_machine_kexec(struct kimage *image) +{ + const extern unsigned char relocate_new_kernel[]; + const extern unsigned int relocate_new_kernel_size; + unsigned long page_list; + unsigned long reboot_code_buffer, reboot_code_buffer_phys; + relocate_new_kernel_t rnk; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + page_list = image->head; + + /* we need both effective and real address here */ + reboot_code_buffer = + (unsigned long)page_address(image->control_code_page); + reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer); + + /* copy our kernel relocation code to the control code page */ + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + + flush_icache_range(reboot_code_buffer, + reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE); + printk(KERN_INFO "Bye!\n"); + + /* now call it */ + rnk = (relocate_new_kernel_t) reboot_code_buffer; + (*rnk)(page_list, reboot_code_buffer_phys, image->start); +} + +int default_machine_kexec_prepare(struct kimage *image) +{ + return 0; +} diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 97c51e452be7..d6431440c54f 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -1,5 +1,5 @@ /* - * machine_kexec.c - handle transition of Linux booting another kernel + * PPC64 code to handle Linux booting another kernel. * * Copyright (C) 2004-2005, IBM Corp. * @@ -28,21 +28,7 @@ #define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ -/* Have this around till we move it into crash specific file */ -note_buf_t crash_notes[NR_CPUS]; - -/* Dummy for now. Not sure if we need to have a crash shutdown in here - * and if what it will achieve. Letting it be now to compile the code - * in generic kexec environment - */ -void machine_crash_shutdown(struct pt_regs *regs) -{ - /* do nothing right now */ - /* smp_relase_cpus() if we want smp on panic kernel */ - /* cpu_irq_down to isolate us until we are ready */ -} - -int machine_kexec_prepare(struct kimage *image) +int default_machine_kexec_prepare(struct kimage *image) { int i; unsigned long begin, end; /* limits of segment */ @@ -111,11 +97,6 @@ int machine_kexec_prepare(struct kimage *image) return 0; } -void machine_kexec_cleanup(struct kimage *image) -{ - /* we do nothing in prepare that needs to be undone */ -} - #define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) static void copy_segments(unsigned long ind) @@ -172,9 +153,8 @@ void kexec_copy_flush(struct kimage *image) * including ones that were in place on the original copy */ for (i = 0; i < nr_segments; i++) - flush_icache_range(ranges[i].mem + KERNELBASE, - ranges[i].mem + KERNELBASE + - ranges[i].memsz); + flush_icache_range((unsigned long)__va(ranges[i].mem), + (unsigned long)__va(ranges[i].mem + ranges[i].memsz)); } #ifdef CONFIG_SMP @@ -283,13 +263,20 @@ extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, void (*clear_all)(void)) ATTRIB_NORET; /* too late to fail here */ -void machine_kexec(struct kimage *image) +void default_machine_kexec(struct kimage *image) { - /* prepare control code if any */ - /* shutdown other cpus into our wait loop and quiesce interrupts */ - kexec_prepare_cpus(); + /* + * If the kexec boot is the normal one, need to shutdown other cpus + * into our wait loop and quiesce interrupts. + * Otherwise, in the case of crashed mode (crashing_cpu >= 0), + * stopping other CPUs and collecting their pt_regs is done before + * using debugger IPI. + */ + + if (crashing_cpu == -1) + kexec_prepare_cpus(); /* switch to a staticly allocated stack. Based on irq stack code. * XXX: the task struct will likely be invalid once we do the copy! diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 624a983a9676..be982023409e 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -5,6 +5,10 @@ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) * and Paul Mackerras. * + * kexec bits: + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -24,6 +28,8 @@ #include <asm/ppc_asm.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> +#include <asm/processor.h> +#include <asm/kexec.h> .text @@ -62,7 +68,7 @@ _GLOBAL(reloc_offset) mflr r0 bl 1f 1: mflr r3 - LOADADDR(r4,1b) + LOAD_REG_IMMEDIATE(r4,1b) subf r3,r4,r3 mtlr r0 blr @@ -74,7 +80,7 @@ _GLOBAL(add_reloc_offset) mflr r0 bl 1f 1: mflr r5 - LOADADDR(r4,1b) + LOAD_REG_IMMEDIATE(r4,1b) subf r5,r4,r5 add r3,r3,r5 mtlr r0 @@ -1006,3 +1012,110 @@ _GLOBAL(execve) */ _GLOBAL(__main) blr + +#ifdef CONFIG_KEXEC + /* + * Must be relocatable PIC code callable as a C function. + */ + .globl relocate_new_kernel +relocate_new_kernel: + /* r3 = page_list */ + /* r4 = reboot_code_buffer */ + /* r5 = start_address */ + + li r0, 0 + + /* + * Set Machine Status Register to a known status, + * switch the MMU off and jump to 1: in a single step. + */ + + mr r8, r0 + ori r8, r8, MSR_RI|MSR_ME + mtspr SPRN_SRR1, r8 + addi r8, r4, 1f - relocate_new_kernel + mtspr SPRN_SRR0, r8 + sync + rfi + +1: + /* from this point address translation is turned off */ + /* and interrupts are disabled */ + + /* set a new stack at the bottom of our page... */ + /* (not really needed now) */ + addi r1, r4, KEXEC_CONTROL_CODE_SIZE - 8 /* for LR Save+Back Chain */ + stw r0, 0(r1) + + /* Do the copies */ + li r6, 0 /* checksum */ + mr r0, r3 + b 1f + +0: /* top, read another word for the indirection page */ + lwzu r0, 4(r3) + +1: + /* is it a destination page? (r8) */ + rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ + beq 2f + + rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ + b 0b + +2: /* is it an indirection page? (r3) */ + rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ + beq 2f + + rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ + subi r3, r3, 4 + b 0b + +2: /* are we done? */ + rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ + beq 2f + b 3f + +2: /* is it a source page? (r9) */ + rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ + beq 0b + + rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ + + li r7, PAGE_SIZE / 4 + mtctr r7 + subi r9, r9, 4 + subi r8, r8, 4 +9: + lwzu r0, 4(r9) /* do the copy */ + xor r6, r6, r0 + stwu r0, 4(r8) + dcbst 0, r8 + sync + icbi 0, r8 + bdnz 9b + + addi r9, r9, 4 + addi r8, r8, 4 + b 0b + +3: + + /* To be certain of avoiding problems with self-modifying code + * execute a serializing instruction here. + */ + isync + sync + + /* jump to the entry point, usually the setup routine */ + mtlr r5 + blrl + +1: b 1b + +relocate_new_kernel_end: + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long relocate_new_kernel_end - relocate_new_kernel +#endif diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index ae48a002f81a..2778cce058e2 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -39,7 +39,7 @@ _GLOBAL(reloc_offset) mflr r0 bl 1f 1: mflr r3 - LOADADDR(r4,1b) + LOAD_REG_IMMEDIATE(r4,1b) subf r3,r4,r3 mtlr r0 blr @@ -51,7 +51,7 @@ _GLOBAL(add_reloc_offset) mflr r0 bl 1f 1: mflr r5 - LOADADDR(r4,1b) + LOAD_REG_IMMEDIATE(r4,1b) subf r5,r4,r5 add r3,r3,r5 mtlr r0 @@ -498,15 +498,15 @@ _GLOBAL(identify_cpu) */ _GLOBAL(do_cpu_ftr_fixups) /* Get CPU 0 features */ - LOADADDR(r6,cur_cpu_spec) + LOAD_REG_IMMEDIATE(r6,cur_cpu_spec) sub r6,r6,r3 ld r4,0(r6) sub r4,r4,r3 ld r4,CPU_SPEC_FEATURES(r4) /* Get the fixup table */ - LOADADDR(r6,__start___ftr_fixup) + LOAD_REG_IMMEDIATE(r6,__start___ftr_fixup) sub r6,r6,r3 - LOADADDR(r7,__stop___ftr_fixup) + LOAD_REG_IMMEDIATE(r7,__stop___ftr_fixup) sub r7,r7,r3 /* Do the fixup */ 1: cmpld r6,r7 diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index c0fcd29918ce..fd7db8d542db 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -80,80 +80,74 @@ static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) static ssize_t dev_nvram_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - ssize_t len; - char *tmp_buffer; - int size; + ssize_t ret; + char *tmp = NULL; + ssize_t size; - if (ppc_md.nvram_size == NULL) - return -ENODEV; + ret = -ENODEV; + if (!ppc_md.nvram_size) + goto out; + + ret = 0; size = ppc_md.nvram_size(); + if (*ppos >= size || size < 0) + goto out; - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - if (*ppos >= size) - return 0; - if (count > size) - count = size; + count = min_t(size_t, count, size - *ppos); + count = min(count, PAGE_SIZE); - tmp_buffer = (char *) kmalloc(count, GFP_KERNEL); - if (!tmp_buffer) { - printk(KERN_ERR "dev_read_nvram: kmalloc failed\n"); - return -ENOMEM; - } + ret = -ENOMEM; + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + goto out; - len = ppc_md.nvram_read(tmp_buffer, count, ppos); - if ((long)len <= 0) { - kfree(tmp_buffer); - return len; - } + ret = ppc_md.nvram_read(tmp, count, ppos); + if (ret <= 0) + goto out; - if (copy_to_user(buf, tmp_buffer, len)) { - kfree(tmp_buffer); - return -EFAULT; - } + if (copy_to_user(buf, tmp, ret)) + ret = -EFAULT; - kfree(tmp_buffer); - return len; +out: + kfree(tmp); + return ret; } static ssize_t dev_nvram_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { - ssize_t len; - char * tmp_buffer; - int size; + ssize_t ret; + char *tmp = NULL; + ssize_t size; - if (ppc_md.nvram_size == NULL) - return -ENODEV; + ret = -ENODEV; + if (!ppc_md.nvram_size) + goto out; + + ret = 0; size = ppc_md.nvram_size(); + if (*ppos >= size || size < 0) + goto out; - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - if (*ppos >= size) - return 0; - if (count > size) - count = size; + count = min_t(size_t, count, size - *ppos); + count = min(count, PAGE_SIZE); - tmp_buffer = (char *) kmalloc(count, GFP_KERNEL); - if (!tmp_buffer) { - printk(KERN_ERR "dev_nvram_write: kmalloc failed\n"); - return -ENOMEM; - } - - if (copy_from_user(tmp_buffer, buf, count)) { - kfree(tmp_buffer); - return -EFAULT; - } + ret = -ENOMEM; + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + goto out; - len = ppc_md.nvram_write(tmp_buffer, count, ppos); - if ((long)len <= 0) { - kfree(tmp_buffer); - return len; - } + ret = -EFAULT; + if (copy_from_user(tmp, buf, count)) + goto out; + + ret = ppc_md.nvram_write(tmp, count, ppos); + +out: + kfree(tmp); + return ret; - kfree(tmp_buffer); - return len; } static int dev_nvram_ioctl(struct inode *inode, struct file *file, diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c index 7065e40e2f42..22d83d4d1af5 100644 --- a/arch/powerpc/kernel/of_device.c +++ b/arch/powerpc/kernel/of_device.c @@ -132,6 +132,8 @@ static int of_device_resume(struct device * dev) struct bus_type of_platform_bus_type = { .name = "of_platform", .match = of_platform_bus_match, + .probe = of_device_probe, + .remove = of_device_remove, .suspend = of_device_suspend, .resume = of_device_resume, }; @@ -150,8 +152,6 @@ int of_register_driver(struct of_platform_driver *drv) /* initialize common driver fields */ drv->driver.name = drv->name; drv->driver.bus = &of_platform_bus_type; - drv->driver.probe = of_device_probe; - drv->driver.remove = of_device_remove; /* register with core */ count = driver_register(&drv->driver); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index a7b68f911eb1..5d1b708086bd 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -17,6 +17,7 @@ #include <asm/page.h> #include <asm/lppaca.h> #include <asm/iseries/it_lp_queue.h> +#include <asm/iseries/it_lp_reg_save.h> #include <asm/paca.h> @@ -24,10 +25,31 @@ * field correctly */ extern unsigned long __toc_start; +/* + * iSeries structure which the hypervisor knows about - this structure + * should not cross a page boundary. The vpa_init/register_vpa call + * is now known to fail if the lppaca structure crosses a page + * boundary. The lppaca is also used on POWER5 pSeries boxes. The + * lppaca is 640 bytes long, and cannot readily change since the + * hypervisor knows its layout, so a 1kB alignment will suffice to + * ensure that it doesn't cross a page boundary. + */ +struct lppaca lppaca[] = { + [0 ... (NR_CPUS-1)] = { + .desc = 0xd397d781, /* "LpPa" */ + .size = sizeof(struct lppaca), + .dyn_proc_status = 2, + .decr_val = 0x00ff0000, + .fpregs_in_use = 1, + .end_of_quantum = 0xfffffffffffffffful, + .slb_count = 64, + .vmxregs_in_use = 0, + }, +}; + /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the - * hypervisor and Linux. Each also contains an ItLpRegSave area which - * is used by the hypervisor to save registers. + * hypervisor and Linux. * On systems with hardware multi-threading, there are two threads * per processor. The Paca array must contain an entry for each thread. * The VPD Areas will give a max logical processors = 2 * max physical @@ -35,33 +57,18 @@ extern unsigned long __toc_start; * processor (not thread). */ #define PACA_INIT_COMMON(number, start, asrr, asrv) \ + .lppaca_ptr = &lppaca[number], \ .lock_token = 0x8000, \ .paca_index = (number), /* Paca Index */ \ - .default_decr = 0x00ff0000, /* Initial Decr */ \ .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ .stab_real = (asrr), /* Real pointer to segment table */ \ .stab_addr = (asrv), /* Virt pointer to segment table */ \ .cpu_start = (start), /* Processor start */ \ - .hw_cpu_id = 0xffff, \ - .lppaca = { \ - .desc = 0xd397d781, /* "LpPa" */ \ - .size = sizeof(struct lppaca), \ - .dyn_proc_status = 2, \ - .decr_val = 0x00ff0000, \ - .fpregs_in_use = 1, \ - .end_of_quantum = 0xfffffffffffffffful, \ - .slb_count = 64, \ - .vmxregs_in_use = 0, \ - }, \ + .hw_cpu_id = 0xffff, #ifdef CONFIG_PPC_ISERIES #define PACA_INIT_ISERIES(number) \ - .lppaca_ptr = &paca[number].lppaca, \ - .reg_save_ptr = &paca[number].reg_save, \ - .reg_save = { \ - .xDesc = 0xd397d9e2, /* "LpRS" */ \ - .xSize = sizeof(struct ItLpRegSave) \ - } + .reg_save_ptr = &iseries_reg_save[number], #define PACA_INIT(number) \ { \ diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c new file mode 100644 index 000000000000..704c846b2b0f --- /dev/null +++ b/arch/powerpc/kernel/pci_32.c @@ -0,0 +1,1897 @@ +/* + * Common pmac/prep/chrp pci routines. -- Cort + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/capability.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/bootmem.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/sections.h> +#include <asm/pci-bridge.h> +#include <asm/byteorder.h> +#include <asm/irq.h> +#include <asm/uaccess.h> +#include <asm/machdep.h> + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +unsigned long isa_io_base = 0; +unsigned long isa_mem_base = 0; +unsigned long pci_dram_offset = 0; +int pcibios_assign_bus_offset = 1; + +void pcibios_make_OF_bus_map(void); + +static int pci_relocate_bridge_resource(struct pci_bus *bus, int i); +static int probe_resource(struct pci_bus *parent, struct resource *pr, + struct resource *res, struct resource **conflict); +static void update_bridge_base(struct pci_bus *bus, int i); +static void pcibios_fixup_resources(struct pci_dev* dev); +static void fixup_broken_pcnet32(struct pci_dev* dev); +static int reparent_resources(struct resource *parent, struct resource *res); +static void fixup_cpc710_pci64(struct pci_dev* dev); +#ifdef CONFIG_PPC_OF +static u8* pci_to_OF_bus_map; +#endif + +/* By default, we don't re-assign bus numbers. We do this only on + * some pmacs + */ +int pci_assign_all_buses; + +struct pci_controller* hose_head; +struct pci_controller** hose_tail = &hose_head; + +static int pci_bus_count; + +static void +fixup_broken_pcnet32(struct pci_dev* dev) +{ + if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) { + dev->vendor = PCI_VENDOR_ID_AMD; + pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32); + +static void +fixup_cpc710_pci64(struct pci_dev* dev) +{ + /* Hide the PCI64 BARs from the kernel as their content doesn't + * fit well in the resource management + */ + dev->resource[0].start = dev->resource[0].end = 0; + dev->resource[0].flags = 0; + dev->resource[1].start = dev->resource[1].end = 0; + dev->resource[1].flags = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); + +static void +pcibios_fixup_resources(struct pci_dev *dev) +{ + struct pci_controller* hose = (struct pci_controller *)dev->sysdata; + int i; + unsigned long offset; + + if (!hose) { + printk(KERN_ERR "No hose for PCI dev %s!\n", pci_name(dev)); + return; + } + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + struct resource *res = dev->resource + i; + if (!res->flags) + continue; + if (res->end == 0xffffffff) { + DBG("PCI:%s Resource %d [%08lx-%08lx] is unassigned\n", + pci_name(dev), i, res->start, res->end); + res->end -= res->start; + res->start = 0; + res->flags |= IORESOURCE_UNSET; + continue; + } + offset = 0; + if (res->flags & IORESOURCE_MEM) { + offset = hose->pci_mem_offset; + } else if (res->flags & IORESOURCE_IO) { + offset = (unsigned long) hose->io_base_virt + - isa_io_base; + } + if (offset != 0) { + res->start += offset; + res->end += offset; +#ifdef DEBUG + printk("Fixup res %d (%lx) of dev %s: %lx -> %lx\n", + i, res->flags, pci_name(dev), + res->start - offset, res->start); +#endif + } + } + + /* Call machine specific resource fixup */ + if (ppc_md.pcibios_fixup_resources) + ppc_md.pcibios_fixup_resources(dev); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources); + +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + unsigned long offset = 0; + struct pci_controller *hose = dev->sysdata; + + if (hose && res->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - isa_io_base; + else if (hose && res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + region->start = res->start - offset; + region->end = res->end - offset; +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + unsigned long offset = 0; + struct pci_controller *hose = dev->sysdata; + + if (hose && res->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - isa_io_base; + else if (hose && res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + res->start = region->start + offset; + res->end = region->end + offset; +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +void pcibios_align_resource(void *data, struct resource *res, unsigned long size, + unsigned long align) +{ + struct pci_dev *dev = data; + + if (res->flags & IORESOURCE_IO) { + unsigned long start = res->start; + + if (size > 0x100) { + printk(KERN_ERR "PCI: I/O Region %s/%d too large" + " (%ld bytes)\n", pci_name(dev), + dev->resource - res, size); + } + + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Handle resources of PCI devices. If the world were perfect, we could + * just allocate all the resource regions and do nothing more. It isn't. + * On the other hand, we cannot just re-allocate all devices, as it would + * require us to know lots of host bridge internals. So we attempt to + * keep as much of the original configuration as possible, but tweak it + * when it's found to be wrong. + * + * Known BIOS problems we have to work around: + * - I/O or memory regions not configured + * - regions configured, but not enabled in the command register + * - bogus I/O addresses above 64K used + * - expansion ROMs left enabled (this may sound harmless, but given + * the fact the PCI specs explicitly allow address decoders to be + * shared between expansion ROMs and other resource regions, it's + * at least dangerous) + * + * Our solution: + * (1) Allocate resources for all buses behind PCI-to-PCI bridges. + * This gives us fixed barriers on where we can allocate. + * (2) Allocate resources for all enabled devices. If there is + * a collision, just mark the resource as unallocated. Also + * disable expansion ROMs during this step. + * (3) Try to allocate resources for disabled devices. If the + * resources were assigned correctly, everything goes well, + * if they weren't, they won't disturb allocation of other + * resources. + * (4) Assign new addresses to resources which were either + * not configured at all or misconfigured. If explicitly + * requested by the user, configure expansion ROM address + * as well. + */ + +static void __init +pcibios_allocate_bus_resources(struct list_head *bus_list) +{ + struct pci_bus *bus; + int i; + struct resource *res, *pr; + + /* Depth-First Search on bus tree */ + list_for_each_entry(bus, bus_list, node) { + for (i = 0; i < 4; ++i) { + if ((res = bus->resource[i]) == NULL || !res->flags + || res->start > res->end) + continue; + if (bus->parent == NULL) + pr = (res->flags & IORESOURCE_IO)? + &ioport_resource: &iomem_resource; + else { + pr = pci_find_parent_resource(bus->self, res); + if (pr == res) { + /* this happens when the generic PCI + * code (wrongly) decides that this + * bridge is transparent -- paulus + */ + continue; + } + } + + DBG("PCI: bridge rsrc %lx..%lx (%lx), parent %p\n", + res->start, res->end, res->flags, pr); + if (pr) { + if (request_resource(pr, res) == 0) + continue; + /* + * Must be a conflict with an existing entry. + * Move that entry (or entries) under the + * bridge resource and try again. + */ + if (reparent_resources(pr, res) == 0) + continue; + } + printk(KERN_ERR "PCI: Cannot allocate resource region " + "%d of PCI bridge %d\n", i, bus->number); + if (pci_relocate_bridge_resource(bus, i)) + bus->resource[i] = NULL; + } + pcibios_allocate_bus_resources(&bus->children); + } +} + +/* + * Reparent resource children of pr that conflict with res + * under res, and make res replace those children. + */ +static int __init +reparent_resources(struct resource *parent, struct resource *res) +{ + struct resource *p, **pp; + struct resource **firstpp = NULL; + + for (pp = &parent->child; (p = *pp) != NULL; pp = &p->sibling) { + if (p->end < res->start) + continue; + if (res->end < p->start) + break; + if (p->start < res->start || p->end > res->end) + return -1; /* not completely contained */ + if (firstpp == NULL) + firstpp = pp; + } + if (firstpp == NULL) + return -1; /* didn't find any conflicting entries? */ + res->parent = parent; + res->child = *firstpp; + res->sibling = *pp; + *firstpp = res; + *pp = NULL; + for (p = res->child; p != NULL; p = p->sibling) { + p->parent = res; + DBG(KERN_INFO "PCI: reparented %s [%lx..%lx] under %s\n", + p->name, p->start, p->end, res->name); + } + return 0; +} + +/* + * A bridge has been allocated a range which is outside the range + * of its parent bridge, so it needs to be moved. + */ +static int __init +pci_relocate_bridge_resource(struct pci_bus *bus, int i) +{ + struct resource *res, *pr, *conflict; + unsigned long try, size; + int j; + struct pci_bus *parent = bus->parent; + + if (parent == NULL) { + /* shouldn't ever happen */ + printk(KERN_ERR "PCI: can't move host bridge resource\n"); + return -1; + } + res = bus->resource[i]; + if (res == NULL) + return -1; + pr = NULL; + for (j = 0; j < 4; j++) { + struct resource *r = parent->resource[j]; + if (!r) + continue; + if ((res->flags ^ r->flags) & (IORESOURCE_IO | IORESOURCE_MEM)) + continue; + if (!((res->flags ^ r->flags) & IORESOURCE_PREFETCH)) { + pr = r; + break; + } + if (res->flags & IORESOURCE_PREFETCH) + pr = r; + } + if (pr == NULL) + return -1; + size = res->end - res->start; + if (pr->start > pr->end || size > pr->end - pr->start) + return -1; + try = pr->end; + for (;;) { + res->start = try - size; + res->end = try; + if (probe_resource(bus->parent, pr, res, &conflict) == 0) + break; + if (conflict->start <= pr->start + size) + return -1; + try = conflict->start - 1; + } + if (request_resource(pr, res)) { + DBG(KERN_ERR "PCI: huh? couldn't move to %lx..%lx\n", + res->start, res->end); + return -1; /* "can't happen" */ + } + update_bridge_base(bus, i); + printk(KERN_INFO "PCI: bridge %d resource %d moved to %lx..%lx\n", + bus->number, i, res->start, res->end); + return 0; +} + +static int __init +probe_resource(struct pci_bus *parent, struct resource *pr, + struct resource *res, struct resource **conflict) +{ + struct pci_bus *bus; + struct pci_dev *dev; + struct resource *r; + int i; + + for (r = pr->child; r != NULL; r = r->sibling) { + if (r->end >= res->start && res->end >= r->start) { + *conflict = r; + return 1; + } + } + list_for_each_entry(bus, &parent->children, node) { + for (i = 0; i < 4; ++i) { + if ((r = bus->resource[i]) == NULL) + continue; + if (!r->flags || r->start > r->end || r == res) + continue; + if (pci_find_parent_resource(bus->self, r) != pr) + continue; + if (r->end >= res->start && res->end >= r->start) { + *conflict = r; + return 1; + } + } + } + list_for_each_entry(dev, &parent->devices, bus_list) { + for (i = 0; i < 6; ++i) { + r = &dev->resource[i]; + if (!r->flags || (r->flags & IORESOURCE_UNSET)) + continue; + if (pci_find_parent_resource(dev, r) != pr) + continue; + if (r->end >= res->start && res->end >= r->start) { + *conflict = r; + return 1; + } + } + } + return 0; +} + +static void __init +update_bridge_base(struct pci_bus *bus, int i) +{ + struct resource *res = bus->resource[i]; + u8 io_base_lo, io_limit_lo; + u16 mem_base, mem_limit; + u16 cmd; + unsigned long start, end, off; + struct pci_dev *dev = bus->self; + struct pci_controller *hose = dev->sysdata; + + if (!hose) { + printk("update_bridge_base: no hose?\n"); + return; + } + pci_read_config_word(dev, PCI_COMMAND, &cmd); + pci_write_config_word(dev, PCI_COMMAND, + cmd & ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)); + if (res->flags & IORESOURCE_IO) { + off = (unsigned long) hose->io_base_virt - isa_io_base; + start = res->start - off; + end = res->end - off; + io_base_lo = (start >> 8) & PCI_IO_RANGE_MASK; + io_limit_lo = (end >> 8) & PCI_IO_RANGE_MASK; + if (end > 0xffff) { + pci_write_config_word(dev, PCI_IO_BASE_UPPER16, + start >> 16); + pci_write_config_word(dev, PCI_IO_LIMIT_UPPER16, + end >> 16); + io_base_lo |= PCI_IO_RANGE_TYPE_32; + } else + io_base_lo |= PCI_IO_RANGE_TYPE_16; + pci_write_config_byte(dev, PCI_IO_BASE, io_base_lo); + pci_write_config_byte(dev, PCI_IO_LIMIT, io_limit_lo); + + } else if ((res->flags & (IORESOURCE_MEM | IORESOURCE_PREFETCH)) + == IORESOURCE_MEM) { + off = hose->pci_mem_offset; + mem_base = ((res->start - off) >> 16) & PCI_MEMORY_RANGE_MASK; + mem_limit = ((res->end - off) >> 16) & PCI_MEMORY_RANGE_MASK; + pci_write_config_word(dev, PCI_MEMORY_BASE, mem_base); + pci_write_config_word(dev, PCI_MEMORY_LIMIT, mem_limit); + + } else if ((res->flags & (IORESOURCE_MEM | IORESOURCE_PREFETCH)) + == (IORESOURCE_MEM | IORESOURCE_PREFETCH)) { + off = hose->pci_mem_offset; + mem_base = ((res->start - off) >> 16) & PCI_PREF_RANGE_MASK; + mem_limit = ((res->end - off) >> 16) & PCI_PREF_RANGE_MASK; + pci_write_config_word(dev, PCI_PREF_MEMORY_BASE, mem_base); + pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, mem_limit); + + } else { + DBG(KERN_ERR "PCI: ugh, bridge %s res %d has flags=%lx\n", + pci_name(dev), i, res->flags); + } + pci_write_config_word(dev, PCI_COMMAND, cmd); +} + +static inline void alloc_resource(struct pci_dev *dev, int idx) +{ + struct resource *pr, *r = &dev->resource[idx]; + + DBG("PCI:%s: Resource %d: %08lx-%08lx (f=%lx)\n", + pci_name(dev), idx, r->start, r->end, r->flags); + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) { + printk(KERN_ERR "PCI: Cannot allocate resource region %d" + " of device %s\n", idx, pci_name(dev)); + if (pr) + DBG("PCI: parent is %p: %08lx-%08lx (f=%lx)\n", + pr, pr->start, pr->end, pr->flags); + /* We'll assign a new address later */ + r->flags |= IORESOURCE_UNSET; + r->end -= r->start; + r->start = 0; + } +} + +static void __init +pcibios_allocate_resources(int pass) +{ + struct pci_dev *dev = NULL; + int idx, disabled; + u16 command; + struct resource *r; + + for_each_pci_dev(dev) { + pci_read_config_word(dev, PCI_COMMAND, &command); + for (idx = 0; idx < 6; idx++) { + r = &dev->resource[idx]; + if (r->parent) /* Already allocated */ + continue; + if (!r->flags || (r->flags & IORESOURCE_UNSET)) + continue; /* Not assigned at all */ + if (r->flags & IORESOURCE_IO) + disabled = !(command & PCI_COMMAND_IO); + else + disabled = !(command & PCI_COMMAND_MEMORY); + if (pass == disabled) + alloc_resource(dev, idx); + } + if (pass) + continue; + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags & IORESOURCE_ROM_ENABLE) { + /* Turn the ROM off, leave the resource region, but keep it unregistered. */ + u32 reg; + DBG("PCI: Switching off ROM of %s\n", pci_name(dev)); + r->flags &= ~IORESOURCE_ROM_ENABLE; + pci_read_config_dword(dev, dev->rom_base_reg, ®); + pci_write_config_dword(dev, dev->rom_base_reg, + reg & ~PCI_ROM_ADDRESS_ENABLE); + } + } +} + +static void __init +pcibios_assign_resources(void) +{ + struct pci_dev *dev = NULL; + int idx; + struct resource *r; + + for_each_pci_dev(dev) { + int class = dev->class >> 8; + + /* Don't touch classless devices and host bridges */ + if (!class || class == PCI_CLASS_BRIDGE_HOST) + continue; + + for (idx = 0; idx < 6; idx++) { + r = &dev->resource[idx]; + + /* + * We shall assign a new address to this resource, + * either because the BIOS (sic) forgot to do so + * or because we have decided the old address was + * unusable for some reason. + */ + if ((r->flags & IORESOURCE_UNSET) && r->end && + (!ppc_md.pcibios_enable_device_hook || + !ppc_md.pcibios_enable_device_hook(dev, 1))) { + r->flags &= ~IORESOURCE_UNSET; + pci_assign_resource(dev, idx); + } + } + +#if 0 /* don't assign ROMs */ + r = &dev->resource[PCI_ROM_RESOURCE]; + r->end -= r->start; + r->start = 0; + if (r->end) + pci_assign_resource(dev, PCI_ROM_RESOURCE); +#endif + } +} + + +int +pcibios_enable_resources(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for (idx=0; idx<6; idx++) { + /* Only set up the requested stuff */ + if (!(mask & (1<<idx))) + continue; + + r = &dev->resource[idx]; + if (r->flags & IORESOURCE_UNSET) { + printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (dev->resource[PCI_ROM_RESOURCE].start) + cmd |= PCI_COMMAND_MEMORY; + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +static int next_controller_index; + +struct pci_controller * __init +pcibios_alloc_controller(void) +{ + struct pci_controller *hose; + + hose = (struct pci_controller *)alloc_bootmem(sizeof(*hose)); + memset(hose, 0, sizeof(struct pci_controller)); + + *hose_tail = hose; + hose_tail = &hose->next; + + hose->index = next_controller_index++; + + return hose; +} + +#ifdef CONFIG_PPC_OF +/* + * Functions below are used on OpenFirmware machines. + */ +static void +make_one_node_map(struct device_node* node, u8 pci_bus) +{ + int *bus_range; + int len; + + if (pci_bus >= pci_bus_count) + return; + bus_range = (int *) get_property(node, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, " + "assuming it starts at 0\n", node->full_name); + pci_to_OF_bus_map[pci_bus] = 0; + } else + pci_to_OF_bus_map[pci_bus] = bus_range[0]; + + for (node=node->child; node != 0;node = node->sibling) { + struct pci_dev* dev; + unsigned int *class_code, *reg; + + class_code = (unsigned int *) get_property(node, "class-code", NULL); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + reg = (unsigned int *)get_property(node, "reg", NULL); + if (!reg) + continue; + dev = pci_find_slot(pci_bus, ((reg[0] >> 8) & 0xff)); + if (!dev || !dev->subordinate) + continue; + make_one_node_map(node, dev->subordinate->number); + } +} + +void +pcibios_make_OF_bus_map(void) +{ + int i; + struct pci_controller* hose; + u8* of_prop_map; + + pci_to_OF_bus_map = (u8*)kmalloc(pci_bus_count, GFP_KERNEL); + if (!pci_to_OF_bus_map) { + printk(KERN_ERR "Can't allocate OF bus map !\n"); + return; + } + + /* We fill the bus map with invalid values, that helps + * debugging. + */ + for (i=0; i<pci_bus_count; i++) + pci_to_OF_bus_map[i] = 0xff; + + /* For each hose, we begin searching bridges */ + for(hose=hose_head; hose; hose=hose->next) { + struct device_node* node; + node = (struct device_node *)hose->arch_data; + if (!node) + continue; + make_one_node_map(node, hose->first_busno); + } + of_prop_map = get_property(find_path_device("/"), "pci-OF-bus-map", NULL); + if (of_prop_map) + memcpy(of_prop_map, pci_to_OF_bus_map, pci_bus_count); +#ifdef DEBUG + printk("PCI->OF bus map:\n"); + for (i=0; i<pci_bus_count; i++) { + if (pci_to_OF_bus_map[i] == 0xff) + continue; + printk("%d -> %d\n", i, pci_to_OF_bus_map[i]); + } +#endif +} + +typedef int (*pci_OF_scan_iterator)(struct device_node* node, void* data); + +static struct device_node* +scan_OF_pci_childs(struct device_node* node, pci_OF_scan_iterator filter, void* data) +{ + struct device_node* sub_node; + + for (; node != 0;node = node->sibling) { + unsigned int *class_code; + + if (filter(node, data)) + return node; + + /* For PCI<->PCI bridges or CardBus bridges, we go down + * Note: some OFs create a parent node "multifunc-device" as + * a fake root for all functions of a multi-function device, + * we go down them as well. + */ + class_code = (unsigned int *) get_property(node, "class-code", NULL); + if ((!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) && + strcmp(node->name, "multifunc-device")) + continue; + sub_node = scan_OF_pci_childs(node->child, filter, data); + if (sub_node) + return sub_node; + } + return NULL; +} + +static int +scan_OF_pci_childs_iterator(struct device_node* node, void* data) +{ + unsigned int *reg; + u8* fdata = (u8*)data; + + reg = (unsigned int *) get_property(node, "reg", NULL); + if (reg && ((reg[0] >> 8) & 0xff) == fdata[1] + && ((reg[0] >> 16) & 0xff) == fdata[0]) + return 1; + return 0; +} + +static struct device_node* +scan_OF_childs_for_device(struct device_node* node, u8 bus, u8 dev_fn) +{ + u8 filter_data[2] = {bus, dev_fn}; + + return scan_OF_pci_childs(node, scan_OF_pci_childs_iterator, filter_data); +} + +/* + * Scans the OF tree for a device node matching a PCI device + */ +struct device_node * +pci_busdev_to_OF_node(struct pci_bus *bus, int devfn) +{ + struct pci_controller *hose; + struct device_node *node; + int busnr; + + if (!have_of) + return NULL; + + /* Lookup the hose */ + busnr = bus->number; + hose = pci_bus_to_hose(busnr); + if (!hose) + return NULL; + + /* Check it has an OF node associated */ + node = (struct device_node *) hose->arch_data; + if (!node) + return NULL; + + /* Fixup bus number according to what OF think it is. */ +#ifdef CONFIG_PPC_PMAC + /* The G5 need a special case here. Basically, we don't remap all + * busses on it so we don't create the pci-OF-map. However, we do + * remap the AGP bus and so have to deal with it. A future better + * fix has to be done by making the remapping per-host and always + * filling the pci_to_OF map. --BenH + */ + if (_machine == _MACH_Pmac && busnr >= 0xf0) + busnr -= 0xf0; + else +#endif + if (pci_to_OF_bus_map) + busnr = pci_to_OF_bus_map[busnr]; + if (busnr == 0xff) + return NULL; + + /* Now, lookup childs of the hose */ + return scan_OF_childs_for_device(node->child, busnr, devfn); +} +EXPORT_SYMBOL(pci_busdev_to_OF_node); + +struct device_node* +pci_device_to_OF_node(struct pci_dev *dev) +{ + return pci_busdev_to_OF_node(dev->bus, dev->devfn); +} +EXPORT_SYMBOL(pci_device_to_OF_node); + +/* This routine is meant to be used early during boot, when the + * PCI bus numbers have not yet been assigned, and you need to + * issue PCI config cycles to an OF device. + * It could also be used to "fix" RTAS config cycles if you want + * to set pci_assign_all_buses to 1 and still use RTAS for PCI + * config cycles. + */ +struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) +{ + if (!have_of) + return NULL; + while(node) { + struct pci_controller* hose; + for (hose=hose_head;hose;hose=hose->next) + if (hose->arch_data == node) + return hose; + node=node->parent; + } + return NULL; +} + +static int +find_OF_pci_device_filter(struct device_node* node, void* data) +{ + return ((void *)node == data); +} + +/* + * Returns the PCI device matching a given OF node + */ +int +pci_device_from_OF_node(struct device_node* node, u8* bus, u8* devfn) +{ + unsigned int *reg; + struct pci_controller* hose; + struct pci_dev* dev = NULL; + + if (!have_of) + return -ENODEV; + /* Make sure it's really a PCI device */ + hose = pci_find_hose_for_OF_device(node); + if (!hose || !hose->arch_data) + return -ENODEV; + if (!scan_OF_pci_childs(((struct device_node*)hose->arch_data)->child, + find_OF_pci_device_filter, (void *)node)) + return -ENODEV; + reg = (unsigned int *) get_property(node, "reg", NULL); + if (!reg) + return -ENODEV; + *bus = (reg[0] >> 16) & 0xff; + *devfn = ((reg[0] >> 8) & 0xff); + + /* Ok, here we need some tweak. If we have already renumbered + * all busses, we can't rely on the OF bus number any more. + * the pci_to_OF_bus_map is not enough as several PCI busses + * may match the same OF bus number. + */ + if (!pci_to_OF_bus_map) + return 0; + + for_each_pci_dev(dev) + if (pci_to_OF_bus_map[dev->bus->number] == *bus && + dev->devfn == *devfn) { + *bus = dev->bus->number; + pci_dev_put(dev); + return 0; + } + + return -ENODEV; +} +EXPORT_SYMBOL(pci_device_from_OF_node); + +void __init +pci_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, int primary) +{ + static unsigned int static_lc_ranges[256] __initdata; + unsigned int *dt_ranges, *lc_ranges, *ranges, *prev; + unsigned int size; + int rlen = 0, orig_rlen; + int memno = 0; + struct resource *res; + int np, na = prom_n_addr_cells(dev); + np = na + 5; + + /* First we try to merge ranges to fix a problem with some pmacs + * that can have more than 3 ranges, fortunately using contiguous + * addresses -- BenH + */ + dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + if (!dt_ranges) + return; + /* Sanity check, though hopefully that never happens */ + if (rlen > sizeof(static_lc_ranges)) { + printk(KERN_WARNING "OF ranges property too large !\n"); + rlen = sizeof(static_lc_ranges); + } + lc_ranges = static_lc_ranges; + memcpy(lc_ranges, dt_ranges, rlen); + orig_rlen = rlen; + + /* Let's work on a copy of the "ranges" property instead of damaging + * the device-tree image in memory + */ + ranges = lc_ranges; + prev = NULL; + while ((rlen -= np * sizeof(unsigned int)) >= 0) { + if (prev) { + if (prev[0] == ranges[0] && prev[1] == ranges[1] && + (prev[2] + prev[na+4]) == ranges[2] && + (prev[na+2] + prev[na+4]) == ranges[na+2]) { + prev[na+4] += ranges[na+4]; + ranges[0] = 0; + ranges += np; + continue; + } + } + prev = ranges; + ranges += np; + } + + /* + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 2: a PCI address + * cells 3 or 3+4: a CPU physical address + * (size depending on dev->n_addr_cells) + * cells 4+5 or 5+6: the size of the range + */ + ranges = lc_ranges; + rlen = orig_rlen; + while (ranges && (rlen -= np * sizeof(unsigned int)) >= 0) { + res = NULL; + size = ranges[na+4]; + switch ((ranges[0] >> 24) & 0x3) { + case 1: /* I/O space */ + if (ranges[2] != 0) + break; + hose->io_base_phys = ranges[na+2]; + /* limit I/O space to 16MB */ + if (size > 0x01000000) + size = 0x01000000; + hose->io_base_virt = ioremap(ranges[na+2], size); + if (primary) + isa_io_base = (unsigned long) hose->io_base_virt; + res = &hose->io_resource; + res->flags = IORESOURCE_IO; + res->start = ranges[2]; + DBG("PCI: IO 0x%lx -> 0x%lx\n", + res->start, res->start + size - 1); + break; + case 2: /* memory space */ + memno = 0; + if (ranges[1] == 0 && ranges[2] == 0 + && ranges[na+4] <= (16 << 20)) { + /* 1st 16MB, i.e. ISA memory area */ + if (primary) + isa_mem_base = ranges[na+2]; + memno = 1; + } + while (memno < 3 && hose->mem_resources[memno].flags) + ++memno; + if (memno == 0) + hose->pci_mem_offset = ranges[na+2] - ranges[2]; + if (memno < 3) { + res = &hose->mem_resources[memno]; + res->flags = IORESOURCE_MEM; + if(ranges[0] & 0x40000000) + res->flags |= IORESOURCE_PREFETCH; + res->start = ranges[na+2]; + DBG("PCI: MEM[%d] 0x%lx -> 0x%lx\n", memno, + res->start, res->start + size - 1); + } + break; + } + if (res != NULL) { + res->name = dev->full_name; + res->end = res->start + size - 1; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + ranges += np; + } +} + +/* We create the "pci-OF-bus-map" property now so it appears in the + * /proc device tree + */ +void __init +pci_create_OF_bus_map(void) +{ + struct property* of_prop; + + of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256); + if (of_prop && find_path_device("/")) { + memset(of_prop, -1, sizeof(struct property) + 256); + of_prop->name = "pci-OF-bus-map"; + of_prop->length = 256; + of_prop->value = (unsigned char *)&of_prop[1]; + prom_add_property(find_path_device("/"), of_prop); + } +} + +static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev; + struct device_node *np; + + pdev = to_pci_dev (dev); + np = pci_device_to_OF_node(pdev); + if (np == NULL || np->full_name == NULL) + return 0; + return sprintf(buf, "%s", np->full_name); +} +static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); + +#else /* CONFIG_PPC_OF */ +void pcibios_make_OF_bus_map(void) +{ +} +#endif /* CONFIG_PPC_OF */ + +/* Add sysfs properties */ +void pcibios_add_platform_entries(struct pci_dev *pdev) +{ +#ifdef CONFIG_PPC_OF + device_create_file(&pdev->dev, &dev_attr_devspec); +#endif /* CONFIG_PPC_OF */ +} + + +#ifdef CONFIG_PPC_PMAC +/* + * This set of routines checks for PCI<->PCI bridges that have closed + * IO resources and have child devices. It tries to re-open an IO + * window on them. + * + * This is a _temporary_ fix to workaround a problem with Apple's OF + * closing IO windows on P2P bridges when the OF drivers of cards + * below this bridge don't claim any IO range (typically ATI or + * Adaptec). + * + * A more complete fix would be to use drivers/pci/setup-bus.c, which + * involves a working pcibios_fixup_pbus_ranges(), some more care about + * ordering when creating the host bus resources, and maybe a few more + * minor tweaks + */ + +/* Initialize bridges with base/limit values we have collected */ +static void __init +do_update_p2p_io_resource(struct pci_bus *bus, int enable_vga) +{ + struct pci_dev *bridge = bus->self; + struct pci_controller* hose = (struct pci_controller *)bridge->sysdata; + u32 l; + u16 w; + struct resource res; + + if (bus->resource[0] == NULL) + return; + res = *(bus->resource[0]); + + DBG("Remapping Bus %d, bridge: %s\n", bus->number, pci_name(bridge)); + res.start -= ((unsigned long) hose->io_base_virt - isa_io_base); + res.end -= ((unsigned long) hose->io_base_virt - isa_io_base); + DBG(" IO window: %08lx-%08lx\n", res.start, res.end); + + /* Set up the top and bottom of the PCI I/O segment for this bus. */ + pci_read_config_dword(bridge, PCI_IO_BASE, &l); + l &= 0xffff000f; + l |= (res.start >> 8) & 0x00f0; + l |= res.end & 0xf000; + pci_write_config_dword(bridge, PCI_IO_BASE, l); + + if ((l & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) { + l = (res.start >> 16) | (res.end & 0xffff0000); + pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, l); + } + + pci_read_config_word(bridge, PCI_COMMAND, &w); + w |= PCI_COMMAND_IO; + pci_write_config_word(bridge, PCI_COMMAND, w); + +#if 0 /* Enabling this causes XFree 4.2.0 to hang during PCI probe */ + if (enable_vga) { + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, &w); + w |= PCI_BRIDGE_CTL_VGA; + pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, w); + } +#endif +} + +/* This function is pretty basic and actually quite broken for the + * general case, it's enough for us right now though. It's supposed + * to tell us if we need to open an IO range at all or not and what + * size. + */ +static int __init +check_for_io_childs(struct pci_bus *bus, struct resource* res, int *found_vga) +{ + struct pci_dev *dev; + int i; + int rc = 0; + +#define push_end(res, size) do { unsigned long __sz = (size) ; \ + res->end = ((res->end + __sz) / (__sz + 1)) * (__sz + 1) + __sz; \ + } while (0) + + list_for_each_entry(dev, &bus->devices, bus_list) { + u16 class = dev->class >> 8; + + if (class == PCI_CLASS_DISPLAY_VGA || + class == PCI_CLASS_NOT_DEFINED_VGA) + *found_vga = 1; + if (class >> 8 == PCI_BASE_CLASS_BRIDGE && dev->subordinate) + rc |= check_for_io_childs(dev->subordinate, res, found_vga); + if (class == PCI_CLASS_BRIDGE_CARDBUS) + push_end(res, 0xfff); + + for (i=0; i<PCI_NUM_RESOURCES; i++) { + struct resource *r; + unsigned long r_size; + + if (dev->class >> 8 == PCI_CLASS_BRIDGE_PCI + && i >= PCI_BRIDGE_RESOURCES) + continue; + r = &dev->resource[i]; + r_size = r->end - r->start; + if (r_size < 0xfff) + r_size = 0xfff; + if (r->flags & IORESOURCE_IO && (r_size) != 0) { + rc = 1; + push_end(res, r_size); + } + } + } + + return rc; +} + +/* Here we scan all P2P bridges of a given level that have a closed + * IO window. Note that the test for the presence of a VGA card should + * be improved to take into account already configured P2P bridges, + * currently, we don't see them and might end up configuring 2 bridges + * with VGA pass through enabled + */ +static void __init +do_fixup_p2p_level(struct pci_bus *bus) +{ + struct pci_bus *b; + int i, parent_io; + int has_vga = 0; + + for (parent_io=0; parent_io<4; parent_io++) + if (bus->resource[parent_io] + && bus->resource[parent_io]->flags & IORESOURCE_IO) + break; + if (parent_io >= 4) + return; + + list_for_each_entry(b, &bus->children, node) { + struct pci_dev *d = b->self; + struct pci_controller* hose = (struct pci_controller *)d->sysdata; + struct resource *res = b->resource[0]; + struct resource tmp_res; + unsigned long max; + int found_vga = 0; + + memset(&tmp_res, 0, sizeof(tmp_res)); + tmp_res.start = bus->resource[parent_io]->start; + + /* We don't let low addresses go through that closed P2P bridge, well, + * that may not be necessary but I feel safer that way + */ + if (tmp_res.start == 0) + tmp_res.start = 0x1000; + + if (!list_empty(&b->devices) && res && res->flags == 0 && + res != bus->resource[parent_io] && + (d->class >> 8) == PCI_CLASS_BRIDGE_PCI && + check_for_io_childs(b, &tmp_res, &found_vga)) { + u8 io_base_lo; + + printk(KERN_INFO "Fixing up IO bus %s\n", b->name); + + if (found_vga) { + if (has_vga) { + printk(KERN_WARNING "Skipping VGA, already active" + " on bus segment\n"); + found_vga = 0; + } else + has_vga = 1; + } + pci_read_config_byte(d, PCI_IO_BASE, &io_base_lo); + + if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) + max = ((unsigned long) hose->io_base_virt + - isa_io_base) + 0xffffffff; + else + max = ((unsigned long) hose->io_base_virt + - isa_io_base) + 0xffff; + + *res = tmp_res; + res->flags = IORESOURCE_IO; + res->name = b->name; + + /* Find a resource in the parent where we can allocate */ + for (i = 0 ; i < 4; i++) { + struct resource *r = bus->resource[i]; + if (!r) + continue; + if ((r->flags & IORESOURCE_IO) == 0) + continue; + DBG("Trying to allocate from %08lx, size %08lx from parent" + " res %d: %08lx -> %08lx\n", + res->start, res->end, i, r->start, r->end); + + if (allocate_resource(r, res, res->end + 1, res->start, max, + res->end + 1, NULL, NULL) < 0) { + DBG("Failed !\n"); + continue; + } + do_update_p2p_io_resource(b, found_vga); + break; + } + } + do_fixup_p2p_level(b); + } +} + +static void +pcibios_fixup_p2p_bridges(void) +{ + struct pci_bus *b; + + list_for_each_entry(b, &pci_root_buses, node) + do_fixup_p2p_level(b); +} + +#endif /* CONFIG_PPC_PMAC */ + +static int __init +pcibios_init(void) +{ + struct pci_controller *hose; + struct pci_bus *bus; + int next_busno; + + printk(KERN_INFO "PCI: Probing PCI hardware\n"); + + /* Scan all of the recorded PCI controllers. */ + for (next_busno = 0, hose = hose_head; hose; hose = hose->next) { + if (pci_assign_all_buses) + hose->first_busno = next_busno; + hose->last_busno = 0xff; + bus = pci_scan_bus(hose->first_busno, hose->ops, hose); + hose->last_busno = bus->subordinate; + if (pci_assign_all_buses || next_busno <= hose->last_busno) + next_busno = hose->last_busno + pcibios_assign_bus_offset; + } + pci_bus_count = next_busno; + + /* OpenFirmware based machines need a map of OF bus + * numbers vs. kernel bus numbers since we may have to + * remap them. + */ + if (pci_assign_all_buses && have_of) + pcibios_make_OF_bus_map(); + + /* Do machine dependent PCI interrupt routing */ + if (ppc_md.pci_swizzle && ppc_md.pci_map_irq) + pci_fixup_irqs(ppc_md.pci_swizzle, ppc_md.pci_map_irq); + + /* Call machine dependent fixup */ + if (ppc_md.pcibios_fixup) + ppc_md.pcibios_fixup(); + + /* Allocate and assign resources */ + pcibios_allocate_bus_resources(&pci_root_buses); + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); +#ifdef CONFIG_PPC_PMAC + pcibios_fixup_p2p_bridges(); +#endif /* CONFIG_PPC_PMAC */ + pcibios_assign_resources(); + + /* Call machine dependent post-init code */ + if (ppc_md.pcibios_after_init) + ppc_md.pcibios_after_init(); + + return 0; +} + +subsys_initcall(pcibios_init); + +unsigned char __init +common_swizzle(struct pci_dev *dev, unsigned char *pinp) +{ + struct pci_controller *hose = dev->sysdata; + + if (dev->bus->number != hose->first_busno) { + u8 pin = *pinp; + do { + pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)); + /* Move up the chain of bridges. */ + dev = dev->bus->self; + } while (dev->bus->self); + *pinp = pin; + + /* The slot is the idsel of the last bridge. */ + } + return PCI_SLOT(dev->devfn); +} + +unsigned long resource_fixup(struct pci_dev * dev, struct resource * res, + unsigned long start, unsigned long size) +{ + return start; +} + +void __init pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_controller *hose = (struct pci_controller *) bus->sysdata; + unsigned long io_offset; + struct resource *res; + int i; + + io_offset = (unsigned long)hose->io_base_virt - isa_io_base; + if (bus->parent == NULL) { + /* This is a host bridge - fill in its resources */ + hose->bus = bus; + + bus->resource[0] = res = &hose->io_resource; + if (!res->flags) { + if (io_offset) + printk(KERN_ERR "I/O resource not set for host" + " bridge %d\n", hose->index); + res->start = 0; + res->end = IO_SPACE_LIMIT; + res->flags = IORESOURCE_IO; + } + res->start += io_offset; + res->end += io_offset; + + for (i = 0; i < 3; ++i) { + res = &hose->mem_resources[i]; + if (!res->flags) { + if (i > 0) + continue; + printk(KERN_ERR "Memory resource not set for " + "host bridge %d\n", hose->index); + res->start = hose->pci_mem_offset; + res->end = ~0U; + res->flags = IORESOURCE_MEM; + } + bus->resource[i+1] = res; + } + } else { + /* This is a subordinate bridge */ + pci_read_bridge_bases(bus); + + for (i = 0; i < 4; ++i) { + if ((res = bus->resource[i]) == NULL) + continue; + if (!res->flags) + continue; + if (io_offset && (res->flags & IORESOURCE_IO)) { + res->start += io_offset; + res->end += io_offset; + } else if (hose->pci_mem_offset + && (res->flags & IORESOURCE_MEM)) { + res->start += hose->pci_mem_offset; + res->end += hose->pci_mem_offset; + } + } + } + + if (ppc_md.pcibios_fixup_bus) + ppc_md.pcibios_fixup_bus(bus); +} + +char __init *pcibios_setup(char *str) +{ + return str; +} + +/* the next one is stolen from the alpha port... */ +void __init +pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); + /* XXX FIXME - update OF device tree node interrupt property */ +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + if (ppc_md.pcibios_enable_device_hook) + if (ppc_md.pcibios_enable_device_hook(dev, 0)) + return -EINVAL; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for (idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + if (r->flags & IORESOURCE_UNSET) { + printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", + pci_name(dev), old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +struct pci_controller* +pci_bus_to_hose(int bus) +{ + struct pci_controller* hose = hose_head; + + for (; hose; hose = hose->next) + if (bus >= hose->first_busno && bus <= hose->last_busno) + return hose; + return NULL; +} + +void __iomem * +pci_bus_io_base(unsigned int bus) +{ + struct pci_controller *hose; + + hose = pci_bus_to_hose(bus); + if (!hose) + return NULL; + return hose->io_base_virt; +} + +unsigned long +pci_bus_io_base_phys(unsigned int bus) +{ + struct pci_controller *hose; + + hose = pci_bus_to_hose(bus); + if (!hose) + return 0; + return hose->io_base_phys; +} + +unsigned long +pci_bus_mem_base_phys(unsigned int bus) +{ + struct pci_controller *hose; + + hose = pci_bus_to_hose(bus); + if (!hose) + return 0; + return hose->pci_mem_offset; +} + +unsigned long +pci_resource_to_bus(struct pci_dev *pdev, struct resource *res) +{ + /* Hack alert again ! See comments in chrp_pci.c + */ + struct pci_controller* hose = + (struct pci_controller *)pdev->sysdata; + if (hose && res->flags & IORESOURCE_MEM) + return res->start - hose->pci_mem_offset; + /* We may want to do something with IOs here... */ + return res->start; +} + + +static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, + unsigned long *offset, + enum pci_mmap_state mmap_state) +{ + struct pci_controller *hose = pci_bus_to_hose(dev->bus->number); + unsigned long io_offset = 0; + int i, res_bit; + + if (hose == 0) + return NULL; /* should never happen */ + + /* If memory, add on the PCI bridge address offset */ + if (mmap_state == pci_mmap_mem) { + *offset += hose->pci_mem_offset; + res_bit = IORESOURCE_MEM; + } else { + io_offset = hose->io_base_virt - ___IO_BASE; + *offset += io_offset; + res_bit = IORESOURCE_IO; + } + + /* + * Check that the offset requested corresponds to one of the + * resources of the device. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &dev->resource[i]; + int flags = rp->flags; + + /* treat ROM as memory (should be already) */ + if (i == PCI_ROM_RESOURCE) + flags |= IORESOURCE_MEM; + + /* Active and same type? */ + if ((flags & res_bit) == 0) + continue; + + /* In the range of this resource? */ + if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) + continue; + + /* found it! construct the final physical address */ + if (mmap_state == pci_mmap_io) + *offset += hose->io_base_phys - io_offset; + return rp; + } + + return NULL; +} + +/* + * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci + * device mapping. + */ +static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, + pgprot_t protection, + enum pci_mmap_state mmap_state, + int write_combine) +{ + unsigned long prot = pgprot_val(protection); + + /* Write combine is always 0 on non-memory space mappings. On + * memory space, if the user didn't pass 1, we check for a + * "prefetchable" resource. This is a bit hackish, but we use + * this to workaround the inability of /sysfs to provide a write + * combine bit + */ + if (mmap_state != pci_mmap_mem) + write_combine = 0; + else if (write_combine == 0) { + if (rp->flags & IORESOURCE_PREFETCH) + write_combine = 1; + } + + /* XXX would be nice to have a way to ask for write-through */ + prot |= _PAGE_NO_CACHE; + if (write_combine) + prot &= ~_PAGE_GUARDED; + else + prot |= _PAGE_GUARDED; + + printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start, + prot); + + return __pgprot(prot); +} + +/* + * This one is used by /dev/mem and fbdev who have no clue about the + * PCI device, it tries to find the PCI device first and calls the + * above routine + */ +pgprot_t pci_phys_mem_access_prot(struct file *file, + unsigned long pfn, + unsigned long size, + pgprot_t protection) +{ + struct pci_dev *pdev = NULL; + struct resource *found = NULL; + unsigned long prot = pgprot_val(protection); + unsigned long offset = pfn << PAGE_SHIFT; + int i; + + if (page_is_ram(pfn)) + return prot; + + prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; + + for_each_pci_dev(pdev) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &pdev->resource[i]; + int flags = rp->flags; + + /* Active and same type? */ + if ((flags & IORESOURCE_MEM) == 0) + continue; + /* In the range of this resource? */ + if (offset < (rp->start & PAGE_MASK) || + offset > rp->end) + continue; + found = rp; + break; + } + if (found) + break; + } + if (found) { + if (found->flags & IORESOURCE_PREFETCH) + prot &= ~_PAGE_GUARDED; + pci_dev_put(pdev); + } + + DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); + + return __pgprot(prot); +} + + +/* + * Perform the actual remap of the pages for a PCI device mapping, as + * appropriate for this architecture. The region in the process to map + * is described by vm_start and vm_end members of VMA, the base physical + * address is found in vm_pgoff. + * The pci device structure is provided so that architectures may make mapping + * decisions on a per-device or per-bus basis. + * + * Returns a negative error code on failure, zero on success. + */ +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, + int write_combine) +{ + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + struct resource *rp; + int ret; + + rp = __pci_mmap_make_offset(dev, &offset, mmap_state); + if (rp == NULL) + return -EINVAL; + + vma->vm_pgoff = offset >> PAGE_SHIFT; + vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO; + vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, + vma->vm_page_prot, + mmap_state, write_combine); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + + return ret; +} + +/* Obsolete functions. Should be removed once the symbios driver + * is fixed + */ +unsigned long +phys_to_bus(unsigned long pa) +{ + struct pci_controller *hose; + int i; + + for (hose = hose_head; hose; hose = hose->next) { + for (i = 0; i < 3; ++i) { + if (pa >= hose->mem_resources[i].start + && pa <= hose->mem_resources[i].end) { + /* + * XXX the hose->pci_mem_offset really + * only applies to mem_resources[0]. + * We need a way to store an offset for + * the others. -- paulus + */ + if (i == 0) + pa -= hose->pci_mem_offset; + return pa; + } + } + } + /* hmmm, didn't find it */ + return 0; +} + +unsigned long +pci_phys_to_bus(unsigned long pa, int busnr) +{ + struct pci_controller* hose = pci_bus_to_hose(busnr); + if (!hose) + return pa; + return pa - hose->pci_mem_offset; +} + +unsigned long +pci_bus_to_phys(unsigned int ba, int busnr) +{ + struct pci_controller* hose = pci_bus_to_hose(busnr); + if (!hose) + return ba; + return ba + hose->pci_mem_offset; +} + +/* Provide information on locations of various I/O regions in physical + * memory. Do this on a per-card basis so that we choose the right + * root bridge. + * Note that the returned IO or memory base is a physical address + */ + +long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) +{ + struct pci_controller* hose; + long result = -EOPNOTSUPP; + + /* Argh ! Please forgive me for that hack, but that's the + * simplest way to get existing XFree to not lockup on some + * G5 machines... So when something asks for bus 0 io base + * (bus 0 is HT root), we return the AGP one instead. + */ +#ifdef CONFIG_PPC_PMAC + if (_machine == _MACH_Pmac && machine_is_compatible("MacRISC4")) + if (bus == 0) + bus = 0xf0; +#endif /* CONFIG_PPC_PMAC */ + + hose = pci_bus_to_hose(bus); + if (!hose) + return -ENODEV; + + switch (which) { + case IOBASE_BRIDGE_NUMBER: + return (long)hose->first_busno; + case IOBASE_MEMORY: + return (long)hose->pci_mem_offset; + case IOBASE_IO: + return (long)hose->io_base_phys; + case IOBASE_ISA_IO: + return (long)isa_io_base; + case IOBASE_ISA_MEM: + return (long)isa_mem_base; + } + + return result; +} + +void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, + u64 *start, u64 *end) +{ + struct pci_controller *hose = pci_bus_to_hose(dev->bus->number); + unsigned long offset = 0; + + if (hose == NULL) + return; + + if (rsrc->flags & IORESOURCE_IO) + offset = ___IO_BASE - hose->io_base_virt + hose->io_base_phys; + + *start = rsrc->start + offset; + *end = rsrc->end + offset; +} + +void __init +pci_init_resource(struct resource *res, unsigned long start, unsigned long end, + int flags, char *name) +{ + res->start = start; + res->end = end; + res->flags = flags; + res->name = name; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; +} + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) +{ + unsigned long start = pci_resource_start(dev, bar); + unsigned long len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len) + return NULL; + if (max && len > max) + len = max; + if (flags & IORESOURCE_IO) + return ioport_map(start, len); + if (flags & IORESOURCE_MEM) + /* Not checking IORESOURCE_CACHEABLE because PPC does + * not currently distinguish between ioremap and + * ioremap_nocache. + */ + return ioremap(start, len); + /* What? */ + return NULL; +} + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(pci_iomap); +EXPORT_SYMBOL(pci_iounmap); + +unsigned long pci_address_to_pio(phys_addr_t address) +{ + struct pci_controller* hose = hose_head; + + for (; hose; hose = hose->next) { + unsigned int size = hose->io_resource.end - + hose->io_resource.start + 1; + if (address >= hose->io_base_phys && + address < (hose->io_base_phys + size)) { + unsigned long base = + (unsigned long)hose->io_base_virt - _IO_BASE; + return base + (address - hose->io_base_phys); + } + } + return (unsigned int)-1; +} +EXPORT_SYMBOL(pci_address_to_pio); + +/* + * Null PCI config access functions, for the case when we can't + * find a hose. + */ +#define NULL_PCI_OP(rw, size, type) \ +static int \ +null_##rw##_config_##size(struct pci_dev *dev, int offset, type val) \ +{ \ + return PCIBIOS_DEVICE_NOT_FOUND; \ +} + +static int +null_read_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 *val) +{ + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static int +null_write_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 val) +{ + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static struct pci_ops null_pci_ops = +{ + null_read_config, + null_write_config +}; + +/* + * These functions are used early on before PCI scanning is done + * and all of the pci_dev and pci_bus structures have been created. + */ +static struct pci_bus * +fake_pci_bus(struct pci_controller *hose, int busnr) +{ + static struct pci_bus bus; + + if (hose == 0) { + hose = pci_bus_to_hose(busnr); + if (hose == 0) + printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr); + } + bus.number = busnr; + bus.sysdata = hose; + bus.ops = hose? hose->ops: &null_pci_ops; + return &bus; +} + +#define EARLY_PCI_OP(rw, size, type) \ +int early_##rw##_config_##size(struct pci_controller *hose, int bus, \ + int devfn, int offset, type value) \ +{ \ + return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus), \ + devfn, offset, value); \ +} + +EARLY_PCI_OP(read, byte, u8 *) +EARLY_PCI_OP(read, word, u16 *) +EARLY_PCI_OP(read, dword, u32 *) +EARLY_PCI_OP(write, byte, u8) +EARLY_PCI_OP(write, word, u16) +EARLY_PCI_OP(write, dword, u32) diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 8b6008ab217d..c367520bc1c3 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -34,7 +34,7 @@ #ifdef DEBUG #include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) +#define DBG(fmt...) printk(fmt) #else #define DBG(fmt...) #endif @@ -53,6 +53,7 @@ EXPORT_SYMBOL(io_page_mask); #ifdef CONFIG_PPC_MULTIPLATFORM static void fixup_resource(struct resource *res, struct pci_dev *dev); static void do_bus_setup(struct pci_bus *bus); +static void phbs_remap_io(void); #endif /* pci_io_base -- the base address from which io bars are offsets. @@ -251,7 +252,8 @@ void pcibios_free_controller(struct pci_controller *phb) kfree(phb); } -static void __init pcibios_claim_one_bus(struct pci_bus *b) +#ifndef CONFIG_PPC_ISERIES +void __devinit pcibios_claim_one_bus(struct pci_bus *b) { struct pci_dev *dev; struct pci_bus *child_bus; @@ -271,8 +273,10 @@ static void __init pcibios_claim_one_bus(struct pci_bus *b) list_for_each_entry(child_bus, &b->children, node) pcibios_claim_one_bus(child_bus); } +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL_GPL(pcibios_claim_one_bus); +#endif -#ifndef CONFIG_PPC_ISERIES static void __init pcibios_claim_of_setup(void) { struct pci_bus *b; @@ -323,6 +327,7 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) addrs = (u32 *) get_property(node, "assigned-addresses", &proplen); if (!addrs) return; + DBG(" parse addresses (%d bytes) @ %p\n", proplen, addrs); for (; proplen >= 20; proplen -= 20, addrs += 5) { flags = pci_parse_of_flags(addrs[0]); if (!flags) @@ -332,6 +337,9 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) if (!size) continue; i = addrs[0] & 0xff; + DBG(" base: %llx, size: %llx, i: %x\n", + (unsigned long long)base, (unsigned long long)size, i); + if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; } else if (i == dev->rom_base_reg) { @@ -362,6 +370,8 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, if (type == NULL) type = ""; + DBG(" create device, devfn: %x, type: %s\n", devfn, type); + memset(dev, 0, sizeof(struct pci_dev)); dev->bus = bus; dev->sysdata = node; @@ -375,12 +385,14 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); dev->subsystem_device = get_int_prop(node, "subsystem-id", 0); - dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/ + dev->cfg_size = pci_cfg_space_size(dev); sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus), dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); dev->class = get_int_prop(node, "class-code", 0); + DBG(" class: 0x%x\n", dev->class); + dev->current_state = 4; /* unknown power state */ if (!strcmp(type, "pci")) { @@ -402,6 +414,8 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pci_parse_of_addrs(node, dev); + DBG(" adding to system ...\n"); + pci_device_add(dev, bus); /* XXX pci_scan_msi_device(dev); */ @@ -418,15 +432,21 @@ void __devinit of_scan_bus(struct device_node *node, int reglen, devfn; struct pci_dev *dev; + DBG("of_scan_bus(%s) bus no %d... \n", node->full_name, bus->number); + while ((child = of_get_next_child(node, child)) != NULL) { + DBG(" * %s\n", child->full_name); reg = (u32 *) get_property(child, "reg", ®len); if (reg == NULL || reglen < 20) continue; devfn = (reg[0] >> 8) & 0xff; + /* create a new pci_dev for this device */ dev = of_create_pci_dev(child, bus, devfn); if (!dev) continue; + DBG("dev header type: %x\n", dev->hdr_type); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) of_scan_pci_bridge(child, dev); @@ -446,16 +466,18 @@ void __devinit of_scan_pci_bridge(struct device_node *node, unsigned int flags; u64 size; + DBG("of_scan_pci_bridge(%s)\n", node->full_name); + /* parse bus-range property */ busrange = (u32 *) get_property(node, "bus-range", &len); if (busrange == NULL || len != 8) { - printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n", + printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", node->full_name); return; } ranges = (u32 *) get_property(node, "ranges", &len); if (ranges == NULL) { - printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n", + printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n", node->full_name); return; } @@ -509,10 +531,13 @@ void __devinit of_scan_pci_bridge(struct device_node *node, } sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), bus->number); + DBG(" bus name: %s\n", bus->name); mode = PCI_PROBE_NORMAL; if (ppc_md.pci_probe_mode) mode = ppc_md.pci_probe_mode(bus); + DBG(" probe mode: %d\n", mode); + if (mode == PCI_PROBE_DEVTREE) of_scan_bus(node, bus); else if (mode == PCI_PROBE_NORMAL) @@ -528,6 +553,8 @@ void __devinit scan_phb(struct pci_controller *hose) int i, mode; struct resource *res; + DBG("Scanning PHB %s\n", node ? node->full_name : "<NO NAME>"); + bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node); if (bus == NULL) { printk(KERN_ERR "Failed to create bus for PCI domain %04x\n", @@ -552,8 +579,9 @@ void __devinit scan_phb(struct pci_controller *hose) mode = PCI_PROBE_NORMAL; #ifdef CONFIG_PPC_MULTIPLATFORM - if (ppc_md.pci_probe_mode) + if (node && ppc_md.pci_probe_mode) mode = ppc_md.pci_probe_mode(bus); + DBG(" probe mode: %d\n", mode); if (mode == PCI_PROBE_DEVTREE) { bus->subordinate = hose->last_busno; of_scan_bus(node, bus); @@ -842,8 +870,7 @@ pgprot_t pci_phys_mem_access_prot(struct file *file, * Returns a negative error code on failure, zero on success. */ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, - int write_combine) + enum pci_mmap_state mmap_state, int write_combine) { unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; struct resource *rp; @@ -896,6 +923,25 @@ static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, unsigned long phb_io_base_phys, void __iomem * phb_io_base_virt) { + /* Remove these asap */ + + struct pci_address { + u32 a_hi; + u32 a_mid; + u32 a_lo; + }; + + struct isa_address { + u32 a_hi; + u32 a_lo; + }; + + struct isa_range { + struct isa_address isa_addr; + struct pci_address pci_addr; + unsigned int size; + }; + struct isa_range *range; unsigned long pci_addr; unsigned int isa_addr; @@ -1173,7 +1219,7 @@ int remap_bus_range(struct pci_bus *bus) } EXPORT_SYMBOL(remap_bus_range); -void phbs_remap_io(void) +static void phbs_remap_io(void) { struct pci_controller *hose, *tmp; @@ -1223,6 +1269,7 @@ void __devinit pcibios_fixup_device_resources(struct pci_dev *dev, } EXPORT_SYMBOL(pcibios_fixup_device_resources); + static void __devinit do_bus_setup(struct pci_bus *bus) { struct pci_dev *dev; @@ -1306,8 +1353,38 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, *end = rsrc->end + offset; } +struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) +{ + if (!have_of) + return NULL; + while(node) { + struct pci_controller *hose, *tmp; + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + if (hose->arch_data == node) + return hose; + node = node->parent; + } + return NULL; +} + #endif /* CONFIG_PPC_MULTIPLATFORM */ +unsigned long pci_address_to_pio(phys_addr_t address) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + if (address >= hose->io_base_phys && + address < (hose->io_base_phys + hose->pci_io_size)) { + unsigned long base = + (unsigned long)hose->io_base_virt - pci_io_base; + return base + (address - hose->io_base_phys); + } + } + return (unsigned int)-1; +} +EXPORT_SYMBOL_GPL(pci_address_to_pio); + #define IOBASE_BRIDGE_NUMBER 0 #define IOBASE_MEMORY 1 diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 2d333cc84082..e6fb194fe537 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -43,8 +43,13 @@ static void dummy_perf(struct pt_regs *regs) mtspr(SPRN_MMCR0, mmcr0); } #else +/* Ensure exceptions are disabled */ static void dummy_perf(struct pt_regs *regs) { + unsigned int mmcr0 = mfspr(SPRN_MMCR0); + + mmcr0 &= ~(MMCR0_PMXE); + mtspr(SPRN_MMCR0, mmcr0); } #endif diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 94db25708456..d9a459c144d8 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -76,11 +76,6 @@ EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); #endif -#if defined(CONFIG_PPC_PREP) -EXPORT_SYMBOL(_prep_type); -EXPORT_SYMBOL(ucSystemType); -#endif - EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); @@ -235,8 +230,7 @@ EXPORT_SYMBOL(__down_interruptible); EXPORT_SYMBOL(cpm_install_handler); EXPORT_SYMBOL(cpm_free_handler); #endif /* CONFIG_8xx */ -#if defined(CONFIG_8xx) || defined(CONFIG_40x) || defined(CONFIG_85xx) ||\ - defined(CONFIG_83xx) +#if defined(CONFIG_8xx) || defined(CONFIG_40x) EXPORT_SYMBOL(__res); #endif @@ -249,7 +243,6 @@ EXPORT_SYMBOL(set_context); extern long mol_trampoline; EXPORT_SYMBOL(mol_trampoline); /* For MOL */ EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ -EXPORT_SYMBOL_GPL(__handle_mm_fault); /* For MOL */ #ifdef CONFIG_SMP extern int mmu_hash_lock; EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 105d5609ff57..57703994a063 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -201,13 +201,13 @@ int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs) } #endif /* CONFIG_SPE */ +#ifndef CONFIG_SMP /* * If we are doing lazy switching of CPU state (FP, altivec or SPE), * and the current task has some state, discard it. */ -static inline void discard_lazy_cpu_state(void) +void discard_lazy_cpu_state(void) { -#ifndef CONFIG_SMP preempt_disable(); if (last_task_used_math == current) last_task_used_math = NULL; @@ -220,9 +220,10 @@ static inline void discard_lazy_cpu_state(void) last_task_used_spe = NULL; #endif preempt_enable(); -#endif /* CONFIG_SMP */ } +#endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_MERGE /* XXX for now */ int set_dabr(unsigned long dabr) { if (ppc_md.set_dabr) @@ -231,6 +232,7 @@ int set_dabr(unsigned long dabr) mtspr(SPRN_DABR, dabr); return 0; } +#endif #ifdef CONFIG_PPC64 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); @@ -424,7 +426,7 @@ void show_regs(struct pt_regs * regs) if (trap == 0x300 || trap == 0x600) printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); printk("TASK = %p[%d] '%s' THREAD: %p", - current, current->pid, current->comm, current->thread_info); + current, current->pid, current->comm, task_thread_info(current)); #ifdef CONFIG_SMP printk(" CPU: %d", smp_processor_id()); @@ -503,7 +505,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, { struct pt_regs *childregs, *kregs; extern void ret_from_fork(void); - unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE; + unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; CHECK_FULL_REGS(regs); /* Copy registers */ @@ -516,7 +518,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_PPC32 childregs->gpr[2] = (unsigned long) p; #else - clear_ti_thread_flag(p->thread_info, TIF_32BIT); + clear_tsk_thread_flag(p, TIF_32BIT); #endif p->thread.regs = NULL; /* no user register state */ } else { @@ -588,10 +590,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) * set. Do it now. */ if (!current->thread.regs) { - unsigned long childregs = (unsigned long)current->thread_info + - THREAD_SIZE; - childregs -= sizeof(struct pt_regs); - current->thread.regs = (struct pt_regs *)childregs; + struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; + current->thread.regs = regs - 1; } memset(regs->gpr, 0, sizeof(regs->gpr)); @@ -767,7 +767,7 @@ out: static int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes) { - unsigned long stack_page = (unsigned long)p->thread_info; + unsigned long stack_page = (unsigned long)task_stack_page(p); if (sp >= stack_page + sizeof(struct thread_struct) && sp <= stack_page + THREAD_SIZE - nbytes) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3bf968e74095..d50c8df0183e 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -29,6 +29,7 @@ #include <linux/initrd.h> #include <linux/bitops.h> #include <linux/module.h> +#include <linux/kexec.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -37,6 +38,7 @@ #include <asm/processor.h> #include <asm/irq.h> #include <asm/io.h> +#include <asm/kdump.h> #include <asm/smp.h> #include <asm/system.h> #include <asm/mmu.h> @@ -55,21 +57,6 @@ #define DBG(fmt...) #endif -struct pci_reg_property { - struct pci_address addr; - u32 size_hi; - u32 size_lo; -}; - -struct isa_reg_property { - u32 space; - u32 address; - u32 size; -}; - - -typedef int interpret_func(struct device_node *, unsigned long *, - int, int, int); static int __initdata dt_root_addr_cells; static int __initdata dt_root_size_cells; @@ -311,6 +298,16 @@ static int __devinit finish_node_interrupts(struct device_node *np, int i, j, n, sense; unsigned int *irq, virq; struct device_node *ic; + int trace = 0; + + //#define TRACE(fmt...) do { if (trace) { printk(fmt); mdelay(1000); } } while(0) +#define TRACE(fmt...) + + if (!strcmp(np->name, "smu-doorbell")) + trace = 1; + + TRACE("Finishing SMU doorbell ! num_interrupt_controllers = %d\n", + num_interrupt_controllers); if (num_interrupt_controllers == 0) { /* @@ -345,11 +342,12 @@ static int __devinit finish_node_interrupts(struct device_node *np, } ints = (unsigned int *) get_property(np, "interrupts", &intlen); + TRACE("ints=%p, intlen=%d\n", ints, intlen); if (ints == NULL) return 0; intrcells = prom_n_intr_cells(np); intlen /= intrcells * sizeof(unsigned int); - + TRACE("intrcells=%d, new intlen=%d\n", intrcells, intlen); np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start); if (!np->intrs) return -ENOMEM; @@ -360,6 +358,7 @@ static int __devinit finish_node_interrupts(struct device_node *np, intrcount = 0; for (i = 0; i < intlen; ++i, ints += intrcells) { n = map_interrupt(&irq, &ic, np, ints, intrcells); + TRACE("map, irq=%d, ic=%p, n=%d\n", irq, ic, n); if (n <= 0) continue; @@ -370,6 +369,7 @@ static int __devinit finish_node_interrupts(struct device_node *np, np->intrs[intrcount].sense = map_isa_senses[sense]; } else { virq = virt_irq_create_mapping(irq[0]); + TRACE("virq=%d\n", virq); #ifdef CONFIG_PPC64 if (virq == NO_IRQ) { printk(KERN_CRIT "Could not allocate interrupt" @@ -379,6 +379,12 @@ static int __devinit finish_node_interrupts(struct device_node *np, #endif np->intrs[intrcount].line = irq_offset_up(virq); sense = (n > 1)? (irq[1] & 3): 1; + + /* Apple uses bits in there in a different way, let's + * only keep the real sense bit on macs + */ + if (_machine == PLATFORM_POWERMAC) + sense &= 0x1; np->intrs[intrcount].sense = map_mpic_senses[sense]; } @@ -388,12 +394,13 @@ static int __devinit finish_node_interrupts(struct device_node *np, char *name = get_property(ic->parent, "name", NULL); if (name && !strcmp(name, "u3")) np->intrs[intrcount].line += 128; - else if (!(name && !strcmp(name, "mac-io"))) + else if (!(name && (!strcmp(name, "mac-io") || + !strcmp(name, "u4")))) /* ignore other cascaded controllers, such as the k2-sata-root */ break; } -#endif +#endif /* CONFIG_PPC64 */ if (n > 2) { printk("hmmm, got %d intr cells for %s:", n, np->full_name); @@ -408,234 +415,19 @@ static int __devinit finish_node_interrupts(struct device_node *np, return 0; } -static int __devinit interpret_pci_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct address_range *adr; - struct pci_reg_property *pci_addrs; - int i, l, n_addrs; - - pci_addrs = (struct pci_reg_property *) - get_property(np, "assigned-addresses", &l); - if (!pci_addrs) - return 0; - - n_addrs = l / sizeof(*pci_addrs); - - adr = prom_alloc(n_addrs * sizeof(*adr), mem_start); - if (!adr) - return -ENOMEM; - - if (measure_only) - return 0; - - np->addrs = adr; - np->n_addrs = n_addrs; - - for (i = 0; i < n_addrs; i++) { - adr[i].space = pci_addrs[i].addr.a_hi; - adr[i].address = pci_addrs[i].addr.a_lo | - ((u64)pci_addrs[i].addr.a_mid << 32); - adr[i].size = pci_addrs[i].size_lo; - } - - return 0; -} - -static int __init interpret_dbdma_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct reg_property32 *rp; - struct address_range *adr; - unsigned long base_address; - int i, l; - struct device_node *db; - - base_address = 0; - if (!measure_only) { - for (db = np->parent; db != NULL; db = db->parent) { - if (!strcmp(db->type, "dbdma") && db->n_addrs != 0) { - base_address = db->addrs[0].address; - break; - } - } - } - - rp = (struct reg_property32 *) get_property(np, "reg", &l); - if (rp != 0 && l >= sizeof(struct reg_property32)) { - i = 0; - adr = (struct address_range *) (*mem_start); - while ((l -= sizeof(struct reg_property32)) >= 0) { - if (!measure_only) { - adr[i].space = 2; - adr[i].address = rp[i].address + base_address; - adr[i].size = rp[i].size; - } - ++i; - } - np->addrs = adr; - np->n_addrs = i; - (*mem_start) += i * sizeof(struct address_range); - } - - return 0; -} - -static int __init interpret_macio_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct reg_property32 *rp; - struct address_range *adr; - unsigned long base_address; - int i, l; - struct device_node *db; - - base_address = 0; - if (!measure_only) { - for (db = np->parent; db != NULL; db = db->parent) { - if (!strcmp(db->type, "mac-io") && db->n_addrs != 0) { - base_address = db->addrs[0].address; - break; - } - } - } - - rp = (struct reg_property32 *) get_property(np, "reg", &l); - if (rp != 0 && l >= sizeof(struct reg_property32)) { - i = 0; - adr = (struct address_range *) (*mem_start); - while ((l -= sizeof(struct reg_property32)) >= 0) { - if (!measure_only) { - adr[i].space = 2; - adr[i].address = rp[i].address + base_address; - adr[i].size = rp[i].size; - } - ++i; - } - np->addrs = adr; - np->n_addrs = i; - (*mem_start) += i * sizeof(struct address_range); - } - - return 0; -} - -static int __init interpret_isa_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct isa_reg_property *rp; - struct address_range *adr; - int i, l; - - rp = (struct isa_reg_property *) get_property(np, "reg", &l); - if (rp != 0 && l >= sizeof(struct isa_reg_property)) { - i = 0; - adr = (struct address_range *) (*mem_start); - while ((l -= sizeof(struct isa_reg_property)) >= 0) { - if (!measure_only) { - adr[i].space = rp[i].space; - adr[i].address = rp[i].address; - adr[i].size = rp[i].size; - } - ++i; - } - np->addrs = adr; - np->n_addrs = i; - (*mem_start) += i * sizeof(struct address_range); - } - - return 0; -} - -static int __init interpret_root_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct address_range *adr; - int i, l; - unsigned int *rp; - int rpsize = (naddrc + nsizec) * sizeof(unsigned int); - - rp = (unsigned int *) get_property(np, "reg", &l); - if (rp != 0 && l >= rpsize) { - i = 0; - adr = (struct address_range *) (*mem_start); - while ((l -= rpsize) >= 0) { - if (!measure_only) { - adr[i].space = 0; - adr[i].address = rp[naddrc - 1]; - adr[i].size = rp[naddrc + nsizec - 1]; - } - ++i; - rp += naddrc + nsizec; - } - np->addrs = adr; - np->n_addrs = i; - (*mem_start) += i * sizeof(struct address_range); - } - - return 0; -} - static int __devinit finish_node(struct device_node *np, unsigned long *mem_start, - interpret_func *ifunc, - int naddrc, int nsizec, int measure_only) { struct device_node *child; - int *ip, rc = 0; - - /* get the device addresses and interrupts */ - if (ifunc != NULL) - rc = ifunc(np, mem_start, naddrc, nsizec, measure_only); - if (rc) - goto out; + int rc = 0; rc = finish_node_interrupts(np, mem_start, measure_only); if (rc) goto out; - /* Look for #address-cells and #size-cells properties. */ - ip = (int *) get_property(np, "#address-cells", NULL); - if (ip != NULL) - naddrc = *ip; - ip = (int *) get_property(np, "#size-cells", NULL); - if (ip != NULL) - nsizec = *ip; - - if (!strcmp(np->name, "device-tree") || np->parent == NULL) - ifunc = interpret_root_props; - else if (np->type == 0) - ifunc = NULL; - else if (!strcmp(np->type, "pci") || !strcmp(np->type, "vci")) - ifunc = interpret_pci_props; - else if (!strcmp(np->type, "dbdma")) - ifunc = interpret_dbdma_props; - else if (!strcmp(np->type, "mac-io") || ifunc == interpret_macio_props) - ifunc = interpret_macio_props; - else if (!strcmp(np->type, "isa")) - ifunc = interpret_isa_props; - else if (!strcmp(np->name, "uni-n") || !strcmp(np->name, "u3")) - ifunc = interpret_root_props; - else if (!((ifunc == interpret_dbdma_props - || ifunc == interpret_macio_props) - && (!strcmp(np->type, "escc") - || !strcmp(np->type, "media-bay")))) - ifunc = NULL; - for (child = np->child; child != NULL; child = child->sibling) { - rc = finish_node(child, mem_start, ifunc, - naddrc, nsizec, measure_only); + rc = finish_node(child, mem_start, measure_only); if (rc) goto out; } @@ -697,10 +489,10 @@ void __init finish_device_tree(void) * reason and then remove those additional 16 bytes */ size = 16; - finish_node(allnodes, &size, NULL, 0, 0, 1); + finish_node(allnodes, &size, 1); size -= 16; end = start = (unsigned long) __va(lmb_alloc(size, 128)); - finish_node(allnodes, &end, NULL, 0, 0, 0); + finish_node(allnodes, &end, 0); BUG_ON(end != start + size); DBG(" <- finish_device_tree\n"); @@ -1180,7 +972,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node, #endif #ifdef CONFIG_PPC_RTAS - /* To help early debugging via the front panel, we retreive a minimal + /* To help early debugging via the front panel, we retrieve a minimal * set of RTAS infos now if available */ { @@ -1197,6 +989,16 @@ static int __init early_init_dt_scan_chosen(unsigned long node, } #endif /* CONFIG_PPC_RTAS */ +#ifdef CONFIG_KEXEC + lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); + if (lprop) + crashk_res.start = *lprop; + + lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL); + if (lprop) + crashk_res.end = crashk_res.start + *lprop - 1; +#endif + /* break now */ return 1; } @@ -1263,7 +1065,9 @@ static int __init early_init_dt_scan_memory(unsigned long node, } else if (strcmp(type, "memory") != 0) return 0; - reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); + reg = (cell_t *)of_get_flat_dt_prop(node, "linux,usable-memory", &l); + if (reg == NULL) + reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); if (reg == NULL) return 0; @@ -1296,17 +1100,37 @@ static int __init early_init_dt_scan_memory(unsigned long node, static void __init early_reserve_mem(void) { - unsigned long base, size; - unsigned long *reserve_map; + u64 base, size; + u64 *reserve_map; - reserve_map = (unsigned long *)(((unsigned long)initial_boot_params) + + reserve_map = (u64 *)(((unsigned long)initial_boot_params) + initial_boot_params->off_mem_rsvmap); +#ifdef CONFIG_PPC32 + /* + * Handle the case where we might be booting from an old kexec + * image that setup the mem_rsvmap as pairs of 32-bit values + */ + if (*reserve_map > 0xffffffffull) { + u32 base_32, size_32; + u32 *reserve_map_32 = (u32 *)reserve_map; + + while (1) { + base_32 = *(reserve_map_32++); + size_32 = *(reserve_map_32++); + if (size_32 == 0) + break; + DBG("reserving: %lx -> %lx\n", base_32, size_32); + lmb_reserve(base_32, size_32); + } + return; + } +#endif while (1) { base = *(reserve_map++); size = *(reserve_map++); if (size == 0) break; - DBG("reserving: %lx -> %lx\n", base, size); + DBG("reserving: %llx -> %llx\n", base, size); lmb_reserve(base, size); } @@ -1335,11 +1159,14 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_memory, NULL); lmb_enforce_memory_limit(memory_limit); lmb_analyze(); - lmb_reserve(0, __pa(klimit)); DBG("Phys. mem: %lx\n", lmb_phys_mem_size()); /* Reserve LMB regions used by kernel, initrd, dt, etc... */ + lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); +#ifdef CONFIG_CRASH_DUMP + lmb_reserve(0, KDUMP_RESERVE_LIMIT); +#endif early_reserve_mem(); DBG("Scanning CPUs ...\n"); @@ -1800,9 +1627,13 @@ static void of_node_release(struct kref *kref) kfree(prop->value); kfree(prop); prop = next; + + if (!prop) { + prop = node->deadprops; + node->deadprops = NULL; + } } kfree(node->intrs); - kfree(node->addrs); kfree(node->full_name); kfree(node->data); kfree(node); @@ -1884,9 +1715,7 @@ void of_detach_node(const struct device_node *np) * This should probably be split up into smaller chunks. */ -static int of_finish_dynamic_node(struct device_node *node, - unsigned long *unused1, int unused2, - int unused3, int unused4) +static int of_finish_dynamic_node(struct device_node *node) { struct device_node *parent = of_get_parent(node); int err = 0; @@ -1907,7 +1736,8 @@ static int of_finish_dynamic_node(struct device_node *node, return -ENODEV; /* fix up new node's linux_phandle field */ - if ((ibm_phandle = (unsigned int *)get_property(node, "ibm,phandle", NULL))) + if ((ibm_phandle = (unsigned int *)get_property(node, + "ibm,phandle", NULL))) node->linux_phandle = *ibm_phandle; out: @@ -1922,7 +1752,9 @@ static int prom_reconfig_notifier(struct notifier_block *nb, switch (action) { case PSERIES_RECONFIG_ADD: - err = finish_node(node, NULL, of_finish_dynamic_node, 0, 0, 0); + err = of_finish_dynamic_node(node); + if (!err) + finish_node(node, NULL, 0); if (err < 0) { printk(KERN_ERR "finish_node returned %d\n", err); err = NOTIFY_BAD; @@ -1947,22 +1779,32 @@ static int __init prom_reconfig_setup(void) __initcall(prom_reconfig_setup); #endif -/* - * Find a property with a given name for a given node - * and return the value. - */ -unsigned char *get_property(struct device_node *np, const char *name, - int *lenp) +struct property *of_find_property(struct device_node *np, const char *name, + int *lenp) { struct property *pp; + read_lock(&devtree_lock); for (pp = np->properties; pp != 0; pp = pp->next) if (strcmp(pp->name, name) == 0) { if (lenp != 0) *lenp = pp->length; - return pp->value; + break; } - return NULL; + read_unlock(&devtree_lock); + + return pp; +} + +/* + * Find a property with a given name for a given node + * and return the value. + */ +unsigned char *get_property(struct device_node *np, const char *name, + int *lenp) +{ + struct property *pp = of_find_property(np,name,lenp); + return pp ? pp->value : NULL; } EXPORT_SYMBOL(get_property); @@ -1996,175 +1838,82 @@ int prom_add_property(struct device_node* np, struct property* prop) return 0; } -/* I quickly hacked that one, check against spec ! */ -static inline unsigned long -bus_space_to_resource_flags(unsigned int bus_space) +/* + * Remove a property from a node. Note that we don't actually + * remove it, since we have given out who-knows-how-many pointers + * to the data using get-property. Instead we just move the property + * to the "dead properties" list, so it won't be found any more. + */ +int prom_remove_property(struct device_node *np, struct property *prop) { - u8 space = (bus_space >> 24) & 0xf; - if (space == 0) - space = 0x02; - if (space == 0x02) - return IORESOURCE_MEM; - else if (space == 0x01) - return IORESOURCE_IO; - else { - printk(KERN_WARNING "prom.c: bus_space_to_resource_flags(), space: %x\n", - bus_space); - return 0; - } -} + struct property **next; + int found = 0; -#ifdef CONFIG_PCI -static struct resource *find_parent_pci_resource(struct pci_dev* pdev, - struct address_range *range) -{ - unsigned long mask; - int i; - - /* Check this one */ - mask = bus_space_to_resource_flags(range->space); - for (i=0; i<DEVICE_COUNT_RESOURCE; i++) { - if ((pdev->resource[i].flags & mask) == mask && - pdev->resource[i].start <= range->address && - pdev->resource[i].end > range->address) { - if ((range->address + range->size - 1) > pdev->resource[i].end) { - /* Add better message */ - printk(KERN_WARNING "PCI/OF resource overlap !\n"); - return NULL; - } - break; - } + write_lock(&devtree_lock); + next = &np->properties; + while (*next) { + if (*next == prop) { + /* found the node */ + *next = prop->next; + prop->next = np->deadprops; + np->deadprops = prop; + found = 1; + break; + } + next = &(*next)->next; } - if (i == DEVICE_COUNT_RESOURCE) - return NULL; - return &pdev->resource[i]; + write_unlock(&devtree_lock); + + if (!found) + return -ENODEV; + +#ifdef CONFIG_PROC_DEVICETREE + /* try to remove the proc node as well */ + if (np->pde) + proc_device_tree_remove_prop(np->pde, prop); +#endif /* CONFIG_PROC_DEVICETREE */ + + return 0; } /* - * Request an OF device resource. Currently handles child of PCI devices, - * or other nodes attached to the root node. Ultimately, put some - * link to resources in the OF node. + * Update a property in a node. Note that we don't actually + * remove it, since we have given out who-knows-how-many pointers + * to the data using get-property. Instead we just move the property + * to the "dead properties" list, and add the new property to the + * property list */ -struct resource *request_OF_resource(struct device_node* node, int index, - const char* name_postfix) +int prom_update_property(struct device_node *np, + struct property *newprop, + struct property *oldprop) { - struct pci_dev* pcidev; - u8 pci_bus, pci_devfn; - unsigned long iomask; - struct device_node* nd; - struct resource* parent; - struct resource *res = NULL; - int nlen, plen; - - if (index >= node->n_addrs) - goto fail; - - /* Sanity check on bus space */ - iomask = bus_space_to_resource_flags(node->addrs[index].space); - if (iomask & IORESOURCE_MEM) - parent = &iomem_resource; - else if (iomask & IORESOURCE_IO) - parent = &ioport_resource; - else - goto fail; - - /* Find a PCI parent if any */ - nd = node; - pcidev = NULL; - while (nd) { - if (!pci_device_from_OF_node(nd, &pci_bus, &pci_devfn)) - pcidev = pci_find_slot(pci_bus, pci_devfn); - if (pcidev) break; - nd = nd->parent; - } - if (pcidev) - parent = find_parent_pci_resource(pcidev, &node->addrs[index]); - if (!parent) { - printk(KERN_WARNING "request_OF_resource(%s), parent not found\n", - node->name); - goto fail; - } - - res = __request_region(parent, node->addrs[index].address, - node->addrs[index].size, NULL); - if (!res) - goto fail; - nlen = strlen(node->name); - plen = name_postfix ? strlen(name_postfix) : 0; - res->name = (const char *)kmalloc(nlen+plen+1, GFP_KERNEL); - if (res->name) { - strcpy((char *)res->name, node->name); - if (plen) - strcpy((char *)res->name+nlen, name_postfix); - } - return res; -fail: - return NULL; -} -EXPORT_SYMBOL(request_OF_resource); + struct property **next; + int found = 0; -int release_OF_resource(struct device_node *node, int index) -{ - struct pci_dev* pcidev; - u8 pci_bus, pci_devfn; - unsigned long iomask, start, end; - struct device_node* nd; - struct resource* parent; - struct resource *res = NULL; - - if (index >= node->n_addrs) - return -EINVAL; - - /* Sanity check on bus space */ - iomask = bus_space_to_resource_flags(node->addrs[index].space); - if (iomask & IORESOURCE_MEM) - parent = &iomem_resource; - else if (iomask & IORESOURCE_IO) - parent = &ioport_resource; - else - return -EINVAL; - - /* Find a PCI parent if any */ - nd = node; - pcidev = NULL; - while(nd) { - if (!pci_device_from_OF_node(nd, &pci_bus, &pci_devfn)) - pcidev = pci_find_slot(pci_bus, pci_devfn); - if (pcidev) break; - nd = nd->parent; - } - if (pcidev) - parent = find_parent_pci_resource(pcidev, &node->addrs[index]); - if (!parent) { - printk(KERN_WARNING "release_OF_resource(%s), parent not found\n", - node->name); - return -ENODEV; + write_lock(&devtree_lock); + next = &np->properties; + while (*next) { + if (*next == oldprop) { + /* found the node */ + newprop->next = oldprop->next; + *next = newprop; + oldprop->next = np->deadprops; + np->deadprops = oldprop; + found = 1; + break; + } + next = &(*next)->next; } + write_unlock(&devtree_lock); - /* Find us in the parent and its childs */ - res = parent->child; - start = node->addrs[index].address; - end = start + node->addrs[index].size - 1; - while (res) { - if (res->start == start && res->end == end && - (res->flags & IORESOURCE_BUSY)) - break; - if (res->start <= start && res->end >= end) - res = res->child; - else - res = res->sibling; - } - if (!res) + if (!found) return -ENODEV; - if (res->name) { - kfree(res->name); - res->name = NULL; - } - release_resource(res); - kfree(res); +#ifdef CONFIG_PROC_DEVICETREE + /* try to add to proc as well if it was initialized */ + if (np->pde) + proc_device_tree_update_prop(np->pde, newprop, oldprop); +#endif /* CONFIG_PROC_DEVICETREE */ return 0; } -EXPORT_SYMBOL(release_OF_resource); -#endif /* CONFIG_PCI */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index bcdc209dca85..7881ec96ef11 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -137,8 +137,8 @@ struct prom_t { }; struct mem_map_entry { - unsigned long base; - unsigned long size; + u64 base; + u64 size; }; typedef u32 cell_t; @@ -192,6 +192,11 @@ static unsigned long __initdata alloc_bottom; static unsigned long __initdata rmo_top; static unsigned long __initdata ram_top; +#ifdef CONFIG_KEXEC +static unsigned long __initdata prom_crashk_base; +static unsigned long __initdata prom_crashk_size; +#endif + static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE]; static int __initdata mem_reserve_cnt; @@ -553,7 +558,8 @@ unsigned long prom_memparse(const char *ptr, const char **retptr) static void __init early_cmdline_parse(void) { struct prom_t *_prom = &RELOC(prom); - char *opt, *p; + const char *opt; + char *p; int l = 0; RELOC(prom_cmd_line[0]) = 0; @@ -590,6 +596,35 @@ static void __init early_cmdline_parse(void) RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000); #endif } + +#ifdef CONFIG_KEXEC + /* + * crashkernel=size@addr specifies the location to reserve for + * crash kernel. + */ + opt = strstr(RELOC(prom_cmd_line), RELOC("crashkernel=")); + if (opt) { + opt += 12; + RELOC(prom_crashk_size) = + prom_memparse(opt, (const char **)&opt); + + if (ALIGN(RELOC(prom_crashk_size), 0x1000000) != + RELOC(prom_crashk_size)) { + prom_printf("Warning: crashkernel size is not " + "aligned to 16MB\n"); + } + + /* + * At present, the crash kernel always run at 32MB. + * Just ignore whatever user passed. + */ + RELOC(prom_crashk_base) = 0x2000000; + if (*opt == '@') { + prom_printf("Warning: PPC64 kdump kernel always runs " + "at 32 MB\n"); + } + } +#endif } #ifdef CONFIG_PPC_PSERIES @@ -863,9 +898,9 @@ static unsigned long __init prom_next_cell(int s, cell_t **cellp) * If problems seem to show up, it would be a good start to track * them down. */ -static void reserve_mem(unsigned long base, unsigned long size) +static void reserve_mem(u64 base, u64 size) { - unsigned long top = base + size; + u64 top = base + size; unsigned long cnt = RELOC(mem_reserve_cnt); if (size == 0) @@ -1011,6 +1046,12 @@ static void __init prom_init_mem(void) prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); prom_printf(" rmo_top : %x\n", RELOC(rmo_top)); prom_printf(" ram_top : %x\n", RELOC(ram_top)); +#ifdef CONFIG_KEXEC + if (RELOC(prom_crashk_base)) { + prom_printf(" crashk_base : %x\n", RELOC(prom_crashk_base)); + prom_printf(" crashk_size : %x\n", RELOC(prom_crashk_size)); + } +#endif } @@ -1500,6 +1541,8 @@ static int __init prom_find_machine_type(void) #ifdef CONFIG_PPC64 if (strstr(p, RELOC("Momentum,Maple"))) return PLATFORM_MAPLE; + if (strstr(p, RELOC("IBM,CPB"))) + return PLATFORM_CELL; #endif i += sl + 1; } @@ -1994,7 +2037,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) if (r3 && r4 && r4 != 0xdeadbeef) { unsigned long val; - RELOC(prom_initrd_start) = (r3 >= KERNELBASE) ? __pa(r3) : r3; + RELOC(prom_initrd_start) = is_kernel_addr(r3) ? __pa(r3) : r3; RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; val = RELOC(prom_initrd_start); @@ -2094,6 +2137,10 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_init_mem(); +#ifdef CONFIG_KEXEC + if (RELOC(prom_crashk_base)) + reserve_mem(RELOC(prom_crashk_base), RELOC(prom_crashk_size)); +#endif /* * Determine which cpu is actually running right _now_ */ @@ -2150,6 +2197,16 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, } #endif +#ifdef CONFIG_KEXEC + if (RELOC(prom_crashk_base)) { + prom_setprop(_prom->chosen, "/chosen", "linux,crashkernel-base", + PTRRELOC(&prom_crashk_base), + sizeof(RELOC(prom_crashk_base))); + prom_setprop(_prom->chosen, "/chosen", "linux,crashkernel-size", + PTRRELOC(&prom_crashk_size), + sizeof(RELOC(prom_crashk_size))); + } +#endif /* * Fixup any known bugs in the device-tree */ diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c new file mode 100644 index 000000000000..a8099c806150 --- /dev/null +++ b/arch/powerpc/kernel/prom_parse.c @@ -0,0 +1,548 @@ +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/pci_regs.h> +#include <linux/module.h> +#include <linux/ioport.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> + +#ifdef DEBUG +#define DBG(fmt...) do { printk(fmt); } while(0) +#else +#define DBG(fmt...) do { } while(0) +#endif + +#ifdef CONFIG_PPC64 +#define PRu64 "%lx" +#else +#define PRu64 "%llx" +#endif + +/* Max address size we deal with */ +#define OF_MAX_ADDR_CELLS 4 +#define OF_CHECK_COUNTS(na, ns) ((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \ + (ns) > 0) + +/* Debug utility */ +#ifdef DEBUG +static void of_dump_addr(const char *s, u32 *addr, int na) +{ + printk("%s", s); + while(na--) + printk(" %08x", *(addr++)); + printk("\n"); +} +#else +static void of_dump_addr(const char *s, u32 *addr, int na) { } +#endif + +/* Read a big address */ +static inline u64 of_read_addr(u32 *cell, int size) +{ + u64 r = 0; + while (size--) + r = (r << 32) | *(cell++); + return r; +} + +/* Callbacks for bus specific translators */ +struct of_bus { + const char *name; + const char *addresses; + int (*match)(struct device_node *parent); + void (*count_cells)(struct device_node *child, + int *addrc, int *sizec); + u64 (*map)(u32 *addr, u32 *range, int na, int ns, int pna); + int (*translate)(u32 *addr, u64 offset, int na); + unsigned int (*get_flags)(u32 *addr); +}; + + +/* + * Default translator (generic bus) + */ + +static void of_bus_default_count_cells(struct device_node *dev, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = prom_n_addr_cells(dev); + if (sizec) + *sizec = prom_n_size_cells(dev); +} + +static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna) +{ + u64 cp, s, da; + + cp = of_read_addr(range, na); + s = of_read_addr(range + na + pna, ns); + da = of_read_addr(addr, na); + + DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n", + cp, s, da); + + if (da < cp || da >= (cp + s)) + return OF_BAD_ADDR; + return da - cp; +} + +static int of_bus_default_translate(u32 *addr, u64 offset, int na) +{ + u64 a = of_read_addr(addr, na); + memset(addr, 0, na * 4); + a += offset; + if (na > 1) + addr[na - 2] = a >> 32; + addr[na - 1] = a & 0xffffffffu; + + return 0; +} + +static unsigned int of_bus_default_get_flags(u32 *addr) +{ + return IORESOURCE_MEM; +} + + +/* + * PCI bus specific translator + */ + +static int of_bus_pci_match(struct device_node *np) +{ + /* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */ + return !strcmp(np->type, "pci") || !strcmp(np->type, "vci"); +} + +static void of_bus_pci_count_cells(struct device_node *np, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = 3; + if (sizec) + *sizec = 2; +} + +static u64 of_bus_pci_map(u32 *addr, u32 *range, int na, int ns, int pna) +{ + u64 cp, s, da; + + /* Check address type match */ + if ((addr[0] ^ range[0]) & 0x03000000) + return OF_BAD_ADDR; + + /* Read address values, skipping high cell */ + cp = of_read_addr(range + 1, na - 1); + s = of_read_addr(range + na + pna, ns); + da = of_read_addr(addr + 1, na - 1); + + DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); + + if (da < cp || da >= (cp + s)) + return OF_BAD_ADDR; + return da - cp; +} + +static int of_bus_pci_translate(u32 *addr, u64 offset, int na) +{ + return of_bus_default_translate(addr + 1, offset, na - 1); +} + +static unsigned int of_bus_pci_get_flags(u32 *addr) +{ + unsigned int flags = 0; + u32 w = addr[0]; + + switch((w >> 24) & 0x03) { + case 0x01: + flags |= IORESOURCE_IO; + case 0x02: /* 32 bits */ + case 0x03: /* 64 bits */ + flags |= IORESOURCE_MEM; + } + if (w & 0x40000000) + flags |= IORESOURCE_PREFETCH; + return flags; +} + +/* + * ISA bus specific translator + */ + +static int of_bus_isa_match(struct device_node *np) +{ + return !strcmp(np->name, "isa"); +} + +static void of_bus_isa_count_cells(struct device_node *child, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = 2; + if (sizec) + *sizec = 1; +} + +static u64 of_bus_isa_map(u32 *addr, u32 *range, int na, int ns, int pna) +{ + u64 cp, s, da; + + /* Check address type match */ + if ((addr[0] ^ range[0]) & 0x00000001) + return OF_BAD_ADDR; + + /* Read address values, skipping high cell */ + cp = of_read_addr(range + 1, na - 1); + s = of_read_addr(range + na + pna, ns); + da = of_read_addr(addr + 1, na - 1); + + DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); + + if (da < cp || da >= (cp + s)) + return OF_BAD_ADDR; + return da - cp; +} + +static int of_bus_isa_translate(u32 *addr, u64 offset, int na) +{ + return of_bus_default_translate(addr + 1, offset, na - 1); +} + +static unsigned int of_bus_isa_get_flags(u32 *addr) +{ + unsigned int flags = 0; + u32 w = addr[0]; + + if (w & 1) + flags |= IORESOURCE_IO; + else + flags |= IORESOURCE_MEM; + return flags; +} + + +/* + * Array of bus specific translators + */ + +static struct of_bus of_busses[] = { + /* PCI */ + { + .name = "pci", + .addresses = "assigned-addresses", + .match = of_bus_pci_match, + .count_cells = of_bus_pci_count_cells, + .map = of_bus_pci_map, + .translate = of_bus_pci_translate, + .get_flags = of_bus_pci_get_flags, + }, + /* ISA */ + { + .name = "isa", + .addresses = "reg", + .match = of_bus_isa_match, + .count_cells = of_bus_isa_count_cells, + .map = of_bus_isa_map, + .translate = of_bus_isa_translate, + .get_flags = of_bus_isa_get_flags, + }, + /* Default */ + { + .name = "default", + .addresses = "reg", + .match = NULL, + .count_cells = of_bus_default_count_cells, + .map = of_bus_default_map, + .translate = of_bus_default_translate, + .get_flags = of_bus_default_get_flags, + }, +}; + +static struct of_bus *of_match_bus(struct device_node *np) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(of_busses); i ++) + if (!of_busses[i].match || of_busses[i].match(np)) + return &of_busses[i]; + BUG(); + return NULL; +} + +static int of_translate_one(struct device_node *parent, struct of_bus *bus, + struct of_bus *pbus, u32 *addr, + int na, int ns, int pna) +{ + u32 *ranges; + unsigned int rlen; + int rone; + u64 offset = OF_BAD_ADDR; + + /* Normally, an absence of a "ranges" property means we are + * crossing a non-translatable boundary, and thus the addresses + * below the current not cannot be converted to CPU physical ones. + * Unfortunately, while this is very clear in the spec, it's not + * what Apple understood, and they do have things like /uni-n or + * /ht nodes with no "ranges" property and a lot of perfectly + * useable mapped devices below them. Thus we treat the absence of + * "ranges" as equivalent to an empty "ranges" property which means + * a 1:1 translation at that level. It's up to the caller not to try + * to translate addresses that aren't supposed to be translated in + * the first place. --BenH. + */ + ranges = (u32 *)get_property(parent, "ranges", &rlen); + if (ranges == NULL || rlen == 0) { + offset = of_read_addr(addr, na); + memset(addr, 0, pna * 4); + DBG("OF: no ranges, 1:1 translation\n"); + goto finish; + } + + DBG("OF: walking ranges...\n"); + + /* Now walk through the ranges */ + rlen /= 4; + rone = na + pna + ns; + for (; rlen >= rone; rlen -= rone, ranges += rone) { + offset = bus->map(addr, ranges, na, ns, pna); + if (offset != OF_BAD_ADDR) + break; + } + if (offset == OF_BAD_ADDR) { + DBG("OF: not found !\n"); + return 1; + } + memcpy(addr, ranges + na, 4 * pna); + + finish: + of_dump_addr("OF: parent translation for:", addr, pna); + DBG("OF: with offset: "PRu64"\n", offset); + + /* Translate it into parent bus space */ + return pbus->translate(addr, offset, pna); +} + + +/* + * Translate an address from the device-tree into a CPU physical address, + * this walks up the tree and applies the various bus mappings on the + * way. + * + * Note: We consider that crossing any level with #size-cells == 0 to mean + * that translation is impossible (that is we are not dealing with a value + * that can be mapped to a cpu physical address). This is not really specified + * that way, but this is traditionally the way IBM at least do things + */ +u64 of_translate_address(struct device_node *dev, u32 *in_addr) +{ + struct device_node *parent = NULL; + struct of_bus *bus, *pbus; + u32 addr[OF_MAX_ADDR_CELLS]; + int na, ns, pna, pns; + u64 result = OF_BAD_ADDR; + + DBG("OF: ** translation for device %s **\n", dev->full_name); + + /* Increase refcount at current level */ + of_node_get(dev); + + /* Get parent & match bus type */ + parent = of_get_parent(dev); + if (parent == NULL) + goto bail; + bus = of_match_bus(parent); + + /* Cound address cells & copy address locally */ + bus->count_cells(dev, &na, &ns); + if (!OF_CHECK_COUNTS(na, ns)) { + printk(KERN_ERR "prom_parse: Bad cell count for %s\n", + dev->full_name); + goto bail; + } + memcpy(addr, in_addr, na * 4); + + DBG("OF: bus is %s (na=%d, ns=%d) on %s\n", + bus->name, na, ns, parent->full_name); + of_dump_addr("OF: translating address:", addr, na); + + /* Translate */ + for (;;) { + /* Switch to parent bus */ + of_node_put(dev); + dev = parent; + parent = of_get_parent(dev); + + /* If root, we have finished */ + if (parent == NULL) { + DBG("OF: reached root node\n"); + result = of_read_addr(addr, na); + break; + } + + /* Get new parent bus and counts */ + pbus = of_match_bus(parent); + pbus->count_cells(dev, &pna, &pns); + if (!OF_CHECK_COUNTS(pna, pns)) { + printk(KERN_ERR "prom_parse: Bad cell count for %s\n", + dev->full_name); + break; + } + + DBG("OF: parent bus is %s (na=%d, ns=%d) on %s\n", + pbus->name, pna, pns, parent->full_name); + + /* Apply bus translation */ + if (of_translate_one(dev, bus, pbus, addr, na, ns, pna)) + break; + + /* Complete the move up one level */ + na = pna; + ns = pns; + bus = pbus; + + of_dump_addr("OF: one level translation:", addr, na); + } + bail: + of_node_put(parent); + of_node_put(dev); + + return result; +} +EXPORT_SYMBOL(of_translate_address); + +u32 *of_get_address(struct device_node *dev, int index, u64 *size, + unsigned int *flags) +{ + u32 *prop; + unsigned int psize; + struct device_node *parent; + struct of_bus *bus; + int onesize, i, na, ns; + + /* Get parent & match bus type */ + parent = of_get_parent(dev); + if (parent == NULL) + return NULL; + bus = of_match_bus(parent); + bus->count_cells(dev, &na, &ns); + of_node_put(parent); + if (!OF_CHECK_COUNTS(na, ns)) + return NULL; + + /* Get "reg" or "assigned-addresses" property */ + prop = (u32 *)get_property(dev, bus->addresses, &psize); + if (prop == NULL) + return NULL; + psize /= 4; + + onesize = na + ns; + for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) + if (i == index) { + if (size) + *size = of_read_addr(prop + na, ns); + if (flags) + *flags = bus->get_flags(prop); + return prop; + } + return NULL; +} +EXPORT_SYMBOL(of_get_address); + +u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, + unsigned int *flags) +{ + u32 *prop; + unsigned int psize; + struct device_node *parent; + struct of_bus *bus; + int onesize, i, na, ns; + + /* Get parent & match bus type */ + parent = of_get_parent(dev); + if (parent == NULL) + return NULL; + bus = of_match_bus(parent); + if (strcmp(bus->name, "pci")) + return NULL; + bus->count_cells(dev, &na, &ns); + of_node_put(parent); + if (!OF_CHECK_COUNTS(na, ns)) + return NULL; + + /* Get "reg" or "assigned-addresses" property */ + prop = (u32 *)get_property(dev, bus->addresses, &psize); + if (prop == NULL) + return NULL; + psize /= 4; + + onesize = na + ns; + for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) + if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) { + if (size) + *size = of_read_addr(prop + na, ns); + if (flags) + *flags = bus->get_flags(prop); + return prop; + } + return NULL; +} +EXPORT_SYMBOL(of_get_pci_address); + +static int __of_address_to_resource(struct device_node *dev, u32 *addrp, + u64 size, unsigned int flags, + struct resource *r) +{ + u64 taddr; + + if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0) + return -EINVAL; + taddr = of_translate_address(dev, addrp); + if (taddr == OF_BAD_ADDR) + return -EINVAL; + memset(r, 0, sizeof(struct resource)); + if (flags & IORESOURCE_IO) { + unsigned long port; + port = pci_address_to_pio(taddr); + if (port == (unsigned long)-1) + return -EINVAL; + r->start = port; + r->end = port + size - 1; + } else { + r->start = taddr; + r->end = taddr + size - 1; + } + r->flags = flags; + r->name = dev->name; + return 0; +} + +int of_address_to_resource(struct device_node *dev, int index, + struct resource *r) +{ + u32 *addrp; + u64 size; + unsigned int flags; + + addrp = of_get_address(dev, index, &size, &flags); + if (addrp == NULL) + return -EINVAL; + return __of_address_to_resource(dev, addrp, size, flags, r); +} +EXPORT_SYMBOL_GPL(of_address_to_resource); + +int of_pci_address_to_resource(struct device_node *dev, int bar, + struct resource *r) +{ + u32 *addrp; + u64 size; + unsigned int flags; + + addrp = of_get_pci_address(dev, bar, &size, &flags); + if (addrp == NULL) + return -EINVAL; + return __of_address_to_resource(dev, addrp, size, flags, r); +} +EXPORT_SYMBOL_GPL(of_pci_address_to_resource); diff --git a/arch/powerpc/kernel/ptrace-common.h b/arch/powerpc/kernel/ptrace-common.h index b1babb729673..5ccbdbe0d5c9 100644 --- a/arch/powerpc/kernel/ptrace-common.h +++ b/arch/powerpc/kernel/ptrace-common.h @@ -62,7 +62,7 @@ static inline void set_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) regs->msr |= MSR_SE; - set_ti_thread_flag(task->thread_info, TIF_SINGLESTEP); + set_tsk_thread_flag(task, TIF_SINGLESTEP); } static inline void clear_single_step(struct task_struct *task) @@ -70,7 +70,7 @@ static inline void clear_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) regs->msr &= ~MSR_SE; - clear_ti_thread_flag(task->thread_info, TIF_SINGLESTEP); + clear_tsk_thread_flag(task, TIF_SINGLESTEP); } #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 61762640b877..826ee3d056de 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -45,33 +45,19 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, unsigned long data) { struct task_struct *child; - int ret = -EPERM; + int ret; lock_kernel(); if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4283fa33f784..7fe4a5c944c9 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -17,6 +17,7 @@ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/capability.h> #include <linux/delay.h> #include <asm/prom.h> @@ -29,11 +30,17 @@ #include <asm/delay.h> #include <asm/uaccess.h> #include <asm/lmb.h> +#include <asm/udbg.h> struct rtas_t rtas = { .lock = SPIN_LOCK_UNLOCKED }; +struct rtas_suspend_me_data { + long waiting; + struct rtas_args *args; +}; + EXPORT_SYMBOL(rtas); DEFINE_SPINLOCK(rtas_data_buf_lock); @@ -52,7 +59,7 @@ EXPORT_SYMBOL(rtas_flash_term_hook); * are designed only for very early low-level debugging, which * is why the token is hard-coded to 10. */ -void call_rtas_display_status(unsigned char c) +static void call_rtas_display_status(char c) { struct rtas_args *args = &rtas.args; unsigned long s; @@ -65,14 +72,14 @@ void call_rtas_display_status(unsigned char c) args->nargs = 1; args->nret = 1; args->rets = (rtas_arg_t *)&(args->args[1]); - args->args[0] = (int)c; + args->args[0] = (unsigned char)c; enter_rtas(__pa(args)); spin_unlock_irqrestore(&rtas.lock, s); } -void call_rtas_display_status_delay(unsigned char c) +static void call_rtas_display_status_delay(char c) { static int pending_newline = 0; /* did last write end with unprinted newline? */ static int width = 16; @@ -96,6 +103,11 @@ void call_rtas_display_status_delay(unsigned char c) } } +void __init udbg_init_rtas(void) +{ + udbg_putc = call_rtas_display_status_delay; +} + void rtas_progress(char *s, unsigned short hex) { struct device_node *root; @@ -549,6 +561,80 @@ void rtas_os_term(char *str) } while (status == RTAS_BUSY); } +static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; +#ifdef CONFIG_PPC_PSERIES +static void rtas_percpu_suspend_me(void *info) +{ + long rc; + long flags; + struct rtas_suspend_me_data *data = + (struct rtas_suspend_me_data *)info; + + /* + * We use "waiting" to indicate our state. As long + * as it is >0, we are still trying to all join up. + * If it goes to 0, we have successfully joined up and + * one thread got H_Continue. If any error happens, + * we set it to <0. + */ + local_irq_save(flags); + do { + rc = plpar_hcall_norets(H_JOIN); + smp_rmb(); + } while (rc == H_Success && data->waiting > 0); + if (rc == H_Success) + goto out; + + if (rc == H_Continue) { + data->waiting = 0; + rtas_call(ibm_suspend_me_token, 0, 1, + data->args->args); + } else { + data->waiting = -EBUSY; + printk(KERN_ERR "Error on H_Join hypervisor call\n"); + } + +out: + /* before we restore interrupts, make sure we don't + * generate a spurious soft lockup errors + */ + touch_softlockup_watchdog(); + local_irq_restore(flags); + return; +} + +static int rtas_ibm_suspend_me(struct rtas_args *args) +{ + int i; + + struct rtas_suspend_me_data data; + + data.waiting = 1; + data.args = args; + + /* Call function on all CPUs. One of us will make the + * rtas call + */ + if (on_each_cpu(rtas_percpu_suspend_me, &data, 1, 0)) + data.waiting = -EINVAL; + + if (data.waiting != 0) + printk(KERN_ERR "Error doing global join\n"); + + /* Prod each CPU. This won't hurt, and will wake + * anyone we successfully put to sleep with H_Join + */ + for_each_cpu(i) + plpar_hcall_norets(H_PROD, i); + + return data.waiting; +} +#else /* CONFIG_PPC_PSERIES */ +static int rtas_ibm_suspend_me(struct rtas_args *args) +{ + return -ENOSYS; +} +#endif asmlinkage int ppc_rtas(struct rtas_args __user *uargs) { @@ -556,6 +642,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) unsigned long flags; char *buff_copy, *errbuf = NULL; int nargs; + int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -574,6 +661,17 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) nargs * sizeof(rtas_arg_t)) != 0) return -EFAULT; + if (args.token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + /* Need to handle ibm,suspend_me call specially */ + if (args.token == ibm_suspend_me_token) { + rc = rtas_ibm_suspend_me(&args); + if (rc) + return rc; + goto copy_return; + } + buff_copy = get_errorlog_buffer(); spin_lock_irqsave(&rtas.lock, flags); @@ -597,6 +695,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) kfree(buff_copy); } + copy_return: /* Copy out args. */ if (copy_to_user(uargs->args + nargs, args.args + nargs, @@ -632,7 +731,7 @@ void rtas_stop_self(void) } /* - * Call early during boot, before mem init or bootmem, to retreive the RTAS + * Call early during boot, before mem init or bootmem, to retrieve the RTAS * informations from the device-tree and allocate the RMO buffer for userland * accesses. */ @@ -668,8 +767,10 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (_machine == PLATFORM_PSERIES_LPAR) + if (_machine == PLATFORM_PSERIES_LPAR) { rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX); + ibm_suspend_me_token = rtas_token("ibm,suspend-me"); + } #endif rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 60dec2401c26..5579f6559912 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -72,7 +72,7 @@ static int of_device_available(struct device_node * dn) return 0; } -static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) { int returnval = -1; unsigned long buid, addr; @@ -188,39 +188,19 @@ int is_python(struct device_node *dev) return 0; } -static int get_phb_reg_prop(struct device_node *dev, - unsigned int addr_size_words, - struct reg_property64 *reg) +static void python_countermeasures(struct device_node *dev) { - unsigned int *ui_ptr = NULL, len; - - /* Found a PHB, now figure out where his registers are mapped. */ - ui_ptr = (unsigned int *)get_property(dev, "reg", &len); - if (ui_ptr == NULL) - return 1; - - if (addr_size_words == 1) { - reg->address = ((struct reg_property32 *)ui_ptr)->address; - reg->size = ((struct reg_property32 *)ui_ptr)->size; - } else { - *reg = *((struct reg_property64 *)ui_ptr); - } - - return 0; -} - -static void python_countermeasures(struct device_node *dev, - unsigned int addr_size_words) -{ - struct reg_property64 reg_struct; + struct resource registers; void __iomem *chip_regs; volatile u32 val; - if (get_phb_reg_prop(dev, addr_size_words, ®_struct)) + if (of_address_to_resource(dev, 0, ®isters)) { + printk(KERN_ERR "Can't get address for Python workarounds !\n"); return; + } /* Python's register file is 1 MB in size. */ - chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000); + chip_regs = ioremap(registers.start & ~(0xfffffUL), 0x100000); /* * Firmware doesn't always clear this bit which is critical @@ -301,11 +281,10 @@ static int phb_set_bus_ranges(struct device_node *dev, } static int __devinit setup_phb(struct device_node *dev, - struct pci_controller *phb, - unsigned int addr_size_words) + struct pci_controller *phb) { if (is_python(dev)) - python_countermeasures(dev, addr_size_words); + python_countermeasures(dev); if (phb_set_bus_ranges(dev, phb)) return 1; @@ -320,8 +299,8 @@ unsigned long __init find_and_init_phbs(void) { struct device_node *node; struct pci_controller *phb; - unsigned int root_size_cells = 0; unsigned int index; + unsigned int root_size_cells = 0; unsigned int *opprop = NULL; struct device_node *root = of_find_node_by_path("/"); @@ -343,10 +322,11 @@ unsigned long __init find_and_init_phbs(void) phb = pcibios_alloc_controller(node); if (!phb) continue; - setup_phb(node, phb, root_size_cells); + setup_phb(node, phb); pci_process_bridge_OF_ranges(phb, node, 0); pci_setup_phb_io(phb, index == 0); #ifdef CONFIG_PPC_PSERIES + /* XXX This code need serious fixing ... --BenH */ if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { int addr = root_size_cells * (index + 2) - 1; mpic_assign_isu(pSeries_mpic, index, opprop[addr]); @@ -381,22 +361,17 @@ unsigned long __init find_and_init_phbs(void) struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) { - struct device_node *root = of_find_node_by_path("/"); - unsigned int root_size_cells = 0; struct pci_controller *phb; int primary; - root_size_cells = prom_n_size_cells(root); - primary = list_empty(&hose_list); phb = pcibios_alloc_controller(dn); if (!phb) return NULL; - setup_phb(dn, phb, root_size_cells); + setup_phb(dn, phb); pci_process_bridge_OF_ranges(phb, dn, primary); pci_setup_phb_io_dynamic(phb, primary); - of_node_put(root); pci_devs_phb_init_dynamic(phb); scan_phb(phb); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index bd3eb4292b53..be12041c0fc5 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -93,14 +93,15 @@ EXPORT_SYMBOL(ppc_do_canonicalize_irqs); /* also used by kexec */ void machine_shutdown(void) { - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); + if (ppc_md.machine_shutdown) + ppc_md.machine_shutdown(); } void machine_restart(char *cmd) { machine_shutdown(); - ppc_md.restart(cmd); + if (ppc_md.restart) + ppc_md.restart(cmd); #ifdef CONFIG_SMP smp_send_stop(); #endif @@ -112,7 +113,8 @@ void machine_restart(char *cmd) void machine_power_off(void) { machine_shutdown(); - ppc_md.power_off(); + if (ppc_md.power_off) + ppc_md.power_off(); #ifdef CONFIG_SMP smp_send_stop(); #endif @@ -129,7 +131,8 @@ EXPORT_SYMBOL_GPL(pm_power_off); void machine_halt(void) { machine_shutdown(); - ppc_md.halt(); + if (ppc_md.halt) + ppc_md.halt(); #ifdef CONFIG_SMP smp_send_stop(); #endif @@ -294,129 +297,6 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -#ifdef CONFIG_PPC_MULTIPLATFORM -static int __init set_preferred_console(void) -{ - struct device_node *prom_stdout = NULL; - char *name; - u32 *spd; - int offset = 0; - - DBG(" -> set_preferred_console()\n"); - - /* The user has requested a console so this is already set up. */ - if (strstr(saved_command_line, "console=")) { - DBG(" console was specified !\n"); - return -EBUSY; - } - - if (!of_chosen) { - DBG(" of_chosen is NULL !\n"); - return -ENODEV; - } - /* We are getting a weird phandle from OF ... */ - /* ... So use the full path instead */ - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); - if (name == NULL) { - DBG(" no linux,stdout-path !\n"); - return -ENODEV; - } - prom_stdout = of_find_node_by_path(name); - if (!prom_stdout) { - DBG(" can't find stdout package %s !\n", name); - return -ENODEV; - } - DBG("stdout is %s\n", prom_stdout->full_name); - - name = (char *)get_property(prom_stdout, "name", NULL); - if (!name) { - DBG(" stdout package has no name !\n"); - goto not_found; - } - spd = (u32 *)get_property(prom_stdout, "current-speed", NULL); - - if (0) - ; -#ifdef CONFIG_SERIAL_8250_CONSOLE - else if (strcmp(name, "serial") == 0) { - int i; - u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i); - if (i > 8) { - switch (reg[1]) { - case 0x3f8: - offset = 0; - break; - case 0x2f8: - offset = 1; - break; - case 0x898: - offset = 2; - break; - case 0x890: - offset = 3; - break; - default: - /* We dont recognise the serial port */ - goto not_found; - } - } - } -#endif /* CONFIG_SERIAL_8250_CONSOLE */ -#ifdef CONFIG_PPC_PSERIES - else if (strcmp(name, "vty") == 0) { - u32 *reg = (u32 *)get_property(prom_stdout, "reg", NULL); - char *compat = (char *)get_property(prom_stdout, "compatible", NULL); - - if (reg && compat && (strcmp(compat, "hvterm-protocol") == 0)) { - /* Host Virtual Serial Interface */ - switch (reg[0]) { - case 0x30000000: - offset = 0; - break; - case 0x30000001: - offset = 1; - break; - default: - goto not_found; - } - of_node_put(prom_stdout); - DBG("Found hvsi console at offset %d\n", offset); - return add_preferred_console("hvsi", offset, NULL); - } else { - /* pSeries LPAR virtual console */ - of_node_put(prom_stdout); - DBG("Found hvc console\n"); - return add_preferred_console("hvc", 0, NULL); - } - } -#endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE - else if (strcmp(name, "ch-a") == 0) - offset = 0; - else if (strcmp(name, "ch-b") == 0) - offset = 1; -#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */ - else - goto not_found; - of_node_put(prom_stdout); - - DBG("Found serial console at ttyS%d\n", offset); - - if (spd) { - static char __initdata opt[16]; - sprintf(opt, "%d", *spd); - return add_preferred_console("ttyS", offset, opt); - } else - return add_preferred_console("ttyS", offset, NULL); - - not_found: - DBG("No preferred console found !\n"); - of_node_put(prom_stdout); - return -ENODEV; -} -console_initcall(set_preferred_console); -#endif /* CONFIG_PPC_MULTIPLATFORM */ - void __init check_for_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD @@ -442,7 +322,7 @@ void __init check_for_initrd(void) /* If we were passed an initrd, set the ROOT_DEV properly if the values * look sensible. If not, clear initrd reference. */ - if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + if (is_kernel_addr(initrd_start) && is_kernel_addr(initrd_end) && initrd_end > initrd_start) ROOT_DEV = Root_RAM0; else diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e5694335bf10..db72a92943bf 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -39,6 +39,8 @@ #include <asm/nvram.h> #include <asm/xmon.h> #include <asm/time.h> +#include <asm/serial.h> +#include <asm/udbg.h> #include "setup.h" @@ -172,12 +174,23 @@ void __init platform_init(void) */ void __init machine_init(unsigned long dt_ptr, unsigned long phys) { + /* If btext is enabled, we might have a BAT setup for early display, + * thus we do enable some very basic udbg output + */ +#ifdef CONFIG_BOOTX_TEXT + udbg_putc = btext_drawchar; +#endif + + /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); + /* Check default command line */ #ifdef CONFIG_CMDLINE - strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line)); + if (cmd_line[0] == 0) + strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line)); #endif /* CONFIG_CMDLINE */ + /* Base init based on machine type */ platform_init(); #ifdef CONFIG_6xx @@ -282,25 +295,20 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); check_for_initrd(); - finish_device_tree(); - smp_setup_cpu_maps(); + if (ppc_md.init_early) + ppc_md.init_early(); -#ifdef CONFIG_BOOTX_TEXT - init_boot_display(); -#endif + find_legacy_serial_ports(); + finish_device_tree(); -#ifdef CONFIG_PPC_PMAC - /* This could be called "early setup arch", it must be done - * now because xmon need it - */ - if (_machine == _MACH_Pmac) - pmac_feature_init(); /* New cool way */ -#endif + smp_setup_cpu_maps(); #ifdef CONFIG_XMON_DEFAULT xmon_init(1); #endif + /* Register early console */ + register_early_udbg_console(); #if defined(CONFIG_KGDB) if (ppc_md.kgdb_map_scc) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e3fb78397dc6..e29b275e09e0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -33,7 +33,9 @@ #include <linux/unistd.h> #include <linux/serial.h> #include <linux/serial_8250.h> +#include <linux/bootmem.h> #include <asm/io.h> +#include <asm/kdump.h> #include <asm/prom.h> #include <asm/processor.h> #include <asm/pgtable.h> @@ -69,33 +71,6 @@ #define DBG(fmt...) #endif -/* - * Here are some early debugging facilities. You can enable one - * but your kernel will not boot on anything else if you do so - */ - -/* This one is for use on LPAR machines that support an HVC console - * on vterm 0 - */ -extern void udbg_init_debug_lpar(void); -/* This one is for use on Apple G5 machines - */ -extern void udbg_init_pmac_realmode(void); -/* That's RTAS panel debug */ -extern void call_rtas_display_status_delay(unsigned char c); -/* Here's maple real mode debug */ -extern void udbg_init_maple_realmode(void); - -#define EARLY_DEBUG_INIT() do {} while(0) - -#if 0 -#define EARLY_DEBUG_INIT() udbg_init_debug_lpar() -#define EARLY_DEBUG_INIT() udbg_init_maple_realmode() -#define EARLY_DEBUG_INIT() udbg_init_pmac_realmode() -#define EARLY_DEBUG_INIT() \ - do { udbg_putc = call_rtas_display_status_delay; } while(0) -#endif - int have_of = 1; int boot_cpuid = 0; int boot_cpuid_phys = 0; @@ -236,11 +211,8 @@ void __init early_setup(unsigned long dt_ptr) struct paca_struct *lpaca = get_paca(); static struct machdep_calls **mach; - /* - * Enable early debugging if any specified (see top of - * this file) - */ - EARLY_DEBUG_INIT(); + /* Enable early debugging if any specified (see udbg.h) */ + udbg_early_init(); DBG(" -> early_setup()\n"); @@ -268,6 +240,10 @@ void __init early_setup(unsigned long dt_ptr) } ppc_md = **mach; +#ifdef CONFIG_CRASH_DUMP + kdump_setup(); +#endif + DBG("Found, Initializing memory management...\n"); /* @@ -317,6 +293,7 @@ void early_setup_secondary(void) void smp_release_cpus(void) { extern unsigned long __secondary_hold_spinloop; + unsigned long *ptr; DBG(" -> smp_release_cpus()\n"); @@ -327,7 +304,9 @@ void smp_release_cpus(void) * This is useless but harmless on iSeries, secondaries are already * waiting on their paca spinloops. */ - __secondary_hold_spinloop = 1; + ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop + - PHYSICAL_START); + *ptr = 1; mb(); DBG(" <- smp_release_cpus()\n"); @@ -430,7 +409,7 @@ void __init setup_system(void) /* * Fill the ppc64_caches & systemcfg structures with informations - * retreived from the device-tree. Need to be called before + * retrieved from the device-tree. Need to be called before * finish_device_tree() since the later requires some of the * informations filled up here to properly parse the interrupt * tree. @@ -459,16 +438,19 @@ void __init setup_system(void) */ ppc_md.init_early(); + /* + * We can discover serial ports now since the above did setup the + * hash table management for us, thus ioremap works. We do that early + * so that further code can be debugged + */ + find_legacy_serial_ports(); + /* * "Finish" the device-tree, that is do the actual parsing of * some of the properties like the interrupt map */ finish_device_tree(); -#ifdef CONFIG_BOOTX_TEXT - init_boot_display(); -#endif - /* * Initialize xmon */ @@ -507,6 +489,9 @@ void __init setup_system(void) ppc64_caches.iline_size); printk("htab_address = 0x%p\n", htab_address); printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#if PHYSICAL_START > 0 + printk("physical_start = 0x%x\n", PHYSICAL_START); +#endif printk("-----------------------------------------------------\n"); mm_init_ppc64(); @@ -657,187 +642,6 @@ void ppc64_terminate_msg(unsigned int src, const char *msg) printk("[terminate]%04x %s\n", src, msg); } -#ifndef CONFIG_PPC_ISERIES -/* - * This function can be used by platforms to "find" legacy serial ports. - * It works for "serial" nodes under an "isa" node, and will try to - * respect the "ibm,aix-loc" property if any. It works with up to 8 - * ports. - */ - -#define MAX_LEGACY_SERIAL_PORTS 8 -static struct plat_serial8250_port serial_ports[MAX_LEGACY_SERIAL_PORTS+1]; -static unsigned int old_serial_count; - -void __init generic_find_legacy_serial_ports(u64 *physport, - unsigned int *default_speed) -{ - struct device_node *np; - u32 *sizeprop; - - struct isa_reg_property { - u32 space; - u32 address; - u32 size; - }; - struct pci_reg_property { - struct pci_address addr; - u32 size_hi; - u32 size_lo; - }; - - DBG(" -> generic_find_legacy_serial_port()\n"); - - *physport = 0; - if (default_speed) - *default_speed = 0; - - np = of_find_node_by_path("/"); - if (!np) - return; - - /* First fill our array */ - for (np = NULL; (np = of_find_node_by_type(np, "serial"));) { - struct device_node *isa, *pci; - struct isa_reg_property *reg; - unsigned long phys_size, addr_size, io_base; - u32 *rangesp; - u32 *interrupts, *clk, *spd; - char *typep; - int index, rlen, rentsize; - - /* Ok, first check if it's under an "isa" parent */ - isa = of_get_parent(np); - if (!isa || strcmp(isa->name, "isa")) { - DBG("%s: no isa parent found\n", np->full_name); - continue; - } - - /* Now look for an "ibm,aix-loc" property that gives us ordering - * if any... - */ - typep = (char *)get_property(np, "ibm,aix-loc", NULL); - - /* Get the ISA port number */ - reg = (struct isa_reg_property *)get_property(np, "reg", NULL); - if (reg == NULL) - goto next_port; - /* We assume the interrupt number isn't translated ... */ - interrupts = (u32 *)get_property(np, "interrupts", NULL); - /* get clock freq. if present */ - clk = (u32 *)get_property(np, "clock-frequency", NULL); - /* get default speed if present */ - spd = (u32 *)get_property(np, "current-speed", NULL); - /* Default to locate at end of array */ - index = old_serial_count; /* end of the array by default */ - - /* If we have a location index, then use it */ - if (typep && *typep == 'S') { - index = simple_strtol(typep+1, NULL, 0) - 1; - /* if index is out of range, use end of array instead */ - if (index >= MAX_LEGACY_SERIAL_PORTS) - index = old_serial_count; - /* if our index is still out of range, that mean that - * array is full, we could scan for a free slot but that - * make little sense to bother, just skip the port - */ - if (index >= MAX_LEGACY_SERIAL_PORTS) - goto next_port; - if (index >= old_serial_count) - old_serial_count = index + 1; - /* Check if there is a port who already claimed our slot */ - if (serial_ports[index].iobase != 0) { - /* if we still have some room, move it, else override */ - if (old_serial_count < MAX_LEGACY_SERIAL_PORTS) { - DBG("Moved legacy port %d -> %d\n", index, - old_serial_count); - serial_ports[old_serial_count++] = - serial_ports[index]; - } else { - DBG("Replacing legacy port %d\n", index); - } - } - } - if (index >= MAX_LEGACY_SERIAL_PORTS) - goto next_port; - if (index >= old_serial_count) - old_serial_count = index + 1; - - /* Now fill the entry */ - memset(&serial_ports[index], 0, sizeof(struct plat_serial8250_port)); - serial_ports[index].uartclk = clk ? *clk : BASE_BAUD * 16; - serial_ports[index].iobase = reg->address; - serial_ports[index].irq = interrupts ? interrupts[0] : 0; - serial_ports[index].flags = ASYNC_BOOT_AUTOCONF; - - DBG("Added legacy port, index: %d, port: %x, irq: %d, clk: %d\n", - index, - serial_ports[index].iobase, - serial_ports[index].irq, - serial_ports[index].uartclk); - - /* Get phys address of IO reg for port 1 */ - if (index != 0) - goto next_port; - - pci = of_get_parent(isa); - if (!pci) { - DBG("%s: no pci parent found\n", np->full_name); - goto next_port; - } - - rangesp = (u32 *)get_property(pci, "ranges", &rlen); - if (rangesp == NULL) { - of_node_put(pci); - goto next_port; - } - rlen /= 4; - - /* we need the #size-cells of the PCI bridge node itself */ - phys_size = 1; - sizeprop = (u32 *)get_property(pci, "#size-cells", NULL); - if (sizeprop != NULL) - phys_size = *sizeprop; - /* we need the parent #addr-cells */ - addr_size = prom_n_addr_cells(pci); - rentsize = 3 + addr_size + phys_size; - io_base = 0; - for (;rlen >= rentsize; rlen -= rentsize,rangesp += rentsize) { - if (((rangesp[0] >> 24) & 0x3) != 1) - continue; /* not IO space */ - io_base = rangesp[3]; - if (addr_size == 2) - io_base = (io_base << 32) | rangesp[4]; - } - if (io_base != 0) { - *physport = io_base + reg->address; - if (default_speed && spd) - *default_speed = *spd; - } - of_node_put(pci); - next_port: - of_node_put(isa); - } - - DBG(" <- generic_find_legacy_serial_port()\n"); -} - -static struct platform_device serial_device = { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, - .dev = { - .platform_data = serial_ports, - }, -}; - -static int __init serial_dev_init(void) -{ - return platform_device_register(&serial_device); -} -arch_initcall(serial_dev_init); - -#endif /* CONFIG_PPC_ISERIES */ - int check_legacy_ioport(unsigned long base_port) { if (ppc_md.check_legacy_ioport == NULL) @@ -851,3 +655,28 @@ void cpu_die(void) if (ppc_md.cpu_die) ppc_md.cpu_die(); } + +#ifdef CONFIG_SMP +void __init setup_per_cpu_areas(void) +{ + int i; + unsigned long size; + char *ptr; + + /* Copy section for each CPU (we discard the original) */ + size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); +#ifdef CONFIG_MODULES + if (size < PERCPU_ENOUGH_ROOM) + size = PERCPU_ENOUGH_ROOM; +#endif + + for_each_cpu(i) { + ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); + if (!ptr) + panic("Cannot allocate cpu data for CPU %d\n", i); + + paca[i].data_offset = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + } +} +#endif diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 5a2eba60dd39..3747ab0dac3f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -76,7 +76,6 @@ * registers from *regs. This is what we need * to do when a signal has been delivered. */ -#define sigreturn_exit(regs) return 0 #define GP_REGS_SIZE min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32)) #undef __SIGNAL_FRAMESIZE @@ -156,9 +155,17 @@ static inline int save_general_regs(struct pt_regs *regs, elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; - for (i = 0; i <= PT_RESULT; i ++) + if (!FULL_REGS(regs)) { + set_thread_flag(TIF_SAVE_NVGPRS); + current_thread_info()->nvgprs_frame = frame->mc_gregs; + } + + for (i = 0; i <= PT_RESULT; i ++) { + if (i == 14 && !FULL_REGS(regs)) + i = 32; if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) return -EFAULT; + } return 0; } @@ -179,8 +186,6 @@ static inline int restore_general_regs(struct pt_regs *regs, #else /* CONFIG_PPC64 */ -extern void sigreturn_exit(struct pt_regs *); - #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set) @@ -214,6 +219,15 @@ static inline int get_old_sigaction(struct k_sigaction *new_ka, static inline int save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { + if (!FULL_REGS(regs)) { + /* Zero out the unsaved GPRs to avoid information + leak, and set TIF_SAVE_NVGPRS to ensure that the + registers do actually get saved later. */ + memset(®s->gpr[14], 0, 18 * sizeof(unsigned long)); + current_thread_info()->nvgprs_frame = &frame->mc_gregs; + set_thread_flag(TIF_SAVE_NVGPRS); + } + return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE); } @@ -238,8 +252,7 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs); /* * Atomically swap in the new signal mask, and wait for a signal. */ -long sys_sigsuspend(old_sigset_t mask, int p2, int p3, int p4, int p6, int p7, - struct pt_regs *regs) +long sys_sigsuspend(old_sigset_t mask) { sigset_t saveset; @@ -250,51 +263,10 @@ long sys_sigsuspend(old_sigset_t mask, int p2, int p3, int p4, int p6, int p7, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(&saveset, regs)) - sigreturn_exit(regs); - } -} - -long sys_rt_sigsuspend( -#ifdef CONFIG_PPC64 - compat_sigset_t __user *unewset, -#else - sigset_t __user *unewset, -#endif - size_t sigsetsize, int p3, int p4, - int p6, int p7, struct pt_regs *regs) -{ - sigset_t saveset, newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (get_sigset_t(&newset, unewset)) - return -EFAULT; - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(&saveset, regs)) - sigreturn_exit(regs); - } + current->state = TASK_INTERRUPTIBLE; + schedule(); + set_thread_flag(TIF_RESTORE_SIGMASK); + return -ERESTARTNOHAND; } #ifdef CONFIG_PPC32 @@ -391,9 +363,6 @@ struct rt_sigframe { static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, int sigret) { -#ifdef CONFIG_PPC32 - CHECK_FULL_REGS(regs); -#endif /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); @@ -482,6 +451,15 @@ static long restore_user_regs(struct pt_regs *regs, if (err) return 1; + /* + * Do this before updating the thread state in + * current->thread.fpr/vr/evr. That way, if we get preempted + * and another task grabs the FPU/Altivec/SPE, it won't be + * tempted to save the current CPU state into the thread_struct + * and corrupt what we are writing there. + */ + discard_lazy_cpu_state(); + /* force the process to reload the FP registers from current->thread when it next does FP instructions */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); @@ -523,18 +501,6 @@ static long restore_user_regs(struct pt_regs *regs, return 1; #endif /* CONFIG_SPE */ -#ifndef CONFIG_SMP - preempt_disable(); - if (last_task_used_math == current) - last_task_used_math = NULL; - if (last_task_used_altivec == current) - last_task_used_altivec = NULL; -#ifdef CONFIG_SPE - if (last_task_used_spe == current) - last_task_used_spe = NULL; -#endif - preempt_enable(); -#endif return 0; } @@ -828,12 +794,6 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka, regs->gpr[6] = (unsigned long) rt_sf; regs->nip = (unsigned long) ka->sa.sa_handler; regs->trap = 0; -#ifdef CONFIG_PPC64 - regs->result = 0; - - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); -#endif return 1; badframe: @@ -911,8 +871,8 @@ long sys_swapcontext(struct ucontext __user *old_ctx, */ if (do_setcontext(new_ctx, regs, 0)) do_exit(SIGSEGV); - sigreturn_exit(regs); - /* doesn't actually return back to here */ + + set_thread_flag(TIF_RESTOREALL); return 0; } @@ -945,12 +905,11 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, * nobody does any... */ compat_sys_sigaltstack((u32)(u64)&rt_sf->uc.uc_stack, 0, 0, 0, 0, 0, regs); - return (int)regs->result; #else do_sigaltstack(&rt_sf->uc.uc_stack, NULL, regs->gpr[1]); - sigreturn_exit(regs); /* doesn't return here */ - return 0; #endif + set_thread_flag(TIF_RESTOREALL); + return 0; bad: force_sig(SIGSEGV, current); @@ -1041,9 +1000,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, */ do_sigaltstack(&ctx->uc_stack, NULL, regs->gpr[1]); - sigreturn_exit(regs); - /* doesn't actually return back to here */ - + set_thread_flag(TIF_RESTOREALL); out: return 0; } @@ -1107,12 +1064,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, regs->gpr[4] = (unsigned long) sc; regs->nip = (unsigned long) ka->sa.sa_handler; regs->trap = 0; -#ifdef CONFIG_PPC64 - regs->result = 0; - - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); -#endif return 1; @@ -1160,12 +1111,8 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, || restore_user_regs(regs, sr, 1)) goto badframe; -#ifdef CONFIG_PPC64 - return (int)regs->result; -#else - sigreturn_exit(regs); /* doesn't return */ + set_thread_flag(TIF_RESTOREALL); return 0; -#endif badframe: force_sig(SIGSEGV, current); @@ -1181,7 +1128,7 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) { siginfo_t info; struct k_sigaction ka; - unsigned int frame, newsp; + unsigned int newsp; int signr, ret; #ifdef CONFIG_PPC32 @@ -1192,11 +1139,11 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) } #endif - if (!oldset) + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else if (!oldset) oldset = ¤t->blocked; - newsp = frame = 0; - signr = get_signal_to_deliver(&info, &ka, regs, NULL); #ifdef CONFIG_PPC32 no_signal: @@ -1226,8 +1173,14 @@ no_signal: } } - if (signr == 0) + if (signr == 0) { + /* No signal to deliver -- put the saved sigmask back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } return 0; /* no signals delivered */ + } if ((ka.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size && !on_sig_stack(regs->gpr[1])) @@ -1260,6 +1213,10 @@ no_signal: sigaddset(¤t->blocked, signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + /* A signal was successfully delivered; the saved sigmask is in + its frame, and we can clear the TIF_RESTORE_SIGMASK flag */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); } return ret; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 1decf2785530..b3193116e686 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -67,40 +67,6 @@ struct rt_sigframe { char abigap[288]; } __attribute__ ((aligned (16))); - -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, int p3, int p4, - int p6, int p7, struct pt_regs *regs) -{ - sigset_t saveset, newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, unewset, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(&saveset, regs)) - return 0; - } -} - long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5, unsigned long r6, unsigned long r7, unsigned long r8, struct pt_regs *regs) @@ -152,6 +118,14 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ err |= __put_user(&sc->gp_regs, &sc->regs); + if (!FULL_REGS(regs)) { + /* Zero out the unsaved GPRs to avoid information + leak, and set TIF_SAVE_NVGPRS to ensure that the + registers do actually get saved later. */ + memset(®s->gpr[14], 0, 18 * sizeof(unsigned long)); + set_thread_flag(TIF_SAVE_NVGPRS); + current_thread_info()->nvgprs_frame = &sc->gp_regs; + } err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); err |= __copy_to_user(&sc->fp_regs, ¤t->thread.fpr, FP_REGS_SIZE); err |= __put_user(signr, &sc->signal); @@ -197,10 +171,20 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, if (!sig) regs->gpr[13] = save_r13; - err |= __copy_from_user(¤t->thread.fpr, &sc->fp_regs, FP_REGS_SIZE); if (set != NULL) err |= __get_user(set->sig[0], &sc->oldmask); + /* + * Do this before updating the thread state in + * current->thread.fpr/vr. That way, if we get preempted + * and another task grabs the FPU/Altivec, it won't be + * tempted to save the current CPU state into the thread_struct + * and corrupt what we are writing there. + */ + discard_lazy_cpu_state(); + + err |= __copy_from_user(¤t->thread.fpr, &sc->fp_regs, FP_REGS_SIZE); + #ifdef CONFIG_ALTIVEC err |= __get_user(v_regs, &sc->v_regs); err |= __get_user(msr, &sc->gp_regs[PT_MSR]); @@ -219,14 +203,6 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, current->thread.vrsave = 0; #endif /* CONFIG_ALTIVEC */ -#ifndef CONFIG_SMP - preempt_disable(); - if (last_task_used_math == current) - last_task_used_math = NULL; - if (last_task_used_altivec == current) - last_task_used_altivec = NULL; - preempt_enable(); -#endif /* Force reload of FP/VEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC); @@ -340,6 +316,7 @@ int sys_swapcontext(struct ucontext __user *old_ctx, do_exit(SIGSEGV); /* This returns like rt_sigreturn */ + set_thread_flag(TIF_RESTOREALL); return 0; } @@ -372,7 +349,8 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, */ do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]); - return regs->result; + set_thread_flag(TIF_RESTOREALL); + return 0; badframe: #if DEBUG_SIG @@ -454,9 +432,6 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, if (err) goto badframe; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 1; badframe: @@ -502,6 +477,8 @@ static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) * we only get here if there is a handler, we dont restart. */ regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; break; case -ERESTARTSYS: /* ERESTARTSYS means to restart the syscall if there is no @@ -509,6 +486,8 @@ static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) */ if (!(ka->sa.sa_flags & SA_RESTART)) { regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; break; } /* fallthrough */ @@ -541,11 +520,15 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) if (test_thread_flag(TIF_32BIT)) return do_signal32(oldset, regs); - if (!oldset) + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else if (!oldset) oldset = ¤t->blocked; signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { + int ret; + /* Whee! Actually deliver the signal. */ if (TRAP(regs) == 0x0C00) syscall_restart(regs, &ka); @@ -558,7 +541,14 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) if (current->thread.dabr) set_dabr(current->thread.dabr); - return handle_signal(signr, &ka, &info, oldset, regs); + ret = handle_signal(signr, &ka, &info, oldset, regs); + + /* If a signal was successfully delivered, the saved sigmask is in + its frame, and we can clear the TIF_RESTORE_SIGMASK flag */ + if (ret && test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + + return ret; } if (TRAP(regs) == 0x0C00) { /* System Call! */ @@ -574,6 +564,11 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) regs->result = 0; } } + /* No signal to deliver -- put the saved sigmask back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } return 0; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 30374d2f88e5..c8458c531b25 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -31,6 +31,7 @@ #include <linux/sysdev.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/topology.h> #include <asm/ptrace.h> #include <asm/atomic.h> @@ -75,6 +76,8 @@ void smp_call_function_interrupt(void); int smt_enabled_at_boot = 1; +static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; + #ifdef CONFIG_MPIC int __init smp_mpic_probe(void) { @@ -123,11 +126,16 @@ void smp_message_recv(int msg, struct pt_regs *regs) /* XXX Do we have to do this? */ set_need_resched(); break; -#ifdef CONFIG_DEBUGGER case PPC_MSG_DEBUGGER_BREAK: + if (crash_ipi_function_ptr) { + crash_ipi_function_ptr(regs); + break; + } +#ifdef CONFIG_DEBUGGER debugger_ipi(regs); break; -#endif +#endif /* CONFIG_DEBUGGER */ + /* FALLTHROUGH */ default: printk("SMP %d: smp_message_recv(): unknown msg %d\n", smp_processor_id(), msg); @@ -147,6 +155,17 @@ void smp_send_debugger_break(int cpu) } #endif +#ifdef CONFIG_KEXEC +void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) +{ + crash_ipi_function_ptr = crash_ipi_callback; + if (crash_ipi_callback) { + mb(); + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK); + } +} +#endif + static void stop_this_cpu(void *dummy) { local_irq_disable(); @@ -319,8 +338,8 @@ static void __init smp_create_idle(unsigned int cpu) #ifdef CONFIG_PPC64 paca[cpu].__current = p; #endif - current_set[cpu] = p->thread_info; - p->thread_info->cpu = cpu; + current_set[cpu] = task_thread_info(p); + task_thread_info(p)->cpu = cpu; } void __init smp_prepare_cpus(unsigned int max_cpus) @@ -356,7 +375,7 @@ void __devinit smp_prepare_boot_cpu(void) #ifdef CONFIG_PPC64 paca[boot_cpuid].__current = current; #endif - current_set[boot_cpuid] = current->thread_info; + current_set[boot_cpuid] = task_thread_info(current); } #ifdef CONFIG_HOTPLUG_CPU @@ -452,10 +471,6 @@ int __devinit __cpu_up(unsigned int cpu) if (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)) return -EINVAL; -#ifdef CONFIG_PPC64 - paca[cpu].default_decr = tb_ticks_per_jiffy; -#endif - /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ @@ -554,6 +569,8 @@ void __init smp_cpus_done(unsigned int max_cpus) smp_ops->setup_cpu(boot_cpuid); set_cpus_allowed(current, old_mask); + + dump_numa_cpu_topology(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 9c921d1c4084..475249dc2350 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -552,30 +552,6 @@ asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec return ret; } -asmlinkage int compat_sys_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) -{ - return sys_pciconfig_read((unsigned long) bus, - (unsigned long) dfn, - (unsigned long) off, - (unsigned long) len, - compat_ptr(ubuf)); -} - -asmlinkage int compat_sys_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) -{ - return sys_pciconfig_write((unsigned long) bus, - (unsigned long) dfn, - (unsigned long) off, - (unsigned long) len, - compat_ptr(ubuf)); -} - -asmlinkage int compat_sys_pciconfig_iobase(u32 which, u32 in_bus, u32 in_devfn) -{ - return sys_pciconfig_iobase(which, in_bus, in_devfn); -} - - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -956,38 +932,6 @@ long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, advice); } -long ppc32_timer_create(clockid_t clock, - struct compat_sigevent __user *ev32, - timer_t __user *timer_id) -{ - sigevent_t event; - timer_t t; - long err; - mm_segment_t savefs; - - if (ev32 == NULL) - return sys_timer_create(clock, NULL, timer_id); - - if (get_compat_sigevent(&event, ev32)) - return -EFAULT; - - if (!access_ok(VERIFY_WRITE, timer_id, sizeof(timer_t))) - return -EFAULT; - - savefs = get_fs(); - set_fs(KERNEL_DS); - /* The __user pointer casts are valid due to the set_fs() */ - err = sys_timer_create(clock, - (sigevent_t __user *) &event, - (timer_t __user *) &t); - set_fs(savefs); - - if (err == 0) - err = __put_user(t, timer_id); - - return err; -} - asmlinkage long compat_sys_add_key(const char __user *_type, const char __user *_description, const void __user *_payload, diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 91b93d917b64..ad895c99813b 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -43,9 +43,6 @@ #include <asm/time.h> #include <asm/unistd.h> -extern unsigned long wall_jiffies; - - /* * sys_ipc() is the de-multiplexer for the SysV IPC calls.. * @@ -311,31 +308,6 @@ int sys_olduname(struct oldold_utsname __user *name) return error? -EFAULT: 0; } -#ifdef CONFIG_PPC64 -time_t sys64_time(time_t __user * tloc) -{ - time_t secs; - time_t usecs; - - long tb_delta = tb_ticks_since(tb_last_stamp); - tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy; - - secs = xtime.tv_sec; - usecs = (xtime.tv_nsec/1000) + tb_delta / tb_ticks_per_usec; - while (usecs >= USEC_PER_SEC) { - ++secs; - usecs -= USEC_PER_SEC; - } - - if (tloc) { - if (put_user(secs,tloc)) - secs = -EFAULT; - } - - return secs; -} -#endif - long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low) { diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 65eaea91b499..007b15ee36d2 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -54,7 +54,7 @@ SYSCALL(link) SYSCALL(unlink) COMPAT_SYS(execve) SYSCALL(chdir) -SYSX(sys64_time,compat_sys_time,sys_time) +COMPAT_SYS(time) SYSCALL(mknod) SYSCALL(chmod) SYSCALL(lchown) @@ -113,7 +113,7 @@ SYSCALL(sgetmask) COMPAT_SYS(ssetmask) SYSCALL(setreuid) SYSCALL(setregid) -SYSX(sys_ni_syscall,ppc32_sigsuspend,ppc_sigsuspend) +SYS32ONLY(sigsuspend) COMPAT_SYS(sigpending) COMPAT_SYS(sethostname) COMPAT_SYS(setrlimit) @@ -160,7 +160,7 @@ SYSCALL(swapoff) COMPAT_SYS(sysinfo) COMPAT_SYS(ipc) SYSCALL(fsync) -SYSX(sys_ni_syscall,ppc32_sigreturn,sys_sigreturn) +SYS32ONLY(sigreturn) PPC_SYS(clone) COMPAT_SYS(setdomainname) PPC_SYS(newuname) @@ -213,13 +213,13 @@ COMPAT_SYS(nfsservctl) SYSCALL(setresgid) SYSCALL(getresgid) COMPAT_SYS(prctl) -SYSX(ppc64_rt_sigreturn,ppc32_rt_sigreturn,sys_rt_sigreturn) +COMPAT_SYS(rt_sigreturn) COMPAT_SYS(rt_sigaction) COMPAT_SYS(rt_sigprocmask) COMPAT_SYS(rt_sigpending) COMPAT_SYS(rt_sigtimedwait) COMPAT_SYS(rt_sigqueueinfo) -SYSX(ppc64_rt_sigsuspend,ppc32_rt_sigsuspend,ppc_rt_sigsuspend) +COMPAT_SYS(rt_sigsuspend) COMPAT_SYS(pread64) COMPAT_SYS(pwrite64) SYSCALL(chown) @@ -239,9 +239,9 @@ SYS32ONLY(ftruncate64) SYSX(sys_ni_syscall,sys_stat64,sys_stat64) SYSX(sys_ni_syscall,sys_lstat64,sys_lstat64) SYSX(sys_ni_syscall,sys_fstat64,sys_fstat64) -COMPAT_SYS(pciconfig_read) -COMPAT_SYS(pciconfig_write) -COMPAT_SYS(pciconfig_iobase) +SYSCALL(pciconfig_read) +SYSCALL(pciconfig_write) +SYSCALL(pciconfig_iobase) SYSCALL(ni_syscall) SYSCALL(getdents64) SYSCALL(pivot_root) @@ -281,7 +281,7 @@ SYSCALL(epoll_create) SYSCALL(epoll_ctl) SYSCALL(epoll_wait) SYSCALL(remap_file_pages) -SYSX(sys_timer_create,ppc32_timer_create,sys_timer_create) +SYSX(sys_timer_create,compat_sys_timer_create,sys_timer_create) COMPAT_SYS(timer_settime) COMPAT_SYS(timer_gettime) SYSCALL(timer_getoverrun) @@ -290,7 +290,7 @@ COMPAT_SYS(clock_settime) COMPAT_SYS(clock_gettime) COMPAT_SYS(clock_getres) COMPAT_SYS(clock_nanosleep) -SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext) +COMPAT_SYS(swapcontext) COMPAT_SYS(tgkill) COMPAT_SYS(utimes) COMPAT_SYS(statfs64) @@ -319,3 +319,7 @@ COMPAT_SYS(ioprio_get) SYSCALL(inotify_init) SYSCALL(inotify_add_watch) SYSCALL(inotify_rm_watch) +SYSCALL(spu_run) +SYSCALL(spu_create) +COMPAT_SYS(pselect6) +COMPAT_SYS(ppoll) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index de8479769bb7..c4a294d657b9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -431,7 +431,7 @@ void timer_interrupt(struct pt_regs * regs) profile_tick(CPU_PROFILING, regs); #ifdef CONFIG_PPC_ISERIES - get_paca()->lppaca.int_dword.fields.decr_int = 0; + get_lppaca()->int_dword.fields.decr_int = 0; #endif while ((ticks = tb_ticks_since(per_cpu(last_jiffy, cpu))) @@ -699,10 +699,6 @@ void __init time_init(void) div128_by_32(1024*1024, 0, tb_ticks_per_sec, &res); tb_to_xs = res.result_low; -#ifdef CONFIG_PPC64 - get_paca()->default_decr = tb_ticks_per_jiffy; -#endif - /* * Compute scale factor for sched_clock. * The calibrate_decr() function has set tb_ticks_per_sec, diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1511454c4690..7509aa6474f2 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -31,6 +31,7 @@ #include <linux/prctl.h> #include <linux/delay.h> #include <linux/kprobes.h> +#include <linux/kexec.h> #include <asm/kdebug.h> #include <asm/pgtable.h> @@ -95,7 +96,7 @@ static DEFINE_SPINLOCK(die_lock); int die(const char *str, struct pt_regs *regs, long err) { - static int die_counter; + static int die_counter, crash_dump_start = 0; int nl = 0; if (debugger(regs)) @@ -156,7 +157,21 @@ int die(const char *str, struct pt_regs *regs, long err) print_modules(); show_regs(regs); bust_spinlocks(0); + + if (!crash_dump_start && kexec_should_crash(current)) { + crash_dump_start = 1; + spin_unlock_irq(&die_lock); + crash_kexec(regs); + /* NOTREACHED */ + } spin_unlock_irq(&die_lock); + if (crash_dump_start) + /* + * Only for soft-reset: Other CPUs will be responded to an IPI + * sent by first kexec CPU. + */ + for(;;) + ; if (in_interrupt()) panic("Fatal exception in interrupt"); @@ -215,8 +230,10 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) void system_reset_exception(struct pt_regs *regs) { /* See if any machine dependent calls */ - if (ppc_md.system_reset_exception) - ppc_md.system_reset_exception(regs); + if (ppc_md.system_reset_exception) { + if (ppc_md.system_reset_exception(regs)) + return; + } die("System Reset", regs, SIGABRT); @@ -886,12 +903,10 @@ void altivec_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); } -#if defined(CONFIG_PPC64) || defined(CONFIG_E500) void performance_monitor_exception(struct pt_regs *regs) { perf_irq(regs); } -#endif #ifdef CONFIG_8xx void SoftwareEmulation(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 0d878e72fc44..3774e80094f5 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -15,11 +15,36 @@ #include <linux/sched.h> #include <linux/console.h> #include <asm/processor.h> +#include <asm/udbg.h> -void (*udbg_putc)(unsigned char c); -unsigned char (*udbg_getc)(void); +void (*udbg_putc)(char c); +int (*udbg_getc)(void); int (*udbg_getc_poll)(void); +/* + * Early debugging facilities. You can enable _one_ of these via .config, + * if you do so your kernel _will not boot_ on anything else. Be careful. + */ +void __init udbg_early_init(void) +{ +#if defined(CONFIG_PPC_EARLY_DEBUG_LPAR) + /* For LPAR machines that have an HVC console on vterm 0 */ + udbg_init_debug_lpar(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_G5) + /* For use on Apple G5 machines */ + udbg_init_pmac_realmode(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS) + /* RTAS panel debug */ + udbg_init_rtas(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE) + /* Maple real mode debug */ + udbg_init_maple_realmode(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_ISERIES) + /* For iSeries - hit Ctrl-x Ctrl-x to see the output */ + udbg_init_iseries(); +#endif +} + /* udbg library, used by xmon et al */ void udbg_puts(const char *s) { @@ -57,8 +82,8 @@ int udbg_write(const char *s, int n) int udbg_read(char *buf, int buflen) { - char c, *p = buf; - int i; + char *p = buf; + int i, c; if (!udbg_getc) return 0; @@ -66,8 +91,11 @@ int udbg_read(char *buf, int buflen) for (i = 0; i < buflen; ++i) { do { c = udbg_getc(); + if (c == -1 && i == 0) + return -1; + } while (c == 0x11 || c == 0x13); - if (c == 0) + if (c == 0 || c == -1) break; *p++ = c; } @@ -78,7 +106,7 @@ int udbg_read(char *buf, int buflen) #define UDBG_BUFSIZE 256 void udbg_printf(const char *fmt, ...) { - unsigned char buf[UDBG_BUFSIZE]; + char buf[UDBG_BUFSIZE]; va_list args; va_start(args, fmt); @@ -87,6 +115,12 @@ void udbg_printf(const char *fmt, ...) va_end(args); } +void __init udbg_progress(char *s, unsigned short hex) +{ + udbg_puts(s); + udbg_puts("\n"); +} + /* * Early boot console based on udbg */ @@ -99,7 +133,7 @@ static void udbg_console_write(struct console *con, const char *s, static struct console udbg_console = { .name = "udbg", .write = udbg_console_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_ENABLED, .index = -1, }; @@ -107,15 +141,19 @@ static int early_console_initialized; void __init disable_early_printk(void) { +#if 1 if (!early_console_initialized) return; unregister_console(&udbg_console); early_console_initialized = 0; +#endif } /* called by setup_system */ void register_early_udbg_console(void) { + if (early_console_initialized) + return; early_console_initialized = 1; register_console(&udbg_console); } diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 9313574ab935..2da65a9c93f6 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -43,9 +43,11 @@ struct NS16550 { #define LSR_TEMT 0x40 /* Xmitter empty */ #define LSR_ERR 0x80 /* Error */ +#define LCR_DLAB 0x80 + static volatile struct NS16550 __iomem *udbg_comport; -static void udbg_550_putc(unsigned char c) +static void udbg_550_putc(char c) { if (udbg_comport) { while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0) @@ -67,39 +69,80 @@ static int udbg_550_getc_poll(void) return -1; } -static unsigned char udbg_550_getc(void) +static int udbg_550_getc(void) { if (udbg_comport) { while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0) /* wait for char */; return in_8(&udbg_comport->rbr); } - return 0; + return -1; } -void udbg_init_uart(void __iomem *comport, unsigned int speed) +void udbg_init_uart(void __iomem *comport, unsigned int speed, + unsigned int clock) { - u16 dll = speed ? (115200 / speed) : 12; + unsigned int dll, base_bauds = clock / 16; + + if (speed == 0) + speed = 9600; + dll = base_bauds / speed; if (comport) { udbg_comport = (struct NS16550 __iomem *)comport; out_8(&udbg_comport->lcr, 0x00); out_8(&udbg_comport->ier, 0xff); out_8(&udbg_comport->ier, 0x00); - out_8(&udbg_comport->lcr, 0x80); /* Access baud rate */ - out_8(&udbg_comport->dll, dll & 0xff); /* 1 = 115200, 2 = 57600, - 3 = 38400, 12 = 9600 baud */ - out_8(&udbg_comport->dlm, dll >> 8); /* dll >> 8 which should be zero - for fast rates; */ - out_8(&udbg_comport->lcr, 0x03); /* 8 data, 1 stop, no parity */ - out_8(&udbg_comport->mcr, 0x03); /* RTS/DTR */ - out_8(&udbg_comport->fcr ,0x07); /* Clear & enable FIFOs */ + out_8(&udbg_comport->lcr, LCR_DLAB); + out_8(&udbg_comport->dll, dll & 0xff); + out_8(&udbg_comport->dlm, dll >> 8); + /* 8 data, 1 stop, no parity */ + out_8(&udbg_comport->lcr, 0x03); + /* RTS/DTR */ + out_8(&udbg_comport->mcr, 0x03); + /* Clear & enable FIFOs */ + out_8(&udbg_comport->fcr ,0x07); udbg_putc = udbg_550_putc; udbg_getc = udbg_550_getc; udbg_getc_poll = udbg_550_getc_poll; } } +unsigned int udbg_probe_uart_speed(void __iomem *comport, unsigned int clock) +{ + unsigned int dll, dlm, divisor, prescaler, speed; + u8 old_lcr; + volatile struct NS16550 __iomem *port = comport; + + old_lcr = in_8(&port->lcr); + + /* select divisor latch registers. */ + out_8(&port->lcr, LCR_DLAB); + + /* now, read the divisor */ + dll = in_8(&port->dll); + dlm = in_8(&port->dlm); + divisor = dlm << 8 | dll; + + /* check prescaling */ + if (in_8(&port->mcr) & 0x80) + prescaler = 4; + else + prescaler = 1; + + /* restore the LCR */ + out_8(&port->lcr, old_lcr); + + /* calculate speed */ + speed = (clock / prescaler) / (divisor * 16); + + /* sanity check */ + if (speed < 0 || speed > (clock / 16)) + speed = 9600; + + return speed; +} + #ifdef CONFIG_PPC_MAPLE void udbg_maple_real_putc(unsigned char c) { @@ -112,7 +155,7 @@ void udbg_maple_real_putc(unsigned char c) } } -void udbg_init_maple_realmode(void) +void __init udbg_init_maple_realmode(void) { udbg_comport = (volatile struct NS16550 __iomem *)0xf40003f8; diff --git a/arch/powerpc/kernel/udbg_scc.c b/arch/powerpc/kernel/udbg_scc.c deleted file mode 100644 index 820c53551507..000000000000 --- a/arch/powerpc/kernel/udbg_scc.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * udbg for for zilog scc ports as found on Apple PowerMacs - * - * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/config.h> -#include <linux/types.h> -#include <asm/udbg.h> -#include <asm/processor.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/pmac_feature.h> - -extern u8 real_readb(volatile u8 __iomem *addr); -extern void real_writeb(u8 data, volatile u8 __iomem *addr); - -#define SCC_TXRDY 4 -#define SCC_RXRDY 1 - -static volatile u8 __iomem *sccc; -static volatile u8 __iomem *sccd; - -static void udbg_scc_putc(unsigned char c) -{ - if (sccc) { - while ((in_8(sccc) & SCC_TXRDY) == 0) - ; - out_8(sccd, c); - if (c == '\n') - udbg_scc_putc('\r'); - } -} - -static int udbg_scc_getc_poll(void) -{ - if (sccc) { - if ((in_8(sccc) & SCC_RXRDY) != 0) - return in_8(sccd); - else - return -1; - } - return -1; -} - -static unsigned char udbg_scc_getc(void) -{ - if (sccc) { - while ((in_8(sccc) & SCC_RXRDY) == 0) - ; - return in_8(sccd); - } - return 0; -} - -static unsigned char scc_inittab[] = { - 13, 0, /* set baud rate divisor */ - 12, 0, - 14, 1, /* baud rate gen enable, src=rtxc */ - 11, 0x50, /* clocks = br gen */ - 5, 0xea, /* tx 8 bits, assert DTR & RTS */ - 4, 0x46, /* x16 clock, 1 stop */ - 3, 0xc1, /* rx enable, 8 bits */ -}; - -void udbg_init_scc(struct device_node *np) -{ - u32 *reg; - unsigned long addr; - int i, x; - - if (np == NULL) - np = of_find_node_by_name(NULL, "escc"); - if (np == NULL || np->parent == NULL) - return; - - udbg_printf("found SCC...\n"); - /* Get address within mac-io ASIC */ - reg = (u32 *)get_property(np, "reg", NULL); - if (reg == NULL) - return; - addr = reg[0]; - udbg_printf("local addr: %lx\n", addr); - /* Get address of mac-io PCI itself */ - reg = (u32 *)get_property(np->parent, "assigned-addresses", NULL); - if (reg == NULL) - return; - addr += reg[2]; - udbg_printf("final addr: %lx\n", addr); - - /* Setup for 57600 8N1 */ - addr += 0x20; - sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ; - sccc += addr & ~PAGE_MASK; - sccd = sccc + 0x10; - - udbg_printf("ioremap result sccc: %p\n", sccc); - mb(); - - for (i = 20000; i != 0; --i) - x = in_8(sccc); - out_8(sccc, 0x09); /* reset A or B side */ - out_8(sccc, 0xc0); - for (i = 0; i < sizeof(scc_inittab); ++i) - out_8(sccc, scc_inittab[i]); - - udbg_putc = udbg_scc_putc; - udbg_getc = udbg_scc_getc; - udbg_getc_poll = udbg_scc_getc_poll; - - udbg_puts("Hello World !\n"); -} - -static void udbg_real_scc_putc(unsigned char c) -{ - while ((real_readb(sccc) & SCC_TXRDY) == 0) - ; - real_writeb(c, sccd); - if (c == '\n') - udbg_real_scc_putc('\r'); -} - -void udbg_init_pmac_realmode(void) -{ - sccc = (volatile u8 __iomem *)0x80013020ul; - sccd = (volatile u8 __iomem *)0x80013030ul; - - udbg_putc = udbg_real_scc_putc; - udbg_getc = NULL; - udbg_getc_poll = NULL; -} diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/arch/powerpc/kernel/vdso32/.gitignore new file mode 100644 index 000000000000..e45fba9d0ced --- /dev/null +++ b/arch/powerpc/kernel/vdso32/.gitignore @@ -0,0 +1 @@ +vdso32.lds diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/arch/powerpc/kernel/vdso64/.gitignore new file mode 100644 index 000000000000..3fd18cf9fec2 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/.gitignore @@ -0,0 +1 @@ +vdso64.lds diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 13c41495fe06..13c655ba2841 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -76,7 +76,7 @@ static void vio_bus_shutdown(struct device *dev) struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); - if (viodrv->shutdown) + if (dev->driver && viodrv->shutdown) viodrv->shutdown(viodev); } @@ -91,9 +91,6 @@ int vio_register_driver(struct vio_driver *viodrv) /* fill in 'struct driver' fields */ viodrv->driver.bus = &vio_bus_type; - viodrv->driver.probe = vio_bus_probe; - viodrv->driver.remove = vio_bus_remove; - viodrv->driver.shutdown = vio_bus_shutdown; return driver_register(&viodrv->driver); } @@ -295,4 +292,7 @@ struct bus_type vio_bus_type = { .name = "vio", .uevent = vio_hotplug, .match = vio_bus_match, + .probe = vio_bus_probe, + .remove = vio_bus_remove, + .shutdown = vio_bus_shutdown, }; |