diff options
113 files changed, 3086 insertions, 2894 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e560d102215a..63a027c9ada5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,6 +25,10 @@ config SPARC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL + select HAVE_GENERIC_HARDIRQS + select GENERIC_HARDIRQS_NO_DEPRECATED + select GENERIC_IRQ_SHOW + select USE_GENERIC_SMP_HELPERS if SMP config SPARC32 def_bool !64BIT @@ -43,15 +47,12 @@ config SPARC64 select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS - select USE_GENERIC_SMP_HELPERS if SMP select RTC_DRV_CMOS select RTC_DRV_BQ4802 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE select HAVE_PERF_EVENTS select PERF_USE_VMALLOC - select HAVE_GENERIC_HARDIRQS - select GENERIC_IRQ_SHOW select IRQ_PREFLOW_FASTEOI config ARCH_DEFCONFIG diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h index 31d48a0e32c7..a4c5a938b936 100644 --- a/arch/sparc/include/asm/cpudata_32.h +++ b/arch/sparc/include/asm/cpudata_32.h @@ -16,6 +16,10 @@ typedef struct { unsigned long clock_tick; unsigned int multiplier; unsigned int counter; +#ifdef CONFIG_SMP + unsigned int irq_resched_count; + unsigned int irq_call_count; +#endif int prom_node; int mid; int next; @@ -23,5 +27,6 @@ typedef struct { DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) +#define local_cpu_data() __get_cpu_var(__cpu_data) #endif /* _SPARC_CPUDATA_H */ diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index 86666f70322e..482c79e2a416 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -281,28 +281,27 @@ static inline void sun_fd_enable_dma(void) pdma_areasize = pdma_size; } -/* Our low-level entry point in arch/sparc/kernel/entry.S */ -extern int sparc_floppy_request_irq(int irq, unsigned long flags, - irq_handler_t irq_handler); +extern int sparc_floppy_request_irq(unsigned int irq, + irq_handler_t irq_handler); static int sun_fd_request_irq(void) { static int once = 0; - int error; - if(!once) { + if (!once) { once = 1; - error = sparc_floppy_request_irq(FLOPPY_IRQ, - IRQF_DISABLED, - floppy_interrupt); - return ((error == 0) ? 0 : -1); - } else return 0; + return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt); + } else { + return 0; + } } static struct linux_prom_registers fd_regs[2]; static int sun_floppy_init(void) { + struct platform_device *op; + struct device_node *dp; char state[128]; phandle tnode, fd_node; int num_regs; @@ -310,7 +309,6 @@ static int sun_floppy_init(void) use_virtual_dma = 1; - FLOPPY_IRQ = 11; /* Forget it if we aren't on a machine that could possibly * ever have a floppy drive. */ @@ -349,6 +347,26 @@ static int sun_floppy_init(void) sun_fdc = (struct sun_flpy_controller *) of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy"); + /* Look up irq in platform_device. + * We try "SUNW,fdtwo" and "fd" + */ + for_each_node_by_name(dp, "SUNW,fdtwo") { + op = of_find_device_by_node(dp); + if (op) + break; + } + if (!op) { + for_each_node_by_name(dp, "fd") { + op = of_find_device_by_node(dp); + if (op) + break; + } + } + if (!op) + goto no_sun_fdc; + + FLOPPY_IRQ = op->archdata.irqs[0]; + /* Last minute sanity check... */ if(sun_fdc->status_82072 == 0xff) { sun_fdc = NULL; diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h index a34b2994937a..f6902cf3cbe9 100644 --- a/arch/sparc/include/asm/io.h +++ b/arch/sparc/include/asm/io.h @@ -5,4 +5,17 @@ #else #include <asm/io_32.h> #endif + +/* + * Defines used for both SPARC32 and SPARC64 + */ + +/* Big endian versions of memory read/write routines */ +#define readb_be(__addr) __raw_readb(__addr) +#define readw_be(__addr) __raw_readw(__addr) +#define readl_be(__addr) __raw_readl(__addr) +#define writeb_be(__b, __addr) __raw_writeb(__b, __addr) +#define writel_be(__w, __addr) __raw_writel(__w, __addr) +#define writew_be(__l, __addr) __raw_writew(__l, __addr) + #endif diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h index eced3e3ebd30..2ae3acaeb1b3 100644 --- a/arch/sparc/include/asm/irq_32.h +++ b/arch/sparc/include/asm/irq_32.h @@ -6,7 +6,11 @@ #ifndef _SPARC_IRQ_H #define _SPARC_IRQ_H -#define NR_IRQS 16 +/* Allocated number of logical irq numbers. + * sun4d boxes (ss2000e) should be OK with ~32. + * Be on the safe side and make room for 64 + */ +#define NR_IRQS 64 #include <linux/interrupt.h> diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h index c04f96fb753c..6bdaf1e43d2a 100644 --- a/arch/sparc/include/asm/leon.h +++ b/arch/sparc/include/asm/leon.h @@ -52,29 +52,6 @@ #define LEON_DIAGF_VALID 0x2000 #define LEON_DIAGF_VALID_SHIFT 13 -/* - * Interrupt Sources - * - * The interrupt source numbers directly map to the trap type and to - * the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask, - * and the Interrupt Pending Registers. - */ -#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR 1 -#define LEON_INTERRUPT_UART_1_RX_TX 2 -#define LEON_INTERRUPT_UART_0_RX_TX 3 -#define LEON_INTERRUPT_EXTERNAL_0 4 -#define LEON_INTERRUPT_EXTERNAL_1 5 -#define LEON_INTERRUPT_EXTERNAL_2 6 -#define LEON_INTERRUPT_EXTERNAL_3 7 -#define LEON_INTERRUPT_TIMER1 8 -#define LEON_INTERRUPT_TIMER2 9 -#define LEON_INTERRUPT_EMPTY1 10 -#define LEON_INTERRUPT_EMPTY2 11 -#define LEON_INTERRUPT_OPEN_ETH 12 -#define LEON_INTERRUPT_EMPTY4 13 -#define LEON_INTERRUPT_EMPTY5 14 -#define LEON_INTERRUPT_EMPTY6 15 - /* irq masks */ #define LEON_HARD_INT(x) (1 << (x)) /* irq 0-15 */ #define LEON_IRQMASK_R 0x0000fffe /* bit 15- 1 of lregs.irqmask */ @@ -183,7 +160,6 @@ static inline void leon_srmmu_enabletlb(void) /* macro access for leon_readnobuffer_reg() */ #define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x)) -extern void sparc_leon_eirq_register(int eirq); extern void leon_init(void); extern void leon_switch_mm(void); extern void leon_init_IRQ(void); @@ -239,8 +215,8 @@ static inline int sparc_leon3_cpuid(void) #endif /*!__ASSEMBLY__*/ #ifdef CONFIG_SMP -# define LEON3_IRQ_RESCHEDULE 13 -# define LEON3_IRQ_TICKER (leon_percpu_timer_dev[0].irq) +# define LEON3_IRQ_IPI_DEFAULT 13 +# define LEON3_IRQ_TICKER (leon3_ticker_irq) # define LEON3_IRQ_CROSS_CALL 15 #endif @@ -339,9 +315,9 @@ struct leon2_cacheregs { #include <linux/interrupt.h> struct device_node; -extern int sparc_leon_eirq_get(int eirq, int cpu); -extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id); -extern void sparc_leon_eirq_register(int eirq); +extern unsigned int leon_build_device_irq(unsigned int real_irq, + irq_flow_handler_t flow_handler, + const char *name, int do_ack); extern void leon_clear_clock_irq(void); extern void leon_load_profile_irq(int cpu, unsigned int limit); extern void leon_init_timers(irq_handler_t counter_fn); @@ -358,6 +334,7 @@ extern void leon3_getCacheRegs(struct leon3_cacheregs *regs); extern int leon_flush_needed(void); extern void leon_switch_mm(void); extern int srmmu_swprobe_trace; +extern int leon3_ticker_irq; #ifdef CONFIG_SMP extern int leon_smp_nrcpus(void); @@ -366,17 +343,19 @@ extern void leon_smp_done(void); extern void leon_boot_cpus(void); extern int leon_boot_one_cpu(int i); void leon_init_smp(void); -extern void cpu_probe(void); extern void cpu_idle(void); extern void init_IRQ(void); extern void cpu_panic(void); extern int __leon_processor_id(void); void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu); +extern irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused); -extern unsigned int real_irq_entry[], smpleon_ticker[]; +extern unsigned int real_irq_entry[]; +extern unsigned int smpleon_ipi[]; extern unsigned int patchme_maybe_smp_msg[]; extern unsigned int t_nmi[], linux_trap_ipi15_leon[]; extern unsigned int linux_trap_ipi15_sun4m[]; +extern int leon_ipi_irq; #endif /* CONFIG_SMP */ diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h index f20ef562b265..7eb5d78f5211 100644 --- a/arch/sparc/include/asm/pcic.h +++ b/arch/sparc/include/asm/pcic.h @@ -29,11 +29,17 @@ struct linux_pcic { int pcic_imdim; }; -extern int pcic_probe(void); -/* Erm... MJ redefined pcibios_present() so that it does not work early. */ +#ifdef CONFIG_PCI extern int pcic_present(void); +extern int pcic_probe(void); +extern void pci_time_init(void); extern void sun4m_pci_init_IRQ(void); - +#else +static inline int pcic_present(void) { return 0; } +static inline int pcic_probe(void) { return 0; } +static inline void pci_time_init(void) {} +static inline void sun4m_pci_init_IRQ(void) {} +#endif #endif /* Size of PCI I/O space which we relocate. */ diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 303bd4dc8292..5b31a8e89823 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -8,6 +8,8 @@ * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/const.h> + #ifndef __ASSEMBLY__ #include <asm-generic/4level-fixup.h> @@ -456,9 +458,9 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, #endif /* !(__ASSEMBLY__) */ -#define VMALLOC_START 0xfe600000 +#define VMALLOC_START _AC(0xfe600000,UL) /* XXX Alter this when I get around to fixing sun4c - Anton */ -#define VMALLOC_END 0xffc00000 +#define VMALLOC_END _AC(0xffc00000,UL) /* We provide our own get_unmapped_area to cope with VA holes for userland */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index f8dddb7045bb..b77128c80524 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -699,6 +699,9 @@ extern pmd_t swapper_low_pmd_dir[2048]; extern void paging_init(void); extern unsigned long find_ecache_flush_span(unsigned long size); +struct seq_file; +extern void mmu_info(struct seq_file *); + /* These do nothing with the way I have things setup. */ #define mmu_lockarea(vaddr, len) (vaddr) #define mmu_unlockarea(vaddr, len) do { } while(0) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 2643c62f4ac0..64718ba26434 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -11,4 +11,16 @@ # define COMMAND_LINE_SIZE 256 #endif +#ifdef __KERNEL__ + +#ifdef CONFIG_SPARC32 +/* The CPU that was used for booting + * Only sun4d + leon may have boot_cpu_id != 0 + */ +extern unsigned char boot_cpu_id; +extern unsigned char boot_cpu_id4; +#endif + +#endif /* __KERNEL__ */ + #endif /* _SPARC_SETUP_H */ diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index d82d7f4c0a79..093f10843ff2 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -50,42 +50,38 @@ void smp_callin(void); void smp_boot_cpus(void); void smp_store_cpu_info(int); +void smp_resched_interrupt(void); +void smp_call_function_single_interrupt(void); +void smp_call_function_interrupt(void); + struct seq_file; void smp_bogo(struct seq_file *); void smp_info(struct seq_file *); BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long) BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void) +BTFIXUPDEF_CALL(void, smp_ipi_resched, int); +BTFIXUPDEF_CALL(void, smp_ipi_single, int); +BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int); BTFIXUPDEF_BLACKBOX(hard_smp_processor_id) BTFIXUPDEF_BLACKBOX(load_current) #define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4) -static inline void xc0(smpfunc_t func) { smp_cross_call(func, cpu_online_map, 0, 0, 0, 0); } +static inline void xc0(smpfunc_t func) { smp_cross_call(func, *cpu_online_mask, 0, 0, 0, 0); } static inline void xc1(smpfunc_t func, unsigned long arg1) -{ smp_cross_call(func, cpu_online_map, arg1, 0, 0, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); } static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, 0, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); } static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); } static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, arg4); } - -static inline int smp_call_function(void (*func)(void *info), void *info, int wait) -{ - xc1((smpfunc_t)func, (unsigned long)info); - return 0; -} +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, arg4); } -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), - void *info, int wait) -{ - smp_cross_call((smpfunc_t)func, cpumask_of_cpu(cpuid), - (unsigned long) info, 0, 0, 0); - return 0; -} +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); static inline int cpu_logical_map(int cpu) { @@ -135,6 +131,11 @@ static inline int hard_smp_processor_id(void) __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t" "nop; nop" : "=&r" (cpuid)); + - leon + __asm__ __volatile__( "rd %asr17, %0\n\t" + "srl %0, 0x1c, %0\n\t" + "nop\n\t" : + "=&r" (cpuid)); See btfixup.h and btfixupprep.c to understand how a blackbox works. */ __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t" diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index f49e11cd4ded..20bca8950710 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -49,6 +49,10 @@ extern void cpu_play_dead(void); extern void smp_fetch_global_regs(void); +struct seq_file; +void smp_bogo(struct seq_file *); +void smp_info(struct seq_file *); + #ifdef CONFIG_HOTPLUG_CPU extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 7f9b9dba38a6..5f5b8bf3f50d 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -9,6 +9,7 @@ #ifndef __ASSEMBLY__ #include <asm/psr.h> +#include <asm/processor.h> /* for cpu_relax */ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) diff --git a/arch/sparc/include/asm/system_32.h b/arch/sparc/include/asm/system_32.h index 890036b3689a..47a7e862474e 100644 --- a/arch/sparc/include/asm/system_32.h +++ b/arch/sparc/include/asm/system_32.h @@ -15,11 +15,6 @@ #include <linux/irqflags.h> -static inline unsigned int probe_irq_mask(unsigned long val) -{ - return 0; -} - /* * Sparc (general) CPU types */ diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h index e3b65d8cf41b..3c96d3bb9f15 100644 --- a/arch/sparc/include/asm/system_64.h +++ b/arch/sparc/include/asm/system_64.h @@ -29,10 +29,6 @@ enum sparc_cpu { /* This cannot ever be a sun4c :) That's just history. */ #define ARCH_SUN4C 0 -extern const char *sparc_cpu_type; -extern const char *sparc_fpu_type; -extern const char *sparc_pmu_type; - extern char reboot_command[]; /* These are here in an effort to more fully work around Spitfire Errata diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h index 5b0a06dc3bcb..a9be04b0d049 100644 --- a/arch/sparc/include/asm/winmacro.h +++ b/arch/sparc/include/asm/winmacro.h @@ -103,6 +103,7 @@ st %scratch, [%cur_reg + TI_W_SAVED]; #ifdef CONFIG_SMP +/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */ #define LOAD_CURRENT4M(dest_reg, idreg) \ rd %tbr, %idreg; \ sethi %hi(current_set), %dest_reg; \ @@ -118,6 +119,14 @@ or %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \ ld [%idreg + %dest_reg], %dest_reg; +#define LOAD_CURRENT_LEON(dest_reg, idreg) \ + rd %asr17, %idreg; \ + sethi %hi(current_set), %dest_reg; \ + srl %idreg, 0x1c, %idreg; \ + or %dest_reg, %lo(current_set), %dest_reg; \ + sll %idreg, 0x2, %idreg; \ + ld [%idreg + %dest_reg], %dest_reg; + /* Blackbox - take care with this... - check smp4m and smp4d before changing this. */ #define LOAD_CURRENT(dest_reg, idreg) \ sethi %hi(___b_load_current), %idreg; \ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 99aa4db6e9c2..9cff2709a96d 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -71,10 +71,6 @@ obj-$(CONFIG_SPARC64) += pcr.o obj-$(CONFIG_SPARC64) += nmi.o obj-$(CONFIG_SPARC64_SMP) += cpumap.o -# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation -obj-$(CONFIG_SPARC32) += devres.o -devres-y := ../../../kernel/irq/devres.o - obj-y += dma.o obj-$(CONFIG_SPARC32_PCI) += pcic.o diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 7925c54f4133..138dbbc8dc84 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -4,6 +4,7 @@ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/seq_file.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -11,7 +12,9 @@ #include <linux/threads.h> #include <asm/spitfire.h> +#include <asm/pgtable.h> #include <asm/oplib.h> +#include <asm/setup.h> #include <asm/page.h> #include <asm/head.h> #include <asm/psr.h> @@ -23,6 +26,9 @@ DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; EXPORT_PER_CPU_SYMBOL(__cpu_data); +int ncpus_probed; +unsigned int fsr_storage; + struct cpu_info { int psr_vers; const char *name; @@ -247,13 +253,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { * machine type value into consideration too. I will fix this. */ -const char *sparc_cpu_type; -const char *sparc_fpu_type; +static const char *sparc_cpu_type; +static const char *sparc_fpu_type; const char *sparc_pmu_type; -unsigned int fsr_storage; -static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) +static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) { const struct manufacturer_info *manuf; int i; @@ -313,7 +318,123 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) } #ifdef CONFIG_SPARC32 -void __cpuinit cpu_probe(void) +static int show_cpuinfo(struct seq_file *m, void *__unused) +{ + seq_printf(m, + "cpu\t\t: %s\n" + "fpu\t\t: %s\n" + "promlib\t\t: Version %d Revision %d\n" + "prom\t\t: %d.%d\n" + "type\t\t: %s\n" + "ncpus probed\t: %d\n" + "ncpus active\t: %d\n" +#ifndef CONFIG_SMP + "CPU0Bogo\t: %lu.%02lu\n" + "CPU0ClkTck\t: %ld\n" +#endif + , + sparc_cpu_type, + sparc_fpu_type , + romvec->pv_romvers, + prom_rev, + romvec->pv_printrev >> 16, + romvec->pv_printrev & 0xffff, + &cputypval[0], + ncpus_probed, + num_online_cpus() +#ifndef CONFIG_SMP + , cpu_data(0).udelay_val/(500000/HZ), + (cpu_data(0).udelay_val/(5000/HZ)) % 100, + cpu_data(0).clock_tick +#endif + ); + +#ifdef CONFIG_SMP + smp_bogo(m); +#endif + mmu_info(m); +#ifdef CONFIG_SMP + smp_info(m); +#endif + return 0; +} +#endif /* CONFIG_SPARC32 */ + +#ifdef CONFIG_SPARC64 +unsigned int dcache_parity_tl1_occurred; +unsigned int icache_parity_tl1_occurred; + + +static int show_cpuinfo(struct seq_file *m, void *__unused) +{ + seq_printf(m, + "cpu\t\t: %s\n" + "fpu\t\t: %s\n" + "pmu\t\t: %s\n" + "prom\t\t: %s\n" + "type\t\t: %s\n" + "ncpus probed\t: %d\n" + "ncpus active\t: %d\n" + "D$ parity tl1\t: %u\n" + "I$ parity tl1\t: %u\n" +#ifndef CONFIG_SMP + "Cpu0ClkTck\t: %016lx\n" +#endif + , + sparc_cpu_type, + sparc_fpu_type, + sparc_pmu_type, + prom_version, + ((tlb_type == hypervisor) ? + "sun4v" : + "sun4u"), + ncpus_probed, + num_online_cpus(), + dcache_parity_tl1_occurred, + icache_parity_tl1_occurred +#ifndef CONFIG_SMP + , cpu_data(0).clock_tick +#endif + ); +#ifdef CONFIG_SMP + smp_bogo(m); +#endif + mmu_info(m); +#ifdef CONFIG_SMP + smp_info(m); +#endif + return 0; +} +#endif /* CONFIG_SPARC64 */ + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + /* The pointer we are returning is arbitrary, + * it just has to be non-NULL and not IS_ERR + * in the success case. + */ + return *pos == 0 ? &c_start : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start =c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +#ifdef CONFIG_SPARC32 +static int __init cpu_type_probe(void) { int psr_impl, psr_vers, fpu_vers; int psr; @@ -332,8 +453,12 @@ void __cpuinit cpu_probe(void) put_psr(psr); set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers); + + return 0; } -#else +#endif /* CONFIG_SPARC32 */ + +#ifdef CONFIG_SPARC64 static void __init sun4v_cpu_probe(void) { switch (sun4v_chip_type) { @@ -374,6 +499,6 @@ static int __init cpu_type_probe(void) } return 0; } +#endif /* CONFIG_SPARC64 */ early_initcall(cpu_type_probe); -#endif diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index 8de64c8126bc..d91fd782743a 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -202,7 +202,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void) new_tree->total_nodes = n; memcpy(&new_tree->level, tmp_level, sizeof(tmp_level)); - prev_cpu = cpu = first_cpu(cpu_online_map); + prev_cpu = cpu = cpumask_first(cpu_online_mask); /* Initialize all levels in the tree with the first CPU */ for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) { @@ -381,7 +381,7 @@ static int simple_map_to_cpu(unsigned int index) } /* Impossible, since num_online_cpus() <= num_possible_cpus() */ - return first_cpu(cpu_online_map); + return cpumask_first(cpu_online_mask); } static int _map_to_cpu(unsigned int index) diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c index d2eddd6647cd..113c052c3043 100644 --- a/arch/sparc/kernel/devices.c +++ b/arch/sparc/kernel/devices.c @@ -20,7 +20,6 @@ #include <asm/system.h> #include <asm/cpudata.h> -extern void cpu_probe(void); extern void clock_stop_probe(void); /* tadpole.c */ extern void sun4c_probe_memerr_reg(void); @@ -115,7 +114,7 @@ int cpu_get_hwmid(phandle prom_node) void __init device_scan(void) { - prom_printf("Booting Linux...\n"); + printk(KERN_NOTICE "Booting Linux...\n"); #ifndef CONFIG_SMP { @@ -133,7 +132,6 @@ void __init device_scan(void) } #endif /* !CONFIG_SMP */ - cpu_probe(); { extern void auxio_probe(void); extern void auxio_power_probe(void); diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 3add4de8a1a9..dd1342c0a3be 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -497,7 +497,7 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num, tag->num_records = ncpus; i = 0; - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { ent[i].cpu = cpu; ent[i].result = DR_CPU_RES_OK; ent[i].stat = default_stat; @@ -534,7 +534,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp, int resp_len, ncpus, cpu; unsigned long flags; - ncpus = cpus_weight(*mask); + ncpus = cpumask_weight(mask); resp_len = dr_cpu_size_response(ncpus); resp = kzalloc(resp_len, GFP_KERNEL); if (!resp) @@ -547,7 +547,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp, mdesc_populate_present_mask(mask); mdesc_fill_in_cpu_data(mask); - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { int err; printk(KERN_INFO "ds-%llu: Starting cpu %d...\n", @@ -593,7 +593,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp, int resp_len, ncpus, cpu; unsigned long flags; - ncpus = cpus_weight(*mask); + ncpus = cpumask_weight(mask); resp_len = dr_cpu_size_response(ncpus); resp = kzalloc(resp_len, GFP_KERNEL); if (!resp) @@ -603,7 +603,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp, resp_len, ncpus, mask, DR_CPU_STAT_UNCONFIGURED); - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { int err; printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n", @@ -649,13 +649,13 @@ static void __cpuinit dr_cpu_data(struct ds_info *dp, purge_dups(cpu_list, tag->num_records); - cpus_clear(mask); + cpumask_clear(&mask); for (i = 0; i < tag->num_records; i++) { if (cpu_list[i] == CPU_SENTINEL) continue; if (cpu_list[i] < nr_cpu_ids) - cpu_set(cpu_list[i], mask); + cpumask_set_cpu(cpu_list[i], &mask); } if (tag->type == DR_CPU_CONFIGURE) diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 6da784a5612b..8341963f4c84 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -269,19 +269,22 @@ smp4m_ticker: /* Here is where we check for possible SMP IPI passed to us * on some level other than 15 which is the NMI and only used * for cross calls. That has a separate entry point below. + * + * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*. */ maybe_smp4m_msg: GET_PROCESSOR4M_ID(o3) sethi %hi(sun4m_irq_percpu), %l5 sll %o3, 2, %o3 or %l5, %lo(sun4m_irq_percpu), %o5 - sethi %hi(0x40000000), %o2 + sethi %hi(0x70000000), %o2 ! Check all soft-IRQs ld [%o5 + %o3], %o1 ld [%o1 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending andcc %o3, %o2, %g0 be,a smp4m_ticker cmp %l7, 14 - st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x40000000 + /* Soft-IRQ IPI */ + st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x70000000 WRITE_PAUSE ld [%o1 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending WRITE_PAUSE @@ -290,9 +293,27 @@ maybe_smp4m_msg: WRITE_PAUSE wr %l4, PSR_ET, %psr WRITE_PAUSE - call smp_reschedule_irq + sll %o2, 28, %o2 ! shift for simpler checks below +maybe_smp4m_msg_check_single: + andcc %o2, 0x1, %g0 + beq,a maybe_smp4m_msg_check_mask + andcc %o2, 0x2, %g0 + call smp_call_function_single_interrupt nop - + andcc %o2, 0x2, %g0 +maybe_smp4m_msg_check_mask: + beq,a maybe_smp4m_msg_check_resched + andcc %o2, 0x4, %g0 + call smp_call_function_interrupt + nop + andcc %o2, 0x4, %g0 +maybe_smp4m_msg_check_resched: + /* rescheduling is done in RESTORE_ALL regardless, but incr stats */ + beq,a maybe_smp4m_msg_out + nop + call smp_resched_interrupt + nop +maybe_smp4m_msg_out: RESTORE_ALL .align 4 @@ -401,18 +422,18 @@ linux_trap_ipi15_sun4d: 1: b,a 1b #ifdef CONFIG_SPARC_LEON - - .globl smpleon_ticker - /* SMP per-cpu ticker interrupts are handled specially. */ -smpleon_ticker: + .globl smpleon_ipi + .extern leon_ipi_interrupt + /* SMP per-cpu IPI interrupts are handled specially. */ +smpleon_ipi: SAVE_ALL or %l0, PSR_PIL, %g2 wr %g2, 0x0, %psr WRITE_PAUSE wr %g2, PSR_ET, %psr WRITE_PAUSE - call leon_percpu_timer_interrupt - add %sp, STACKFRAME_SZ, %o0 + call leonsmp_ipi_interrupt + add %sp, STACKFRAME_SZ, %o1 ! pt_regs wr %l0, PSR_ET, %psr WRITE_PAUSE RESTORE_ALL diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index 59423491cef8..587785759838 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -810,31 +810,25 @@ found_version: got_prop: #ifdef CONFIG_SPARC_LEON /* no cpu-type check is needed, it is a SPARC-LEON */ -#ifdef CONFIG_SMP - ba leon_smp_init - nop - .global leon_smp_init -leon_smp_init: - sethi %hi(boot_cpu_id), %g1 ! master always 0 - stb %g0, [%g1 + %lo(boot_cpu_id)] - sethi %hi(boot_cpu_id4), %g1 ! master always 0 - stb %g0, [%g1 + %lo(boot_cpu_id4)] + sethi %hi(boot_cpu_id), %g2 ! boot-cpu index - rd %asr17,%g1 - srl %g1,28,%g1 +#ifdef CONFIG_SMP + ldub [%g2 + %lo(boot_cpu_id)], %g1 + cmp %g1, 0xff ! unset means first CPU + bne leon_smp_cpu_startup ! continue only with master + nop +#endif + /* Get CPU-ID from most significant 4-bit of ASR17 */ + rd %asr17, %g1 + srl %g1, 28, %g1 - cmp %g0,%g1 - beq sun4c_continue_boot !continue with master - nop + /* Update boot_cpu_id only on boot cpu */ + stub %g1, [%g2 + %lo(boot_cpu_id)] - ba leon_smp_cpu_startup - nop -#else ba sun4c_continue_boot nop #endif -#endif set cputypval, %o2 ldub [%o2 + 0x4], %l1 @@ -893,9 +887,6 @@ sun4d_init: sta %g4, [%g0] ASI_M_VIKING_TMP1 sethi %hi(boot_cpu_id), %g5 stb %g4, [%g5 + %lo(boot_cpu_id)] - sll %g4, 2, %g4 - sethi %hi(boot_cpu_id4), %g5 - stb %g4, [%g5 + %lo(boot_cpu_id4)] #endif /* Fall through to sun4m_init */ @@ -1024,14 +1015,28 @@ sun4c_continue_boot: bl 1b add %o0, 0x1, %o0 + /* If boot_cpu_id has not been setup by machine specific + * init-code above we default it to zero. + */ + sethi %hi(boot_cpu_id), %g2 + ldub [%g2 + %lo(boot_cpu_id)], %g3 + cmp %g3, 0xff + bne 1f + nop + mov %g0, %g3 + stub %g3, [%g2 + %lo(boot_cpu_id)] + +1: /* boot_cpu_id set. calculate boot_cpu_id4 = boot_cpu_id*4 */ + sll %g3, 2, %g3 + sethi %hi(boot_cpu_id4), %g2 + stub %g3, [%g2 + %lo(boot_cpu_id4)] + /* Initialize the uwinmask value for init task just in case. * But first make current_set[boot_cpu_id] point to something useful. */ set init_thread_union, %g6 set current_set, %g2 #ifdef CONFIG_SMP - sethi %hi(boot_cpu_id4), %g3 - ldub [%g3 + %lo(boot_cpu_id4)], %g3 st %g6, [%g2] add %g2, %g3, %g2 #endif diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index c6ce9a6a4790..1c9c80a1a86a 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -50,10 +50,15 @@ #include <asm/io-unit.h> #include <asm/leon.h> +/* This function must make sure that caches and memory are coherent after DMA + * On LEON systems without cache snooping it flushes the entire D-CACHE. + */ #ifndef CONFIG_SPARC_LEON -#define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */ +static inline void dma_make_coherent(unsigned long pa, unsigned long len) +{ +} #else -static inline void mmu_inval_dma_area(void *va, unsigned long len) +static inline void dma_make_coherent(unsigned long pa, unsigned long len) { if (!sparc_leon3_snooping_enabled()) leon_flush_dcache_all(); @@ -284,7 +289,6 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total); goto err_nova; } - mmu_inval_dma_area((void *)va, len_total); // XXX The mmu_map_dma_area does this for us below, see comments. // sparc_mapiorange(0, virt_to_phys(va), res->start, len_total); @@ -336,7 +340,6 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p, release_resource(res); kfree(res); - /* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */ pgv = virt_to_page(p); mmu_unmap_dma_area(dev, ba, n); @@ -463,7 +466,6 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len, printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total); goto err_nova; } - mmu_inval_dma_area(va, len_total); sparc_mapiorange(0, virt_to_phys(va), res->start, len_total); *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */ @@ -489,7 +491,6 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p, dma_addr_t ba) { struct resource *res; - void *pgp; if ((res = _sparc_find_resource(&_sparc_dvma, (unsigned long)p)) == NULL) { @@ -509,14 +510,12 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p, return; } - pgp = phys_to_virt(ba); /* bus_to_virt actually */ - mmu_inval_dma_area(pgp, n); + dma_make_coherent(ba, n); sparc_unmapiorange((unsigned long)p, n); release_resource(res); kfree(res); - - free_pages((unsigned long)pgp, get_order(n)); + free_pages((unsigned long)phys_to_virt(ba), get_order(n)); } /* @@ -535,7 +534,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { if (dir != PCI_DMA_TODEVICE) - mmu_inval_dma_area(phys_to_virt(ba), PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } /* Map a set of buffers described by scatterlist in streaming @@ -562,8 +561,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl, /* IIep is write-through, not flushing. */ for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - sg->dma_address = virt_to_phys(sg_virt(sg)); + sg->dma_address = sg_phys(sg); sg->dma_length = sg->length; } return nents; @@ -582,9 +580,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl, if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } @@ -603,8 +599,7 @@ static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir) { if (dir != PCI_DMA_TODEVICE) { - mmu_inval_dma_area(phys_to_virt(ba), - PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } } @@ -612,8 +607,7 @@ static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir) { if (dir != PCI_DMA_TODEVICE) { - mmu_inval_dma_area(phys_to_virt(ba), - PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } } @@ -631,9 +625,7 @@ static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } @@ -646,9 +638,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist * if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h index 008453b798ec..100b9c204e78 100644 --- a/arch/sparc/kernel/irq.h +++ b/arch/sparc/kernel/irq.h @@ -2,6 +2,23 @@ #include <asm/btfixup.h> +struct irq_bucket { + struct irq_bucket *next; + unsigned int real_irq; + unsigned int irq; + unsigned int pil; +}; + +#define SUN4D_MAX_BOARD 10 +#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5) + +/* Map between the irq identifier used in hw to the + * irq_bucket. The map is sufficient large to hold + * the sun4d hw identifiers. + */ +extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ]; + + /* sun4m specific type definitions */ /* This maps direct to CPU specific interrupt registers */ @@ -35,6 +52,10 @@ struct sparc_irq_config { }; extern struct sparc_irq_config sparc_irq_config; +unsigned int irq_alloc(unsigned int real_irq, unsigned int pil); +void irq_link(unsigned int irq); +void irq_unlink(unsigned int irq); +void handler_irq(unsigned int pil, struct pt_regs *regs); /* Dave Redman (djhr@tadpole.co.uk) * changed these to function pointers.. it saves cycles and will allow @@ -44,33 +65,9 @@ extern struct sparc_irq_config sparc_irq_config; * Changed these to btfixup entities... It saves cycles :) */ -BTFIXUPDEF_CALL(void, disable_irq, unsigned int) -BTFIXUPDEF_CALL(void, enable_irq, unsigned int) -BTFIXUPDEF_CALL(void, disable_pil_irq, unsigned int) -BTFIXUPDEF_CALL(void, enable_pil_irq, unsigned int) BTFIXUPDEF_CALL(void, clear_clock_irq, void) BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int) -static inline void __disable_irq(unsigned int irq) -{ - BTFIXUP_CALL(disable_irq)(irq); -} - -static inline void __enable_irq(unsigned int irq) -{ - BTFIXUP_CALL(enable_irq)(irq); -} - -static inline void disable_pil_irq(unsigned int irq) -{ - BTFIXUP_CALL(disable_pil_irq)(irq); -} - -static inline void enable_pil_irq(unsigned int irq) -{ - BTFIXUP_CALL(enable_pil_irq)(irq); -} - static inline void clear_clock_irq(void) { BTFIXUP_CALL(clear_clock_irq)(); @@ -89,4 +86,10 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int) #define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level) #define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level) #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu) + +/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */ +#define SUN4D_IPI_IRQ 14 + +extern void sun4d_ipi_interrupt(void); + #endif diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c index 7c93df4099cb..9b89d842913c 100644 --- a/arch/sparc/kernel/irq_32.c +++ b/arch/sparc/kernel/irq_32.c @@ -15,6 +15,7 @@ #include <linux/seq_file.h> #include <asm/cacheflush.h> +#include <asm/cpudata.h> #include <asm/pcic.h> #include <asm/leon.h> @@ -101,284 +102,173 @@ EXPORT_SYMBOL(arch_local_irq_restore); * directed CPU interrupts using the existing enable/disable irq code * with tweaks. * + * Sun4d complicates things even further. IRQ numbers are arbitrary + * 32-bit values in that case. Since this is similar to sparc64, + * we adopt a virtual IRQ numbering scheme as is done there. + * Virutal interrupt numbers are allocated by build_irq(). So NR_IRQS + * just becomes a limit of how many interrupt sources we can handle in + * a single system. Even fully loaded SS2000 machines top off at + * about 32 interrupt sources or so, therefore a NR_IRQS value of 64 + * is more than enough. + * + * We keep a map of per-PIL enable interrupts. These get wired + * up via the irq_chip->startup() method which gets invoked by + * the generic IRQ layer during request_irq(). */ +/* Table of allocated irqs. Unused entries has irq == 0 */ +static struct irq_bucket irq_table[NR_IRQS]; +/* Protect access to irq_table */ +static DEFINE_SPINLOCK(irq_table_lock); -/* - * Dave Redman (djhr@tadpole.co.uk) - * - * There used to be extern calls and hard coded values here.. very sucky! - * instead, because some of the devices attach very early, I do something - * equally sucky but at least we'll never try to free statically allocated - * space or call kmalloc before kmalloc_init :(. - * - * In fact it's the timer10 that attaches first.. then timer14 - * then kmalloc_init is called.. then the tty interrupts attach. - * hmmm.... - * - */ -#define MAX_STATIC_ALLOC 4 -struct irqaction static_irqaction[MAX_STATIC_ALLOC]; -int static_irq_count; - -static struct { - struct irqaction *action; - int flags; -} sparc_irq[NR_IRQS]; -#define SPARC_IRQ_INPROGRESS 1 - -/* Used to protect the IRQ action lists */ -DEFINE_SPINLOCK(irq_action_lock); +/* Map between the irq identifier used in hw to the irq_bucket. */ +struct irq_bucket *irq_map[SUN4D_MAX_IRQ]; +/* Protect access to irq_map */ +static DEFINE_SPINLOCK(irq_map_lock); -int show_interrupts(struct seq_file *p, void *v) +/* Allocate a new irq from the irq_table */ +unsigned int irq_alloc(unsigned int real_irq, unsigned int pil) { - int i = *(loff_t *)v; - struct irqaction *action; unsigned long flags; -#ifdef CONFIG_SMP - int j; -#endif + unsigned int i; + + spin_lock_irqsave(&irq_table_lock, flags); + for (i = 1; i < NR_IRQS; i++) { + if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil) + goto found; + } - if (sparc_cpu_model == sun4d) - return show_sun4d_interrupts(p, v); + for (i = 1; i < NR_IRQS; i++) { + if (!irq_table[i].irq) + break; + } - spin_lock_irqsave(&irq_action_lock, flags); if (i < NR_IRQS) { - action = sparc_irq[i].action; - if (!action) - goto out_unlock; - seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) { - seq_printf(p, "%10u ", - kstat_cpu(j).irqs[i]); - } -#endif - seq_printf(p, " %c %s", - (action->flags & IRQF_DISABLED) ? '+' : ' ', - action->name); - for (action = action->next; action; action = action->next) { - seq_printf(p, ",%s %s", - (action->flags & IRQF_DISABLED) ? " +" : "", - action->name); - } - seq_putc(p, '\n'); + irq_table[i].real_irq = real_irq; + irq_table[i].irq = i; + irq_table[i].pil = pil; + } else { + printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); + i = 0; } -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); - return 0; +found: + spin_unlock_irqrestore(&irq_table_lock, flags); + + return i; } -void free_irq(unsigned int irq, void *dev_id) +/* Based on a single pil handler_irq may need to call several + * interrupt handlers. Use irq_map as entry to irq_table, + * and let each entry in irq_table point to the next entry. + */ +void irq_link(unsigned int irq) { - struct irqaction *action; - struct irqaction **actionp; + struct irq_bucket *p; unsigned long flags; - unsigned int cpu_irq; - - if (sparc_cpu_model == sun4d) { - sun4d_free_irq(irq, dev_id); - return; - } - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { /* 14 irq levels on the sparc */ - printk(KERN_ERR "Trying to free bogus IRQ %d\n", irq); - return; - } + unsigned int pil; - spin_lock_irqsave(&irq_action_lock, flags); + BUG_ON(irq >= NR_IRQS); - actionp = &sparc_irq[cpu_irq].action; - action = *actionp; + spin_lock_irqsave(&irq_map_lock, flags); - if (!action->handler) { - printk(KERN_ERR "Trying to free free IRQ%d\n", irq); - goto out_unlock; - } - if (dev_id) { - for (; action; action = action->next) { - if (action->dev_id == dev_id) - break; - actionp = &action->next; - } - if (!action) { - printk(KERN_ERR "Trying to free free shared IRQ%d\n", - irq); - goto out_unlock; - } - } else if (action->flags & IRQF_SHARED) { - printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n", - irq); - goto out_unlock; - } - if (action->flags & SA_STATIC_ALLOC) { - /* - * This interrupt is marked as specially allocated - * so it is a bad idea to free it. - */ - printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n", - irq, action->name); - goto out_unlock; - } - - *actionp = action->next; + p = &irq_table[irq]; + pil = p->pil; + BUG_ON(pil > SUN4D_MAX_IRQ); + p->next = irq_map[pil]; + irq_map[pil] = p; - spin_unlock_irqrestore(&irq_action_lock, flags); + spin_unlock_irqrestore(&irq_map_lock, flags); +} - synchronize_irq(irq); +void irq_unlink(unsigned int irq) +{ + struct irq_bucket *p, **pnext; + unsigned long flags; - spin_lock_irqsave(&irq_action_lock, flags); + BUG_ON(irq >= NR_IRQS); - kfree(action); + spin_lock_irqsave(&irq_map_lock, flags); - if (!sparc_irq[cpu_irq].action) - __disable_irq(irq); + p = &irq_table[irq]; + BUG_ON(p->pil > SUN4D_MAX_IRQ); + pnext = &irq_map[p->pil]; + while (*pnext != p) + pnext = &(*pnext)->next; + *pnext = p->next; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); + spin_unlock_irqrestore(&irq_map_lock, flags); } -EXPORT_SYMBOL(free_irq); - -/* - * This is called when we want to synchronize with - * interrupts. We may for example tell a device to - * stop sending interrupts: but to make sure there - * are no interrupts that are executing on another - * CPU we need to call this function. - */ -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - unsigned int cpu_irq; - cpu_irq = irq & (NR_IRQS - 1); - while (sparc_irq[cpu_irq].flags & SPARC_IRQ_INPROGRESS) - cpu_relax(); -} -EXPORT_SYMBOL(synchronize_irq); -#endif /* SMP */ -void unexpected_irq(int irq, void *dev_id, struct pt_regs *regs) +/* /proc/interrupts printing */ +int arch_show_interrupts(struct seq_file *p, int prec) { - int i; - struct irqaction *action; - unsigned int cpu_irq; + int j; - cpu_irq = irq & (NR_IRQS - 1); - action = sparc_irq[cpu_irq].action; - - printk(KERN_ERR "IO device interrupt, irq = %d\n", irq); - printk(KERN_ERR "PC = %08lx NPC = %08lx FP=%08lx\n", regs->pc, - regs->npc, regs->u_regs[14]); - if (action) { - printk(KERN_ERR "Expecting: "); - for (i = 0; i < 16; i++) - if (action->handler) - printk(KERN_CONT "[%s:%d:0x%x] ", action->name, - i, (unsigned int)action->handler); - } - printk(KERN_ERR "AIEEE\n"); - panic("bogus interrupt received"); +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).irq_resched_count); + seq_printf(p, " IPI rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).irq_call_count); + seq_printf(p, " IPI function call interrupts\n"); +#endif + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).counter); + seq_printf(p, " Non-maskable interrupts\n"); + return 0; } -void handler_irq(int pil, struct pt_regs *regs) +void handler_irq(unsigned int pil, struct pt_regs *regs) { struct pt_regs *old_regs; - struct irqaction *action; - int cpu = smp_processor_id(); + struct irq_bucket *p; + BUG_ON(pil > 15); old_regs = set_irq_regs(regs); irq_enter(); - disable_pil_irq(pil); -#ifdef CONFIG_SMP - /* Only rotate on lower priority IRQs (scsi, ethernet, etc.). */ - if ((sparc_cpu_model==sun4m) && (pil < 10)) - smp4m_irq_rotate(cpu); -#endif - action = sparc_irq[pil].action; - sparc_irq[pil].flags |= SPARC_IRQ_INPROGRESS; - kstat_cpu(cpu).irqs[pil]++; - do { - if (!action || !action->handler) - unexpected_irq(pil, NULL, regs); - action->handler(pil, action->dev_id); - action = action->next; - } while (action); - sparc_irq[pil].flags &= ~SPARC_IRQ_INPROGRESS; - enable_pil_irq(pil); + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next = p->next; + + generic_handle_irq(p->irq); + p = next; + } irq_exit(); set_irq_regs(old_regs); } #if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE) +static unsigned int floppy_irq; -/* - * Fast IRQs on the Sparc can only have one routine attached to them, - * thus no sharing possible. - */ -static int request_fast_irq(unsigned int irq, - void (*handler)(void), - unsigned long irqflags, const char *devname) +int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler) { - struct irqaction *action; - unsigned long flags; unsigned int cpu_irq; - int ret; + int err; + #if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON struct tt_entry *trap_table; #endif - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { - ret = -EINVAL; - goto out; - } - if (!handler) { - ret = -EINVAL; - goto out; - } - spin_lock_irqsave(&irq_action_lock, flags); + err = request_irq(irq, irq_handler, 0, "floppy", NULL); + if (err) + return -1; - action = sparc_irq[cpu_irq].action; - if (action) { - if (action->flags & IRQF_SHARED) - panic("Trying to register fast irq when already shared.\n"); - if (irqflags & IRQF_SHARED) - panic("Trying to register fast irq as shared.\n"); + /* Save for later use in floppy interrupt handler */ + floppy_irq = irq; - /* Anyway, someone already owns it so cannot be made fast. */ - printk(KERN_ERR "request_fast_irq: Trying to register yet already owned.\n"); - ret = -EBUSY; - goto out_unlock; - } - - /* - * If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Fast IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } + cpu_irq = (irq & (NR_IRQS - 1)); /* Dork with trap table if we get this far. */ #define INSTANTIATE(table) \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \ - SPARC_BRANCH((unsigned long) handler, \ + SPARC_BRANCH((unsigned long) floppy_hardint, \ (unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP; @@ -399,22 +289,9 @@ static int request_fast_irq(unsigned int irq, * writing we have no CPU-neutral interface to fine-grained flushes. */ flush_cache_all(); - - action->flags = irqflags; - action->name = devname; - action->dev_id = NULL; - action->next = NULL; - - sparc_irq[cpu_irq].action = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; + return 0; } +EXPORT_SYMBOL(sparc_floppy_request_irq); /* * These variables are used to access state from the assembler @@ -440,154 +317,23 @@ EXPORT_SYMBOL(pdma_base); unsigned long pdma_areasize; EXPORT_SYMBOL(pdma_areasize); -static irq_handler_t floppy_irq_handler; - +/* Use the generic irq support to call floppy_interrupt + * which was setup using request_irq() in sparc_floppy_request_irq(). + * We only have one floppy interrupt so we do not need to check + * for additional handlers being wired up by irq_link() + */ void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) { struct pt_regs *old_regs; - int cpu = smp_processor_id(); old_regs = set_irq_regs(regs); - disable_pil_irq(irq); irq_enter(); - kstat_cpu(cpu).irqs[irq]++; - floppy_irq_handler(irq, dev_id); + generic_handle_irq(floppy_irq); irq_exit(); - enable_pil_irq(irq); set_irq_regs(old_regs); - /* - * XXX Eek, it's totally changed with preempt_count() and such - * if (softirq_pending(cpu)) - * do_softirq(); - */ -} - -int sparc_floppy_request_irq(int irq, unsigned long flags, - irq_handler_t irq_handler) -{ - floppy_irq_handler = irq_handler; - return request_fast_irq(irq, floppy_hardint, flags, "floppy"); } -EXPORT_SYMBOL(sparc_floppy_request_irq); - #endif -int request_irq(unsigned int irq, - irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - struct irqaction *action, **actionp; - unsigned long flags; - unsigned int cpu_irq; - int ret; - - if (sparc_cpu_model == sun4d) - return sun4d_request_irq(irq, handler, irqflags, devname, dev_id); - - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { - ret = -EINVAL; - goto out; - } - if (!handler) { - ret = -EINVAL; - goto out; - } - - spin_lock_irqsave(&irq_action_lock, flags); - - actionp = &sparc_irq[cpu_irq].action; - action = *actionp; - if (action) { - if (!(action->flags & IRQF_SHARED) || !(irqflags & IRQF_SHARED)) { - ret = -EBUSY; - goto out_unlock; - } - if ((action->flags & IRQF_DISABLED) != (irqflags & IRQF_DISABLED)) { - printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n", - irq); - ret = -EBUSY; - goto out_unlock; - } - for ( ; action; action = *actionp) - actionp = &action->next; - } - - /* If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } - - action->handler = handler; - action->flags = irqflags; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - *actionp = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; -} -EXPORT_SYMBOL(request_irq); - -void disable_irq_nosync(unsigned int irq) -{ - __disable_irq(irq); -} -EXPORT_SYMBOL(disable_irq_nosync); - -void disable_irq(unsigned int irq) -{ - __disable_irq(irq); -} -EXPORT_SYMBOL(disable_irq); - -void enable_irq(unsigned int irq) -{ - __enable_irq(irq); -} -EXPORT_SYMBOL(enable_irq); - -/* - * We really don't need these at all on the Sparc. We only have - * stubs here because they are exported to modules. - */ -unsigned long probe_irq_on(void) -{ - return 0; -} -EXPORT_SYMBOL(probe_irq_on); - -int probe_irq_off(unsigned long mask) -{ - return 0; -} -EXPORT_SYMBOL(probe_irq_off); - -static unsigned int build_device_irq(struct platform_device *op, - unsigned int real_irq) -{ - return real_irq; -} - /* djhr * This could probably be made indirect too and assigned in the CPU * bits of the code. That would be much nicer I think and would also @@ -598,8 +344,6 @@ static unsigned int build_device_irq(struct platform_device *op, void __init init_IRQ(void) { - sparc_irq_config.build_device_irq = build_device_irq; - switch (sparc_cpu_model) { case sun4c: case sun4: @@ -607,14 +351,11 @@ void __init init_IRQ(void) break; case sun4m: -#ifdef CONFIG_PCI pcic_probe(); - if (pcic_present()) { + if (pcic_present()) sun4m_pci_init_IRQ(); - break; - } -#endif - sun4m_init_IRQ(); + else + sun4m_init_IRQ(); break; case sun4d: @@ -632,9 +373,3 @@ void __init init_IRQ(void) btfixup(); } -#ifdef CONFIG_PROC_FS -void init_irq_proc(void) -{ - /* For now, nothing... */ -} -#endif /* CONFIG_PROC_FS */ diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index b1d275ce3435..4e78862d12fd 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -224,13 +224,13 @@ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) int cpuid; cpumask_copy(&mask, affinity); - if (cpus_equal(mask, cpu_online_map)) { + if (cpumask_equal(&mask, cpu_online_mask)) { cpuid = map_to_cpu(irq); } else { cpumask_t tmp; - cpus_and(tmp, cpu_online_map, mask); - cpuid = cpus_empty(tmp) ? map_to_cpu(irq) : first_cpu(tmp); + cpumask_and(&tmp, cpu_online_mask, &mask); + cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp); } return cpuid; diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 24ad449886be..6f6544cfa0ef 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -6,11 +6,9 @@ #include <asm/traps.h> /* cpu.c */ -extern const char *sparc_cpu_type; extern const char *sparc_pmu_type; -extern const char *sparc_fpu_type; - extern unsigned int fsr_storage; +extern int ncpus_probed; #ifdef CONFIG_SPARC32 /* cpu.c */ @@ -37,6 +35,7 @@ extern void sun4c_init_IRQ(void); extern unsigned int lvl14_resolution; extern void sun4m_init_IRQ(void); +extern void sun4m_unmask_profile_irq(void); extern void sun4m_clear_profile_irq(int cpu); /* sun4d_irq.c */ diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 2969f777fa11..2f538ac2e139 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -19,53 +19,70 @@ #include <asm/leon_amba.h> #include <asm/traps.h> #include <asm/cacheflush.h> +#include <asm/smp.h> +#include <asm/setup.h> #include "prom.h" #include "irq.h" struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */ struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */ -struct amba_apb_device leon_percpu_timer_dev[16]; int leondebug_irq_disable; int leon_debug_irqout; static int dummy_master_l10_counter; unsigned long amba_system_id; +static DEFINE_SPINLOCK(leon_irq_lock); unsigned long leon3_gptimer_irq; /* interrupt controller irq number */ unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */ +int leon3_ticker_irq; /* Timer ticker IRQ */ unsigned int sparc_leon_eirq; -#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0])) +#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu]) +#define LEON_IACK (&leon3_irqctrl_regs->iclear) +#define LEON_DO_ACK_HW 1 -/* Return the IRQ of the pending IRQ on the extended IRQ controller */ -int sparc_leon_eirq_get(int eirq, int cpu) +/* Return the last ACKed IRQ by the Extended IRQ controller. It has already + * been (automatically) ACKed when the CPU takes the trap. + */ +static inline unsigned int leon_eirq_get(int cpu) { return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f; } -irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id) +/* Handle one or multiple IRQs from the extended interrupt controller */ +static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc) { - printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n"); - return IRQ_HANDLED; + unsigned int eirq; + int cpu = sparc_leon3_cpuid(); + + eirq = leon_eirq_get(cpu); + if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */ + generic_handle_irq(irq_map[eirq]->irq); } /* The extended IRQ controller has been found, this function registers it */ -void sparc_leon_eirq_register(int eirq) +void leon_eirq_setup(unsigned int eirq) { - int irq; + unsigned long mask, oldmask; + unsigned int veirq; - /* Register a "BAD" handler for this interrupt, it should never happen */ - irq = request_irq(eirq, sparc_leon_eirq_isr, - (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL); - - if (irq) { - printk(KERN_ERR - "sparc_leon_eirq_register: unable to attach IRQ%d\n", - eirq); - } else { - sparc_leon_eirq = eirq; + if (eirq < 1 || eirq > 0xf) { + printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq); + return; } + veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0); + + /* + * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ + * controller have a mask-bit of their own, so this is safe. + */ + irq_link(veirq); + mask = 1 << eirq; + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask)); + sparc_leon_eirq = eirq; } static inline unsigned long get_irqmask(unsigned int irq) @@ -83,35 +100,151 @@ static inline unsigned long get_irqmask(unsigned int irq) return mask; } -static void leon_enable_irq(unsigned int irq_nr) +#ifdef CONFIG_SMP +static int irq_choose_cpu(const struct cpumask *affinity) { - unsigned long mask, flags; - mask = get_irqmask(irq_nr); - local_irq_save(flags); - LEON3_BYPASS_STORE_PA(LEON_IMASK, - (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask))); - local_irq_restore(flags); + cpumask_t mask; + + cpus_and(mask, cpu_online_map, *affinity); + if (cpus_equal(mask, cpu_online_map) || cpus_empty(mask)) + return boot_cpu_id; + else + return first_cpu(mask); } +#else +#define irq_choose_cpu(affinity) boot_cpu_id +#endif -static void leon_disable_irq(unsigned int irq_nr) +static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest, + bool force) { - unsigned long mask, flags; - mask = get_irqmask(irq_nr); - local_irq_save(flags); - LEON3_BYPASS_STORE_PA(LEON_IMASK, - (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask))); - local_irq_restore(flags); + unsigned long mask, oldmask, flags; + int oldcpu, newcpu; + + mask = (unsigned long)data->chip_data; + oldcpu = irq_choose_cpu(data->affinity); + newcpu = irq_choose_cpu(dest); + + if (oldcpu == newcpu) + goto out; + + /* unmask on old CPU first before enabling on the selected CPU */ + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask)); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +out: + return IRQ_SET_MASK_OK; +} + +static void leon_unmask_irq(struct irq_data *data) +{ + unsigned long mask, oldmask, flags; + int cpu; + + mask = (unsigned long)data->chip_data; + cpu = irq_choose_cpu(data->affinity); + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +} + +static void leon_mask_irq(struct irq_data *data) +{ + unsigned long mask, oldmask, flags; + int cpu; + + mask = (unsigned long)data->chip_data; + cpu = irq_choose_cpu(data->affinity); + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +} + +static unsigned int leon_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + leon_unmask_irq(data); + return 0; +} +static void leon_shutdown_irq(struct irq_data *data) +{ + leon_mask_irq(data); + irq_unlink(data->irq); +} + +/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */ +static void leon_eoi_irq(struct irq_data *data) +{ + unsigned long mask = (unsigned long)data->chip_data; + + if (mask & LEON_DO_ACK_HW) + LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW); +} + +static struct irq_chip leon_irq = { + .name = "leon", + .irq_startup = leon_startup_irq, + .irq_shutdown = leon_shutdown_irq, + .irq_mask = leon_mask_irq, + .irq_unmask = leon_unmask_irq, + .irq_eoi = leon_eoi_irq, + .irq_set_affinity = leon_set_affinity, +}; + +/* + * Build a LEON IRQ for the edge triggered LEON IRQ controller: + * Edge (normal) IRQ - handle_simple_irq, ack=DONT-CARE, never ack + * Level IRQ (PCI|Level-GPIO) - handle_fasteoi_irq, ack=1, ack after ISR + * Per-CPU Edge - handle_percpu_irq, ack=0 + */ +unsigned int leon_build_device_irq(unsigned int real_irq, + irq_flow_handler_t flow_handler, + const char *name, int do_ack) +{ + unsigned int irq; + unsigned long mask; + + irq = 0; + mask = get_irqmask(real_irq); + if (mask == 0) + goto out; + + irq = irq_alloc(real_irq, real_irq); + if (irq == 0) + goto out; + + if (do_ack) + mask |= LEON_DO_ACK_HW; + + irq_set_chip_and_handler_name(irq, &leon_irq, + flow_handler, name); + irq_set_chip_data(irq, (void *)mask); + +out: + return irq; +} + +static unsigned int _leon_build_device_irq(struct platform_device *op, + unsigned int real_irq) +{ + return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0); } void __init leon_init_timers(irq_handler_t counter_fn) { - int irq; + int irq, eirq; struct device_node *rootnp, *np, *nnp; struct property *pp; int len; - int cpu, icsel; + int icsel; int ampopts; + int err; leondebug_irq_disable = 0; leon_debug_irqout = 0; @@ -173,98 +306,85 @@ void __init leon_init_timers(irq_handler_t counter_fn) leon3_gptimer_irq = *(unsigned int *)pp->value; } while (0); - if (leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq) { - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx].rld, - (((1000000 / HZ) - 1))); - LEON3_BYPASS_STORE_PA( + if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq)) + goto bad; + + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld, + (((1000000 / HZ) - 1))); + LEON3_BYPASS_STORE_PA( &leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0); #ifdef CONFIG_SMP - leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs; - leon_percpu_timer_dev[0].irq = leon3_gptimer_irq + 1 + - leon3_gptimer_idx; - - if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) & - (1<<LEON3_GPTIMER_SEPIRQ))) { - prom_printf("irq timer not configured with separate irqs\n"); - BUG(); - } + leon3_ticker_irq = leon3_gptimer_irq + 1 + leon3_gptimer_idx; - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, 0); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld, - (((1000000/HZ) - 1))); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 0); -# endif - - /* - * The IRQ controller may (if implemented) consist of multiple - * IRQ controllers, each mapped on a 4Kb boundary. - * Each CPU may be routed to different IRQCTRLs, however - * we assume that all CPUs (in SMP system) is routed to the - * same IRQ Controller, and for non-SMP only one IRQCTRL is - * accessed anyway. - * In AMP systems, Linux must run on CPU0 for the time being. - */ - cpu = sparc_leon3_cpuid(); - icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[cpu/8]); - icsel = (icsel >> ((7 - (cpu&0x7)) * 4)) & 0xf; - leon3_irqctrl_regs += icsel; - } else { - goto bad; + if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) & + (1<<LEON3_GPTIMER_SEPIRQ))) { + printk(KERN_ERR "timer not configured with separate irqs\n"); + BUG(); } - irq = request_irq(leon3_gptimer_irq+leon3_gptimer_idx, - counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, + 0); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld, + (((1000000/HZ) - 1))); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, + 0); +#endif - if (irq) { - printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n", - LEON_INTERRUPT_TIMER1); + /* + * The IRQ controller may (if implemented) consist of multiple + * IRQ controllers, each mapped on a 4Kb boundary. + * Each CPU may be routed to different IRQCTRLs, however + * we assume that all CPUs (in SMP system) is routed to the + * same IRQ Controller, and for non-SMP only one IRQCTRL is + * accessed anyway. + * In AMP systems, Linux must run on CPU0 for the time being. + */ + icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]); + icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf; + leon3_irqctrl_regs += icsel; + + /* Mask all IRQs on boot-cpu IRQ controller */ + LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0); + + /* Probe extended IRQ controller */ + eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus) + >> 16) & 0xf; + if (eirq != 0) + leon_eirq_setup(eirq); + + irq = _leon_build_device_irq(NULL, leon3_gptimer_irq+leon3_gptimer_idx); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); + if (err) { + printk(KERN_ERR "unable to attach timer IRQ%d\n", irq); prom_halt(); } -# ifdef CONFIG_SMP - { - unsigned long flags; - struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)]; - - /* For SMP we use the level 14 ticker, however the bootup code - * has copied the firmwares level 14 vector into boot cpu's - * trap table, we must fix this now or we get squashed. - */ - local_irq_save(flags); - - patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */ - - /* Adjust so that we jump directly to smpleon_ticker */ - trap_table->inst_three += smpleon_ticker - real_irq_entry; + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, + LEON3_GPTIMER_EN | + LEON3_GPTIMER_RL | + LEON3_GPTIMER_LD | + LEON3_GPTIMER_IRQEN); - local_flush_cache_all(); - local_irq_restore(flags); +#ifdef CONFIG_SMP + /* Install per-cpu IRQ handler for broadcasted ticker */ + irq = leon_build_device_irq(leon3_ticker_irq, handle_percpu_irq, + "per-cpu", 0); + err = request_irq(irq, leon_percpu_timer_interrupt, + IRQF_PERCPU | IRQF_TIMER, "ticker", + NULL); + if (err) { + printk(KERN_ERR "unable to attach ticker IRQ%d\n", irq); + prom_halt(); } -# endif - - if (leon3_gptimer_regs) { - LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, - LEON3_GPTIMER_EN | - LEON3_GPTIMER_RL | - LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN); -#ifdef CONFIG_SMP - LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, - LEON3_GPTIMER_EN | - LEON3_GPTIMER_RL | - LEON3_GPTIMER_LD | - LEON3_GPTIMER_IRQEN); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, + LEON3_GPTIMER_EN | + LEON3_GPTIMER_RL | + LEON3_GPTIMER_LD | + LEON3_GPTIMER_IRQEN); #endif - - } return; bad: printk(KERN_ERR "No Timer/irqctrl found\n"); @@ -281,9 +401,6 @@ void leon_load_profile_irq(int cpu, unsigned int limit) BUG(); } - - - void __init leon_trans_init(struct device_node *dp) { if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) { @@ -337,22 +454,18 @@ void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu) { unsigned long mask, flags, *addr; mask = get_irqmask(irq_nr); - local_irq_save(flags); - addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]); - LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask))); - local_irq_restore(flags); + spin_lock_irqsave(&leon_irq_lock, flags); + addr = (unsigned long *)LEON_IMASK(cpu); + LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); } #endif void __init leon_init_IRQ(void) { - sparc_irq_config.init_timers = leon_init_timers; - - BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM); + sparc_irq_config.init_timers = leon_init_timers; + sparc_irq_config.build_device_irq = _leon_build_device_irq; BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 8f5de4aa3c0a..fe8fb44c609c 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -14,6 +14,7 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <linux/of.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/mm.h> @@ -29,6 +30,7 @@ #include <asm/ptrace.h> #include <asm/atomic.h> #include <asm/irq_regs.h> +#include <asm/traps.h> #include <asm/delay.h> #include <asm/irq.h> @@ -50,9 +52,12 @@ extern ctxd_t *srmmu_ctx_table_phys; static int smp_processors_ready; extern volatile unsigned long cpu_callin_map[NR_CPUS]; -extern unsigned char boot_cpu_id; extern cpumask_t smp_commenced_mask; void __init leon_configure_cache_smp(void); +static void leon_ipi_init(void); + +/* IRQ number of LEON IPIs */ +int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT; static inline unsigned long do_swap(volatile unsigned long *ptr, unsigned long val) @@ -94,8 +99,6 @@ void __cpuinit leon_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - /* Fix idle thread fields. */ __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(¤t_set[cpuid]) : "memory" /* paranoid */); @@ -104,11 +107,11 @@ void __cpuinit leon_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) mb(); local_irq_enable(); - cpu_set(cpuid, cpu_online_map); + set_cpu_online(cpuid, true); } /* @@ -179,13 +182,16 @@ void __init leon_boot_cpus(void) int nrcpu = leon_smp_nrcpus(); int me = smp_processor_id(); + /* Setup IPI */ + leon_ipi_init(); + printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me, (unsigned int)nrcpu, (unsigned int)NR_CPUS, (unsigned int)&(leon3_irqctrl_regs->mpstatus)); leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me); leon_enable_irq_cpu(LEON3_IRQ_TICKER, me); - leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me); + leon_enable_irq_cpu(leon_ipi_irq, me); leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER); @@ -220,6 +226,10 @@ int __cpuinit leon_boot_one_cpu(int i) (unsigned int)&leon3_irqctrl_regs->mpstatus); local_flush_cache_all(); + /* Make sure all IRQs are of from the start for this new CPU */ + LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0); + + /* Wake one CPU */ LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i); /* wheee... it's going... */ @@ -236,7 +246,7 @@ int __cpuinit leon_boot_one_cpu(int i) } else { leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i); leon_enable_irq_cpu(LEON3_IRQ_TICKER, i); - leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i); + leon_enable_irq_cpu(leon_ipi_irq, i); } local_flush_cache_all(); @@ -262,21 +272,21 @@ void __init leon_smp_done(void) local_flush_cache_all(); /* Free unneeded trap tables */ - if (!cpu_isset(1, cpu_present_map)) { + if (!cpu_present(1)) { ClearPageReserved(virt_to_page(&trapbase_cpu1)); init_page_count(virt_to_page(&trapbase_cpu1)); free_page((unsigned long)&trapbase_cpu1); totalram_pages++; num_physpages++; } - if (!cpu_isset(2, cpu_present_map)) { + if (!cpu_present(2)) { ClearPageReserved(virt_to_page(&trapbase_cpu2)); init_page_count(virt_to_page(&trapbase_cpu2)); free_page((unsigned long)&trapbase_cpu2); totalram_pages++; num_physpages++; } - if (!cpu_isset(3, cpu_present_map)) { + if (!cpu_present(3)) { ClearPageReserved(virt_to_page(&trapbase_cpu3)); init_page_count(virt_to_page(&trapbase_cpu3)); free_page((unsigned long)&trapbase_cpu3); @@ -292,6 +302,99 @@ void leon_irq_rotate(int cpu) { } +struct leon_ipi_work { + int single; + int msk; + int resched; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work); + +/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ + * is used for all three types of IPIs. + */ +static void __init leon_ipi_init(void) +{ + int cpu, len; + struct leon_ipi_work *work; + struct property *pp; + struct device_node *rootnp; + struct tt_entry *trap_table; + unsigned long flags; + + /* Find IPI IRQ or stick with default value */ + rootnp = of_find_node_by_path("/ambapp0"); + if (rootnp) { + pp = of_find_property(rootnp, "ipi_num", &len); + if (pp && (*(int *)pp->value)) + leon_ipi_irq = *(int *)pp->value; + } + printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq); + + /* Adjust so that we jump directly to smpleon_ipi */ + local_irq_save(flags); + trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)]; + trap_table->inst_three += smpleon_ipi - real_irq_entry; + local_flush_cache_all(); + local_irq_restore(flags); + + for_each_possible_cpu(cpu) { + work = &per_cpu(leon_ipi_work, cpu); + work->single = work->msk = work->resched = 0; + } +} + +static void leon_ipi_single(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->single = 1; + + /* Generate IRQ on the CPU */ + set_cpu_int(cpu, leon_ipi_irq); +} + +static void leon_ipi_mask_one(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->msk = 1; + + /* Generate IRQ on the CPU */ + set_cpu_int(cpu, leon_ipi_irq); +} + +static void leon_ipi_resched(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->resched = 1; + + /* Generate IRQ on the CPU (any IRQ will cause resched) */ + set_cpu_int(cpu, leon_ipi_irq); +} + +void leonsmp_ipi_interrupt(void) +{ + struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work); + + if (work->single) { + work->single = 0; + smp_call_function_single_interrupt(); + } + if (work->msk) { + work->msk = 0; + smp_call_function_interrupt(); + } + if (work->resched) { + work->resched = 0; + smp_resched_interrupt(); + } +} + static struct smp_funcall { smpfunc_t func; unsigned long arg1; @@ -337,10 +440,10 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i <= high; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; set_cpu_int(i, LEON3_IRQ_CROSS_CALL); @@ -354,7 +457,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) @@ -363,7 +466,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) @@ -386,27 +489,23 @@ void leon_cross_call_irq(void) ccall_info.processors_out[i] = 1; } -void leon_percpu_timer_interrupt(struct pt_regs *regs) +irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused) { - struct pt_regs *old_regs; int cpu = smp_processor_id(); - old_regs = set_irq_regs(regs); - leon_clear_profile_irq(cpu); profile_tick(CPU_PROFILING); if (!--prof_counter(cpu)) { - int user = user_mode(regs); + int user = user_mode(get_irq_regs()); - irq_enter(); update_process_times(user); - irq_exit(); prof_counter(cpu) = prof_multiplier(cpu); } - set_irq_regs(old_regs); + + return IRQ_HANDLED; } static void __init smp_setup_percpu_timer(void) @@ -449,6 +548,9 @@ void __init leon_init_smp(void) BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM); } #endif /* CONFIG_SPARC_LEON */ diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 56db06432ce9..42f28c7420e1 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -768,7 +768,7 @@ static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handl cpuid, NR_CPUS); continue; } - if (!cpu_isset(cpuid, *mask)) + if (!cpumask_test_cpu(cpuid, mask)) continue; #endif diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 5c149689bb20..3bb2eace58cf 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -622,8 +622,9 @@ static unsigned int __init build_one_device_irq(struct platform_device *op, out: nid = of_node_to_nid(dp); if (nid != -1) { - cpumask_t numa_mask = *cpumask_of_node(nid); + cpumask_t numa_mask; + cpumask_copy(&numa_mask, cpumask_of_node(nid)); irq_set_affinity(irq, &numa_mask); } diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 30982e9ab626..580651af73f2 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -284,8 +284,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, nid = pbm->numa_node; if (nid != -1) { - cpumask_t numa_mask = *cpumask_of_node(nid); + cpumask_t numa_mask; + cpumask_copy(&numa_mask, cpumask_of_node(nid)); irq_set_affinity(irq, &numa_mask); } err = request_irq(irq, sparc64_msiq_interrupt, 0, diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 2cdc131b50ac..948601a066ff 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -164,6 +164,9 @@ void __iomem *pcic_regs; volatile int pcic_speculative; volatile int pcic_trapped; +/* forward */ +unsigned int pcic_build_device_irq(struct platform_device *op, + unsigned int real_irq); #define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3)) @@ -523,6 +526,7 @@ static void pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) { struct pcic_ca2irq *p; + unsigned int real_irq; int i, ivec; char namebuf[64]; @@ -551,26 +555,25 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) i = p->pin; if (i >= 0 && i < 4) { ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO); - dev->irq = ivec >> (i << 2) & 0xF; + real_irq = ivec >> (i << 2) & 0xF; } else if (i >= 4 && i < 8) { ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI); - dev->irq = ivec >> ((i-4) << 2) & 0xF; + real_irq = ivec >> ((i-4) << 2) & 0xF; } else { /* Corrupted map */ printk("PCIC: BAD PIN %d\n", i); for (;;) {} } /* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */ - /* - * dev->irq=0 means PROM did not bother to program the upper + /* real_irq means PROM did not bother to program the upper * half of PCIC. This happens on JS-E with PROM 3.11, for instance. */ - if (dev->irq == 0 || p->force) { + if (real_irq == 0 || p->force) { if (p->irq == 0 || p->irq >= 15) { /* Corrupted map */ printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {} } printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n", p->irq, p->pin, dev->bus->number, dev->devfn); - dev->irq = p->irq; + real_irq = p->irq; i = p->pin; if (i >= 4) { @@ -584,7 +587,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) ivec |= p->irq << (i << 2); writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO); } - } + } + dev->irq = pcic_build_device_irq(NULL, real_irq); } /* @@ -729,6 +733,7 @@ void __init pci_time_init(void) struct linux_pcic *pcic = &pcic0; unsigned long v; int timer_irq, irq; + int err; do_arch_gettimeoffset = pci_gettimeoffset; @@ -740,9 +745,10 @@ void __init pci_time_init(void) timer_irq = PCI_COUNTER_IRQ_SYS(v); writel (PCI_COUNTER_IRQ_SET(timer_irq, 0), pcic->pcic_regs+PCI_COUNTER_IRQ); - irq = request_irq(timer_irq, pcic_timer_handler, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); - if (irq) { + irq = pcic_build_device_irq(NULL, timer_irq); + err = request_irq(irq, pcic_timer_handler, + IRQF_TIMER, "timer", NULL); + if (err) { prom_printf("time_init: unable to attach IRQ%d\n", timer_irq); prom_halt(); } @@ -803,50 +809,73 @@ static inline unsigned long get_irqmask(int irq_nr) return 1 << irq_nr; } -static void pcic_disable_irq(unsigned int irq_nr) +static void pcic_mask_irq(struct irq_data *data) { unsigned long mask, flags; - mask = get_irqmask(irq_nr); + mask = (unsigned long)data->chip_data; local_irq_save(flags); writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET); local_irq_restore(flags); } -static void pcic_enable_irq(unsigned int irq_nr) +static void pcic_unmask_irq(struct irq_data *data) { unsigned long mask, flags; - mask = get_irqmask(irq_nr); + mask = (unsigned long)data->chip_data; local_irq_save(flags); writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR); local_irq_restore(flags); } -static void pcic_load_profile_irq(int cpu, unsigned int limit) +static unsigned int pcic_startup_irq(struct irq_data *data) { - printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__); + irq_link(data->irq); + pcic_unmask_irq(data); + return 0; } -/* We assume the caller has disabled local interrupts when these are called, - * or else very bizarre behavior will result. - */ -static void pcic_disable_pil_irq(unsigned int pil) +static struct irq_chip pcic_irq = { + .name = "pcic", + .irq_startup = pcic_startup_irq, + .irq_mask = pcic_mask_irq, + .irq_unmask = pcic_unmask_irq, +}; + +unsigned int pcic_build_device_irq(struct platform_device *op, + unsigned int real_irq) { - writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET); + unsigned int irq; + unsigned long mask; + + irq = 0; + mask = get_irqmask(real_irq); + if (mask == 0) + goto out; + + irq = irq_alloc(real_irq, real_irq); + if (irq == 0) + goto out; + + irq_set_chip_and_handler_name(irq, &pcic_irq, + handle_level_irq, "PCIC"); + irq_set_chip_data(irq, (void *)mask); + +out: + return irq; } -static void pcic_enable_pil_irq(unsigned int pil) + +static void pcic_load_profile_irq(int cpu, unsigned int limit) { - writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR); + printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__); } void __init sun4m_pci_init_IRQ(void) { - BTFIXUPSET_CALL(enable_irq, pcic_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, pcic_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, pcic_enable_pil_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, pcic_disable_pil_irq, BTFIXUPCALL_NORM); + sparc_irq_config.build_device_irq = pcic_build_device_irq; + BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index ee8426ede7c7..2cb0e1c001e2 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -26,6 +26,7 @@ #include <asm/nmi.h> #include <asm/pcr.h> +#include "kernel.h" #include "kstack.h" /* Sparc64 chips have two performance counters, 32-bits each, with diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 17529298c50a..c8cc461ff75f 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -128,8 +128,16 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while(1) { - while (!need_resched()) - cpu_relax(); +#ifdef CONFIG_SPARC_LEON + if (pm_idle) { + while (!need_resched()) + (*pm_idle)(); + } else +#endif + { + while (!need_resched()) + cpu_relax(); + } preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index 05fb25330583..5ce3d15a99b0 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -326,7 +326,6 @@ void __init of_console_init(void) of_console_options = NULL; } - prom_printf(msg, of_console_path); printk(msg, of_console_path); } diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 7b8b76c9557f..3609bdee9ed2 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -103,16 +103,20 @@ static unsigned int boot_flags __initdata = 0; /* Exported for mm/init.c:paging_init. */ unsigned long cmdline_memory_size __initdata = 0; +/* which CPU booted us (0xff = not set) */ +unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */ +unsigned char boot_cpu_id4; /* boot_cpu_id << 2 */ + static void prom_console_write(struct console *con, const char *s, unsigned n) { prom_write(s, n); } -static struct console prom_debug_console = { - .name = "debug", +static struct console prom_early_console = { + .name = "earlyprom", .write = prom_console_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; @@ -133,8 +137,7 @@ static void __init process_switch(char c) prom_halt(); break; case 'p': - /* Use PROM debug console. */ - register_console(&prom_debug_console); + /* Just ignore, this behavior is now the default. */ break; default: printk("Unknown boot switch (-%c)\n", c); @@ -215,6 +218,10 @@ void __init setup_arch(char **cmdline_p) strcpy(boot_command_line, *cmdline_p); parse_early_param(); + boot_flags_init(*cmdline_p); + + register_console(&prom_early_console); + /* Set sparc_cpu_model */ sparc_cpu_model = sun_unknown; if (!strcmp(&cputypval[0], "sun4 ")) @@ -265,7 +272,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif - boot_flags_init(*cmdline_p); idprom_init(); if (ARCH_SUN4C) @@ -311,75 +317,6 @@ void __init setup_arch(char **cmdline_p) smp_setup_cpu_possible_map(); } -static int ncpus_probed; - -static int show_cpuinfo(struct seq_file *m, void *__unused) -{ - seq_printf(m, - "cpu\t\t: %s\n" - "fpu\t\t: %s\n" - "promlib\t\t: Version %d Revision %d\n" - "prom\t\t: %d.%d\n" - "type\t\t: %s\n" - "ncpus probed\t: %d\n" - "ncpus active\t: %d\n" -#ifndef CONFIG_SMP - "CPU0Bogo\t: %lu.%02lu\n" - "CPU0ClkTck\t: %ld\n" -#endif - , - sparc_cpu_type, - sparc_fpu_type , - romvec->pv_romvers, - prom_rev, - romvec->pv_printrev >> 16, - romvec->pv_printrev & 0xffff, - &cputypval[0], - ncpus_probed, - num_online_cpus() -#ifndef CONFIG_SMP - , cpu_data(0).udelay_val/(500000/HZ), - (cpu_data(0).udelay_val/(5000/HZ)) % 100, - cpu_data(0).clock_tick -#endif - ); - -#ifdef CONFIG_SMP - smp_bogo(m); -#endif - mmu_info(m); -#ifdef CONFIG_SMP - smp_info(m); -#endif - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - /* The pointer we are returning is arbitrary, - * it just has to be non-NULL and not IS_ERR - * in the success case. - */ - return *pos == 0 ? &c_start : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - extern int stop_a_enabled; void sun_do_break(void) diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 29bafe051bb1..f3b6850cc8db 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -339,84 +339,6 @@ void __init setup_arch(char **cmdline_p) paging_init(); } -/* BUFFER is PAGE_SIZE bytes long. */ - -extern void smp_info(struct seq_file *); -extern void smp_bogo(struct seq_file *); -extern void mmu_info(struct seq_file *); - -unsigned int dcache_parity_tl1_occurred; -unsigned int icache_parity_tl1_occurred; - -int ncpus_probed; - -static int show_cpuinfo(struct seq_file *m, void *__unused) -{ - seq_printf(m, - "cpu\t\t: %s\n" - "fpu\t\t: %s\n" - "pmu\t\t: %s\n" - "prom\t\t: %s\n" - "type\t\t: %s\n" - "ncpus probed\t: %d\n" - "ncpus active\t: %d\n" - "D$ parity tl1\t: %u\n" - "I$ parity tl1\t: %u\n" -#ifndef CONFIG_SMP - "Cpu0ClkTck\t: %016lx\n" -#endif - , - sparc_cpu_type, - sparc_fpu_type, - sparc_pmu_type, - prom_version, - ((tlb_type == hypervisor) ? - "sun4v" : - "sun4u"), - ncpus_probed, - num_online_cpus(), - dcache_parity_tl1_occurred, - icache_parity_tl1_occurred -#ifndef CONFIG_SMP - , cpu_data(0).clock_tick -#endif - ); -#ifdef CONFIG_SMP - smp_bogo(m); -#endif - mmu_info(m); -#ifdef CONFIG_SMP - smp_info(m); -#endif - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - /* The pointer we are returning is arbitrary, - * it just has to be non-NULL and not IS_ERR - * in the success case. - */ - return *pos == 0 ? &c_start : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - extern int stop_a_enabled; void sun_do_break(void) diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 442286d83435..d5b3958be0b4 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -37,8 +37,6 @@ #include "irq.h" volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; -unsigned char boot_cpu_id = 0; -unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ cpumask_t smp_commenced_mask = CPU_MASK_NONE; @@ -130,14 +128,57 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 }; void smp_send_reschedule(int cpu) { /* - * XXX missing reschedule IPI, see scheduler_ipi() + * CPU model dependent way of implementing IPI generation targeting + * a single CPU. The trap handler needs only to do trap entry/return + * to call schedule. */ + BTFIXUP_CALL(smp_ipi_resched)(cpu); } void smp_send_stop(void) { } +void arch_send_call_function_single_ipi(int cpu) +{ + /* trigger one IPI single call on one CPU */ + BTFIXUP_CALL(smp_ipi_single)(cpu); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + int cpu; + + /* trigger IPI mask call on each CPU */ + for_each_cpu(cpu, mask) + BTFIXUP_CALL(smp_ipi_mask_one)(cpu); +} + +void smp_resched_interrupt(void) +{ + irq_enter(); + scheduler_ipi(); + local_cpu_data().irq_resched_count++; + irq_exit(); + /* re-schedule routine called by interrupt return code. */ +} + +void smp_call_function_single_interrupt(void) +{ + irq_enter(); + generic_smp_call_function_single_interrupt(); + local_cpu_data().irq_call_count++; + irq_exit(); +} + +void smp_call_function_interrupt(void) +{ + irq_enter(); + generic_smp_call_function_interrupt(); + local_cpu_data().irq_call_count++; + irq_exit(); +} + void smp_flush_cache_all(void) { xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all)); @@ -153,9 +194,10 @@ void smp_flush_tlb_all(void) void smp_flush_cache_mm(struct mm_struct *mm) { if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm); local_flush_cache_mm(mm); } @@ -164,9 +206,10 @@ void smp_flush_cache_mm(struct mm_struct *mm) void smp_flush_tlb_mm(struct mm_struct *mm) { if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) { xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm); if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm) cpumask_copy(mm_cpumask(mm), @@ -182,9 +225,10 @@ void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end); local_flush_cache_range(vma, start, end); } @@ -196,9 +240,10 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end); local_flush_tlb_range(vma, start, end); } @@ -209,9 +254,10 @@ void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page) struct mm_struct *mm = vma->vm_mm; if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page); local_flush_cache_page(vma, page); } @@ -222,19 +268,15 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) struct mm_struct *mm = vma->vm_mm; if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page); local_flush_tlb_page(vma, page); } } -void smp_reschedule_irq(void) -{ - set_need_resched(); -} - void smp_flush_page_to_ram(unsigned long page) { /* Current theory is that those who call this are the one's @@ -251,9 +293,10 @@ void smp_flush_page_to_ram(unsigned long page) void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr); local_flush_sig_insns(mm, insn_addr); } @@ -407,7 +450,7 @@ int __cpuinit __cpu_up(unsigned int cpu) }; if (!ret) { - cpu_set(cpu, smp_commenced_mask); + cpumask_set_cpu(cpu, &smp_commenced_mask); while (!cpu_online(cpu)) mb(); } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9478da7fdb3e..99cb17251bb5 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -121,11 +121,11 @@ void __cpuinit smp_callin(void) /* inform the notifiers about the new cpu */ notify_cpu_starting(cpuid); - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) rmb(); ipi_call_lock_irq(); - cpu_set(cpuid, cpu_online_map); + set_cpu_online(cpuid, true); ipi_call_unlock_irq(); /* idle thread is expected to have preempt disabled */ @@ -785,7 +785,7 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask /* Send cross call to all processors mentioned in MASK_P * except self. Really, there are only two cases currently, - * "&cpu_online_map" and "&mm->cpu_vm_mask". + * "cpu_online_mask" and "mm_cpumask(mm)". */ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask) { @@ -797,7 +797,7 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d /* Send cross call to all processors except self. */ static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) { - smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map); + smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask); } extern unsigned long xcall_sync_tick; @@ -805,7 +805,7 @@ extern unsigned long xcall_sync_tick; static void smp_start_sync_tick_client(int cpu) { xcall_deliver((u64) &xcall_sync_tick, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } extern unsigned long xcall_call_function; @@ -820,7 +820,7 @@ extern unsigned long xcall_call_function_single; void arch_send_call_function_single_ipi(int cpu) { xcall_deliver((u64) &xcall_call_function_single, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) @@ -918,7 +918,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) } if (data0) { xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, &cpumask_of_cpu(cpu)); + (u64) pg_addr, cpumask_of(cpu)); #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall); #endif @@ -954,7 +954,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) } if (data0) { xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, &cpu_online_map); + (u64) pg_addr, cpu_online_mask); #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall); #endif @@ -1197,32 +1197,32 @@ void __devinit smp_fill_in_sib_core_maps(void) for_each_present_cpu(i) { unsigned int j; - cpus_clear(cpu_core_map[i]); + cpumask_clear(&cpu_core_map[i]); if (cpu_data(i).core_id == 0) { - cpu_set(i, cpu_core_map[i]); + cpumask_set_cpu(i, &cpu_core_map[i]); continue; } for_each_present_cpu(j) { if (cpu_data(i).core_id == cpu_data(j).core_id) - cpu_set(j, cpu_core_map[i]); + cpumask_set_cpu(j, &cpu_core_map[i]); } } for_each_present_cpu(i) { unsigned int j; - cpus_clear(per_cpu(cpu_sibling_map, i)); + cpumask_clear(&per_cpu(cpu_sibling_map, i)); if (cpu_data(i).proc_id == -1) { - cpu_set(i, per_cpu(cpu_sibling_map, i)); + cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i)); continue; } for_each_present_cpu(j) { if (cpu_data(i).proc_id == cpu_data(j).proc_id) - cpu_set(j, per_cpu(cpu_sibling_map, i)); + cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i)); } } } @@ -1232,10 +1232,10 @@ int __cpuinit __cpu_up(unsigned int cpu) int ret = smp_boot_one_cpu(cpu); if (!ret) { - cpu_set(cpu, smp_commenced_mask); - while (!cpu_isset(cpu, cpu_online_map)) + cpumask_set_cpu(cpu, &smp_commenced_mask); + while (!cpu_online(cpu)) mb(); - if (!cpu_isset(cpu, cpu_online_map)) { + if (!cpu_online(cpu)) { ret = -ENODEV; } else { /* On SUN4V, writes to %tick and %stick are @@ -1269,7 +1269,7 @@ void cpu_play_dead(void) tb->nonresum_mondo_pa, 0); } - cpu_clear(cpu, smp_commenced_mask); + cpumask_clear_cpu(cpu, &smp_commenced_mask); membar_safe("#Sync"); local_irq_disable(); @@ -1290,13 +1290,13 @@ int __cpu_disable(void) cpuinfo_sparc *c; int i; - for_each_cpu_mask(i, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[i]); - cpus_clear(cpu_core_map[cpu]); + for_each_cpu(i, &cpu_core_map[cpu]) + cpumask_clear_cpu(cpu, &cpu_core_map[i]); + cpumask_clear(&cpu_core_map[cpu]); - for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu)) - cpu_clear(cpu, per_cpu(cpu_sibling_map, i)); - cpus_clear(per_cpu(cpu_sibling_map, cpu)); + for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu)) + cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i)); + cpumask_clear(&per_cpu(cpu_sibling_map, cpu)); c = &cpu_data(cpu); @@ -1313,7 +1313,7 @@ int __cpu_disable(void) local_irq_disable(); ipi_call_lock(); - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); ipi_call_unlock(); cpu_map_rebuild(); @@ -1327,11 +1327,11 @@ void __cpu_die(unsigned int cpu) for (i = 0; i < 100; i++) { smp_rmb(); - if (!cpu_isset(cpu, smp_commenced_mask)) + if (!cpumask_test_cpu(cpu, &smp_commenced_mask)) break; msleep(100); } - if (cpu_isset(cpu, smp_commenced_mask)) { + if (cpumask_test_cpu(cpu, &smp_commenced_mask)) { printk(KERN_ERR "CPU %u didn't die...\n", cpu); } else { #if defined(CONFIG_SUN_LDOMS) @@ -1341,7 +1341,7 @@ void __cpu_die(unsigned int cpu) do { hv_err = sun4v_cpu_stop(cpu); if (hv_err == HV_EOK) { - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); break; } } while (--limit > 0); @@ -1362,7 +1362,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void smp_send_reschedule(int cpu) { xcall_deliver((u64) &xcall_receive_signal, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c index 90eea38ad66f..f6bf25a2ff80 100644 --- a/arch/sparc/kernel/sun4c_irq.c +++ b/arch/sparc/kernel/sun4c_irq.c @@ -65,62 +65,94 @@ */ unsigned char __iomem *interrupt_enable; -static void sun4c_disable_irq(unsigned int irq_nr) +static void sun4c_mask_irq(struct irq_data *data) { - unsigned long flags; - unsigned char current_mask, new_mask; - - local_irq_save(flags); - irq_nr &= (NR_IRQS - 1); - current_mask = sbus_readb(interrupt_enable); - switch (irq_nr) { - case 1: - new_mask = ((current_mask) & (~(SUN4C_INT_E1))); - break; - case 8: - new_mask = ((current_mask) & (~(SUN4C_INT_E8))); - break; - case 10: - new_mask = ((current_mask) & (~(SUN4C_INT_E10))); - break; - case 14: - new_mask = ((current_mask) & (~(SUN4C_INT_E14))); - break; - default: + unsigned long mask = (unsigned long)data->chip_data; + + if (mask) { + unsigned long flags; + + local_irq_save(flags); + mask = sbus_readb(interrupt_enable) & ~mask; + sbus_writeb(mask, interrupt_enable); local_irq_restore(flags); - return; } - sbus_writeb(new_mask, interrupt_enable); - local_irq_restore(flags); } -static void sun4c_enable_irq(unsigned int irq_nr) +static void sun4c_unmask_irq(struct irq_data *data) { - unsigned long flags; - unsigned char current_mask, new_mask; - - local_irq_save(flags); - irq_nr &= (NR_IRQS - 1); - current_mask = sbus_readb(interrupt_enable); - switch (irq_nr) { - case 1: - new_mask = ((current_mask) | SUN4C_INT_E1); - break; - case 8: - new_mask = ((current_mask) | SUN4C_INT_E8); - break; - case 10: - new_mask = ((current_mask) | SUN4C_INT_E10); - break; - case 14: - new_mask = ((current_mask) | SUN4C_INT_E14); - break; - default: + unsigned long mask = (unsigned long)data->chip_data; + + if (mask) { + unsigned long flags; + + local_irq_save(flags); + mask = sbus_readb(interrupt_enable) | mask; + sbus_writeb(mask, interrupt_enable); local_irq_restore(flags); - return; } - sbus_writeb(new_mask, interrupt_enable); - local_irq_restore(flags); +} + +static unsigned int sun4c_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + sun4c_unmask_irq(data); + + return 0; +} + +static void sun4c_shutdown_irq(struct irq_data *data) +{ + sun4c_mask_irq(data); + irq_unlink(data->irq); +} + +static struct irq_chip sun4c_irq = { + .name = "sun4c", + .irq_startup = sun4c_startup_irq, + .irq_shutdown = sun4c_shutdown_irq, + .irq_mask = sun4c_mask_irq, + .irq_unmask = sun4c_unmask_irq, +}; + +static unsigned int sun4c_build_device_irq(struct platform_device *op, + unsigned int real_irq) +{ + unsigned int irq; + + if (real_irq >= 16) { + prom_printf("Bogus sun4c IRQ %u\n", real_irq); + prom_halt(); + } + + irq = irq_alloc(real_irq, real_irq); + if (irq) { + unsigned long mask = 0UL; + + switch (real_irq) { + case 1: + mask = SUN4C_INT_E1; + break; + case 8: + mask = SUN4C_INT_E8; + break; + case 10: + mask = SUN4C_INT_E10; + break; + case 14: + mask = SUN4C_INT_E14; + break; + default: + /* All the rest are either always enabled, + * or are for signalling software interrupts. + */ + break; + } + irq_set_chip_and_handler_name(irq, &sun4c_irq, + handle_level_irq, "level"); + irq_set_chip_data(irq, (void *)mask); + } + return irq; } struct sun4c_timer_info { @@ -144,8 +176,9 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit) static void __init sun4c_init_timers(irq_handler_t counter_fn) { - const struct linux_prom_irqs *irq; + const struct linux_prom_irqs *prom_irqs; struct device_node *dp; + unsigned int irq; const u32 *addr; int err; @@ -163,9 +196,9 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn) sun4c_timers = (void __iomem *) (unsigned long) addr[0]; - irq = of_get_property(dp, "intr", NULL); + prom_irqs = of_get_property(dp, "intr", NULL); of_node_put(dp); - if (!irq) { + if (!prom_irqs) { prom_printf("sun4c_init_timers: No intr property\n"); prom_halt(); } @@ -178,15 +211,15 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn) master_l10_counter = &sun4c_timers->l10_count; - err = request_irq(irq[0].pri, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), - "timer", NULL); + irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err); prom_halt(); } - sun4c_disable_irq(irq[1].pri); + /* disable timer interrupt */ + sun4c_mask_irq(irq_get_irq_data(irq)); } #ifdef CONFIG_SMP @@ -215,14 +248,11 @@ void __init sun4c_init_IRQ(void) interrupt_enable = (void __iomem *) (unsigned long) addr[0]; - BTFIXUPSET_CALL(enable_irq, sun4c_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4c_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, sun4c_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, sun4c_disable_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP); - sparc_irq_config.init_timers = sun4c_init_timers; + sparc_irq_config.init_timers = sun4c_init_timers; + sparc_irq_config.build_device_irq = sun4c_build_device_irq; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP); diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c index 77b4a8992710..a9ea60eb2c10 100644 --- a/arch/sparc/kernel/sun4d_irq.c +++ b/arch/sparc/kernel/sun4d_irq.c @@ -14,6 +14,7 @@ #include <asm/io.h> #include <asm/sbi.h> #include <asm/cacheflush.h> +#include <asm/setup.h> #include "kernel.h" #include "irq.h" @@ -22,22 +23,20 @@ * cpu local. CPU local interrupts cover the timer interrupts * and whatnot, and we encode those as normal PILs between * 0 and 15. - * - * SBUS interrupts are encoded integers including the board number - * (plus one), the SBUS level, and the SBUS slot number. Sun4D - * IRQ dispatch is done by: - * - * 1) Reading the BW local interrupt table in order to get the bus - * interrupt mask. - * - * This table is indexed by SBUS interrupt level which can be - * derived from the PIL we got interrupted on. - * - * 2) For each bus showing interrupt pending from #1, read the - * SBI interrupt state register. This will indicate which slots - * have interrupts pending for that SBUS interrupt level. + * SBUS interrupts are encodes as a combination of board, level and slot. */ +struct sun4d_handler_data { + unsigned int cpuid; /* target cpu */ + unsigned int real_irq; /* interrupt level */ +}; + + +static unsigned int sun4d_encode_irq(int board, int lvl, int slot) +{ + return (board + 1) << 5 | (lvl << 2) | slot; +} + struct sun4d_timer_regs { u32 l10_timer_limit; u32 l10_cur_countx; @@ -48,17 +47,12 @@ struct sun4d_timer_regs { static struct sun4d_timer_regs __iomem *sun4d_timers; -#define TIMER_IRQ 10 - -#define MAX_STATIC_ALLOC 4 -static unsigned char sbus_tid[32]; - -static struct irqaction *irq_action[NR_IRQS]; +#define SUN4D_TIMER_IRQ 10 -static struct sbus_action { - struct irqaction *action; - /* For SMP this needs to be extended */ -} *sbus_actions; +/* Specify which cpu handle interrupts from which board. + * Index is board - value is cpu. + */ +static unsigned char board_to_cpu[32]; static int pil_to_sbus[] = { 0, @@ -79,152 +73,81 @@ static int pil_to_sbus[] = { 0, }; -static int sbus_to_pil[] = { - 0, - 2, - 3, - 5, - 7, - 9, - 11, - 13, -}; - -static int nsbi; - /* Exported for sun4d_smp.c */ DEFINE_SPINLOCK(sun4d_imsk_lock); -int show_sun4d_interrupts(struct seq_file *p, void *v) +/* SBUS interrupts are encoded integers including the board number + * (plus one), the SBUS level, and the SBUS slot number. Sun4D + * IRQ dispatch is done by: + * + * 1) Reading the BW local interrupt table in order to get the bus + * interrupt mask. + * + * This table is indexed by SBUS interrupt level which can be + * derived from the PIL we got interrupted on. + * + * 2) For each bus showing interrupt pending from #1, read the + * SBI interrupt state register. This will indicate which slots + * have interrupts pending for that SBUS interrupt level. + * + * 3) Call the genreric IRQ support. + */ +static void sun4d_sbus_handler_irq(int sbusl) { - int i = *(loff_t *) v, j = 0, k = 0, sbusl; - struct irqaction *action; - unsigned long flags; -#ifdef CONFIG_SMP - int x; -#endif - - spin_lock_irqsave(&irq_action_lock, flags); - if (i < NR_IRQS) { - sbusl = pil_to_sbus[i]; - if (!sbusl) { - action = *(i + irq_action); - if (!action) - goto out_unlock; - } else { - for (j = 0; j < nsbi; j++) { - for (k = 0; k < 4; k++) - action = sbus_actions[(j << 5) + (sbusl << 2) + k].action; - if (action) - goto found_it; - } - goto out_unlock; - } -found_it: seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(x) - seq_printf(p, "%10u ", - kstat_cpu(cpu_logical_map(x)).irqs[i]); -#endif - seq_printf(p, "%c %s", - (action->flags & IRQF_DISABLED) ? '+' : ' ', - action->name); - action = action->next; - for (;;) { - for (; action; action = action->next) { - seq_printf(p, ",%s %s", - (action->flags & IRQF_DISABLED) ? " +" : "", - action->name); - } - if (!sbusl) - break; - k++; - if (k < 4) { - action = sbus_actions[(j << 5) + (sbusl << 2) + k].action; - } else { - j++; - if (j == nsbi) - break; - k = 0; - action = sbus_actions[(j << 5) + (sbusl << 2)].action; + unsigned int bus_mask; + unsigned int sbino, slot; + unsigned int sbil; + + bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff; + bw_clear_intr_mask(sbusl, bus_mask); + + sbil = (sbusl << 2); + /* Loop for each pending SBI */ + for (sbino = 0; bus_mask; sbino++) { + unsigned int idx, mask; + + bus_mask >>= 1; + if (!(bus_mask & 1)) + continue; + /* XXX This seems to ACK the irq twice. acquire_sbi() + * XXX uses swap, therefore this writes 0xf << sbil, + * XXX then later release_sbi() will write the individual + * XXX bits which were set again. + */ + mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil); + mask &= (0xf << sbil); + + /* Loop for each pending SBI slot */ + idx = 0; + slot = (1 << sbil); + while (mask != 0) { + unsigned int pil; + struct irq_bucket *p; + + idx++; + slot <<= 1; + if (!(mask & slot)) + continue; + + mask &= ~slot; + pil = sun4d_encode_irq(sbino, sbil, idx); + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next; + + next = p->next; + generic_handle_irq(p->irq); + p = next; } + release_sbi(SBI2DEVID(sbino), slot); } - seq_putc(p, '\n'); } -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); - return 0; -} - -void sun4d_free_irq(unsigned int irq, void *dev_id) -{ - struct irqaction *action, **actionp; - struct irqaction *tmp = NULL; - unsigned long flags; - - spin_lock_irqsave(&irq_action_lock, flags); - if (irq < 15) - actionp = irq + irq_action; - else - actionp = &(sbus_actions[irq - (1 << 5)].action); - action = *actionp; - if (!action) { - printk(KERN_ERR "Trying to free free IRQ%d\n", irq); - goto out_unlock; - } - if (dev_id) { - for (; action; action = action->next) { - if (action->dev_id == dev_id) - break; - tmp = action; - } - if (!action) { - printk(KERN_ERR "Trying to free free shared IRQ%d\n", - irq); - goto out_unlock; - } - } else if (action->flags & IRQF_SHARED) { - printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n", - irq); - goto out_unlock; - } - if (action->flags & SA_STATIC_ALLOC) { - /* - * This interrupt is marked as specially allocated - * so it is a bad idea to free it. - */ - printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n", - irq, action->name); - goto out_unlock; - } - - if (tmp) - tmp->next = action->next; - else - *actionp = action->next; - - spin_unlock_irqrestore(&irq_action_lock, flags); - - synchronize_irq(irq); - - spin_lock_irqsave(&irq_action_lock, flags); - - kfree(action); - - if (!(*actionp)) - __disable_irq(irq); - -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); } void sun4d_handler_irq(int pil, struct pt_regs *regs) { struct pt_regs *old_regs; - struct irqaction *action; - int cpu = smp_processor_id(); /* SBUS IRQ level (1 - 7) */ int sbusl = pil_to_sbus[pil]; @@ -233,160 +156,96 @@ void sun4d_handler_irq(int pil, struct pt_regs *regs) cc_set_iclr(1 << pil); +#ifdef CONFIG_SMP + /* + * Check IPI data structures after IRQ has been cleared. Hard and Soft + * IRQ can happen at the same time, so both cases are always handled. + */ + if (pil == SUN4D_IPI_IRQ) + sun4d_ipi_interrupt(); +#endif + old_regs = set_irq_regs(regs); irq_enter(); - kstat_cpu(cpu).irqs[pil]++; - if (!sbusl) { - action = *(pil + irq_action); - if (!action) - unexpected_irq(pil, NULL, regs); - do { - action->handler(pil, action->dev_id); - action = action->next; - } while (action); + if (sbusl == 0) { + /* cpu interrupt */ + struct irq_bucket *p; + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next; + + next = p->next; + generic_handle_irq(p->irq); + p = next; + } } else { - int bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff; - int sbino; - struct sbus_action *actionp; - unsigned mask, slot; - int sbil = (sbusl << 2); - - bw_clear_intr_mask(sbusl, bus_mask); - - /* Loop for each pending SBI */ - for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1) - if (bus_mask & 1) { - mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil); - mask &= (0xf << sbil); - actionp = sbus_actions + (sbino << 5) + (sbil); - /* Loop for each pending SBI slot */ - for (slot = (1 << sbil); mask; slot <<= 1, actionp++) - if (mask & slot) { - mask &= ~slot; - action = actionp->action; - - if (!action) - unexpected_irq(pil, NULL, regs); - do { - action->handler(pil, action->dev_id); - action = action->next; - } while (action); - release_sbi(SBI2DEVID(sbino), slot); - } - } + /* SBUS interrupt */ + sun4d_sbus_handler_irq(sbusl); } irq_exit(); set_irq_regs(old_regs); } -int sun4d_request_irq(unsigned int irq, - irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) + +static void sun4d_mask_irq(struct irq_data *data) { - struct irqaction *action, *tmp = NULL, **actionp; + struct sun4d_handler_data *handler_data = data->handler_data; + unsigned int real_irq; +#ifdef CONFIG_SMP + int cpuid = handler_data->cpuid; unsigned long flags; - int ret; - - if (irq > 14 && irq < (1 << 5)) { - ret = -EINVAL; - goto out; - } - - if (!handler) { - ret = -EINVAL; - goto out; - } - - spin_lock_irqsave(&irq_action_lock, flags); - - if (irq >= (1 << 5)) - actionp = &(sbus_actions[irq - (1 << 5)].action); - else - actionp = irq + irq_action; - action = *actionp; - - if (action) { - if ((action->flags & IRQF_SHARED) && (irqflags & IRQF_SHARED)) { - for (tmp = action; tmp->next; tmp = tmp->next) - /* find last entry - tmp used below */; - } else { - ret = -EBUSY; - goto out_unlock; - } - if ((action->flags & IRQF_DISABLED) ^ (irqflags & IRQF_DISABLED)) { - printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n", - irq); - ret = -EBUSY; - goto out_unlock; - } - action = NULL; /* Or else! */ - } - - /* If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } - - action->handler = handler; - action->flags = irqflags; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - if (tmp) - tmp->next = action; - else - *actionp = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; +#endif + real_irq = handler_data->real_irq; +#ifdef CONFIG_SMP + spin_lock_irqsave(&sun4d_imsk_lock, flags); + cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq)); + spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +#else + cc_set_imsk(cc_get_imsk() | (1 << real_irq)); +#endif } -static void sun4d_disable_irq(unsigned int irq) +static void sun4d_unmask_irq(struct irq_data *data) { - int tid = sbus_tid[(irq >> 5) - 1]; + struct sun4d_handler_data *handler_data = data->handler_data; + unsigned int real_irq; +#ifdef CONFIG_SMP + int cpuid = handler_data->cpuid; unsigned long flags; +#endif + real_irq = handler_data->real_irq; - if (irq < NR_IRQS) - return; - +#ifdef CONFIG_SMP spin_lock_irqsave(&sun4d_imsk_lock, flags); - cc_set_imsk_other(tid, cc_get_imsk_other(tid) | (1 << sbus_to_pil[(irq >> 2) & 7])); + cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | ~(1 << real_irq)); spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +#else + cc_set_imsk(cc_get_imsk() | ~(1 << real_irq)); +#endif } -static void sun4d_enable_irq(unsigned int irq) +static unsigned int sun4d_startup_irq(struct irq_data *data) { - int tid = sbus_tid[(irq >> 5) - 1]; - unsigned long flags; - - if (irq < NR_IRQS) - return; + irq_link(data->irq); + sun4d_unmask_irq(data); + return 0; +} - spin_lock_irqsave(&sun4d_imsk_lock, flags); - cc_set_imsk_other(tid, cc_get_imsk_other(tid) & ~(1 << sbus_to_pil[(irq >> 2) & 7])); - spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +static void sun4d_shutdown_irq(struct irq_data *data) +{ + sun4d_mask_irq(data); + irq_unlink(data->irq); } +struct irq_chip sun4d_irq = { + .name = "sun4d", + .irq_startup = sun4d_startup_irq, + .irq_shutdown = sun4d_shutdown_irq, + .irq_unmask = sun4d_unmask_irq, + .irq_mask = sun4d_mask_irq, +}; + #ifdef CONFIG_SMP static void sun4d_set_cpu_int(int cpu, int level) { @@ -413,7 +272,7 @@ void __init sun4d_distribute_irqs(void) for_each_node_by_name(dp, "sbi") { int devid = of_getintprop_default(dp, "device-id", 0); int board = of_getintprop_default(dp, "board#", 0); - sbus_tid[board] = cpuid; + board_to_cpu[board] = cpuid; set_sbi_tid(devid, cpuid << 3); } printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid); @@ -443,15 +302,16 @@ static void __init sun4d_load_profile_irqs(void) unsigned int sun4d_build_device_irq(struct platform_device *op, unsigned int real_irq) { - static int pil_to_sbus[] = { - 0, 0, 1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 0, - }; struct device_node *dp = op->dev.of_node; struct device_node *io_unit, *sbi = dp->parent; const struct linux_prom_registers *regs; + struct sun4d_handler_data *handler_data; + unsigned int pil; + unsigned int irq; int board, slot; int sbusl; + irq = 0; while (sbi) { if (!strcmp(sbi->name, "sbi")) break; @@ -484,7 +344,28 @@ unsigned int sun4d_build_device_irq(struct platform_device *op, sbusl = pil_to_sbus[real_irq]; if (sbusl) - return (((board + 1) << 5) + (sbusl << 2) + slot); + pil = sun4d_encode_irq(board, sbusl, slot); + else + pil = real_irq; + + irq = irq_alloc(real_irq, pil); + if (irq == 0) + goto err_out; + + handler_data = irq_get_handler_data(irq); + if (unlikely(handler_data)) + goto err_out; + + handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC); + if (unlikely(!handler_data)) { + prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n"); + prom_halt(); + } + handler_data->cpuid = board_to_cpu[board]; + handler_data->real_irq = real_irq; + irq_set_chip_and_handler_name(irq, &sun4d_irq, + handle_level_irq, "level"); + irq_set_handler_data(irq, handler_data); err_out: return real_irq; @@ -518,6 +399,7 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) { struct device_node *dp; struct resource res; + unsigned int irq; const u32 *reg; int err; @@ -552,9 +434,8 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) master_l10_counter = &sun4d_timers->l10_cur_count; - err = request_irq(TIMER_IRQ, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), - "timer", NULL); + irq = sun4d_build_device_irq(NULL, SUN4D_TIMER_IRQ); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { prom_printf("sun4d_init_timers: request_irq() failed with %d\n", err); @@ -567,27 +448,16 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) void __init sun4d_init_sbi_irq(void) { struct device_node *dp; - int target_cpu = 0; + int target_cpu; -#ifdef CONFIG_SMP target_cpu = boot_cpu_id; -#endif - - nsbi = 0; - for_each_node_by_name(dp, "sbi") - nsbi++; - sbus_actions = kzalloc(nsbi * 8 * 4 * sizeof(struct sbus_action), GFP_ATOMIC); - if (!sbus_actions) { - prom_printf("SUN4D: Cannot allocate sbus_actions, halting.\n"); - prom_halt(); - } for_each_node_by_name(dp, "sbi") { int devid = of_getintprop_default(dp, "device-id", 0); int board = of_getintprop_default(dp, "board#", 0); unsigned int mask; set_sbi_tid(devid, target_cpu << 3); - sbus_tid[board] = target_cpu; + board_to_cpu[board] = target_cpu; /* Get rid of pending irqs from PROM */ mask = acquire_sbi(devid, 0xffffffff); @@ -603,12 +473,10 @@ void __init sun4d_init_IRQ(void) { local_irq_disable(); - BTFIXUPSET_CALL(enable_irq, sun4d_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4d_disable_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM); - sparc_irq_config.init_timers = sun4d_init_timers; + sparc_irq_config.init_timers = sun4d_init_timers; sparc_irq_config.build_device_irq = sun4d_build_device_irq; #ifdef CONFIG_SMP diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 475d50b96cd0..133387980b56 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -32,6 +32,7 @@ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned lon return val; } +static void smp4d_ipi_init(void); static void smp_setup_percpu_timer(void); static unsigned char cpu_leds[32]; @@ -80,8 +81,6 @@ void __cpuinit smp4d_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - while ((unsigned long)current_set[cpuid] < PAGE_OFFSET) barrier(); @@ -105,7 +104,7 @@ void __cpuinit smp4d_callin(void) local_irq_enable(); /* We don't allow PIL 14 yet */ - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) barrier(); spin_lock_irqsave(&sun4d_imsk_lock, flags); @@ -120,6 +119,7 @@ void __cpuinit smp4d_callin(void) */ void __init smp4d_boot_cpus(void) { + smp4d_ipi_init(); if (boot_cpu_id) current_set[0] = NULL; smp_setup_percpu_timer(); @@ -191,6 +191,80 @@ void __init smp4d_smp_done(void) sun4d_distribute_irqs(); } +/* Memory structure giving interrupt handler information about IPI generated */ +struct sun4d_ipi_work { + int single; + int msk; + int resched; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work); + +/* Initialize IPIs on the SUN4D SMP machine */ +static void __init smp4d_ipi_init(void) +{ + int cpu; + struct sun4d_ipi_work *work; + + printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ); + + for_each_possible_cpu(cpu) { + work = &per_cpu(sun4d_ipi_work, cpu); + work->single = work->msk = work->resched = 0; + } +} + +void sun4d_ipi_interrupt(void) +{ + struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work); + + if (work->single) { + work->single = 0; + smp_call_function_single_interrupt(); + } + if (work->msk) { + work->msk = 0; + smp_call_function_interrupt(); + } + if (work->resched) { + work->resched = 0; + smp_resched_interrupt(); + } +} + +static void smp4d_ipi_single(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->single = 1; + + /* Generate IRQ on the CPU */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + +static void smp4d_ipi_mask_one(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->msk = 1; + + /* Generate IRQ on the CPU */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + +static void smp4d_ipi_resched(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->resched = 1; + + /* Generate IRQ on the CPU (any IRQ will cause resched) */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + static struct smp_funcall { smpfunc_t func; unsigned long arg1; @@ -239,10 +313,10 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i <= high; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; sun4d_send_ipi(i, IRQ_CROSS_CALL); @@ -255,7 +329,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) barrier(); @@ -263,7 +337,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) barrier(); @@ -356,6 +430,9 @@ void __init sun4d_init_smp(void) BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM); for (i = 0; i < NR_CPUS; i++) { ccall_info.processors_in[i] = 1; diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c index 69df6257a32e..422c16dad1f6 100644 --- a/arch/sparc/kernel/sun4m_irq.c +++ b/arch/sparc/kernel/sun4m_irq.c @@ -100,6 +100,11 @@ struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS]; struct sun4m_irq_global __iomem *sun4m_irq_global; +struct sun4m_handler_data { + bool percpu; + long mask; +}; + /* Dave Redman (djhr@tadpole.co.uk) * The sun4m interrupt registers. */ @@ -142,9 +147,9 @@ struct sun4m_irq_global __iomem *sun4m_irq_global; #define OBP_INT_LEVEL_VME 0x40 #define SUN4M_TIMER_IRQ (OBP_INT_LEVEL_ONBOARD | 10) -#define SUM4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14) +#define SUN4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14) -static unsigned long irq_mask[0x50] = { +static unsigned long sun4m_imask[0x50] = { /* 0x00 - SMP */ 0, SUN4M_SOFT_INT(1), SUN4M_SOFT_INT(2), SUN4M_SOFT_INT(3), @@ -169,7 +174,7 @@ static unsigned long irq_mask[0x50] = { SUN4M_INT_VIDEO, SUN4M_INT_MODULE, SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY, (SUN4M_INT_SERIAL | SUN4M_INT_KBDMS), - SUN4M_INT_AUDIO, 0, SUN4M_INT_MODULE_ERR, + SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR, /* 0x30 - sbus */ 0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1), 0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3), @@ -182,105 +187,110 @@ static unsigned long irq_mask[0x50] = { 0, SUN4M_INT_VME(6), 0, 0 }; -static unsigned long sun4m_get_irqmask(unsigned int irq) +static void sun4m_mask_irq(struct irq_data *data) { - unsigned long mask; - - if (irq < 0x50) - mask = irq_mask[irq]; - else - mask = 0; + struct sun4m_handler_data *handler_data = data->handler_data; + int cpu = smp_processor_id(); - if (!mask) - printk(KERN_ERR "sun4m_get_irqmask: IRQ%d has no valid mask!\n", - irq); + if (handler_data->mask) { + unsigned long flags; - return mask; + local_irq_save(flags); + if (handler_data->percpu) { + sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set); + } else { + sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set); + } + local_irq_restore(flags); + } } -static void sun4m_disable_irq(unsigned int irq_nr) +static void sun4m_unmask_irq(struct irq_data *data) { - unsigned long mask, flags; + struct sun4m_handler_data *handler_data = data->handler_data; int cpu = smp_processor_id(); - mask = sun4m_get_irqmask(irq_nr); - local_irq_save(flags); - if (irq_nr > 15) - sbus_writel(mask, &sun4m_irq_global->mask_set); - else - sbus_writel(mask, &sun4m_irq_percpu[cpu]->set); - local_irq_restore(flags); -} - -static void sun4m_enable_irq(unsigned int irq_nr) -{ - unsigned long mask, flags; - int cpu = smp_processor_id(); + if (handler_data->mask) { + unsigned long flags; - /* Dreadful floppy hack. When we use 0x2b instead of - * 0x0b the system blows (it starts to whistle!). - * So we continue to use 0x0b. Fixme ASAP. --P3 - */ - if (irq_nr != 0x0b) { - mask = sun4m_get_irqmask(irq_nr); - local_irq_save(flags); - if (irq_nr > 15) - sbus_writel(mask, &sun4m_irq_global->mask_clear); - else - sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear); - local_irq_restore(flags); - } else { local_irq_save(flags); - sbus_writel(SUN4M_INT_FLOPPY, &sun4m_irq_global->mask_clear); + if (handler_data->percpu) { + sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear); + } else { + sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear); + } local_irq_restore(flags); } } -static unsigned long cpu_pil_to_imask[16] = { -/*0*/ 0x00000000, -/*1*/ 0x00000000, -/*2*/ SUN4M_INT_SBUS(0) | SUN4M_INT_VME(0), -/*3*/ SUN4M_INT_SBUS(1) | SUN4M_INT_VME(1), -/*4*/ SUN4M_INT_SCSI, -/*5*/ SUN4M_INT_SBUS(2) | SUN4M_INT_VME(2), -/*6*/ SUN4M_INT_ETHERNET, -/*7*/ SUN4M_INT_SBUS(3) | SUN4M_INT_VME(3), -/*8*/ SUN4M_INT_VIDEO, -/*9*/ SUN4M_INT_SBUS(4) | SUN4M_INT_VME(4) | SUN4M_INT_MODULE_ERR, -/*10*/ SUN4M_INT_REALTIME, -/*11*/ SUN4M_INT_SBUS(5) | SUN4M_INT_VME(5) | SUN4M_INT_FLOPPY, -/*12*/ SUN4M_INT_SERIAL | SUN4M_INT_KBDMS, -/*13*/ SUN4M_INT_SBUS(6) | SUN4M_INT_VME(6) | SUN4M_INT_AUDIO, -/*14*/ SUN4M_INT_E14, -/*15*/ SUN4M_INT_ERROR, -}; +static unsigned int sun4m_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + sun4m_unmask_irq(data); + return 0; +} -/* We assume the caller has disabled local interrupts when these are called, - * or else very bizarre behavior will result. - */ -static void sun4m_disable_pil_irq(unsigned int pil) +static void sun4m_shutdown_irq(struct irq_data *data) { - sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_set); + sun4m_mask_irq(data); + irq_unlink(data->irq); } -static void sun4m_enable_pil_irq(unsigned int pil) +static struct irq_chip sun4m_irq = { + .name = "sun4m", + .irq_startup = sun4m_startup_irq, + .irq_shutdown = sun4m_shutdown_irq, + .irq_mask = sun4m_mask_irq, + .irq_unmask = sun4m_unmask_irq, +}; + + +static unsigned int sun4m_build_device_irq(struct platform_device *op, + unsigned int real_irq) { - sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_clear); + struct sun4m_handler_data *handler_data; + unsigned int irq; + unsigned int pil; + + if (real_irq >= OBP_INT_LEVEL_VME) { + prom_printf("Bogus sun4m IRQ %u\n", real_irq); + prom_halt(); + } + pil = (real_irq & 0xf); + irq = irq_alloc(real_irq, pil); + + if (irq == 0) + goto out; + + handler_data = irq_get_handler_data(irq); + if (unlikely(handler_data)) + goto out; + + handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC); + if (unlikely(!handler_data)) { + prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n"); + prom_halt(); + } + + handler_data->mask = sun4m_imask[real_irq]; + handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD; + irq_set_chip_and_handler_name(irq, &sun4m_irq, + handle_level_irq, "level"); + irq_set_handler_data(irq, handler_data); + +out: + return irq; } #ifdef CONFIG_SMP static void sun4m_send_ipi(int cpu, int level) { - unsigned long mask = sun4m_get_irqmask(level); - - sbus_writel(mask, &sun4m_irq_percpu[cpu]->set); + sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set); } static void sun4m_clear_ipi(int cpu, int level) { - unsigned long mask = sun4m_get_irqmask(level); - - sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear); + sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->clear); } static void sun4m_set_udt(int cpu) @@ -343,7 +353,15 @@ void sun4m_nmi(struct pt_regs *regs) prom_halt(); } -/* Exported for sun4m_smp.c */ +void sun4m_unmask_profile_irq(void) +{ + unsigned long flags; + + local_irq_save(flags); + sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear); + local_irq_restore(flags); +} + void sun4m_clear_profile_irq(int cpu) { sbus_readl(&timers_percpu[cpu]->l14_limit); @@ -358,6 +376,7 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn) { struct device_node *dp = of_find_node_by_name(NULL, "counter"); int i, err, len, num_cpu_timers; + unsigned int irq; const u32 *addr; if (!dp) { @@ -384,8 +403,9 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn) master_l10_counter = &timers_global->l10_count; - err = request_irq(SUN4M_TIMER_IRQ, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); + irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ); + + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n", err); @@ -452,14 +472,11 @@ void __init sun4m_init_IRQ(void) if (num_cpu_iregs == 4) sbus_writel(0, &sun4m_irq_global->interrupt_target); - BTFIXUPSET_CALL(enable_irq, sun4m_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4m_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, sun4m_enable_pil_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, sun4m_disable_pil_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM); sparc_irq_config.init_timers = sun4m_init_timers; + sparc_irq_config.build_device_irq = sun4m_build_device_irq; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 5cc7dc51de3d..594768686525 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -15,6 +15,9 @@ #include "irq.h" #include "kernel.h" +#define IRQ_IPI_SINGLE 12 +#define IRQ_IPI_MASK 13 +#define IRQ_IPI_RESCHED 14 #define IRQ_CROSS_CALL 15 static inline unsigned long @@ -26,6 +29,7 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val) return val; } +static void smp4m_ipi_init(void); static void smp_setup_percpu_timer(void); void __cpuinit smp4m_callin(void) @@ -59,8 +63,6 @@ void __cpuinit smp4m_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - /* Fix idle thread fields. */ __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r" (¤t_set[cpuid]) @@ -70,7 +72,7 @@ void __cpuinit smp4m_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) mb(); local_irq_enable(); @@ -83,6 +85,7 @@ void __cpuinit smp4m_callin(void) */ void __init smp4m_boot_cpus(void) { + smp4m_ipi_init(); smp_setup_percpu_timer(); local_flush_cache_all(); } @@ -150,18 +153,25 @@ void __init smp4m_smp_done(void) /* Ok, they are spinning and ready to go. */ } -/* At each hardware IRQ, we get this called to forward IRQ reception - * to the next processor. The caller must disable the IRQ level being - * serviced globally so that there are no double interrupts received. - * - * XXX See sparc64 irq.c. - */ -void smp4m_irq_rotate(int cpu) + +/* Initialize IPIs on the SUN4M SMP machine */ +static void __init smp4m_ipi_init(void) +{ +} + +static void smp4m_ipi_resched(int cpu) +{ + set_cpu_int(cpu, IRQ_IPI_RESCHED); +} + +static void smp4m_ipi_single(int cpu) { - int next = cpu_data(cpu).next; + set_cpu_int(cpu, IRQ_IPI_SINGLE); +} - if (next != cpu) - set_irq_udt(next); +static void smp4m_ipi_mask_one(int cpu) +{ + set_cpu_int(cpu, IRQ_IPI_MASK); } static struct smp_funcall { @@ -199,10 +209,10 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i < ncpus; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; set_cpu_int(i, IRQ_CROSS_CALL); @@ -218,7 +228,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) barrier(); @@ -226,7 +236,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) barrier(); @@ -277,7 +287,7 @@ static void __cpuinit smp_setup_percpu_timer(void) load_profile_irq(cpu, lvl14_resolution); if (cpu == boot_cpu_id) - enable_pil_irq(14); + sun4m_unmask_profile_irq(); } static void __init smp4m_blackbox_id(unsigned *addr) @@ -306,4 +316,7 @@ void __init sun4m_init_smp(void) BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM); } diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index 1eb8b00aed75..7408201d7efb 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -103,9 +103,10 @@ static unsigned long run_on_cpu(unsigned long cpu, unsigned long (*func)(unsigned long), unsigned long arg) { - cpumask_t old_affinity = current->cpus_allowed; + cpumask_t old_affinity; unsigned long ret; + cpumask_copy(&old_affinity, tsk_cpus_allowed(current)); /* should return -EINVAL to userspace */ if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) return 0; diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 96046a4024c2..1060e0672a4b 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -228,14 +228,10 @@ static void __init sbus_time_init(void) void __init time_init(void) { -#ifdef CONFIG_PCI - extern void pci_time_init(void); - if (pcic_present()) { + if (pcic_present()) pci_time_init(); - return; - } -#endif - sbus_time_init(); + else + sbus_time_init(); } diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c index 8f982b76c712..531d54fc9829 100644 --- a/arch/sparc/kernel/us2e_cpufreq.c +++ b/arch/sparc/kernel/us2e_cpufreq.c @@ -237,7 +237,7 @@ static unsigned int us2e_freq_get(unsigned int cpu) if (!cpu_online(cpu)) return 0; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); clock_tick = sparc64_get_clock_tick(cpu) / 1000; @@ -258,7 +258,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index) if (!cpu_online(cpu)) return; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000; diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c index f35d1e794548..9a8ceb700833 100644 --- a/arch/sparc/kernel/us3_cpufreq.c +++ b/arch/sparc/kernel/us3_cpufreq.c @@ -85,7 +85,7 @@ static unsigned int us3_freq_get(unsigned int cpu) if (!cpu_online(cpu)) return 0; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); reg = read_safari_cfg(); @@ -105,7 +105,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index) if (!cpu_online(cpu)) return; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = sparc64_get_clock_tick(cpu) / 1000; diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 846d1c4374ea..7f01b8fce8bc 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -15,7 +15,6 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o lib-$(CONFIG_SPARC32) += copy_user.o locks.o lib-y += atomic_$(BITS).o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o -lib-$(CONFIG_SPARC32) += rwsem_32.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S deleted file mode 100644 index 9675268e7fde..000000000000 --- a/arch/sparc/lib/rwsem_32.S +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Assembly part of rw semaphores. - * - * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) - */ - -#include <asm/ptrace.h> -#include <asm/psr.h> - - .section .sched.text, "ax" - .align 4 - - .globl ___down_read -___down_read: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - nop - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - sub %g7, 1, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - sub %g7, 1, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - add %g7, 1, %g7 - nop - nop - subcc %g7, 1, %g7 - bneg 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - bcs 4f - mov %g5, %l5 - call down_read_failed - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba ___down_read - restore %l5, %g0, %g5 -4: call down_read_failed_biased - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .globl ___down_write -___down_write: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - sethi %hi(0x01000000), %g2 - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - sub %g7, %g2, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - sub %g7, %g2, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - add %g7, %g2, %g7 - nop - nop - subcc %g7, %g2, %g7 - bne 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - bcs 4f - mov %g5, %l5 - call down_write_failed - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba ___down_write - restore %l5, %g0, %g5 -4: call down_write_failed_biased - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .text - .globl ___up_read -___up_read: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - nop - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - add %g7, 1, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - add %g7, 1, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - nop - nop - nop - cmp %g7, 0 - be 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - mov %g5, %l5 - clr %o1 - call __rwsem_wake - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .globl ___up_write -___up_write: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - sethi %hi(0x01000000), %g2 - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - add %g7, %g2, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - add %g7, %g2, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - sub %g7, %g2, %g7 - nop - nop - addcc %g7, %g2, %g7 - bcs 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - mov %g5, %l5 - mov %g7, %o1 - call __rwsem_wake - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 2f6ae1d1fb6b..e10cd03fab80 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -862,7 +862,7 @@ static void init_node_masks_nonnuma(void) for (i = 0; i < NR_CPUS; i++) numa_cpu_lookup_table[i] = 0; - numa_cpumask_lookup_table[0] = CPU_MASK_ALL; + cpumask_setall(&numa_cpumask_lookup_table[0]); } #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -1080,7 +1080,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, { u64 arc; - cpus_clear(*mask); + cpumask_clear(mask); mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { u64 target = mdesc_arc_target(md, arc); @@ -1091,7 +1091,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, continue; id = mdesc_get_property(md, target, "id", NULL); if (*id < nr_cpu_ids) - cpu_set(*id, *mask); + cpumask_set_cpu(*id, mask); } } @@ -1153,13 +1153,13 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, numa_parse_mdesc_group_cpus(md, grp, &mask); - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, &mask) numa_cpu_lookup_table[cpu] = index; - numa_cpumask_lookup_table[index] = mask; + cpumask_copy(&numa_cpumask_lookup_table[index], &mask); if (numa_debug) { printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, &mask) printk("%d ", cpu); printk("]\n"); } @@ -1218,7 +1218,7 @@ static int __init numa_parse_jbus(void) index = 0; for_each_present_cpu(cpu) { numa_cpu_lookup_table[cpu] = index; - numa_cpumask_lookup_table[index] = cpumask_of_cpu(cpu); + cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu)); node_masks[index].mask = ~((1UL << 36UL) - 1UL); node_masks[index].val = cpu << 36UL; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cbc70a27430c..c8b41623377f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -254,7 +254,7 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } #endif -static int disable_smep __initdata; +static int disable_smep __cpuinitdata; static __init int setup_disable_smep(char *arg) { disable_smep = 1; @@ -262,7 +262,7 @@ static __init int setup_disable_smep(char *arg) } __setup("nosmep", setup_disable_smep); -static __init void setup_smep(struct cpuinfo_x86 *c) +static __cpuinit void setup_smep(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_SMEP)) { if (unlikely(disable_smep)) { diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b2699bb2e530..d871b14ed5a1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -42,6 +42,7 @@ #include <linux/genhd.h> #include <net/tcp.h> #include <linux/lru_cache.h> +#include <linux/prefetch.h> #ifdef __CHECKER__ # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index c9213ead4a26..a4d6cb0c0343 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -34,6 +34,7 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/workqueue.h> +#include <linux/prefetch.h> #include <linux/i7300_idle.h> #include "dma.h" #include "registers.h" diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index effd140fc042..f4a51d4d0349 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -34,6 +34,7 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/workqueue.h> +#include <linux/prefetch.h> #include <linux/i7300_idle.h> #include "dma.h" #include "dma_v2.h" diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index d0f499098479..d845dc4b7103 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -60,6 +60,7 @@ #include <linux/gfp.h> #include <linux/dmaengine.h> #include <linux/dma-mapping.h> +#include <linux/prefetch.h> #include "registers.h" #include "hw.h" #include "dma.h" diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c index c26c11905ffe..2af8cb460a3b 100644 --- a/drivers/ide/ide-acpi.c +++ b/drivers/ide/ide-acpi.c @@ -416,21 +416,21 @@ void ide_acpi_get_timing(ide_hwif_t *hwif) out_obj = output.pointer; if (out_obj->type != ACPI_TYPE_BUFFER) { - kfree(output.pointer); DEBPRINT("Run _GTM: error: " "expected object type of ACPI_TYPE_BUFFER, " "got 0x%x\n", out_obj->type); + kfree(output.pointer); return; } if (!out_obj->buffer.length || !out_obj->buffer.pointer || out_obj->buffer.length != sizeof(struct GTM_buffer)) { - kfree(output.pointer); printk(KERN_ERR "%s: unexpected _GTM length (0x%x)[should be 0x%zx] or " "addr (0x%p)\n", __func__, out_obj->buffer.length, sizeof(struct GTM_buffer), out_obj->buffer.pointer); + kfree(output.pointer); return; } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 5a702d02c848..61fdf544fbd6 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -73,7 +73,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc) drive->failed_pc = NULL; if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 || - (rq && rq->cmd_type == REQ_TYPE_BLOCK_PC)) + rq->cmd_type == REQ_TYPE_BLOCK_PC) uptodate = 1; /* FIXME */ else if (pc->c[0] == GPCMD_REQUEST_SENSE) { diff --git a/drivers/ide/ide-scan-pci.c b/drivers/ide/ide-scan-pci.c index 0e79efff1deb..c3da53e7bb2b 100644 --- a/drivers/ide/ide-scan-pci.c +++ b/drivers/ide/ide-scan-pci.c @@ -88,7 +88,7 @@ static int __init ide_scan_pcibus(void) struct list_head *l, *n; pre_init = 0; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev))) + for_each_pci_dev(dev) ide_scan_pcidev(dev); /* diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index ebcf8e470a97..1db7c4368dbf 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -1334,7 +1334,7 @@ out_free_pmif: static int pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) { - pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)pci_get_drvdata(pdev); + pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev); int rc = 0; if (mesg.event != pdev->dev.power.power_state.event @@ -1350,7 +1350,7 @@ pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) static int pmac_ide_pci_resume(struct pci_dev *pdev) { - pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)pci_get_drvdata(pdev); + pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev); int rc = 0; if (pdev->dev.power.power_state.event != PM_EVENT_ON) { diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index dc85d777578e..0cfc455630d0 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -47,6 +47,7 @@ #include <linux/init.h> #include <linux/dma-mapping.h> #include <linux/slab.h> +#include <linux/prefetch.h> #include <asm/io.h> #include <asm/irq.h> diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 5c9362792f1d..70bd738b8b99 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -493,11 +493,11 @@ void bitmap_update_sb(struct bitmap *bitmap) spin_unlock_irqrestore(&bitmap->lock, flags); sb = kmap_atomic(bitmap->sb_page, KM_USER0); sb->events = cpu_to_le64(bitmap->mddev->events); - if (bitmap->mddev->events < bitmap->events_cleared) { + if (bitmap->mddev->events < bitmap->events_cleared) /* rocking back to read-only */ bitmap->events_cleared = bitmap->mddev->events; - sb->events_cleared = cpu_to_le64(bitmap->events_cleared); - } + sb->events_cleared = cpu_to_le64(bitmap->events_cleared); + sb->state = cpu_to_le32(bitmap->flags); /* Just in case these have been changed via sysfs: */ sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); @@ -618,7 +618,7 @@ success: if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) bitmap->flags |= BITMAP_HOSTENDIAN; bitmap->events_cleared = le64_to_cpu(sb->events_cleared); - if (sb->state & cpu_to_le32(BITMAP_STALE)) + if (bitmap->flags & BITMAP_STALE) bitmap->events_cleared = bitmap->mddev->events; err = 0; out: @@ -652,9 +652,11 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, switch (op) { case MASK_SET: sb->state |= cpu_to_le32(bits); + bitmap->flags |= bits; break; case MASK_UNSET: sb->state &= cpu_to_le32(~bits); + bitmap->flags &= ~bits; break; default: BUG(); diff --git a/drivers/md/md.c b/drivers/md/md.c index 7d6f7f18a920..aa640a85bb21 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3324,7 +3324,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len) char *e; unsigned long long n = simple_strtoull(buf, &e, 10); - if (mddev->pers) + if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) return -EBUSY; if (cmd_match(buf, "none")) n = MaxSector; @@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name) disk->fops = &md_fops; disk->private_data = mddev; disk->queue = mddev->queue; + blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA); /* Allow extended partitions. This makes the * 'mdp' device redundant, but we can't really * remove it now. */ disk->flags |= GENHD_FL_EXT_DEVT; - add_disk(disk); mddev->gendisk = disk; + /* As soon as we call add_disk(), another thread could get + * through to md_open, so make sure it doesn't get too far + */ + mutex_lock(&mddev->open_mutex); + add_disk(disk); + error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk_to_dev(disk)->kobj, "%s", "md"); if (error) { @@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name) if (mddev->kobj.sd && sysfs_create_group(&mddev->kobj, &md_bitmap_group)) printk(KERN_DEBUG "pointless warning\n"); - - blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA); + mutex_unlock(&mddev->open_mutex); abort: mutex_unlock(&disks_mutex); if (!error && mddev->kobj.sd) { @@ -5211,6 +5216,16 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) } else super_types[mddev->major_version]. validate_super(mddev, rdev); + if ((info->state & (1<<MD_DISK_SYNC)) && + (!test_bit(In_sync, &rdev->flags) || + rdev->raid_disk != info->raid_disk)) { + /* This was a hot-add request, but events doesn't + * match, so reject it. + */ + export_rdev(rdev); + return -EINVAL; + } + if (test_bit(In_sync, &rdev->flags)) rdev->saved_raid_disk = rdev->raid_disk; else diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c35890990985..3535c23af288 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -146,7 +146,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev) int i; seq_printf (seq, " [%d/%d] [", conf->raid_disks, - conf->working_disks); + conf->raid_disks - mddev->degraded); for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", conf->multipaths[i].rdev && @@ -186,35 +186,36 @@ static int multipath_congested(void *data, int bits) static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) { multipath_conf_t *conf = mddev->private; + char b[BDEVNAME_SIZE]; - if (conf->working_disks <= 1) { + if (conf->raid_disks - mddev->degraded <= 1) { /* * Uh oh, we can do nothing if this is our last path, but * first check if this is a queued request for a device * which has just failed. */ printk(KERN_ALERT - "multipath: only one IO path left and IO error.\n"); + "multipath: only one IO path left and IO error.\n"); /* leave it active... it's all we have */ - } else { - /* - * Mark disk as unusable - */ - if (!test_bit(Faulty, &rdev->flags)) { - char b[BDEVNAME_SIZE]; - clear_bit(In_sync, &rdev->flags); - set_bit(Faulty, &rdev->flags); - set_bit(MD_CHANGE_DEVS, &mddev->flags); - conf->working_disks--; - mddev->degraded++; - printk(KERN_ALERT "multipath: IO failure on %s," - " disabling IO path.\n" - "multipath: Operation continuing" - " on %d IO paths.\n", - bdevname (rdev->bdev,b), - conf->working_disks); - } + return; + } + /* + * Mark disk as unusable + */ + if (test_and_clear_bit(In_sync, &rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded++; + spin_unlock_irqrestore(&conf->device_lock, flags); } + set_bit(Faulty, &rdev->flags); + set_bit(MD_CHANGE_DEVS, &mddev->flags); + printk(KERN_ALERT "multipath: IO failure on %s," + " disabling IO path.\n" + "multipath: Operation continuing" + " on %d IO paths.\n", + bdevname(rdev->bdev, b), + conf->raid_disks - mddev->degraded); } static void print_multipath_conf (multipath_conf_t *conf) @@ -227,7 +228,7 @@ static void print_multipath_conf (multipath_conf_t *conf) printk("(conf==NULL)\n"); return; } - printk(" --- wd:%d rd:%d\n", conf->working_disks, + printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, conf->raid_disks); for (i = 0; i < conf->raid_disks; i++) { @@ -274,10 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) PAGE_CACHE_SIZE - 1); } - conf->working_disks++; + spin_lock_irq(&conf->device_lock); mddev->degraded--; rdev->raid_disk = path; set_bit(In_sync, &rdev->flags); + spin_unlock_irq(&conf->device_lock); rcu_assign_pointer(p->rdev, rdev); err = 0; md_integrity_add_rdev(rdev, mddev); @@ -391,6 +393,7 @@ static int multipath_run (mddev_t *mddev) int disk_idx; struct multipath_info *disk; mdk_rdev_t *rdev; + int working_disks; if (md_check_no_bitmap(mddev)) return -EINVAL; @@ -424,7 +427,7 @@ static int multipath_run (mddev_t *mddev) goto out_free_conf; } - conf->working_disks = 0; + working_disks = 0; list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx < 0 || @@ -446,7 +449,7 @@ static int multipath_run (mddev_t *mddev) } if (!test_bit(Faulty, &rdev->flags)) - conf->working_disks++; + working_disks++; } conf->raid_disks = mddev->raid_disks; @@ -454,12 +457,12 @@ static int multipath_run (mddev_t *mddev) spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); - if (!conf->working_disks) { + if (!working_disks) { printk(KERN_ERR "multipath: no operational IO paths for %s\n", mdname(mddev)); goto out_free_conf; } - mddev->degraded = conf->raid_disks - conf->working_disks; + mddev->degraded = conf->raid_disks - working_disks; conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS, sizeof(struct multipath_bh)); @@ -481,7 +484,8 @@ static int multipath_run (mddev_t *mddev) printk(KERN_INFO "multipath: array %s active with %d out of %d IO paths\n", - mdname(mddev), conf->working_disks, mddev->raid_disks); + mdname(mddev), conf->raid_disks - mddev->degraded, + mddev->raid_disks); /* * Ok, everything is just fine now */ diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h index d1c2a8d78395..3c5a45eb5f8a 100644 --- a/drivers/md/multipath.h +++ b/drivers/md/multipath.h @@ -9,7 +9,6 @@ struct multipath_private_data { mddev_t *mddev; struct multipath_info *multipaths; int raid_disks; - int working_disks; spinlock_t device_lock; struct list_head retry_list; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 2b7a7ff401dc..5d096096f958 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error) rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); } -static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv, - int behind) +static void r1_bio_write_done(r1bio_t *r1_bio) { if (atomic_dec_and_test(&r1_bio->remaining)) { /* it really is the end of this request */ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { /* free extra copy of the data pages */ - int i = vcnt; + int i = r1_bio->behind_page_count; while (i--) - safe_put_page(bv[i].bv_page); + safe_put_page(r1_bio->behind_pages[i]); + kfree(r1_bio->behind_pages); + r1_bio->behind_pages = NULL; } /* clear the bitmap if all writes complete successfully */ bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, r1_bio->sectors, !test_bit(R1BIO_Degraded, &r1_bio->state), - behind); + test_bit(R1BIO_BehindIO, &r1_bio->state)); md_write_end(r1_bio->mddev); raid_end_bio_io(r1_bio); } @@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error) * Let's see if all mirrored write operations have finished * already. */ - r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind); + r1_bio_write_done(r1_bio); if (to_put) bio_put(to_put); @@ -411,10 +412,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) { const sector_t this_sector = r1_bio->sector; const int sectors = r1_bio->sectors; - int new_disk = -1; int start_disk; + int best_disk; int i; - sector_t new_distance, current_distance; + sector_t best_dist; mdk_rdev_t *rdev; int choose_first; @@ -425,6 +426,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) * We take the first readable disk when above the resync window. */ retry: + best_disk = -1; + best_dist = MaxSector; if (conf->mddev->recovery_cp < MaxSector && (this_sector + sectors >= conf->next_resync)) { choose_first = 1; @@ -434,8 +437,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) start_disk = conf->last_used; } - /* make sure the disk is operational */ for (i = 0 ; i < conf->raid_disks ; i++) { + sector_t dist; int disk = start_disk + i; if (disk >= conf->raid_disks) disk -= conf->raid_disks; @@ -443,60 +446,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) rdev = rcu_dereference(conf->mirrors[disk].rdev); if (r1_bio->bios[disk] == IO_BLOCKED || rdev == NULL - || !test_bit(In_sync, &rdev->flags)) + || test_bit(Faulty, &rdev->flags)) continue; - - new_disk = disk; - if (!test_bit(WriteMostly, &rdev->flags)) - break; - } - - if (new_disk < 0 || choose_first) - goto rb_out; - - /* - * Don't change to another disk for sequential reads: - */ - if (conf->next_seq_sect == this_sector) - goto rb_out; - if (this_sector == conf->mirrors[new_disk].head_position) - goto rb_out; - - current_distance = abs(this_sector - - conf->mirrors[new_disk].head_position); - - /* look for a better disk - i.e. head is closer */ - start_disk = new_disk; - for (i = 1; i < conf->raid_disks; i++) { - int disk = start_disk + 1; - if (disk >= conf->raid_disks) - disk -= conf->raid_disks; - - rdev = rcu_dereference(conf->mirrors[disk].rdev); - if (r1_bio->bios[disk] == IO_BLOCKED - || rdev == NULL - || !test_bit(In_sync, &rdev->flags) - || test_bit(WriteMostly, &rdev->flags)) + if (!test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < this_sector + sectors) continue; - - if (!atomic_read(&rdev->nr_pending)) { - new_disk = disk; + if (test_bit(WriteMostly, &rdev->flags)) { + /* Don't balance among write-mostly, just + * use the first as a last resort */ + if (best_disk < 0) + best_disk = disk; + continue; + } + /* This is a reasonable device to use. It might + * even be best. + */ + dist = abs(this_sector - conf->mirrors[disk].head_position); + if (choose_first + /* Don't change to another disk for sequential reads */ + || conf->next_seq_sect == this_sector + || dist == 0 + /* If device is idle, use it */ + || atomic_read(&rdev->nr_pending) == 0) { + best_disk = disk; break; } - new_distance = abs(this_sector - conf->mirrors[disk].head_position); - if (new_distance < current_distance) { - current_distance = new_distance; - new_disk = disk; + if (dist < best_dist) { + best_dist = dist; + best_disk = disk; } } - rb_out: - if (new_disk >= 0) { - rdev = rcu_dereference(conf->mirrors[new_disk].rdev); + if (best_disk >= 0) { + rdev = rcu_dereference(conf->mirrors[best_disk].rdev); if (!rdev) goto retry; atomic_inc(&rdev->nr_pending); - if (!test_bit(In_sync, &rdev->flags)) { + if (test_bit(Faulty, &rdev->flags)) { /* cannot risk returning a device that failed * before we inc'ed nr_pending */ @@ -504,11 +490,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) goto retry; } conf->next_seq_sect = this_sector + sectors; - conf->last_used = new_disk; + conf->last_used = best_disk; } rcu_read_unlock(); - return new_disk; + return best_disk; } static int raid1_congested(void *data, int bits) @@ -675,37 +661,36 @@ static void unfreeze_array(conf_t *conf) /* duplicate the data pages for behind I/O - * We return a list of bio_vec rather than just page pointers - * as it makes freeing easier */ -static struct bio_vec *alloc_behind_pages(struct bio *bio) +static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio) { int i; struct bio_vec *bvec; - struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), + struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*), GFP_NOIO); if (unlikely(!pages)) - goto do_sync_io; + return; bio_for_each_segment(bvec, bio, i) { - pages[i].bv_page = alloc_page(GFP_NOIO); - if (unlikely(!pages[i].bv_page)) + pages[i] = alloc_page(GFP_NOIO); + if (unlikely(!pages[i])) goto do_sync_io; - memcpy(kmap(pages[i].bv_page) + bvec->bv_offset, + memcpy(kmap(pages[i]) + bvec->bv_offset, kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); - kunmap(pages[i].bv_page); + kunmap(pages[i]); kunmap(bvec->bv_page); } - - return pages; + r1_bio->behind_pages = pages; + r1_bio->behind_page_count = bio->bi_vcnt; + set_bit(R1BIO_BehindIO, &r1_bio->state); + return; do_sync_io: - if (pages) - for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++) - put_page(pages[i].bv_page); + for (i = 0; i < bio->bi_vcnt; i++) + if (pages[i]) + put_page(pages[i]); kfree(pages); PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); - return NULL; } static int make_request(mddev_t *mddev, struct bio * bio) @@ -717,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) int i, targets = 0, disks; struct bitmap *bitmap; unsigned long flags; - struct bio_vec *behind_pages = NULL; const int rw = bio_data_dir(bio); const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); @@ -870,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) if (bitmap && (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && - !waitqueue_active(&bitmap->behind_wait) && - (behind_pages = alloc_behind_pages(bio)) != NULL) - set_bit(R1BIO_BehindIO, &r1_bio->state); + !waitqueue_active(&bitmap->behind_wait)) + alloc_behind_pages(bio, r1_bio); atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->behind_remaining, 0); @@ -893,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) mbio->bi_rw = WRITE | do_flush_fua | do_sync; mbio->bi_private = r1_bio; - if (behind_pages) { + if (r1_bio->behind_pages) { struct bio_vec *bvec; int j; @@ -905,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) * them all */ __bio_for_each_segment(bvec, mbio, j, 0) - bvec->bv_page = behind_pages[j].bv_page; + bvec->bv_page = r1_bio->behind_pages[j]; if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) atomic_inc(&r1_bio->behind_remaining); } @@ -915,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) bio_list_add(&conf->pending_bio_list, mbio); spin_unlock_irqrestore(&conf->device_lock, flags); } - r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); - kfree(behind_pages); /* the behind pages are attached to the bios now */ + r1_bio_write_done(r1_bio); /* In case raid1d snuck in to freeze_array */ wake_up(&conf->wait_barrier); @@ -1196,194 +1178,210 @@ static void end_sync_write(struct bio *bio, int error) } } -static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) +static int fix_sync_read_error(r1bio_t *r1_bio) { + /* Try some synchronous reads of other devices to get + * good data, much like with normal read errors. Only + * read into the pages we already have so we don't + * need to re-issue the read request. + * We don't need to freeze the array, because being in an + * active sync request, there is no normal IO, and + * no overlapping syncs. + */ + mddev_t *mddev = r1_bio->mddev; conf_t *conf = mddev->private; - int i; - int disks = conf->raid_disks; - struct bio *bio, *wbio; - - bio = r1_bio->bios[r1_bio->read_disk]; + struct bio *bio = r1_bio->bios[r1_bio->read_disk]; + sector_t sect = r1_bio->sector; + int sectors = r1_bio->sectors; + int idx = 0; + while(sectors) { + int s = sectors; + int d = r1_bio->read_disk; + int success = 0; + mdk_rdev_t *rdev; + int start; - if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { - /* We have read all readable devices. If we haven't - * got the block, then there is no hope left. - * If we have, then we want to do a comparison - * and skip the write if everything is the same. - * If any blocks failed to read, then we need to - * attempt an over-write - */ - int primary; - if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { - for (i=0; i<mddev->raid_disks; i++) - if (r1_bio->bios[i]->bi_end_io == end_sync_read) - md_error(mddev, conf->mirrors[i].rdev); + if (s > (PAGE_SIZE>>9)) + s = PAGE_SIZE >> 9; + do { + if (r1_bio->bios[d]->bi_end_io == end_sync_read) { + /* No rcu protection needed here devices + * can only be removed when no resync is + * active, and resync is currently active + */ + rdev = conf->mirrors[d].rdev; + if (sync_page_io(rdev, + sect, + s<<9, + bio->bi_io_vec[idx].bv_page, + READ, false)) { + success = 1; + break; + } + } + d++; + if (d == conf->raid_disks) + d = 0; + } while (!success && d != r1_bio->read_disk); - md_done_sync(mddev, r1_bio->sectors, 1); + if (!success) { + char b[BDEVNAME_SIZE]; + /* Cannot read from anywhere, array is toast */ + md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" + " for block %llu\n", + mdname(mddev), + bdevname(bio->bi_bdev, b), + (unsigned long long)r1_bio->sector); + md_done_sync(mddev, r1_bio->sectors, 0); put_buf(r1_bio); - return; + return 0; } - for (primary=0; primary<mddev->raid_disks; primary++) - if (r1_bio->bios[primary]->bi_end_io == end_sync_read && - test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { - r1_bio->bios[primary]->bi_end_io = NULL; - rdev_dec_pending(conf->mirrors[primary].rdev, mddev); - break; - } - r1_bio->read_disk = primary; - for (i=0; i<mddev->raid_disks; i++) - if (r1_bio->bios[i]->bi_end_io == end_sync_read) { - int j; - int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); - struct bio *pbio = r1_bio->bios[primary]; - struct bio *sbio = r1_bio->bios[i]; - - if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { - for (j = vcnt; j-- ; ) { - struct page *p, *s; - p = pbio->bi_io_vec[j].bv_page; - s = sbio->bi_io_vec[j].bv_page; - if (memcmp(page_address(p), - page_address(s), - PAGE_SIZE)) - break; - } - } else - j = 0; - if (j >= 0) - mddev->resync_mismatches += r1_bio->sectors; - if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) - && test_bit(BIO_UPTODATE, &sbio->bi_flags))) { - sbio->bi_end_io = NULL; - rdev_dec_pending(conf->mirrors[i].rdev, mddev); - } else { - /* fixup the bio for reuse */ - int size; - sbio->bi_vcnt = vcnt; - sbio->bi_size = r1_bio->sectors << 9; - sbio->bi_idx = 0; - sbio->bi_phys_segments = 0; - sbio->bi_flags &= ~(BIO_POOL_MASK - 1); - sbio->bi_flags |= 1 << BIO_UPTODATE; - sbio->bi_next = NULL; - sbio->bi_sector = r1_bio->sector + - conf->mirrors[i].rdev->data_offset; - sbio->bi_bdev = conf->mirrors[i].rdev->bdev; - size = sbio->bi_size; - for (j = 0; j < vcnt ; j++) { - struct bio_vec *bi; - bi = &sbio->bi_io_vec[j]; - bi->bv_offset = 0; - if (size > PAGE_SIZE) - bi->bv_len = PAGE_SIZE; - else - bi->bv_len = size; - size -= PAGE_SIZE; - memcpy(page_address(bi->bv_page), - page_address(pbio->bi_io_vec[j].bv_page), - PAGE_SIZE); - } - } - } + start = d; + /* write it back and re-read */ + while (d != r1_bio->read_disk) { + if (d == 0) + d = conf->raid_disks; + d--; + if (r1_bio->bios[d]->bi_end_io != end_sync_read) + continue; + rdev = conf->mirrors[d].rdev; + if (sync_page_io(rdev, + sect, + s<<9, + bio->bi_io_vec[idx].bv_page, + WRITE, false) == 0) { + r1_bio->bios[d]->bi_end_io = NULL; + rdev_dec_pending(rdev, mddev); + md_error(mddev, rdev); + } else + atomic_add(s, &rdev->corrected_errors); + } + d = start; + while (d != r1_bio->read_disk) { + if (d == 0) + d = conf->raid_disks; + d--; + if (r1_bio->bios[d]->bi_end_io != end_sync_read) + continue; + rdev = conf->mirrors[d].rdev; + if (sync_page_io(rdev, + sect, + s<<9, + bio->bi_io_vec[idx].bv_page, + READ, false) == 0) + md_error(mddev, rdev); + } + sectors -= s; + sect += s; + idx ++; } - if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { - /* ouch - failed to read all of that. - * Try some synchronous reads of other devices to get - * good data, much like with normal read errors. Only - * read into the pages we already have so we don't - * need to re-issue the read request. - * We don't need to freeze the array, because being in an - * active sync request, there is no normal IO, and - * no overlapping syncs. - */ - sector_t sect = r1_bio->sector; - int sectors = r1_bio->sectors; - int idx = 0; - - while(sectors) { - int s = sectors; - int d = r1_bio->read_disk; - int success = 0; - mdk_rdev_t *rdev; - - if (s > (PAGE_SIZE>>9)) - s = PAGE_SIZE >> 9; - do { - if (r1_bio->bios[d]->bi_end_io == end_sync_read) { - /* No rcu protection needed here devices - * can only be removed when no resync is - * active, and resync is currently active - */ - rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, - sect, - s<<9, - bio->bi_io_vec[idx].bv_page, - READ, false)) { - success = 1; - break; - } - } - d++; - if (d == conf->raid_disks) - d = 0; - } while (!success && d != r1_bio->read_disk); - - if (success) { - int start = d; - /* write it back and re-read */ - set_bit(R1BIO_Uptodate, &r1_bio->state); - while (d != r1_bio->read_disk) { - if (d == 0) - d = conf->raid_disks; - d--; - if (r1_bio->bios[d]->bi_end_io != end_sync_read) - continue; - rdev = conf->mirrors[d].rdev; - atomic_add(s, &rdev->corrected_errors); - if (sync_page_io(rdev, - sect, - s<<9, - bio->bi_io_vec[idx].bv_page, - WRITE, false) == 0) - md_error(mddev, rdev); - } - d = start; - while (d != r1_bio->read_disk) { - if (d == 0) - d = conf->raid_disks; - d--; - if (r1_bio->bios[d]->bi_end_io != end_sync_read) - continue; - rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, - sect, - s<<9, - bio->bi_io_vec[idx].bv_page, - READ, false) == 0) - md_error(mddev, rdev); - } - } else { - char b[BDEVNAME_SIZE]; - /* Cannot read from anywhere, array is toast */ - md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); - printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" - " for block %llu\n", - mdname(mddev), - bdevname(bio->bi_bdev, b), - (unsigned long long)r1_bio->sector); - md_done_sync(mddev, r1_bio->sectors, 0); - put_buf(r1_bio); - return; + set_bit(R1BIO_Uptodate, &r1_bio->state); + set_bit(BIO_UPTODATE, &bio->bi_flags); + return 1; +} + +static int process_checks(r1bio_t *r1_bio) +{ + /* We have read all readable devices. If we haven't + * got the block, then there is no hope left. + * If we have, then we want to do a comparison + * and skip the write if everything is the same. + * If any blocks failed to read, then we need to + * attempt an over-write + */ + mddev_t *mddev = r1_bio->mddev; + conf_t *conf = mddev->private; + int primary; + int i; + + for (primary = 0; primary < conf->raid_disks; primary++) + if (r1_bio->bios[primary]->bi_end_io == end_sync_read && + test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { + r1_bio->bios[primary]->bi_end_io = NULL; + rdev_dec_pending(conf->mirrors[primary].rdev, mddev); + break; + } + r1_bio->read_disk = primary; + for (i = 0; i < conf->raid_disks; i++) { + int j; + int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); + struct bio *pbio = r1_bio->bios[primary]; + struct bio *sbio = r1_bio->bios[i]; + int size; + + if (r1_bio->bios[i]->bi_end_io != end_sync_read) + continue; + + if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { + for (j = vcnt; j-- ; ) { + struct page *p, *s; + p = pbio->bi_io_vec[j].bv_page; + s = sbio->bi_io_vec[j].bv_page; + if (memcmp(page_address(p), + page_address(s), + PAGE_SIZE)) + break; } - sectors -= s; - sect += s; - idx ++; + } else + j = 0; + if (j >= 0) + mddev->resync_mismatches += r1_bio->sectors; + if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) + && test_bit(BIO_UPTODATE, &sbio->bi_flags))) { + /* No need to write to this device. */ + sbio->bi_end_io = NULL; + rdev_dec_pending(conf->mirrors[i].rdev, mddev); + continue; + } + /* fixup the bio for reuse */ + sbio->bi_vcnt = vcnt; + sbio->bi_size = r1_bio->sectors << 9; + sbio->bi_idx = 0; + sbio->bi_phys_segments = 0; + sbio->bi_flags &= ~(BIO_POOL_MASK - 1); + sbio->bi_flags |= 1 << BIO_UPTODATE; + sbio->bi_next = NULL; + sbio->bi_sector = r1_bio->sector + + conf->mirrors[i].rdev->data_offset; + sbio->bi_bdev = conf->mirrors[i].rdev->bdev; + size = sbio->bi_size; + for (j = 0; j < vcnt ; j++) { + struct bio_vec *bi; + bi = &sbio->bi_io_vec[j]; + bi->bv_offset = 0; + if (size > PAGE_SIZE) + bi->bv_len = PAGE_SIZE; + else + bi->bv_len = size; + size -= PAGE_SIZE; + memcpy(page_address(bi->bv_page), + page_address(pbio->bi_io_vec[j].bv_page), + PAGE_SIZE); } } + return 0; +} +static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) +{ + conf_t *conf = mddev->private; + int i; + int disks = conf->raid_disks; + struct bio *bio, *wbio; + + bio = r1_bio->bios[r1_bio->read_disk]; + + if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) + /* ouch - failed to read all of that. */ + if (!fix_sync_read_error(r1_bio)) + return; + + if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) + if (process_checks(r1_bio) < 0) + return; /* * schedule writes */ @@ -2063,7 +2061,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && - mddev->recovery_cp == MaxSector) { + mddev->recovery_cp > mddev->dev_sectors) { mddev->recovery_cp = mddev->dev_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index cbfdf1a6acd9..5fc4ca1af863 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -94,7 +94,9 @@ struct r1bio_s { int read_disk; struct list_head retry_list; - struct bitmap_update *bitmap_update; + /* Next two are only valid when R1BIO_BehindIO is set */ + struct page **behind_pages; + int behind_page_count; /* * if the IO is in WRITE direction, then multiple bios are used. * We choose the number when they are allocated. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8e9462626ec5..6e846688962f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error) */ set_bit(R10BIO_Uptodate, &r10_bio->state); raid_end_bio_io(r10_bio); + rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); } else { /* - * oops, read error: + * oops, read error - keep the refcount on the rdev */ char b[BDEVNAME_SIZE]; if (printk_ratelimit()) @@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error) bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); reschedule_retry(r10_bio); } - - rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); } static void raid10_end_write_request(struct bio *bio, int error) @@ -488,13 +487,19 @@ static int raid10_mergeable_bvec(struct request_queue *q, static int read_balance(conf_t *conf, r10bio_t *r10_bio) { const sector_t this_sector = r10_bio->sector; - int disk, slot, nslot; + int disk, slot; const int sectors = r10_bio->sectors; - sector_t new_distance, current_distance; + sector_t new_distance, best_dist; mdk_rdev_t *rdev; + int do_balance; + int best_slot; raid10_find_phys(conf, r10_bio); rcu_read_lock(); +retry: + best_slot = -1; + best_dist = MaxSector; + do_balance = 1; /* * Check if we can balance. We can balance on the whole * device if no resync is going on (recovery is ok), or below @@ -502,86 +507,58 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) * above the resync window. */ if (conf->mddev->recovery_cp < MaxSector - && (this_sector + sectors >= conf->next_resync)) { - /* make sure that disk is operational */ - slot = 0; - disk = r10_bio->devs[slot].devnum; - - while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL || - r10_bio->devs[slot].bio == IO_BLOCKED || - !test_bit(In_sync, &rdev->flags)) { - slot++; - if (slot == conf->copies) { - slot = 0; - disk = -1; - break; - } - disk = r10_bio->devs[slot].devnum; - } - goto rb_out; - } - + && (this_sector + sectors >= conf->next_resync)) + do_balance = 0; - /* make sure the disk is operational */ - slot = 0; - disk = r10_bio->devs[slot].devnum; - while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL || - r10_bio->devs[slot].bio == IO_BLOCKED || - !test_bit(In_sync, &rdev->flags)) { - slot ++; - if (slot == conf->copies) { - disk = -1; - goto rb_out; - } + for (slot = 0; slot < conf->copies ; slot++) { + if (r10_bio->devs[slot].bio == IO_BLOCKED) + continue; disk = r10_bio->devs[slot].devnum; - } - - - current_distance = abs(r10_bio->devs[slot].addr - - conf->mirrors[disk].head_position); - - /* Find the disk whose head is closest, - * or - for far > 1 - find the closest to partition beginning */ - - for (nslot = slot; nslot < conf->copies; nslot++) { - int ndisk = r10_bio->devs[nslot].devnum; - - - if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL || - r10_bio->devs[nslot].bio == IO_BLOCKED || - !test_bit(In_sync, &rdev->flags)) + rdev = rcu_dereference(conf->mirrors[disk].rdev); + if (rdev == NULL) continue; + if (!test_bit(In_sync, &rdev->flags)) + continue; + + if (!do_balance) + break; /* This optimisation is debatable, and completely destroys * sequential read speed for 'far copies' arrays. So only * keep it for 'near' arrays, and review those later. */ - if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) { - disk = ndisk; - slot = nslot; + if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) break; - } /* for far > 1 always use the lowest address */ if (conf->far_copies > 1) - new_distance = r10_bio->devs[nslot].addr; + new_distance = r10_bio->devs[slot].addr; else - new_distance = abs(r10_bio->devs[nslot].addr - - conf->mirrors[ndisk].head_position); - if (new_distance < current_distance) { - current_distance = new_distance; - disk = ndisk; - slot = nslot; + new_distance = abs(r10_bio->devs[slot].addr - + conf->mirrors[disk].head_position); + if (new_distance < best_dist) { + best_dist = new_distance; + best_slot = slot; } } + if (slot == conf->copies) + slot = best_slot; -rb_out: - r10_bio->read_slot = slot; -/* conf->next_seq_sect = this_sector + sectors;*/ - - if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL) - atomic_inc(&conf->mirrors[disk].rdev->nr_pending); - else + if (slot >= 0) { + disk = r10_bio->devs[slot].devnum; + rdev = rcu_dereference(conf->mirrors[disk].rdev); + if (!rdev) + goto retry; + atomic_inc(&rdev->nr_pending); + if (test_bit(Faulty, &rdev->flags)) { + /* Cannot risk returning a device that failed + * before we inc'ed nr_pending + */ + rdev_dec_pending(rdev, conf->mddev); + goto retry; + } + r10_bio->read_slot = slot; + } else disk = -1; rcu_read_unlock(); @@ -1460,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) int max_read_errors = atomic_read(&mddev->max_corr_read_errors); int d = r10_bio->devs[r10_bio->read_slot].devnum; - rcu_read_lock(); - rdev = rcu_dereference(conf->mirrors[d].rdev); - if (rdev) { /* If rdev is not NULL */ - char b[BDEVNAME_SIZE]; - int cur_read_error_count = 0; + /* still own a reference to this rdev, so it cannot + * have been cleared recently. + */ + rdev = conf->mirrors[d].rdev; - bdevname(rdev->bdev, b); + if (test_bit(Faulty, &rdev->flags)) + /* drive has already been failed, just ignore any + more fix_read_error() attempts */ + return; - if (test_bit(Faulty, &rdev->flags)) { - rcu_read_unlock(); - /* drive has already been failed, just ignore any - more fix_read_error() attempts */ - return; - } + check_decay_read_errors(mddev, rdev); + atomic_inc(&rdev->read_errors); + if (atomic_read(&rdev->read_errors) > max_read_errors) { + char b[BDEVNAME_SIZE]; + bdevname(rdev->bdev, b); - check_decay_read_errors(mddev, rdev); - atomic_inc(&rdev->read_errors); - cur_read_error_count = atomic_read(&rdev->read_errors); - if (cur_read_error_count > max_read_errors) { - rcu_read_unlock(); - printk(KERN_NOTICE - "md/raid10:%s: %s: Raid device exceeded " - "read_error threshold " - "[cur %d:max %d]\n", - mdname(mddev), - b, cur_read_error_count, max_read_errors); - printk(KERN_NOTICE - "md/raid10:%s: %s: Failing raid " - "device\n", mdname(mddev), b); - md_error(mddev, conf->mirrors[d].rdev); - return; - } + printk(KERN_NOTICE + "md/raid10:%s: %s: Raid device exceeded " + "read_error threshold [cur %d:max %d]\n", + mdname(mddev), b, + atomic_read(&rdev->read_errors), max_read_errors); + printk(KERN_NOTICE + "md/raid10:%s: %s: Failing raid device\n", + mdname(mddev), b); + md_error(mddev, conf->mirrors[d].rdev); + return; } - rcu_read_unlock(); while(sectors) { int s = sectors; @@ -1562,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) "write failed" " (%d sectors at %llu on %s)\n", mdname(mddev), s, - (unsigned long long)(sect+ - rdev->data_offset), + (unsigned long long)( + sect + rdev->data_offset), bdevname(rdev->bdev, b)); printk(KERN_NOTICE "md/raid10:%s: %s: failing " "drive\n", @@ -1599,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) "corrected sectors" " (%d sectors at %llu on %s)\n", mdname(mddev), s, - (unsigned long long)(sect+ - rdev->data_offset), + (unsigned long long)( + sect + rdev->data_offset), bdevname(rdev->bdev, b)); printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", mdname(mddev), @@ -1612,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) "md/raid10:%s: read error corrected" " (%d sectors at %llu on %s)\n", mdname(mddev), s, - (unsigned long long)(sect+ - rdev->data_offset), + (unsigned long long)( + sect + rdev->data_offset), bdevname(rdev->bdev, b)); } @@ -1663,7 +1633,8 @@ static void raid10d(mddev_t *mddev) else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) recovery_request_write(mddev, r10_bio); else { - int mirror; + int slot = r10_bio->read_slot; + int mirror = r10_bio->devs[slot].devnum; /* we got a read error. Maybe the drive is bad. Maybe just * the block and we can fix it. * We freeze all other IO, and try reading the block from @@ -1677,9 +1648,10 @@ static void raid10d(mddev_t *mddev) fix_read_error(conf, mddev, r10_bio); unfreeze_array(conf); } + rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); - bio = r10_bio->devs[r10_bio->read_slot].bio; - r10_bio->devs[r10_bio->read_slot].bio = + bio = r10_bio->devs[slot].bio; + r10_bio->devs[slot].bio = mddev->ro ? IO_BLOCKED : NULL; mirror = read_balance(conf, r10_bio); if (mirror == -1) { @@ -1693,6 +1665,7 @@ static void raid10d(mddev_t *mddev) } else { const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); bio_put(bio); + slot = r10_bio->read_slot; rdev = conf->mirrors[mirror].rdev; if (printk_ratelimit()) printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to" @@ -1702,8 +1675,8 @@ static void raid10d(mddev_t *mddev) (unsigned long long)r10_bio->sector); bio = bio_clone_mddev(r10_bio->master_bio, GFP_NOIO, mddev); - r10_bio->devs[r10_bio->read_slot].bio = bio; - bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr + r10_bio->devs[slot].bio = bio; + bio->bi_sector = r10_bio->devs[slot].addr + rdev->data_offset; bio->bi_bdev = rdev->bdev; bio->bi_rw = READ | do_sync; @@ -1763,13 +1736,13 @@ static int init_resync(conf_t *conf) * */ -static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) +static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, + int *skipped, int go_faster) { conf_t *conf = mddev->private; r10bio_t *r10_bio; struct bio *biolist = NULL, *bio; sector_t max_sector, nr_sectors; - int disk; int i; int max_sync; sector_t sync_blocks; @@ -1858,108 +1831,114 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int j, k; r10_bio = NULL; - for (i=0 ; i<conf->raid_disks; i++) - if (conf->mirrors[i].rdev && - !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { - int still_degraded = 0; - /* want to reconstruct this device */ - r10bio_t *rb2 = r10_bio; - sector_t sect = raid10_find_virt(conf, sector_nr, i); - int must_sync; - /* Unless we are doing a full sync, we only need - * to recover the block if it is set in the bitmap - */ - must_sync = bitmap_start_sync(mddev->bitmap, sect, - &sync_blocks, 1); - if (sync_blocks < max_sync) - max_sync = sync_blocks; - if (!must_sync && - !conf->fullsync) { - /* yep, skip the sync_blocks here, but don't assume - * that there will never be anything to do here - */ - chunks_skipped = -1; - continue; - } + for (i=0 ; i<conf->raid_disks; i++) { + int still_degraded; + r10bio_t *rb2; + sector_t sect; + int must_sync; - r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); - raise_barrier(conf, rb2 != NULL); - atomic_set(&r10_bio->remaining, 0); + if (conf->mirrors[i].rdev == NULL || + test_bit(In_sync, &conf->mirrors[i].rdev->flags)) + continue; - r10_bio->master_bio = (struct bio*)rb2; - if (rb2) - atomic_inc(&rb2->remaining); - r10_bio->mddev = mddev; - set_bit(R10BIO_IsRecover, &r10_bio->state); - r10_bio->sector = sect; + still_degraded = 0; + /* want to reconstruct this device */ + rb2 = r10_bio; + sect = raid10_find_virt(conf, sector_nr, i); + /* Unless we are doing a full sync, we only need + * to recover the block if it is set in the bitmap + */ + must_sync = bitmap_start_sync(mddev->bitmap, sect, + &sync_blocks, 1); + if (sync_blocks < max_sync) + max_sync = sync_blocks; + if (!must_sync && + !conf->fullsync) { + /* yep, skip the sync_blocks here, but don't assume + * that there will never be anything to do here + */ + chunks_skipped = -1; + continue; + } - raid10_find_phys(conf, r10_bio); + r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + raise_barrier(conf, rb2 != NULL); + atomic_set(&r10_bio->remaining, 0); - /* Need to check if the array will still be - * degraded - */ - for (j=0; j<conf->raid_disks; j++) - if (conf->mirrors[j].rdev == NULL || - test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { - still_degraded = 1; - break; - } - - must_sync = bitmap_start_sync(mddev->bitmap, sect, - &sync_blocks, still_degraded); - - for (j=0; j<conf->copies;j++) { - int d = r10_bio->devs[j].devnum; - if (conf->mirrors[d].rdev && - test_bit(In_sync, &conf->mirrors[d].rdev->flags)) { - /* This is where we read from */ - bio = r10_bio->devs[0].bio; - bio->bi_next = biolist; - biolist = bio; - bio->bi_private = r10_bio; - bio->bi_end_io = end_sync_read; - bio->bi_rw = READ; - bio->bi_sector = r10_bio->devs[j].addr + - conf->mirrors[d].rdev->data_offset; - bio->bi_bdev = conf->mirrors[d].rdev->bdev; - atomic_inc(&conf->mirrors[d].rdev->nr_pending); - atomic_inc(&r10_bio->remaining); - /* and we write to 'i' */ - - for (k=0; k<conf->copies; k++) - if (r10_bio->devs[k].devnum == i) - break; - BUG_ON(k == conf->copies); - bio = r10_bio->devs[1].bio; - bio->bi_next = biolist; - biolist = bio; - bio->bi_private = r10_bio; - bio->bi_end_io = end_sync_write; - bio->bi_rw = WRITE; - bio->bi_sector = r10_bio->devs[k].addr + - conf->mirrors[i].rdev->data_offset; - bio->bi_bdev = conf->mirrors[i].rdev->bdev; - - r10_bio->devs[0].devnum = d; - r10_bio->devs[1].devnum = i; + r10_bio->master_bio = (struct bio*)rb2; + if (rb2) + atomic_inc(&rb2->remaining); + r10_bio->mddev = mddev; + set_bit(R10BIO_IsRecover, &r10_bio->state); + r10_bio->sector = sect; - break; - } - } - if (j == conf->copies) { - /* Cannot recover, so abort the recovery */ - put_buf(r10_bio); - if (rb2) - atomic_dec(&rb2->remaining); - r10_bio = rb2; - if (!test_and_set_bit(MD_RECOVERY_INTR, - &mddev->recovery)) - printk(KERN_INFO "md/raid10:%s: insufficient " - "working devices for recovery.\n", - mdname(mddev)); + raid10_find_phys(conf, r10_bio); + + /* Need to check if the array will still be + * degraded + */ + for (j=0; j<conf->raid_disks; j++) + if (conf->mirrors[j].rdev == NULL || + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { + still_degraded = 1; break; } + + must_sync = bitmap_start_sync(mddev->bitmap, sect, + &sync_blocks, still_degraded); + + for (j=0; j<conf->copies;j++) { + int d = r10_bio->devs[j].devnum; + if (!conf->mirrors[d].rdev || + !test_bit(In_sync, &conf->mirrors[d].rdev->flags)) + continue; + /* This is where we read from */ + bio = r10_bio->devs[0].bio; + bio->bi_next = biolist; + biolist = bio; + bio->bi_private = r10_bio; + bio->bi_end_io = end_sync_read; + bio->bi_rw = READ; + bio->bi_sector = r10_bio->devs[j].addr + + conf->mirrors[d].rdev->data_offset; + bio->bi_bdev = conf->mirrors[d].rdev->bdev; + atomic_inc(&conf->mirrors[d].rdev->nr_pending); + atomic_inc(&r10_bio->remaining); + /* and we write to 'i' */ + + for (k=0; k<conf->copies; k++) + if (r10_bio->devs[k].devnum == i) + break; + BUG_ON(k == conf->copies); + bio = r10_bio->devs[1].bio; + bio->bi_next = biolist; + biolist = bio; + bio->bi_private = r10_bio; + bio->bi_end_io = end_sync_write; + bio->bi_rw = WRITE; + bio->bi_sector = r10_bio->devs[k].addr + + conf->mirrors[i].rdev->data_offset; + bio->bi_bdev = conf->mirrors[i].rdev->bdev; + + r10_bio->devs[0].devnum = d; + r10_bio->devs[1].devnum = i; + + break; + } + if (j == conf->copies) { + /* Cannot recover, so abort the recovery */ + put_buf(r10_bio); + if (rb2) + atomic_dec(&rb2->remaining); + r10_bio = rb2; + if (!test_and_set_bit(MD_RECOVERY_INTR, + &mddev->recovery)) + printk(KERN_INFO "md/raid10:%s: insufficient " + "working devices for recovery.\n", + mdname(mddev)); + break; } + } if (biolist == NULL) { while (r10_bio) { r10bio_t *rb2 = r10_bio; @@ -1977,7 +1956,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, mddev->degraded) && - !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, + &mddev->recovery)) { /* We can skip this block */ *skipped = 1; return sync_blocks + sectors_skipped; @@ -2022,7 +2002,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i for (i=0; i<conf->copies; i++) { int d = r10_bio->devs[i].devnum; if (r10_bio->devs[i].bio->bi_end_io) - rdev_dec_pending(conf->mirrors[d].rdev, mddev); + rdev_dec_pending(conf->mirrors[d].rdev, + mddev); } put_buf(r10_bio); biolist = NULL; @@ -2047,26 +2028,27 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i do { struct page *page; int len = PAGE_SIZE; - disk = 0; if (sector_nr + (len>>9) > max_sector) len = (max_sector - sector_nr) << 9; if (len == 0) break; for (bio= biolist ; bio ; bio=bio->bi_next) { + struct bio *bio2; page = bio->bi_io_vec[bio->bi_vcnt].bv_page; - if (bio_add_page(bio, page, len, 0) == 0) { - /* stop here */ - struct bio *bio2; - bio->bi_io_vec[bio->bi_vcnt].bv_page = page; - for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) { - /* remove last page from this bio */ - bio2->bi_vcnt--; - bio2->bi_size -= len; - bio2->bi_flags &= ~(1<< BIO_SEG_VALID); - } - goto bio_full; + if (bio_add_page(bio, page, len, 0)) + continue; + + /* stop here */ + bio->bi_io_vec[bio->bi_vcnt].bv_page = page; + for (bio2 = biolist; + bio2 && bio2 != bio; + bio2 = bio2->bi_next) { + /* remove last page from this bio */ + bio2->bi_vcnt--; + bio2->bi_size -= len; + bio2->bi_flags &= ~(1<< BIO_SEG_VALID); } - disk = i; + goto bio_full; } nr_sectors += len>>9; sector_nr += len>>9; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 49bf5f891435..34dd54539f7b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1700,27 +1700,25 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) raid5_conf_t *conf = mddev->private; pr_debug("raid456: error called\n"); - if (!test_bit(Faulty, &rdev->flags)) { - set_bit(MD_CHANGE_DEVS, &mddev->flags); - if (test_and_clear_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded++; - spin_unlock_irqrestore(&conf->device_lock, flags); - /* - * if recovery was running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - } - set_bit(Faulty, &rdev->flags); - printk(KERN_ALERT - "md/raid:%s: Disk failure on %s, disabling device.\n" - "md/raid:%s: Operation continuing on %d devices.\n", - mdname(mddev), - bdevname(rdev->bdev, b), - mdname(mddev), - conf->raid_disks - mddev->degraded); + if (test_and_clear_bit(In_sync, &rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded++; + spin_unlock_irqrestore(&conf->device_lock, flags); + /* + * if recovery was running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + set_bit(Faulty, &rdev->flags); + set_bit(MD_CHANGE_DEVS, &mddev->flags); + printk(KERN_ALERT + "md/raid:%s: Disk failure on %s, disabling device.\n" + "md/raid:%s: Operation continuing on %d devices.\n", + mdname(mddev), + bdevname(rdev->bdev, b), + mdname(mddev), + conf->raid_disks - mddev->degraded); } /* @@ -5391,7 +5389,8 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) return -EINVAL; set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); - if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { + if (sectors > mddev->dev_sectors && + mddev->recovery_cp > mddev->dev_sectors) { mddev->recovery_cp = mddev->dev_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 0ae93529bfc1..18fccf913635 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -45,9 +45,9 @@ #include <linux/interrupt.h> #include <linux/if_ether.h> #include <linux/aer.h> +#include <linux/prefetch.h> #ifdef CONFIG_IGB_DCA #include <linux/dca.h> -#include <linux/prefetch.h> #endif #include "igb.h" diff --git a/fs/compat.c b/fs/compat.c index 72fe6cda9108..0ea00832de23 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int return do_sys_open(dfd, filename, flags, mode); } -/* - * compat_count() counts the number of arguments/envelopes. It is basically - * a copy of count() from fs/exec.c, except that it works with 32 bit argv - * and envp pointers. - */ -static int compat_count(compat_uptr_t __user *argv, int max) -{ - int i = 0; - - if (argv != NULL) { - for (;;) { - compat_uptr_t p; - - if (get_user(p, argv)) - return -EFAULT; - if (!p) - break; - argv++; - if (i++ >= max) - return -E2BIG; - - if (fatal_signal_pending(current)) - return -ERESTARTNOHAND; - cond_resched(); - } - } - return i; -} - -/* - * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c - * except that it works with 32 bit argv and envp pointers. - */ -static int compat_copy_strings(int argc, compat_uptr_t __user *argv, - struct linux_binprm *bprm) -{ - struct page *kmapped_page = NULL; - char *kaddr = NULL; - unsigned long kpos = 0; - int ret; - - while (argc-- > 0) { - compat_uptr_t str; - int len; - unsigned long pos; - - if (get_user(str, argv+argc) || - !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) { - ret = -EFAULT; - goto out; - } - - if (len > MAX_ARG_STRLEN) { - ret = -E2BIG; - goto out; - } - - /* We're going to work our way backwords. */ - pos = bprm->p; - str += len; - bprm->p -= len; - - while (len > 0) { - int offset, bytes_to_copy; - - if (fatal_signal_pending(current)) { - ret = -ERESTARTNOHAND; - goto out; - } - cond_resched(); - - offset = pos % PAGE_SIZE; - if (offset == 0) - offset = PAGE_SIZE; - - bytes_to_copy = offset; - if (bytes_to_copy > len) - bytes_to_copy = len; - - offset -= bytes_to_copy; - pos -= bytes_to_copy; - str -= bytes_to_copy; - len -= bytes_to_copy; - - if (!kmapped_page || kpos != (pos & PAGE_MASK)) { - struct page *page; - - page = get_arg_page(bprm, pos, 1); - if (!page) { - ret = -E2BIG; - goto out; - } - - if (kmapped_page) { - flush_kernel_dcache_page(kmapped_page); - kunmap(kmapped_page); - put_page(kmapped_page); - } - kmapped_page = page; - kaddr = kmap(kmapped_page); - kpos = pos & PAGE_MASK; - flush_cache_page(bprm->vma, kpos, - page_to_pfn(kmapped_page)); - } - if (copy_from_user(kaddr+offset, compat_ptr(str), - bytes_to_copy)) { - ret = -EFAULT; - goto out; - } - } - } - ret = 0; -out: - if (kmapped_page) { - flush_kernel_dcache_page(kmapped_page); - kunmap(kmapped_page); - put_page(kmapped_page); - } - return ret; -} - -/* - * compat_do_execve() is mostly a copy of do_execve(), with the exception - * that it processes 32 bit argv and envp pointers. - */ -int compat_do_execve(char * filename, - compat_uptr_t __user *argv, - compat_uptr_t __user *envp, - struct pt_regs * regs) -{ - struct linux_binprm *bprm; - struct file *file; - struct files_struct *displaced; - bool clear_in_exec; - int retval; - - retval = unshare_files(&displaced); - if (retval) - goto out_ret; - - retval = -ENOMEM; - bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); - if (!bprm) - goto out_files; - - retval = prepare_bprm_creds(bprm); - if (retval) - goto out_free; - - retval = check_unsafe_exec(bprm); - if (retval < 0) - goto out_free; - clear_in_exec = retval; - current->in_execve = 1; - - file = open_exec(filename); - retval = PTR_ERR(file); - if (IS_ERR(file)) - goto out_unmark; - - sched_exec(); - - bprm->file = file; - bprm->filename = filename; - bprm->interp = filename; - - retval = bprm_mm_init(bprm); - if (retval) - goto out_file; - - bprm->argc = compat_count(argv, MAX_ARG_STRINGS); - if ((retval = bprm->argc) < 0) - goto out; - - bprm->envc = compat_count(envp, MAX_ARG_STRINGS); - if ((retval = bprm->envc) < 0) - goto out; - - retval = prepare_binprm(bprm); - if (retval < 0) - goto out; - - retval = copy_strings_kernel(1, &bprm->filename, bprm); - if (retval < 0) - goto out; - - bprm->exec = bprm->p; - retval = compat_copy_strings(bprm->envc, envp, bprm); - if (retval < 0) - goto out; - - retval = compat_copy_strings(bprm->argc, argv, bprm); - if (retval < 0) - goto out; - - retval = search_binary_handler(bprm, regs); - if (retval < 0) - goto out; - - /* execve succeeded */ - current->fs->in_exec = 0; - current->in_execve = 0; - acct_update_integrals(current); - free_bprm(bprm); - if (displaced) - put_files_struct(displaced); - return retval; - -out: - if (bprm->mm) { - acct_arg_size(bprm, 0); - mmput(bprm->mm); - } - -out_file: - if (bprm->file) { - allow_write_access(bprm->file); - fput(bprm->file); - } - -out_unmark: - if (clear_in_exec) - current->fs->in_exec = 0; - current->in_execve = 0; - -out_free: - free_bprm(bprm); - -out_files: - if (displaced) - reset_files_struct(displaced); -out_ret: - return retval; -} - #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, diff --git a/fs/exec.c b/fs/exec.c index 8328beb9016f..c016896dcbb2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -55,6 +55,7 @@ #include <linux/fs_struct.h> #include <linux/pipe_fs_i.h> #include <linux/oom.h> +#include <linux/compat.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -166,8 +167,13 @@ out: } #ifdef CONFIG_MMU - -void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) +/* + * The nascent bprm->mm is not visible until exec_mmap() but it can + * use a lot of memory, account these pages in current->mm temporary + * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we + * change the counter back via acct_arg_size(0). + */ +static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { struct mm_struct *mm = current->mm; long diff = (long)(pages - bprm->vma_pages); @@ -186,7 +192,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) #endif } -struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, +static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; @@ -305,11 +311,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len) #else -void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) +static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { } -struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, +static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; @@ -398,22 +404,56 @@ err: return err; } +struct user_arg_ptr { +#ifdef CONFIG_COMPAT + bool is_compat; +#endif + union { + const char __user *const __user *native; +#ifdef CONFIG_COMPAT + compat_uptr_t __user *compat; +#endif + } ptr; +}; + +static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr) +{ + const char __user *native; + +#ifdef CONFIG_COMPAT + if (unlikely(argv.is_compat)) { + compat_uptr_t compat; + + if (get_user(compat, argv.ptr.compat + nr)) + return ERR_PTR(-EFAULT); + + return compat_ptr(compat); + } +#endif + + if (get_user(native, argv.ptr.native + nr)) + return ERR_PTR(-EFAULT); + + return native; +} + /* * count() counts the number of strings in array ARGV. */ -static int count(const char __user * const __user * argv, int max) +static int count(struct user_arg_ptr argv, int max) { int i = 0; - if (argv != NULL) { + if (argv.ptr.native != NULL) { for (;;) { - const char __user * p; + const char __user *p = get_user_arg_ptr(argv, i); - if (get_user(p, argv)) - return -EFAULT; if (!p) break; - argv++; + + if (IS_ERR(p)) + return -EFAULT; + if (i++ >= max) return -E2BIG; @@ -430,7 +470,7 @@ static int count(const char __user * const __user * argv, int max) * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ -static int copy_strings(int argc, const char __user *const __user *argv, +static int copy_strings(int argc, struct user_arg_ptr argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; @@ -443,16 +483,18 @@ static int copy_strings(int argc, const char __user *const __user *argv, int len; unsigned long pos; - if (get_user(str, argv+argc) || - !(len = strnlen_user(str, MAX_ARG_STRLEN))) { - ret = -EFAULT; + ret = -EFAULT; + str = get_user_arg_ptr(argv, argc); + if (IS_ERR(str)) goto out; - } - if (!valid_arg_len(bprm, len)) { - ret = -E2BIG; + len = strnlen_user(str, MAX_ARG_STRLEN); + if (!len) + goto out; + + ret = -E2BIG; + if (!valid_arg_len(bprm, len)) goto out; - } /* We're going to work our way backwords. */ pos = bprm->p; @@ -519,14 +561,19 @@ out: /* * Like copy_strings, but get argv and its values from kernel memory. */ -int copy_strings_kernel(int argc, const char *const *argv, +int copy_strings_kernel(int argc, const char *const *__argv, struct linux_binprm *bprm) { int r; mm_segment_t oldfs = get_fs(); + struct user_arg_ptr argv = { + .ptr.native = (const char __user *const __user *)__argv, + }; + set_fs(KERNEL_DS); - r = copy_strings(argc, (const char __user *const __user *)argv, bprm); + r = copy_strings(argc, argv, bprm); set_fs(oldfs); + return r; } EXPORT_SYMBOL(copy_strings_kernel); @@ -1379,10 +1426,10 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -int do_execve(const char * filename, - const char __user *const __user *argv, - const char __user *const __user *envp, - struct pt_regs * regs) +static int do_execve_common(const char *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + struct pt_regs *regs) { struct linux_binprm *bprm; struct file *file; @@ -1489,6 +1536,34 @@ out_ret: return retval; } +int do_execve(const char *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp, + struct pt_regs *regs) +{ + struct user_arg_ptr argv = { .ptr.native = __argv }; + struct user_arg_ptr envp = { .ptr.native = __envp }; + return do_execve_common(filename, argv, envp, regs); +} + +#ifdef CONFIG_COMPAT +int compat_do_execve(char *filename, + compat_uptr_t __user *__argv, + compat_uptr_t __user *__envp, + struct pt_regs *regs) +{ + struct user_arg_ptr argv = { + .is_compat = true, + .ptr.compat = __argv, + }; + struct user_arg_ptr envp = { + .is_compat = true, + .ptr.compat = __envp, + }; + return do_execve_common(filename, argv, envp, regs); +} +#endif + void set_binfmt(struct linux_binfmt *new) { struct mm_struct *mm = current->mm; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 74add2ddcc3f..e65493a8ac00 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, metadata = (height != ip->i_height - 1); if (metadata) revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; + else if (ip->i_depth) + revokes = sdp->sd_inptrs; if (ip != GFS2_I(sdp->sd_rindex)) error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index cec26c00b50d..903115f2bb34 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -228,6 +228,27 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) return ret; } +static void gfs2_ail1_wait(struct gfs2_sbd *sdp) +{ + struct gfs2_ail *ai; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + + spin_lock(&sdp->sd_ail_lock); + list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) { + list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) { + bh = bd->bd_bh; + if (!buffer_locked(bh)) + continue; + get_bh(bh); + spin_unlock(&sdp->sd_ail_lock); + wait_on_buffer(bh); + brelse(bh); + return; + } + } + spin_unlock(&sdp->sd_ail_lock); +} /** * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced @@ -878,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) gfs2_log_flush(sdp, NULL); for (;;) { gfs2_ail1_start(sdp); + gfs2_ail1_wait(sdp); if (gfs2_ail1_empty(sdp)) break; - msleep(10); } } @@ -920,12 +941,14 @@ int gfs2_logd(void *data) if (gfs2_ail_flush_reqd(sdp)) { gfs2_ail1_start(sdp); - io_schedule(); + gfs2_ail1_wait(sdp); gfs2_ail1_empty(sdp); gfs2_log_flush(sdp, NULL); } - wake_up(&sdp->sd_log_waitq); + if (!gfs2_ail_flush_reqd(sdp)) + wake_up(&sdp->sd_log_waitq); + t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; if (freezing(current)) refrigerator(); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7273ad3c85ba..9b780df3fd54 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1629,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); gfs2_trans_add_rg(rgd); + + /* Directories keep their data in the metadata address space */ + if (ip->i_depth) + gfs2_meta_wipe(ip, bstart, blen); } /** diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index f7684483785e..eed4d7b26249 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, void nilfs_palloc_commit_alloc_entry(struct inode *inode, struct nilfs_palloc_req *req) { - nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); - nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); + mark_buffer_dirty(req->pr_bitmap_bh); + mark_buffer_dirty(req->pr_desc_bh); nilfs_mdt_mark_dirty(inode); brelse(req->pr_bitmap_bh); @@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); - nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); - nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); + mark_buffer_dirty(req->pr_desc_bh); + mark_buffer_dirty(req->pr_bitmap_bh); nilfs_mdt_mark_dirty(inode); brelse(req->pr_bitmap_bh); @@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) kunmap(bitmap_bh->b_page); kunmap(desc_bh->b_page); - nilfs_mdt_mark_buffer_dirty(desc_bh); - nilfs_mdt_mark_buffer_dirty(bitmap_bh); + mark_buffer_dirty(desc_bh); + mark_buffer_dirty(bitmap_bh); nilfs_mdt_mark_dirty(inode); brelse(bitmap_bh); diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 4723f04e9b12..aadbd0b5e3e8 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -34,7 +34,9 @@ struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) { - return NILFS_I_NILFS(bmap->b_inode)->ns_dat; + struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info; + + return nilfs->ns_dat; } static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 609cd223eea8..a35ae35e6932 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -34,12 +34,6 @@ #include "page.h" #include "btnode.h" -void nilfs_btnode_cache_init(struct address_space *btnc, - struct backing_dev_info *bdi) -{ - nilfs_mapping_init(btnc, bdi); -} - void nilfs_btnode_cache_clear(struct address_space *btnc) { invalidate_mapping_pages(btnc, 0, -1); @@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) BUG(); } memset(bh->b_data, 0, 1 << inode->i_blkbits); - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = blocknr; set_buffer_mapped(bh); set_buffer_uptodate(bh); @@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, if (pblocknr == 0) { pblocknr = blocknr; if (inode->i_ino != NILFS_DAT_INO) { - struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; /* blocknr is a virtual block number */ - err = nilfs_dat_translate(dat, blocknr, &pblocknr); + err = nilfs_dat_translate(nilfs->ns_dat, blocknr, + &pblocknr); if (unlikely(err)) { brelse(bh); goto out_locked; @@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, goto found; } set_buffer_mapped(bh); - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); - nilfs_btnode_mark_dirty(obh); + mark_buffer_dirty(obh); spin_lock_irq(&btnc->tree_lock); radix_tree_delete(&btnc->page_tree, oldkey); @@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, unlock_page(opage); } else { nilfs_copy_buffer(nbh, obh); - nilfs_btnode_mark_dirty(nbh); + mark_buffer_dirty(nbh); nbh->b_blocknr = newkey; ctxt->bh = nbh; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 1b8ebd888c28..3a4dd2d8d3fc 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt { struct buffer_head *newbh; }; -void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); @@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *, void nilfs_btnode_abort_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); -#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh) - - #endif /* _NILFS_BTNODE_H */ diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index d451ae0e0bf3..7eafe468a29c 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree, nilfs_btree_get_nonroot_node(path, level), path[level].bp_index, key); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); } while ((path[level].bp_index == 0) && (++level < nilfs_btree_height(btree) - 1)); } @@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree, nilfs_btree_node_insert(node, path[level].bp_index, *keyp, *ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, @@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree, nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree, nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, @@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree, nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); newkey = nilfs_btree_node_get_key(right, 0); newptr = path[level].bp_newreq.bpr_ptr; @@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree, nilfs_btree_node_set_level(root, level + 1); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; @@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree, nilfs_btree_node_delete(node, path[level].bp_index, keyp, ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, @@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree, nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_sib_bh)) - nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; @@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree, nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; @@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); if (!buffer_dirty(bh)) - nilfs_btnode_mark_dirty(bh); + mark_buffer_dirty(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); @@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, { while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) - nilfs_btnode_mark_dirty(path[level].bp_bh); + mark_buffer_dirty(path[level].bp_bh); return 0; } @@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) } if (!buffer_dirty(bh)) - nilfs_btnode_mark_dirty(bh); + mark_buffer_dirty(bh); brelse(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 5ff15a8a1024..c9b342c8b503 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, if (!nilfs_cpfile_is_in_first(cpfile, cno)) nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, kaddr, 1); - nilfs_mdt_mark_buffer_dirty(cp_bh); + mark_buffer_dirty(cp_bh); kaddr = kmap_atomic(header_bh->b_page, KM_USER0); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, 1); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); } @@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, } if (nicps > 0) { tnicps += nicps; - nilfs_mdt_mark_buffer_dirty(cp_bh); + mark_buffer_dirty(cp_bh); nilfs_mdt_mark_dirty(cpfile); if (!nilfs_cpfile_is_in_first(cpfile, cno)) { count = @@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); kunmap_atomic(kaddr, KM_USER0); } @@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) le64_add_cpu(&header->ch_nsnapshots, 1); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(prev_bh); - nilfs_mdt_mark_buffer_dirty(curr_bh); - nilfs_mdt_mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(prev_bh); + mark_buffer_dirty(curr_bh); + mark_buffer_dirty(cp_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); brelse(prev_bh); @@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) le64_add_cpu(&header->ch_nsnapshots, -1); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(next_bh); - nilfs_mdt_mark_buffer_dirty(prev_bh); - nilfs_mdt_mark_buffer_dirty(cp_bh); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(next_bh); + mark_buffer_dirty(prev_bh); + mark_buffer_dirty(cp_bh); + mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); brelse(prev_bh); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 59e5fe742f7b..fcc2f869af16 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat, static void nilfs_dat_commit_entry(struct inode *dat, struct nilfs_palloc_req *req) { - nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); + mark_buffer_dirty(req->pr_entry_bh); nilfs_mdt_mark_dirty(dat); brelse(req->pr_entry_bh); } @@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) entry->de_blocknr = cpu_to_le64(blocknr); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(entry_bh); + mark_buffer_dirty(entry_bh); nilfs_mdt_mark_dirty(dat); brelse(entry_bh); diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 397e73258631..d7eeca62febd 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - SetPageChecked(page); wait_on_page_writeback(page); return VM_FAULT_LOCKED; } diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 1c2a3e23f8b2..08a07a218d26 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -48,9 +48,6 @@ #include "dat.h" #include "ifile.h" -static const struct address_space_operations def_gcinode_aops = { -}; - /* * nilfs_gccache_submit_read_data() - add data buffer and submit read request * @inode - gc inode @@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, goto out; if (pbn == 0) { - struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; - /* use original dat, not gc dat. */ - err = nilfs_dat_translate(dat_inode, vbn, &pbn); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn); if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ brelse(bh); goto failed; @@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, } if (!buffer_mapped(bh)) { - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_bdev = inode->i_sb->s_bdev; set_buffer_mapped(bh); } bh->b_blocknr = pbn; @@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) if (buffer_dirty(bh)) return -EEXIST; - if (buffer_nilfs_node(bh)) { - if (nilfs_btree_broken_node_block(bh)) { - clear_buffer_uptodate(bh); - return -EIO; - } - nilfs_btnode_mark_dirty(bh); - } else { - nilfs_mark_buffer_dirty(bh); + if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) { + clear_buffer_uptodate(bh); + return -EIO; } + mark_buffer_dirty(bh); return 0; } @@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode) inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); - inode->i_mapping->a_ops = &def_gcinode_aops; + inode->i_mapping->a_ops = &empty_aops; inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; ii->i_flags = 0; diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index bfc73d3a30ed..684d76300a80 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, return ret; } nilfs_palloc_commit_alloc_entry(ifile, &req); - nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); + mark_buffer_dirty(req.pr_entry_bh); nilfs_mdt_mark_dirty(ifile); *out_ino = (ino_t)req.pr_entry_nr; *out_bh = req.pr_entry_bh; @@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) raw_inode->i_flags = 0; kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); + mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); nilfs_palloc_commit_free_entry(ifile, &req); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index c0aa27490c02..587f18432832 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) { struct nilfs_inode_info *ii = NILFS_I(inode); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; __u64 blknum = 0; int err = 0, ret; - struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; - down_read(&NILFS_MDT(dat)->mi_sem); + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); - up_read(&NILFS_MDT(dat)->mi_sem); + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); if (ret >= 0) { /* found */ map_bh(bh_result, inode->i_sb, blknum); if (ret > 0) @@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode, raw_inode->i_flags = cpu_to_le32(ii->i_flags); raw_inode->i_generation = cpu_to_le32(inode->i_generation); + if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + /* zero-fill unused portion in the case of super root block */ + raw_inode->i_xattr = 0; + raw_inode->i_pad = 0; + memset((void *)raw_inode + sizeof(*raw_inode), 0, + nilfs->ns_inode_size - sizeof(*raw_inode)); + } + if (has_bmap) nilfs_bmap_write(ii->i_bmap, raw_inode); else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) @@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) return -EINVAL; /* NILFS_I_DIRTY may remain for freeing inode */ } - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files); + list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); set_bit(NILFS_I_QUEUED, &ii->i_state); } spin_unlock(&nilfs->ns_inode_lock); @@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) return err; } nilfs_update_inode(inode, ibh); - nilfs_mdt_mark_buffer_dirty(ibh); + mark_buffer_dirty(ibh); nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); brelse(ibh); return 0; @@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode) int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { - struct the_nilfs *nilfs = NILFS_I_NILFS(inode); + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; __u64 logical = 0, phys = 0, size = 0; __u32 flags = 0; loff_t isize; diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index f2469ba6246b..41d6743d303c 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return 0; } +static int nilfs_ioctl_resize(struct inode *inode, struct file *filp, + void __user *argp) +{ + __u64 newsize; + int ret = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = mnt_want_write(filp->f_path.mnt); + if (ret) + goto out; + + ret = -EFAULT; + if (copy_from_user(&newsize, argp, sizeof(newsize))) + goto out_drop_write; + + ret = nilfs_resize_fs(inode->i_sb, newsize); + +out_drop_write: + mnt_drop_write(filp->f_path.mnt); +out: + return ret; +} + +static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + __u64 range[2]; + __u64 minseg, maxseg; + unsigned long segbytes; + int ret = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = -EFAULT; + if (copy_from_user(range, argp, sizeof(__u64[2]))) + goto out; + + ret = -ERANGE; + if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode)) + goto out; + + segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize; + + minseg = range[0] + segbytes - 1; + do_div(minseg, segbytes); + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); + do_div(maxseg, segbytes); + maxseg--; + + ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg); +out: + return ret; +} + static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, size_t membsz, @@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); case NILFS_IOCTL_SYNC: return nilfs_ioctl_sync(inode, filp, cmd, argp); + case NILFS_IOCTL_RESIZE: + return nilfs_ioctl_resize(inode, filp, argp); + case NILFS_IOCTL_SET_ALLOC_RANGE: + return nilfs_ioctl_set_alloc_range(inode, argp); default: return -ENOTTY; } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index a649b05f7069..800e8d78a83b 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, kunmap_atomic(kaddr, KM_USER0); set_buffer_uptodate(bh); - nilfs_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); return 0; } @@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) err = nilfs_mdt_read_block(inode, block, 0, &bh); if (unlikely(err)) return err; - nilfs_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); brelse(bh); return 0; @@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, INIT_LIST_HEAD(&shadow->frozen_buffers); address_space_init_once(&shadow->frozen_data); - nilfs_mapping_init(&shadow->frozen_data, bdi); + nilfs_mapping_init(&shadow->frozen_data, inode, bdi); address_space_init_once(&shadow->frozen_btnodes); - nilfs_mapping_init(&shadow->frozen_btnodes, bdi); + nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi); mi->mi_shadow = shadow; return 0; } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index ed68563ec708..ab20a4baa50f 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) return inode->i_private; } -static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) -{ - return inode->i_sb->s_fs_info; -} - /* Default GFP flags using highmem */ #define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) @@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh); struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh); -#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) - static inline void nilfs_mdt_mark_dirty(struct inode *inode) { if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) @@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode) static inline __u64 nilfs_mdt_cno(struct inode *inode) { - return NILFS_I_NILFS(inode)->ns_cno; + return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno; } #define nilfs_mdt_bgl_lock(inode, bg) \ diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index a8dd344303cb..a9c6a531f80c 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) return &ii->vfs_inode; } -static inline struct inode *NILFS_AS_I(struct address_space *mapping) -{ - return (mapping->host) ? : - container_of(mapping, struct inode, i_data); -} - /* * Dynamic state flags of NILFS on-memory inode (i_state) */ @@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, int flip); int nilfs_commit_super(struct super_block *sb, int flag); int nilfs_cleanup_super(struct super_block *sb); +int nilfs_resize_fs(struct super_block *sb, __u64 newsize); int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, struct nilfs_root **root); int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 1168059c7efd..65221a04c6f0 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -37,8 +37,7 @@ #define NILFS_BUFFER_INHERENT_BITS \ ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ - (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ - (1UL << BH_NILFS_Checked)) + (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked)) static struct buffer_head * __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, @@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, return bh; } -/* - * Since the page cache of B-tree node pages or data page cache of pseudo - * inodes does not have a valid mapping->host pointer, calling - * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; - * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). - * To avoid this problem, the old style mark_buffer_dirty() is used instead. - */ -void nilfs_mark_buffer_dirty(struct buffer_head *bh) -{ - if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) - __set_page_dirty_nobuffers(bh->b_page); -} - struct buffer_head *nilfs_grab_buffer(struct inode *inode, struct address_space *mapping, unsigned long blkoff, @@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page) void nilfs_page_bug(struct page *page) { struct address_space *m; - unsigned long ino = 0; + unsigned long ino; if (unlikely(!page)) { printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); @@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page) } m = page->mapping; - if (m) { - struct inode *inode = NILFS_AS_I(m); - if (inode != NULL) - ino = inode->i_ino; - } + ino = m ? m->host->i_ino : 0; + printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " "mapping=%p ino=%lu\n", page, atomic_read(&page->_count), @@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page) } /** - * nilfs_alloc_private_page - allocate a private page with buffer heads - * - * Return Value: On success, a pointer to the allocated page is returned. - * On error, NULL is returned. - */ -struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, - unsigned long state) -{ - struct buffer_head *bh, *head, *tail; - struct page *page; - - page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ - if (unlikely(!page)) - return NULL; - - lock_page(page); - head = alloc_page_buffers(page, size, 0); - if (unlikely(!head)) { - unlock_page(page); - __free_page(page); - return NULL; - } - - bh = head; - do { - bh->b_state = (1UL << BH_NILFS_Allocated) | state; - tail = bh; - bh->b_bdev = bdev; - bh = bh->b_this_page; - } while (bh); - - tail->b_this_page = head; - attach_page_buffers(page, head); - - return page; -} - -void nilfs_free_private_page(struct page *page) -{ - BUG_ON(!PageLocked(page)); - BUG_ON(page->mapping); - - if (page_has_buffers(page) && !try_to_free_buffers(page)) - NILFS_PAGE_BUG(page, "failed to free page"); - - unlock_page(page); - __free_page(page); -} - -/** * nilfs_copy_page -- copy the page with buffers * @dst: destination page * @src: source page @@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, return nc; } -void nilfs_mapping_init(struct address_space *mapping, +void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, struct backing_dev_info *bdi) { - mapping->host = NULL; + mapping->host = inode; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); mapping->assoc_mapping = NULL; diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index f06b79ad7493..fb7de71605a0 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -38,14 +38,12 @@ enum { BH_NILFS_Redirected, }; -BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ BUFFER_FNS(NILFS_Volatile, nilfs_volatile) BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ -void nilfs_mark_buffer_dirty(struct buffer_head *bh); int __nilfs_clear_page_dirty(struct page *); struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, @@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *); void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); int nilfs_page_buffers_clean(struct page *); void nilfs_page_bug(struct page *); -struct page *nilfs_alloc_private_page(struct block_device *, int, - unsigned long); -void nilfs_free_private_page(struct page *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); void nilfs_clear_dirty_pages(struct address_space *); -void nilfs_mapping_init(struct address_space *mapping, +void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, struct backing_dev_info *bdi); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index ba4a64518f38..a604ac0331b2 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr, static void dispose_recovery_list(struct list_head *head) { while (!list_empty(head)) { - struct nilfs_recovery_block *rb - = list_entry(head->next, - struct nilfs_recovery_block, list); + struct nilfs_recovery_block *rb; + + rb = list_first_entry(head, struct nilfs_recovery_block, list); list_del(&rb->list); kfree(rb); } @@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum) void nilfs_dispose_segment_list(struct list_head *head) { while (!list_empty(head)) { - struct nilfs_segment_entry *ent - = list_entry(head->next, - struct nilfs_segment_entry, list); + struct nilfs_segment_entry *ent; + + ent = list_first_entry(head, struct nilfs_segment_entry, list); list_del(&ent->list); kfree(ent); } diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 2853ff20f85a..850a7c0228fb 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf, u32 seed) { struct nilfs_super_root *raw_sr; + struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info; + unsigned srsize; u32 crc; raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; + srsize = NILFS_SR_BYTES(nilfs->ns_inode_size); crc = crc32_le(seed, (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), - NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); + srsize - sizeof(raw_sr->sr_sum)); raw_sr->sr_sum = cpu_to_le32(crc); } @@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list) list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { list_del_init(&bh->b_assoc_buffers); - if (buffer_nilfs_allocated(bh)) { - struct page *clone_page = bh->b_page; - - /* remove clone page */ - brelse(bh); - page_cache_release(clone_page); /* for each bh */ - if (page_count(clone_page) <= 2) { - lock_page(clone_page); - nilfs_free_private_page(clone_page); - } - continue; - } brelse(bh); } } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index afe4f2183454..141646e88fb5 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, if (unlikely(page->index > last)) break; - if (mapping->host) { - lock_page(page); - if (!page_has_buffers(page)) - create_empty_buffers(page, - 1 << inode->i_blkbits, 0); - unlock_page(page); - } + lock_page(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, 0); + unlock_page(page); bh = head = page_buffers(page); do { @@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) /* The following code is duplicated with cpfile. But, it is needed to collect the checkpoint even if it was not newly created */ - nilfs_mdt_mark_buffer_dirty(bh_cp); + mark_buffer_dirty(bh_cp); nilfs_mdt_mark_dirty(nilfs->ns_cpfile); nilfs_cpfile_put_checkpoint( nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); @@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, { struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; - unsigned isz = nilfs->ns_inode_size; + unsigned isz, srsz; bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; raw_sr = (struct nilfs_super_root *)bh_sr->b_data; + isz = nilfs->ns_inode_size; + srsz = NILFS_SR_BYTES(isz); - raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); + raw_sr->sr_bytes = cpu_to_le16(srsz); raw_sr->sr_nongc_ctime = cpu_to_le64(nilfs_doing_gc() ? nilfs->ns_nongc_ctime : sci->sc_seg_ctime); @@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, NILFS_SR_CPFILE_OFFSET(isz), 1); nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + NILFS_SR_SUFILE_OFFSET(isz), 1); + memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); } static void nilfs_redirty_inodes(struct list_head *head) @@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, dispose_buffers: while (!list_empty(listp)) { - bh = list_entry(listp->next, struct buffer_head, - b_assoc_buffers); + bh = list_first_entry(listp, struct buffer_head, + b_assoc_buffers); list_del_init(&bh->b_assoc_buffers); brelse(bh); } @@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, nblocks = le32_to_cpu(finfo->fi_nblocks); ndatablk = le32_to_cpu(finfo->fi_ndatablk); - if (buffer_nilfs_node(bh)) - inode = NILFS_BTNC_I(bh->b_page->mapping); - else - inode = NILFS_AS_I(bh->b_page->mapping); + inode = bh->b_page->mapping->host; if (mode == SC_LSEG_DSYNC) sc_op = &nilfs_sc_dsync_ops; @@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) return 0; } -static int -nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) -{ - struct page *clone_page; - struct buffer_head *bh, *head, *bh2; - void *kaddr; - - bh = head = page_buffers(page); - - clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); - if (unlikely(!clone_page)) - return -ENOMEM; - - bh2 = page_buffers(clone_page); - kaddr = kmap_atomic(page, KM_USER0); - do { - if (list_empty(&bh->b_assoc_buffers)) - continue; - get_bh(bh2); - page_cache_get(clone_page); /* for each bh */ - memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); - bh2->b_blocknr = bh->b_blocknr; - list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); - list_add_tail(&bh->b_assoc_buffers, out); - } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); - kunmap_atomic(kaddr, KM_USER0); - - if (!TestSetPageWriteback(clone_page)) - account_page_writeback(clone_page); - unlock_page(clone_page); - - return 0; -} - -static int nilfs_test_page_to_be_frozen(struct page *page) -{ - struct address_space *mapping = page->mapping; - - if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) - return 0; - - if (page_mapped(page)) { - ClearPageChecked(page); - return 1; - } - return PageChecked(page); -} - -static int nilfs_begin_page_io(struct page *page, struct list_head *out) +static void nilfs_begin_page_io(struct page *page) { if (!page || PageWriteback(page)) /* For split b-tree node pages, this function may be called twice. We ignore the 2nd or later calls by this check. */ - return 0; + return; lock_page(page); clear_page_dirty_for_io(page); set_page_writeback(page); unlock_page(page); - - if (nilfs_test_page_to_be_frozen(page)) { - int err = nilfs_copy_replace_page_buffers(page, out); - if (unlikely(err)) - return err; - } - return 0; } -static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, - struct page **failed_page) +static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; - struct list_head *list = &sci->sc_copied_buffers; - int err; - *failed_page = NULL; list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { struct buffer_head *bh; @@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, break; } if (bh->b_page != fs_page) { - err = nilfs_begin_page_io(fs_page, list); - if (unlikely(err)) { - *failed_page = fs_page; - goto out; - } + nilfs_begin_page_io(fs_page); fs_page = bh->b_page; } } @@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, set_page_writeback(bd_page); unlock_page(bd_page); } - err = nilfs_begin_page_io(fs_page, list); - if (unlikely(err)) - *failed_page = fs_page; - out: - return err; + nilfs_begin_page_io(fs_page); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, return ret; } -static void __nilfs_end_page_io(struct page *page, int err) -{ - if (!err) { - if (!nilfs_page_buffers_clean(page)) - __set_page_dirty_nobuffers(page); - ClearPageError(page); - } else { - __set_page_dirty_nobuffers(page); - SetPageError(page); - } - - if (buffer_nilfs_allocated(page_buffers(page))) { - if (TestClearPageWriteback(page)) - dec_zone_page_state(page, NR_WRITEBACK); - } else - end_page_writeback(page); -} - static void nilfs_end_page_io(struct page *page, int err) { if (!page) @@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err) return; } - __nilfs_end_page_io(page, err); -} - -static void nilfs_clear_copied_buffers(struct list_head *list, int err) -{ - struct buffer_head *bh, *head; - struct page *page; - - while (!list_empty(list)) { - bh = list_entry(list->next, struct buffer_head, - b_assoc_buffers); - page = bh->b_page; - page_cache_get(page); - head = bh = page_buffers(page); - do { - if (!list_empty(&bh->b_assoc_buffers)) { - list_del_init(&bh->b_assoc_buffers); - if (!err) { - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - clear_buffer_delay(bh); - clear_buffer_nilfs_volatile(bh); - } - brelse(bh); /* for b_assoc_buffers */ - } - } while ((bh = bh->b_this_page) != head); - - __nilfs_end_page_io(page, err); - page_cache_release(page); + if (!err) { + if (!nilfs_page_buffers_clean(page)) + __set_page_dirty_nobuffers(page); + ClearPageError(page); + } else { + __set_page_dirty_nobuffers(page); + SetPageError(page); } + + end_page_writeback(page); } -static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, - int err) +static void nilfs_abort_logs(struct list_head *logs, int err) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; @@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, } if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); - if (fs_page && fs_page == failed_page) - return; fs_page = bh->b_page; } } @@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, list_splice_tail_init(&sci->sc_write_logs, &logs); ret = nilfs_wait_on_logs(&logs); - nilfs_abort_logs(&logs, NULL, ret ? : err); + nilfs_abort_logs(&logs, ret ? : err); list_splice_tail_init(&sci->sc_segbufs, &logs); nilfs_cancel_segusage(&logs, nilfs->ns_sufile); nilfs_free_incomplete_logs(&logs, nilfs); - nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); if (sci->sc_stage.flags & NILFS_CF_SUFREED) { ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, @@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_end_page_io(fs_page, 0); - nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); - nilfs_drop_collected_inodes(&sci->sc_dirty_files); if (nilfs_doing_gc()) @@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, "failed to get inode block.\n"); return err; } - nilfs_mdt_mark_buffer_dirty(ibh); + mark_buffer_dirty(ibh); nilfs_mdt_mark_dirty(ifile); spin_lock(&nilfs->ns_inode_lock); if (likely(!ii->i_bh)) @@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, clear_bit(NILFS_I_QUEUED, &ii->i_state); set_bit(NILFS_I_BUSY, &ii->i_state); - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); + list_move_tail(&ii->i_dirty, &sci->sc_dirty_files); } spin_unlock(&nilfs->ns_inode_lock); @@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; - list_del(&ii->i_dirty); - list_add_tail(&ii->i_dirty, &ti->ti_garbage); + list_move_tail(&ii->i_dirty, &ti->ti_garbage); } spin_unlock(&nilfs->ns_inode_lock); } @@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) { struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - struct page *failed_page; int err; sci->sc_stage.scnt = NILFS_ST_INIT; @@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - err = nilfs_segctor_prepare_write(sci, &failed_page); - if (err) { - nilfs_abort_logs(&sci->sc_segbufs, failed_page, err); - goto failed_to_write; - } + nilfs_segctor_prepare_write(sci); nilfs_add_checksums_on_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); @@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); - INIT_LIST_HEAD(&sci->sc_copied_buffers); init_timer(&sci->sc_timer); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; @@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); - WARN_ON(!list_empty(&sci->sc_copied_buffers)); - if (!list_empty(&sci->sc_dirty_files)) { nilfs_warning(sci->sc_super, __func__, "dirty file(s) after the final construction\n"); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 6c02a86745fb..38a1d0013314 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -92,7 +92,6 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written - * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data * @sc_freesegs: array of segment numbers to be freed * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -136,7 +135,6 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; - struct list_head sc_copied_buffers; __u64 *sc_freesegs; size_t sc_nfreesegs; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 1d6f488ccae8..0a0aba617d8a 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -33,7 +33,9 @@ struct nilfs_sufile_info { struct nilfs_mdt_info mi; - unsigned long ncleansegs; + unsigned long ncleansegs;/* number of clean segments */ + __u64 allocmin; /* lower limit of allocatable segment range */ + __u64 allocmax; /* upper limit of allocatable segment range */ }; static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) @@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, create, NULL, bhp); } +static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile, + __u64 segnum) +{ + return nilfs_mdt_delete_block(sufile, + nilfs_sufile_get_blkoff(sufile, segnum)); +} + static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, u64 ncleanadd, u64 ndirtyadd) { @@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); + mark_buffer_dirty(header_bh); } /** @@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, } /** + * nilfs_sufile_set_alloc_range - limit range of segment to be allocated + * @sufile: inode of segment usage file + * @start: minimum segment number of allocatable region (inclusive) + * @end: maximum segment number of allocatable region (inclusive) + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-ERANGE - invalid segment region + */ +int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) +{ + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + __u64 nsegs; + int ret = -ERANGE; + + down_write(&NILFS_MDT(sufile)->mi_sem); + nsegs = nilfs_sufile_get_nsegments(sufile); + + if (start <= end && end < nsegs) { + sui->allocmin = start; + sui->allocmax = end; + ret = 0; + } + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** * nilfs_sufile_alloc - allocate a segment * @sufile: inode of segment usage file * @segnump: pointer to segment number @@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) struct buffer_head *header_bh, *su_bh; struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; void *kaddr; - unsigned long nsegments, ncleansegs, nsus; - int ret, i, j; + unsigned long nsegments, ncleansegs, nsus, cnt; + int ret, j; down_write(&NILFS_MDT(sufile)->mi_sem); @@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) kunmap_atomic(kaddr, KM_USER0); nsegments = nilfs_sufile_get_nsegments(sufile); + maxsegnum = sui->allocmax; segnum = last_alloc + 1; - maxsegnum = nsegments - 1; - for (i = 0; i < nsegments; i += nsus) { - if (segnum >= nsegments) { - /* wrap around */ - segnum = 0; - maxsegnum = last_alloc; + if (segnum < sui->allocmin || segnum > sui->allocmax) + segnum = sui->allocmin; + + for (cnt = 0; cnt < nsegments; cnt += nsus) { + if (segnum > maxsegnum) { + if (cnt < sui->allocmax - sui->allocmin + 1) { + /* + * wrap around in the limited region. + * if allocation started from + * sui->allocmin, this never happens. + */ + segnum = sui->allocmin; + maxsegnum = last_alloc; + } else if (segnum > sui->allocmin && + sui->allocmax + 1 < nsegments) { + segnum = sui->allocmax + 1; + maxsegnum = nsegments - 1; + } else if (sui->allocmin > 0) { + segnum = 0; + maxsegnum = sui->allocmin - 1; + } else { + break; /* never happens */ + } } ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &su_bh); @@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) header->sh_last_alloc = cpu_to_le64(segnum); kunmap_atomic(kaddr, KM_USER0); - NILFS_SUI(sufile)->ncleansegs--; - nilfs_mdt_mark_buffer_dirty(header_bh); - nilfs_mdt_mark_buffer_dirty(su_bh); + sui->ncleansegs--; + mark_buffer_dirty(header_bh); + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); brelse(su_bh); *segnump = segnum; @@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; - nilfs_mdt_mark_buffer_dirty(su_bh); + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; - nilfs_mdt_mark_buffer_dirty(su_bh); + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, sudirty = nilfs_segment_usage_dirty(su); nilfs_segment_usage_set_clean(su); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(su_bh); + mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); NILFS_SUI(sufile)->ncleansegs++; @@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (!ret) { - nilfs_mdt_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); } @@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, su->su_nblocks = cpu_to_le32(nblocks); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(bh); + mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); @@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; - struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; void *kaddr; int ret; @@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, nilfs_sufile_mod_counter(header_bh, -1, 0); NILFS_SUI(sufile)->ncleansegs--; } - nilfs_mdt_mark_buffer_dirty(su_bh); + mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } /** + * nilfs_sufile_truncate_range - truncate range of segment array + * @sufile: inode of segment usage file + * @start: start segment number (inclusive) + * @end: end segment number (inclusive) + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid number of segments specified + * + * %-EBUSY - Dirty or active segments are present in the range + */ +static int nilfs_sufile_truncate_range(struct inode *sufile, + __u64 start, __u64 end) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh; + struct buffer_head *su_bh; + struct nilfs_segment_usage *su, *su2; + size_t susz = NILFS_MDT(sufile)->mi_entry_size; + unsigned long segusages_per_block; + unsigned long nsegs, ncleaned; + __u64 segnum; + void *kaddr; + ssize_t n, nc; + int ret; + int j; + + nsegs = nilfs_sufile_get_nsegments(sufile); + + ret = -EINVAL; + if (start > end || start >= nsegs) + goto out; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out; + + segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); + ncleaned = 0; + + for (segnum = start; segnum <= end; segnum += n) { + n = min_t(unsigned long, + segusages_per_block - + nilfs_sufile_get_offset(sufile, segnum), + end - segnum + 1); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, + &su_bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out_header; + /* hole */ + continue; + } + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum, su_bh, kaddr); + su2 = su; + for (j = 0; j < n; j++, su = (void *)su + susz) { + if ((le32_to_cpu(su->su_flags) & + ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) || + nilfs_segment_is_active(nilfs, segnum + j)) { + ret = -EBUSY; + kunmap_atomic(kaddr, KM_USER0); + brelse(su_bh); + goto out_header; + } + } + nc = 0; + for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) { + if (nilfs_segment_usage_error(su)) { + nilfs_segment_usage_set_clean(su); + nc++; + } + } + kunmap_atomic(kaddr, KM_USER0); + if (nc > 0) { + mark_buffer_dirty(su_bh); + ncleaned += nc; + } + brelse(su_bh); + + if (n == segusages_per_block) { + /* make hole */ + nilfs_sufile_delete_segment_usage_block(sufile, segnum); + } + } + ret = 0; + +out_header: + if (ncleaned > 0) { + NILFS_SUI(sufile)->ncleansegs += ncleaned; + nilfs_sufile_mod_counter(header_bh, ncleaned, 0); + nilfs_mdt_mark_dirty(sufile); + } + brelse(header_bh); +out: + return ret; +} + +/** + * nilfs_sufile_resize - resize segment array + * @sufile: inode of segment usage file + * @newnsegs: new number of segments + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOSPC - Enough free space is not left for shrinking + * + * %-EBUSY - Dirty or active segments exist in the region to be truncated + */ +int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) +{ + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + struct buffer_head *header_bh; + struct nilfs_sufile_header *header; + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + void *kaddr; + unsigned long nsegs, nrsvsegs; + int ret = 0; + + down_write(&NILFS_MDT(sufile)->mi_sem); + + nsegs = nilfs_sufile_get_nsegments(sufile); + if (nsegs == newnsegs) + goto out; + + ret = -ENOSPC; + nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs); + if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs) + goto out; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out; + + if (newnsegs > nsegs) { + sui->ncleansegs += newnsegs - nsegs; + } else /* newnsegs < nsegs */ { + ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1); + if (ret < 0) + goto out_header; + + sui->ncleansegs -= nsegs - newnsegs; + } + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = kaddr + bh_offset(header_bh); + header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); + kunmap_atomic(kaddr, KM_USER0); + + mark_buffer_dirty(header_bh); + nilfs_mdt_mark_dirty(sufile); + nilfs_set_nsegments(nilfs, newnsegs); + +out_header: + brelse(header_bh); +out: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** * nilfs_sufile_get_suinfo - * @sufile: inode of segment usage file * @segnum: segment number to start looking @@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, struct nilfs_segment_usage *su; struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; - struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, kunmap_atomic(kaddr, KM_USER0); brelse(header_bh); + sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; + sui->allocmin = 0; + unlock_new_inode(sufile); out: *inodep = sufile; diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index a943fbacb45b..e84bc5b51fc1 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -31,11 +31,12 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) { - return NILFS_I_NILFS(sufile)->ns_nsegments; + return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments; } unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); +int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end); int nilfs_sufile_alloc(struct inode *, __u64 *); int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, @@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs); int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_inode *raw_inode, struct inode **inodep); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 062cca065195..8351c44a7320 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -56,6 +56,7 @@ #include "btnode.h" #include "page.h" #include "cpfile.h" +#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */ #include "ifile.h" #include "dat.h" #include "segment.h" @@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_state = 0; ii->i_cno = 0; ii->vfs_inode.i_version = 1; - nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); + nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi); return &ii->vfs_inode; } @@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb) return ret; } +/** + * nilfs_move_2nd_super - relocate secondary super block + * @sb: super block instance + * @sb2off: new offset of the secondary super block (in bytes) + */ +static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct buffer_head *nsbh; + struct nilfs_super_block *nsbp; + sector_t blocknr, newblocknr; + unsigned long offset; + int sb2i = -1; /* array index of the secondary superblock */ + int ret = 0; + + /* nilfs->ns_sem must be locked by the caller. */ + if (nilfs->ns_sbh[1] && + nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) { + sb2i = 1; + blocknr = nilfs->ns_sbh[1]->b_blocknr; + } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { + sb2i = 0; + blocknr = nilfs->ns_sbh[0]->b_blocknr; + } + if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) + goto out; /* super block location is unchanged */ + + /* Get new super block buffer */ + newblocknr = sb2off >> nilfs->ns_blocksize_bits; + offset = sb2off & (nilfs->ns_blocksize - 1); + nsbh = sb_getblk(sb, newblocknr); + if (!nsbh) { + printk(KERN_WARNING + "NILFS warning: unable to move secondary superblock " + "to block %llu\n", (unsigned long long)newblocknr); + ret = -EIO; + goto out; + } + nsbp = (void *)nsbh->b_data + offset; + memset(nsbp, 0, nilfs->ns_blocksize); + + if (sb2i >= 0) { + memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); + brelse(nilfs->ns_sbh[sb2i]); + nilfs->ns_sbh[sb2i] = nsbh; + nilfs->ns_sbp[sb2i] = nsbp; + } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) { + /* secondary super block will be restored to index 1 */ + nilfs->ns_sbh[1] = nsbh; + nilfs->ns_sbp[1] = nsbp; + } else { + brelse(nsbh); + } +out: + return ret; +} + +/** + * nilfs_resize_fs - resize the filesystem + * @sb: super block instance + * @newsize: new size of the filesystem (in bytes) + */ +int nilfs_resize_fs(struct super_block *sb, __u64 newsize) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + __u64 devsize, newnsegs; + loff_t sb2off; + int ret; + + ret = -ERANGE; + devsize = i_size_read(sb->s_bdev->bd_inode); + if (newsize > devsize) + goto out; + + /* + * Write lock is required to protect some functions depending + * on the number of segments, the number of reserved segments, + * and so forth. + */ + down_write(&nilfs->ns_segctor_sem); + + sb2off = NILFS_SB2_OFFSET_BYTES(newsize); + newnsegs = sb2off >> nilfs->ns_blocksize_bits; + do_div(newnsegs, nilfs->ns_blocks_per_segment); + + ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); + up_write(&nilfs->ns_segctor_sem); + if (ret < 0) + goto out; + + ret = nilfs_construct_segment(sb); + if (ret < 0) + goto out; + + down_write(&nilfs->ns_sem); + nilfs_move_2nd_super(sb, sb2off); + ret = -EIO; + sbp = nilfs_prepare_super(sb, 0); + if (likely(sbp)) { + nilfs_set_log_cursor(sbp[0], nilfs); + /* + * Drop NILFS_RESIZE_FS flag for compatibility with + * mount-time resize which may be implemented in a + * future release. + */ + sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & + ~NILFS_RESIZE_FS); + sbp[0]->s_dev_size = cpu_to_le64(newsize); + sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments); + if (sbp[1]) + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + } + up_write(&nilfs->ns_sem); + + /* + * Reset the range of allocatable segments last. This order + * is important in the case of expansion because the secondary + * superblock must be protected from log write until migration + * completes. + */ + if (!ret) + nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1); +out: + return ret; +} + static void nilfs_put_super(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index d2acd1a651f3..d32714094375 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) return res; } +/** + * nilfs_nrsvsegs - calculate the number of reserved segments + * @nilfs: nilfs object + * @nsegs: total number of segments + */ +unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) +{ + return max_t(unsigned long, NILFS_MIN_NRSVSEGS, + DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage, + 100)); +} + +void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) +{ + nilfs->ns_nsegments = nsegs; + nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs); +} + static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { @@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, } nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); - nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments); nilfs->ns_r_segments_percentage = le32_to_cpu(sbp->s_r_segments_percentage); - nilfs->ns_nrsvsegs = - max_t(unsigned long, NILFS_MIN_NRSVSEGS, - DIV_ROUND_UP(nilfs->ns_nsegments * - nilfs->ns_r_segments_percentage, 100)); + nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); return 0; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index f4968145c2a3..9992b11312ff 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev); void destroy_nilfs(struct the_nilfs *nilfs); int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); +unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs); +void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs); int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index c3d6512eded1..8845613fd7e3 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -60,10 +60,6 @@ struct linux_binprm { unsigned long loader, exec; }; -extern void acct_arg_size(struct linux_binprm *bprm, unsigned long pages); -extern struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, - int write); - #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT) diff --git a/include/linux/ide.h b/include/linux/ide.h index 072fe8c93e6f..42557851b12e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -18,13 +18,13 @@ #include <linux/pci.h> #include <linux/completion.h> #include <linux/pm.h> +#include <linux/mutex.h> #ifdef CONFIG_BLK_DEV_IDEACPI #include <acpi/acpi.h> #endif #include <asm/byteorder.h> #include <asm/system.h> #include <asm/io.h> -#include <asm/mutex.h> /* for request_sense */ #include <linux/cdrom.h> diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 8768c469e93e..7454ad7451b4 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -107,7 +107,7 @@ struct nilfs_super_root { #define NILFS_SR_DAT_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 0) #define NILFS_SR_CPFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 1) #define NILFS_SR_SUFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 2) -#define NILFS_SR_BYTES (sizeof(struct nilfs_super_root)) +#define NILFS_SR_BYTES(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 3) /* * Maximal mount counts @@ -845,5 +845,7 @@ struct nilfs_bdesc { _IOR(NILFS_IOCTL_IDENT, 0x8A, __u64) #define NILFS_IOCTL_RESIZE \ _IOW(NILFS_IOCTL_IDENT, 0x8B, __u64) +#define NILFS_IOCTL_SET_ALLOC_RANGE \ + _IOW(NILFS_IOCTL_IDENT, 0x8C, __u64[2]) #endif /* _LINUX_NILFS_FS_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aeaad97e6815..e8b78ce14474 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1782,7 +1782,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ - (skb != (struct sk_buff *)(queue)); \ + skb != (struct sk_buff *)(queue); \ skb = skb->next) #define skb_queue_walk_safe(queue, skb, tmp) \ @@ -1791,7 +1791,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = tmp, tmp = skb->next) #define skb_queue_walk_from(queue, skb) \ - for (; (skb != (struct sk_buff *)(queue)); \ + for (; skb != (struct sk_buff *)(queue); \ skb = skb->next) #define skb_queue_walk_from_safe(queue, skb, tmp) \ @@ -1801,7 +1801,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ - (skb != (struct sk_buff *)(queue)); \ + skb != (struct sk_buff *)(queue); \ skb = skb->prev) #define skb_queue_reverse_walk_safe(queue, skb, tmp) \ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8c7189c3f6ed..e6d6a66a8f71 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -538,7 +538,7 @@ struct ieee80211_tx_info { }; /** - * ieee80211_sched_scan_ies - scheduled scan IEs + * struct ieee80211_sched_scan_ies - scheduled scan IEs * * This structure is used to pass the appropriate IEs to be used in scheduled * scans for all bands. It contains both the IEs passed from the userspace @@ -2278,6 +2278,7 @@ static inline int ieee80211_sta_ps_transition_ni(struct ieee80211_sta *sta, /** * ieee80211_sta_set_tim - set the TIM bit for a sleeping station + * @sta: &struct ieee80211_sta pointer for the sleeping station * * If a driver buffers frames for a powersave station instead of passing * them back to mac80211 for retransmission, the station needs to be told diff --git a/init/Kconfig b/init/Kconfig index 4986ecc49e65..c8b172efaa65 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -827,11 +827,6 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. -config SCHED_TTWU_QUEUE - bool - depends on !SPARC32 - default y - config MM_OWNER bool @@ -908,7 +903,6 @@ endif config CC_OPTIMIZE_FOR_SIZE bool "Optimize for size" - default y help Enabling this option will pass "-Os" instead of "-O2" to gcc resulting in a smaller kernel. diff --git a/kernel/sched.c b/kernel/sched.c index c62acf45d3b9..0516af415085 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2564,7 +2564,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) { struct rq *rq = cpu_rq(cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) +#if defined(CONFIG_SMP) if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { ttwu_queue_remote(p, cpu); return; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index dd5a3206423e..58c25ea5a5c1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -67,12 +67,12 @@ #include <linux/if_arp.h> #include <linux/proc_fs.h> #include <linux/rcupdate.h> -#include <linux/prefetch.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/prefetch.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 8ce792ea08e9..1fd29b2daa92 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -36,6 +36,7 @@ $default{"REBOOT_ON_SUCCESS"} = 1; $default{"POWEROFF_ON_SUCCESS"} = 0; $default{"BUILD_OPTIONS"} = ""; $default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects +$default{"PATCHCHECK_SLEEP_TIME"} = 60; # sleep time between patch checks $default{"CLEAR_LOG"} = 0; $default{"BISECT_MANUAL"} = 0; $default{"BISECT_SKIP"} = 1; @@ -96,6 +97,7 @@ my $monitor_pid; my $monitor_cnt = 0; my $sleep_time; my $bisect_sleep_time; +my $patchcheck_sleep_time; my $store_failures; my $timeout; my $booted_timeout; @@ -112,6 +114,7 @@ my $successes = 0; my %entered_configs; my %config_help; +my %variable; $config_help{"MACHINE"} = << "EOF" The machine hostname that you will test. @@ -260,6 +263,39 @@ sub get_ktest_configs { } } +sub process_variables { + my ($value) = @_; + my $retval = ""; + + # We want to check for '\', and it is just easier + # to check the previous characet of '$' and not need + # to worry if '$' is the first character. By adding + # a space to $value, we can just check [^\\]\$ and + # it will still work. + $value = " $value"; + + while ($value =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) { + my $begin = $1; + my $var = $2; + my $end = $3; + # append beginning of value to retval + $retval = "$retval$begin"; + if (defined($variable{$var})) { + $retval = "$retval$variable{$var}"; + } else { + # put back the origin piece. + $retval = "$retval\$\{$var\}"; + } + $value = $end; + } + $retval = "$retval$value"; + + # remove the space added in the beginning + $retval =~ s/ //; + + return "$retval" +} + sub set_value { my ($lvalue, $rvalue) = @_; @@ -269,10 +305,22 @@ sub set_value { if ($rvalue =~ /^\s*$/) { delete $opt{$lvalue}; } else { + $rvalue = process_variables($rvalue); $opt{$lvalue} = $rvalue; } } +sub set_variable { + my ($lvalue, $rvalue) = @_; + + if ($rvalue =~ /^\s*$/) { + delete $variable{$lvalue}; + } else { + $rvalue = process_variables($rvalue); + $variable{$lvalue} = $rvalue; + } +} + sub read_config { my ($config) = @_; @@ -385,6 +433,22 @@ sub read_config { $repeats{$val} = $repeat; } } + } elsif (/^\s*([A-Z_\[\]\d]+)\s*:=\s*(.*?)\s*$/) { + next if ($skip); + + my $lvalue = $1; + my $rvalue = $2; + + # process config variables. + # Config variables are only active while reading the + # config and can be defined anywhere. They also ignore + # TEST_START and DEFAULTS, but are skipped if they are in + # on of these sections that have SKIP defined. + # The save variable can be + # defined multiple times and the new one simply overrides + # the prevous one. + set_variable($lvalue, $rvalue); + } else { die "$name: $.: Garbage found in config\n$_"; } @@ -838,6 +902,7 @@ sub monitor { if ($stop_test_after > 0 && !$booted && !$bug) { if (time - $monitor_start > $stop_test_after) { + doprint "STOP_TEST_AFTER ($stop_test_after seconds) timed out\n"; $done = 1; } } @@ -907,7 +972,7 @@ sub install { return if (!defined($post_install)); my $cp_post_install = $post_install; - $cp_post_install = s/\$KERNEL_VERSION/$version/g; + $cp_post_install =~ s/\$KERNEL_VERSION/$version/g; run_command "$cp_post_install" or dodie "Failed to run post install"; } @@ -1247,14 +1312,14 @@ sub run_bisect_test { if ($failed) { $result = 0; - - # reboot the box to a good kernel - if ($type ne "build") { - bisect_reboot; - } } else { $result = 1; } + + # reboot the box to a kernel we can ssh to + if ($type ne "build") { + bisect_reboot; + } $in_bisect = 0; return $result; @@ -1763,6 +1828,14 @@ sub config_bisect { success $i; } +sub patchcheck_reboot { + doprint "Reboot and sleep $patchcheck_sleep_time seconds\n"; + reboot; + start_monitor; + wait_for_monitor $patchcheck_sleep_time; + end_monitor; +} + sub patchcheck { my ($i) = @_; @@ -1854,6 +1927,8 @@ sub patchcheck { end_monitor; return 0 if ($failed); + patchcheck_reboot; + } $in_patchcheck = 0; success $i; @@ -1944,7 +2019,7 @@ for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { } } -sub set_test_option { +sub __set_test_option { my ($name, $i) = @_; my $option = "$name\[$i\]"; @@ -1970,6 +2045,72 @@ sub set_test_option { return undef; } +sub eval_option { + my ($option, $i) = @_; + + # Add space to evaluate the character before $ + $option = " $option"; + my $retval = ""; + + while ($option =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) { + my $start = $1; + my $var = $2; + my $end = $3; + + # Append beginning of line + $retval = "$retval$start"; + + # If the iteration option OPT[$i] exists, then use that. + # otherwise see if the default OPT (without [$i]) exists. + + my $o = "$var\[$i\]"; + + if (defined($opt{$o})) { + $o = $opt{$o}; + $retval = "$retval$o"; + } elsif (defined($opt{$var})) { + $o = $opt{$var}; + $retval = "$retval$o"; + } else { + $retval = "$retval\$\{$var\}"; + } + + $option = $end; + } + + $retval = "$retval$option"; + + $retval =~ s/^ //; + + return $retval; +} + +sub set_test_option { + my ($name, $i) = @_; + + my $option = __set_test_option($name, $i); + return $option if (!defined($option)); + + my $prev = ""; + + # Since an option can evaluate to another option, + # keep iterating until we do not evaluate any more + # options. + my $r = 0; + while ($prev ne $option) { + # Check for recursive evaluations. + # 100 deep should be more than enough. + if ($r++ > 100) { + die "Over 100 evaluations accurred with $name\n" . + "Check for recursive variables\n"; + } + $prev = $option; + $option = eval_option($option, $i); + } + + return $option; +} + # First we need to do is the builds for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { @@ -2003,6 +2144,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i); $sleep_time = set_test_option("SLEEP_TIME", $i); $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i); + $patchcheck_sleep_time = set_test_option("PATCHCHECK_SLEEP_TIME", $i); $bisect_manual = set_test_option("BISECT_MANUAL", $i); $bisect_skip = set_test_option("BISECT_SKIP", $i); $store_failures = set_test_option("STORE_FAILURES", $i); diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index 4c5d6bd74a02..48cbcc80602a 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -73,6 +73,95 @@ # ktest will fail to execute, and no tests will run. # +#### Config variables #### +# +# This config file can also contain "config variables". +# These are assigned with ":=" instead of the ktest option +# assigment "=". +# +# The difference between ktest options and config variables +# is that config variables can be used multiple times, +# where each instance will override the previous instance. +# And that they only live at time of processing this config. +# +# The advantage to config variables are that they can be used +# by any option or any other config variables to define thing +# that you may use over and over again in the options. +# +# For example: +# +# USER := root +# TARGET := mybox +# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test +# +# TEST_START +# MIN_CONFIG = config1 +# TEST = ${TEST_CASE} +# +# TEST_START +# MIN_CONFIG = config2 +# TEST = ${TEST_CASE} +# +# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test2 +# +# TEST_START +# MIN_CONFIG = config1 +# TEST = ${TEST_CASE} +# +# TEST_START +# MIN_CONFIG = config2 +# TEST = ${TEST_CASE} +# +# TEST_DIR := /home/me/test +# +# BUILD_DIR = ${TEST_DIR}/linux.git +# OUTPUT_DIR = ${TEST_DIR}/test +# +# Note, the config variables are evaluated immediately, thus +# updating TARGET after TEST_CASE has been assigned does nothing +# to TEST_CASE. +# +# As shown in the example, to evaluate a config variable, you +# use the ${X} convention. Simple $X will not work. +# +# If the config variable does not exist, the ${X} will not +# be evaluated. Thus: +# +# MAKE_CMD = PATH=/mypath:${PATH} make +# +# If PATH is not a config variable, then the ${PATH} in +# the MAKE_CMD option will be evaluated by the shell when +# the MAKE_CMD option is passed into shell processing. + +#### Using options in other options #### +# +# Options that are defined in the config file may also be used +# by other options. All options are evaulated at time of +# use (except that config variables are evaluated at config +# processing time). +# +# If an ktest option is used within another option, instead of +# typing it again in that option you can simply use the option +# just like you can config variables. +# +# MACHINE = mybox +# +# TEST = ssh root@${MACHINE} /path/to/test +# +# The option will be used per test case. Thus: +# +# TEST_TYPE = test +# TEST = ssh root@{MACHINE} +# +# TEST_START +# MACHINE = box1 +# +# TEST_START +# MACHINE = box2 +# +# For both test cases, MACHINE will be evaluated at the time +# of the test case. The first test will run ssh root@box1 +# and the second will run ssh root@box2. #### Mandatory Default Options #### @@ -366,6 +455,10 @@ # (default 60) #BISECT_SLEEP_TIME = 60 +# The time in between patch checks to sleep (in seconds) +# (default 60) +#PATCHCHECK_SLEEP_TIME = 60 + # Reboot the target box on error (default 0) #REBOOT_ON_ERROR = 0 |