diff options
33 files changed, 1039 insertions, 246 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a344980287a5..fe451348ae57 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -31,6 +31,7 @@ config RISCV select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_CONTIGUOUS + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GENERIC_DMA_COHERENT select HAVE_PERF_EVENTS select IRQ_DOMAIN @@ -108,10 +109,12 @@ config ARCH_RV32I select GENERIC_LIB_ASHRDI3 select GENERIC_LIB_LSHRDI3 select GENERIC_LIB_UCMPDI2 + select GENERIC_LIB_UMODDI3 config ARCH_RV64I bool "RV64I" select 64BIT + select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD @@ -208,14 +211,61 @@ config RISCV_BASE_PMU endmenu +config FPU + bool "FPU support" + default y + help + Say N here if you want to disable all floating-point related procedure + in the kernel. + + If you don't know what to do here, say Y. + endmenu -menu "Kernel type" +menu "Kernel features" source "kernel/Kconfig.hz" endmenu +menu "Boot options" + +config CMDLINE_BOOL + bool "Built-in kernel command line" + help + For most platforms, it is firmware or second stage bootloader + that by default specifies the kernel command line options. + However, it might be necessary or advantageous to either override + the default kernel command line or add a few extra options to it. + For such cases, this option allows hardcoding command line options + directly into the kernel. + + For that, choose 'Y' here and fill in the extra boot parameters + in CONFIG_CMDLINE. + + The built-in options will be concatenated to the default command + line if CMDLINE_FORCE is set to 'N'. Otherwise, the default + command line will be ignored and replaced by the built-in string. + +config CMDLINE + string "Built-in kernel command string" + depends on CMDLINE_BOOL + default "" + help + Supply command-line options at build time by entering them here. + +config CMDLINE_FORCE + bool "Built-in command line overrides bootloader arguments" + depends on CMDLINE_BOOL + help + Set this option to 'Y' to have the kernel ignore the bootloader + or firmware command line. Instead, the built-in command line + will be used exclusively. + + If you don't know what to do here, say N. + +endmenu + menu "Bus support" config PCI diff --git a/arch/riscv/Kconfig.debug b/arch/riscv/Kconfig.debug index 3224ff6ecf6e..c5a72f17c469 100644 --- a/arch/riscv/Kconfig.debug +++ b/arch/riscv/Kconfig.debug @@ -1,37 +1,2 @@ - -config CMDLINE_BOOL - bool "Built-in kernel command line" - help - For most platforms, it is firmware or second stage bootloader - that by default specifies the kernel command line options. - However, it might be necessary or advantageous to either override - the default kernel command line or add a few extra options to it. - For such cases, this option allows hardcoding command line options - directly into the kernel. - - For that, choose 'Y' here and fill in the extra boot parameters - in CONFIG_CMDLINE. - - The built-in options will be concatenated to the default command - line if CMDLINE_FORCE is set to 'N'. Otherwise, the default - command line will be ignored and replaced by the built-in string. - -config CMDLINE - string "Built-in kernel command string" - depends on CMDLINE_BOOL - default "" - help - Supply command-line options at build time by entering them here. - -config CMDLINE_FORCE - bool "Built-in command line overrides bootloader arguments" - depends on CMDLINE_BOOL - help - Set this option to 'Y' to have the kernel ignore the bootloader - or firmware command line. Instead, the built-in command line - will be used exclusively. - - If you don't know what to do here, say N. - config EARLY_PRINTK def_bool y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 61ec42405ec9..d10146197533 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -25,10 +25,7 @@ ifeq ($(CONFIG_ARCH_RV64I),y) KBUILD_CFLAGS += -mabi=lp64 KBUILD_AFLAGS += -mabi=lp64 - - KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128) - KBUILD_MARCH = rv64im KBUILD_LDFLAGS += -melf64lriscv else BITS := 32 @@ -36,22 +33,20 @@ else KBUILD_CFLAGS += -mabi=ilp32 KBUILD_AFLAGS += -mabi=ilp32 - KBUILD_MARCH = rv32im KBUILD_LDFLAGS += -melf32lriscv endif KBUILD_CFLAGS += -Wall -ifeq ($(CONFIG_RISCV_ISA_A),y) - KBUILD_ARCH_A = a -endif -ifeq ($(CONFIG_RISCV_ISA_C),y) - KBUILD_ARCH_C = c -endif - -KBUILD_AFLAGS += -march=$(KBUILD_MARCH)$(KBUILD_ARCH_A)fd$(KBUILD_ARCH_C) +# ISA string setting +riscv-march-$(CONFIG_ARCH_RV32I) := rv32im +riscv-march-$(CONFIG_ARCH_RV64I) := rv64im +riscv-march-$(CONFIG_RISCV_ISA_A) := $(riscv-march-y)a +riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd +riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c +KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) +KBUILD_AFLAGS += -march=$(riscv-march-y) -KBUILD_CFLAGS += -march=$(KBUILD_MARCH)$(KBUILD_ARCH_A)$(KBUILD_ARCH_C) KBUILD_CFLAGS += -mno-save-restore KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index efdbe311e936..6a646d9ea780 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -13,7 +13,6 @@ generic-y += errno.h generic-y += exec.h generic-y += fb.h generic-y += fcntl.h -generic-y += futex.h generic-y += hardirq.h generic-y += hash.h generic-y += hw_irq.h diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h new file mode 100644 index 000000000000..3b19eba1bc8e --- /dev/null +++ b/arch/riscv/include/asm/futex.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (c) 2018 Jim Wilson (jimw@sifive.com) + */ + +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifndef CONFIG_RISCV_ISA_A +/* + * Use the generic interrupt disabling versions if the A extension + * is not supported. + */ +#ifdef CONFIG_SMP +#error "Can't support generic futex calls without A extension on SMP" +#endif +#include <asm-generic/futex.h> + +#else /* CONFIG_RISCV_ISA_A */ + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <linux/errno.h> +#include <asm/asm.h> + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +{ \ + uintptr_t tmp; \ + __enable_user_access(); \ + __asm__ __volatile__ ( \ + "1: " insn " \n" \ + "2: \n" \ + " .section .fixup,\"ax\" \n" \ + " .balign 4 \n" \ + "3: li %[r],%[e] \n" \ + " jump 2b,%[t] \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " .balign " RISCV_SZPTR " \n" \ + " " RISCV_PTR " 1b, 3b \n" \ + " .previous \n" \ + : [r] "+r" (ret), [ov] "=&r" (oldval), \ + [u] "+m" (*uaddr), [t] "=&r" (tmp) \ + : [op] "Jr" (oparg), [e] "i" (-EFAULT) \ + : "memory"); \ + __disable_user_access(); \ +} + +static inline int +arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) +{ + int oldval = 0, ret = 0; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]", + ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("amoadd.w.aqrl %[ov],%z[op],%[u]", + ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("amoor.w.aqrl %[ov],%z[op],%[u]", + ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("amoand.w.aqrl %[ov],%z[op],%[u]", + ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]", + ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) + *oval = oldval; + + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0; + u32 val; + uintptr_t tmp; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + __enable_user_access(); + __asm__ __volatile__ ( + "1: lr.w.aqrl %[v],%[u] \n" + " bne %[v],%z[ov],3f \n" + "2: sc.w.aqrl %[t],%z[nv],%[u] \n" + " bnez %[t],1b \n" + "3: \n" + " .section .fixup,\"ax\" \n" + " .balign 4 \n" + "4: li %[r],%[e] \n" + " jump 3b,%[t] \n" + " .previous \n" + " .section __ex_table,\"a\" \n" + " .balign " RISCV_SZPTR " \n" + " " RISCV_PTR " 1b, 4b \n" + " " RISCV_PTR " 2b, 4b \n" + " .previous \n" + : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp) + : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "i" (-EFAULT) + : "memory"); + __disable_user_access(); + + *uval = val; + return ret; +} + +#endif /* CONFIG_RISCV_ISA_A */ +#endif /* _ASM_FUTEX_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3fe4af8147d2..50de774d827a 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -88,7 +88,7 @@ static inline void wait_for_interrupt(void) } struct device_node; -extern int riscv_of_processor_hart(struct device_node *node); +int riscv_of_processor_hartid(struct device_node *node); extern void riscv_fill_hwcap(void); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 36016845461d..41aa73b476f4 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -14,16 +14,24 @@ #ifndef _ASM_RISCV_SMP_H #define _ASM_RISCV_SMP_H -/* This both needs asm-offsets.h and is used when generating it. */ -#ifndef GENERATING_ASM_OFFSETS -#include <asm/asm-offsets.h> -#endif - #include <linux/cpumask.h> #include <linux/irqreturn.h> +#include <linux/thread_info.h> + +#define INVALID_HARTID ULONG_MAX +/* + * Mapping between linux logical cpu index and hartid. + */ +extern unsigned long __cpuid_to_hartid_map[NR_CPUS]; +#define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu] + +struct seq_file; #ifdef CONFIG_SMP +/* print IPI stats */ +void show_ipi_stats(struct seq_file *p, int prec); + /* SMP initialization hook for setup_arch */ void __init setup_smp(void); @@ -33,14 +41,31 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask); /* Hook for the generic smp_call_function_single() routine. */ void arch_send_call_function_single_ipi(int cpu); +int riscv_hartid_to_cpuid(int hartid); +void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); + /* - * This is particularly ugly: it appears we can't actually get the definition - * of task_struct here, but we need access to the CPU this task is running on. - * Instead of using C we're using asm-offsets.h to get the current processor - * ID. + * Obtains the hart ID of the currently executing task. This relies on + * THREAD_INFO_IN_TASK, but we define that unconditionally. */ -#define raw_smp_processor_id() (*((int*)((char*)get_current() + TASK_TI_CPU))) +#define raw_smp_processor_id() (current_thread_info()->cpu) -#endif /* CONFIG_SMP */ +#else + +static inline void show_ipi_stats(struct seq_file *p, int prec) +{ +} +static inline int riscv_hartid_to_cpuid(int hartid) +{ + return 0; +} + +static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in, + struct cpumask *out) +{ + cpumask_set_cpu(cpuid_to_hartid_map(0), out); +} + +#endif /* CONFIG_SMP */ #endif /* _ASM_RISCV_SMP_H */ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index dd6b05bff75b..733559083f24 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -18,6 +18,7 @@ #include <asm/ptrace.h> #include <asm/csr.h> +#ifdef CONFIG_FPU extern void __fstate_save(struct task_struct *save_to); extern void __fstate_restore(struct task_struct *restore_from); @@ -55,6 +56,14 @@ static inline void __switch_to_aux(struct task_struct *prev, fstate_restore(next, task_pt_regs(next)); } +extern bool has_fpu; +#else +#define has_fpu false +#define fstate_save(task, regs) do { } while (0) +#define fstate_restore(task, regs) do { } while (0) +#define __switch_to_aux(__prev, __next) do { } while (0) +#endif + extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); @@ -62,7 +71,8 @@ extern struct task_struct *__switch_to(struct task_struct *, do { \ struct task_struct *__prev = (prev); \ struct task_struct *__next = (next); \ - __switch_to_aux(__prev, __next); \ + if (has_fpu) \ + __switch_to_aux(__prev, __next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 85c2d8bae957..54fee0cadb1e 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -16,6 +16,7 @@ #define _ASM_RISCV_TLBFLUSH_H #include <linux/mm_types.h> +#include <asm/smp.h> /* * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction @@ -49,13 +50,22 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, #include <asm/sbi.h> +static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, + unsigned long size) +{ + struct cpumask hmask; + + cpumask_clear(&hmask); + riscv_cpuid_to_hartid_mask(cmask, &hmask); + sbi_remote_sfence_vma(hmask.bits, start, size); +} + #define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) #define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) #define flush_tlb_range(vma, start, end) \ - sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \ - start, (end) - (start)) + remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) #define flush_tlb_mm(mm) \ - sbi_remote_sfence_vma(mm_cpumask(mm)->bits, 0, -1) + remote_sfence_vma(mm_cpumask(mm), 0, -1) #endif /* CONFIG_SMP */ diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index 1e0dfc36aab9..644a00ce6e2e 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -19,7 +19,10 @@ typedef unsigned long elf_greg_t; typedef struct user_regs_struct elf_gregset_t; #define ELF_NGREG (sizeof(elf_gregset_t) / sizeof(elf_greg_t)) +/* We don't support f without d, or q. */ +typedef __u64 elf_fpreg_t; typedef union __riscv_fp_state elf_fpregset_t; +#define ELF_NFPREG (sizeof(struct __riscv_d_ext_state) / sizeof(elf_fpreg_t)) #if __riscv_xlen == 64 #define ELF_RISCV_R_SYM(r_info) ELF64_R_SYM(r_info) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index e1274fc03af4..f13f7f276639 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -31,6 +31,7 @@ obj-y += vdso/ CFLAGS_setup.o := -mcmodel=medany +obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 0bc86e5f8f3f..cb35ffd8ec6b 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -22,13 +22,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, { this_leaf->level = level; this_leaf->type = type; - /* not a sector cache */ - this_leaf->physical_line_partition = 1; - /* TODO: Add to DTS */ - this_leaf->attributes = - CACHE_WRITE_BACK - | CACHE_READ_ALLOCATE - | CACHE_WRITE_ALLOCATE; } static int __init_cache_level(unsigned int cpu) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index ca6c81e54e37..3a5a2ee31547 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -14,9 +14,13 @@ #include <linux/init.h> #include <linux/seq_file.h> #include <linux/of.h> +#include <asm/smp.h> -/* Return -1 if not a valid hart */ -int riscv_of_processor_hart(struct device_node *node) +/* + * Returns the hart ID of the given device tree node, or -1 if the device tree + * node isn't a RISC-V hart. + */ +int riscv_of_processor_hartid(struct device_node *node) { const char *isa, *status; u32 hart; @@ -58,6 +62,64 @@ int riscv_of_processor_hart(struct device_node *node) #ifdef CONFIG_PROC_FS +static void print_isa(struct seq_file *f, const char *orig_isa) +{ + static const char *ext = "mafdc"; + const char *isa = orig_isa; + const char *e; + + /* + * Linux doesn't support rv32e or rv128i, and we only support booting + * kernels on harts with the same ISA that the kernel is compiled for. + */ +#if defined(CONFIG_32BIT) + if (strncmp(isa, "rv32i", 5) != 0) + return; +#elif defined(CONFIG_64BIT) + if (strncmp(isa, "rv64i", 5) != 0) + return; +#endif + + /* Print the base ISA, as we already know it's legal. */ + seq_puts(f, "isa\t\t: "); + seq_write(f, isa, 5); + isa += 5; + + /* + * Check the rest of the ISA string for valid extensions, printing those + * we find. RISC-V ISA strings define an order, so we only print the + * extension bits when they're in order. + */ + for (e = ext; *e != '\0'; ++e) { + if (isa[0] == e[0]) { + seq_write(f, isa, 1); + isa++; + } + } + seq_puts(f, "\n"); + + /* + * If we were given an unsupported ISA in the device tree then print + * a bit of info describing what went wrong. + */ + if (isa[0] != '\0') + pr_info("unsupported ISA \"%s\" in device tree", orig_isa); +} + +static void print_mmu(struct seq_file *f, const char *mmu_type) +{ +#if defined(CONFIG_32BIT) + if (strcmp(mmu_type, "riscv,sv32") != 0) + return; +#elif defined(CONFIG_64BIT) + if (strcmp(mmu_type, "riscv,sv39") != 0 && + strcmp(mmu_type, "riscv,sv48") != 0) + return; +#endif + + seq_printf(f, "mmu\t\t: %s\n", mmu_type+6); +} + static void *c_start(struct seq_file *m, loff_t *pos) { *pos = cpumask_next(*pos - 1, cpu_online_mask); @@ -78,21 +140,20 @@ static void c_stop(struct seq_file *m, void *v) static int c_show(struct seq_file *m, void *v) { - unsigned long hart_id = (unsigned long)v - 1; - struct device_node *node = of_get_cpu_node(hart_id, NULL); + unsigned long cpu_id = (unsigned long)v - 1; + struct device_node *node = of_get_cpu_node(cpuid_to_hartid_map(cpu_id), + NULL); const char *compat, *isa, *mmu; - seq_printf(m, "hart\t: %lu\n", hart_id); - if (!of_property_read_string(node, "riscv,isa", &isa) - && isa[0] == 'r' - && isa[1] == 'v') - seq_printf(m, "isa\t: %s\n", isa); - if (!of_property_read_string(node, "mmu-type", &mmu) - && !strncmp(mmu, "riscv,", 6)) - seq_printf(m, "mmu\t: %s\n", mmu+6); + seq_printf(m, "processor\t: %lu\n", cpu_id); + seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); + if (!of_property_read_string(node, "riscv,isa", &isa)) + print_isa(m, isa); + if (!of_property_read_string(node, "mmu-type", &mmu)) + print_mmu(m, mmu); if (!of_property_read_string(node, "compatible", &compat) && strcmp(compat, "riscv")) - seq_printf(m, "uarch\t: %s\n", compat); + seq_printf(m, "uarch\t\t: %s\n", compat); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 17011a870044..5493f3228704 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -22,6 +22,9 @@ #include <asm/hwcap.h> unsigned long elf_hwcap __read_mostly; +#ifdef CONFIG_FPU +bool has_fpu __read_mostly; +#endif void riscv_fill_hwcap(void) { @@ -57,5 +60,17 @@ void riscv_fill_hwcap(void) for (i = 0; i < strlen(isa); ++i) elf_hwcap |= isa2hwcap[(unsigned char)(isa[i])]; + /* We don't support systems with F but without D, so mask those out + * here. */ + if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) { + pr_info("This kernel does not support systems with F but not D"); + elf_hwcap &= ~COMPAT_HWCAP_ISA_F; + } + pr_info("elf_hwcap is 0x%lx", elf_hwcap); + +#ifdef CONFIG_FPU + if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)) + has_fpu = true; +#endif } diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index fa2c08e3c05e..13d4826ab2a1 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -168,7 +168,6 @@ ENTRY(handle_exception) /* Handle interrupts */ move a0, sp /* pt_regs */ - move a1, s4 /* scause */ tail do_IRQ 1: /* Exceptions run with interrupts enabled */ @@ -357,93 +356,6 @@ ENTRY(__switch_to) ret ENDPROC(__switch_to) -ENTRY(__fstate_save) - li a2, TASK_THREAD_F0 - add a0, a0, a2 - li t1, SR_FS - csrs sstatus, t1 - frcsr t0 - fsd f0, TASK_THREAD_F0_F0(a0) - fsd f1, TASK_THREAD_F1_F0(a0) - fsd f2, TASK_THREAD_F2_F0(a0) - fsd f3, TASK_THREAD_F3_F0(a0) - fsd f4, TASK_THREAD_F4_F0(a0) - fsd f5, TASK_THREAD_F5_F0(a0) - fsd f6, TASK_THREAD_F6_F0(a0) - fsd f7, TASK_THREAD_F7_F0(a0) - fsd f8, TASK_THREAD_F8_F0(a0) - fsd f9, TASK_THREAD_F9_F0(a0) - fsd f10, TASK_THREAD_F10_F0(a0) - fsd f11, TASK_THREAD_F11_F0(a0) - fsd f12, TASK_THREAD_F12_F0(a0) - fsd f13, TASK_THREAD_F13_F0(a0) - fsd f14, TASK_THREAD_F14_F0(a0) - fsd f15, TASK_THREAD_F15_F0(a0) - fsd f16, TASK_THREAD_F16_F0(a0) - fsd f17, TASK_THREAD_F17_F0(a0) - fsd f18, TASK_THREAD_F18_F0(a0) - fsd f19, TASK_THREAD_F19_F0(a0) - fsd f20, TASK_THREAD_F20_F0(a0) - fsd f21, TASK_THREAD_F21_F0(a0) - fsd f22, TASK_THREAD_F22_F0(a0) - fsd f23, TASK_THREAD_F23_F0(a0) - fsd f24, TASK_THREAD_F24_F0(a0) - fsd f25, TASK_THREAD_F25_F0(a0) - fsd f26, TASK_THREAD_F26_F0(a0) - fsd f27, TASK_THREAD_F27_F0(a0) - fsd f28, TASK_THREAD_F28_F0(a0) - fsd f29, TASK_THREAD_F29_F0(a0) - fsd f30, TASK_THREAD_F30_F0(a0) - fsd f31, TASK_THREAD_F31_F0(a0) - sw t0, TASK_THREAD_FCSR_F0(a0) - csrc sstatus, t1 - ret -ENDPROC(__fstate_save) - -ENTRY(__fstate_restore) - li a2, TASK_THREAD_F0 - add a0, a0, a2 - li t1, SR_FS - lw t0, TASK_THREAD_FCSR_F0(a0) - csrs sstatus, t1 - fld f0, TASK_THREAD_F0_F0(a0) - fld f1, TASK_THREAD_F1_F0(a0) - fld f2, TASK_THREAD_F2_F0(a0) - fld f3, TASK_THREAD_F3_F0(a0) - fld f4, TASK_THREAD_F4_F0(a0) - fld f5, TASK_THREAD_F5_F0(a0) - fld f6, TASK_THREAD_F6_F0(a0) - fld f7, TASK_THREAD_F7_F0(a0) - fld f8, TASK_THREAD_F8_F0(a0) - fld f9, TASK_THREAD_F9_F0(a0) - fld f10, TASK_THREAD_F10_F0(a0) - fld f11, TASK_THREAD_F11_F0(a0) - fld f12, TASK_THREAD_F12_F0(a0) - fld f13, TASK_THREAD_F13_F0(a0) - fld f14, TASK_THREAD_F14_F0(a0) - fld f15, TASK_THREAD_F15_F0(a0) - fld f16, TASK_THREAD_F16_F0(a0) - fld f17, TASK_THREAD_F17_F0(a0) - fld f18, TASK_THREAD_F18_F0(a0) - fld f19, TASK_THREAD_F19_F0(a0) - fld f20, TASK_THREAD_F20_F0(a0) - fld f21, TASK_THREAD_F21_F0(a0) - fld f22, TASK_THREAD_F22_F0(a0) - fld f23, TASK_THREAD_F23_F0(a0) - fld f24, TASK_THREAD_F24_F0(a0) - fld f25, TASK_THREAD_F25_F0(a0) - fld f26, TASK_THREAD_F26_F0(a0) - fld f27, TASK_THREAD_F27_F0(a0) - fld f28, TASK_THREAD_F28_F0(a0) - fld f29, TASK_THREAD_F29_F0(a0) - fld f30, TASK_THREAD_F30_F0(a0) - fld f31, TASK_THREAD_F31_F0(a0) - fscsr t0 - csrc sstatus, t1 - ret -ENDPROC(__fstate_restore) - - .section ".rodata" /* Exception vector table */ ENTRY(excp_vect_table) diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S new file mode 100644 index 000000000000..1defb0618aff --- /dev/null +++ b/arch/riscv/kernel/fpu.S @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> + +#include <asm/asm.h> +#include <asm/csr.h> +#include <asm/asm-offsets.h> + +ENTRY(__fstate_save) + li a2, TASK_THREAD_F0 + add a0, a0, a2 + li t1, SR_FS + csrs sstatus, t1 + frcsr t0 + fsd f0, TASK_THREAD_F0_F0(a0) + fsd f1, TASK_THREAD_F1_F0(a0) + fsd f2, TASK_THREAD_F2_F0(a0) + fsd f3, TASK_THREAD_F3_F0(a0) + fsd f4, TASK_THREAD_F4_F0(a0) + fsd f5, TASK_THREAD_F5_F0(a0) + fsd f6, TASK_THREAD_F6_F0(a0) + fsd f7, TASK_THREAD_F7_F0(a0) + fsd f8, TASK_THREAD_F8_F0(a0) + fsd f9, TASK_THREAD_F9_F0(a0) + fsd f10, TASK_THREAD_F10_F0(a0) + fsd f11, TASK_THREAD_F11_F0(a0) + fsd f12, TASK_THREAD_F12_F0(a0) + fsd f13, TASK_THREAD_F13_F0(a0) + fsd f14, TASK_THREAD_F14_F0(a0) + fsd f15, TASK_THREAD_F15_F0(a0) + fsd f16, TASK_THREAD_F16_F0(a0) + fsd f17, TASK_THREAD_F17_F0(a0) + fsd f18, TASK_THREAD_F18_F0(a0) + fsd f19, TASK_THREAD_F19_F0(a0) + fsd f20, TASK_THREAD_F20_F0(a0) + fsd f21, TASK_THREAD_F21_F0(a0) + fsd f22, TASK_THREAD_F22_F0(a0) + fsd f23, TASK_THREAD_F23_F0(a0) + fsd f24, TASK_THREAD_F24_F0(a0) + fsd f25, TASK_THREAD_F25_F0(a0) + fsd f26, TASK_THREAD_F26_F0(a0) + fsd f27, TASK_THREAD_F27_F0(a0) + fsd f28, TASK_THREAD_F28_F0(a0) + fsd f29, TASK_THREAD_F29_F0(a0) + fsd f30, TASK_THREAD_F30_F0(a0) + fsd f31, TASK_THREAD_F31_F0(a0) + sw t0, TASK_THREAD_FCSR_F0(a0) + csrc sstatus, t1 + ret +ENDPROC(__fstate_save) + +ENTRY(__fstate_restore) + li a2, TASK_THREAD_F0 + add a0, a0, a2 + li t1, SR_FS + lw t0, TASK_THREAD_FCSR_F0(a0) + csrs sstatus, t1 + fld f0, TASK_THREAD_F0_F0(a0) + fld f1, TASK_THREAD_F1_F0(a0) + fld f2, TASK_THREAD_F2_F0(a0) + fld f3, TASK_THREAD_F3_F0(a0) + fld f4, TASK_THREAD_F4_F0(a0) + fld f5, TASK_THREAD_F5_F0(a0) + fld f6, TASK_THREAD_F6_F0(a0) + fld f7, TASK_THREAD_F7_F0(a0) + fld f8, TASK_THREAD_F8_F0(a0) + fld f9, TASK_THREAD_F9_F0(a0) + fld f10, TASK_THREAD_F10_F0(a0) + fld f11, TASK_THREAD_F11_F0(a0) + fld f12, TASK_THREAD_F12_F0(a0) + fld f13, TASK_THREAD_F13_F0(a0) + fld f14, TASK_THREAD_F14_F0(a0) + fld f15, TASK_THREAD_F15_F0(a0) + fld f16, TASK_THREAD_F16_F0(a0) + fld f17, TASK_THREAD_F17_F0(a0) + fld f18, TASK_THREAD_F18_F0(a0) + fld f19, TASK_THREAD_F19_F0(a0) + fld f20, TASK_THREAD_F20_F0(a0) + fld f21, TASK_THREAD_F21_F0(a0) + fld f22, TASK_THREAD_F22_F0(a0) + fld f23, TASK_THREAD_F23_F0(a0) + fld f24, TASK_THREAD_F24_F0(a0) + fld f25, TASK_THREAD_F25_F0(a0) + fld f26, TASK_THREAD_F26_F0(a0) + fld f27, TASK_THREAD_F27_F0(a0) + fld f28, TASK_THREAD_F28_F0(a0) + fld f29, TASK_THREAD_F29_F0(a0) + fld f30, TASK_THREAD_F30_F0(a0) + fld f31, TASK_THREAD_F31_F0(a0) + fscsr t0 + csrc sstatus, t1 + ret +ENDPROC(__fstate_restore) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index c4d2c63f9a29..711190d473d4 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -47,6 +47,8 @@ ENTRY(_start) /* Save hart ID and DTB physical address */ mv s0, a0 mv s1, a1 + la a2, boot_cpu_hartid + REG_S a0, (a2) /* Initialize page tables and relocate to virtual addresses */ la sp, init_thread_union + THREAD_SIZE @@ -55,7 +57,7 @@ ENTRY(_start) /* Restore C environment */ la tp, init_task - sw s0, TASK_TI_CPU(tp) + sw zero, TASK_TI_CPU(tp) la sp, init_thread_union li a0, ASM_THREAD_SIZE diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 0cfac48a1272..48e6b7db83a1 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -8,6 +8,8 @@ #include <linux/interrupt.h> #include <linux/irqchip.h> #include <linux/irqdomain.h> +#include <linux/seq_file.h> +#include <asm/smp.h> /* * Possible interrupt causes: @@ -24,12 +26,18 @@ */ #define INTERRUPT_CAUSE_FLAG (1UL << (__riscv_xlen - 1)) -asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long cause) +int arch_show_interrupts(struct seq_file *p, int prec) +{ + show_ipi_stats(p, prec); + return 0; +} + +asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); - switch (cause & ~INTERRUPT_CAUSE_FLAG) { + switch (regs->scause & ~INTERRUPT_CAUSE_FLAG) { case INTERRUPT_CAUSE_TIMER: riscv_timer_interrupt(); break; diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 5721624886a1..8a5593ff9ff3 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -75,7 +75,6 @@ ENTRY(return_to_handler) RESTORE_RET_ABI_STATE jalr a1 ENDPROC(return_to_handler) -EXPORT_SYMBOL(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index d7c6ca7c95ae..bef19993ea92 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -76,7 +76,9 @@ void show_regs(struct pt_regs *regs) void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL; + regs->sstatus = SR_SPIE; + if (has_fpu) + regs->sstatus |= SR_FS_INITIAL; regs->sepc = pc; regs->sp = sp; set_fs(USER_DS); @@ -84,12 +86,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc, void flush_thread(void) { +#ifdef CONFIG_FPU /* * Reset FPU context * frm: round to nearest, ties to even (IEEE default) * fflags: accrued exceptions cleared */ memset(¤t->thread.fstate, 0, sizeof(current->thread.fstate)); +#endif } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 9f82a7e34c64..60f1e02eed36 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -28,6 +28,9 @@ enum riscv_regset { REGSET_X, +#ifdef CONFIG_FPU + REGSET_F, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -54,6 +57,45 @@ static int riscv_gpr_set(struct task_struct *target, return ret; } +#ifdef CONFIG_FPU +static int riscv_fpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + struct __riscv_d_ext_state *fstate = &target->thread.fstate; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0, + offsetof(struct __riscv_d_ext_state, fcsr)); + if (!ret) { + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0, + offsetof(struct __riscv_d_ext_state, fcsr) + + sizeof(fstate->fcsr)); + } + + return ret; +} + +static int riscv_fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct __riscv_d_ext_state *fstate = &target->thread.fstate; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0, + offsetof(struct __riscv_d_ext_state, fcsr)); + if (!ret) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0, + offsetof(struct __riscv_d_ext_state, fcsr) + + sizeof(fstate->fcsr)); + } + + return ret; +} +#endif static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { @@ -64,6 +106,16 @@ static const struct user_regset riscv_user_regset[] = { .get = &riscv_gpr_get, .set = &riscv_gpr_set, }, +#ifdef CONFIG_FPU + [REGSET_F] = { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), + .align = sizeof(elf_fpreg_t), + .get = &riscv_fpr_get, + .set = &riscv_fpr_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b2d26d9d8489..2c290e6aaa6e 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -81,6 +81,16 @@ EXPORT_SYMBOL(empty_zero_page); /* The lucky hart to first increment this variable will boot the other cores */ atomic_t hart_lottery; +unsigned long boot_cpu_hartid; + +unsigned long __cpuid_to_hartid_map[NR_CPUS] = { + [0 ... NR_CPUS-1] = INVALID_HARTID +}; + +void __init smp_setup_processor_id(void) +{ + cpuid_to_hartid_map(0) = boot_cpu_hartid; +} #ifdef CONFIG_BLK_DEV_INITRD static void __init setup_initrd(void) @@ -227,7 +237,10 @@ void __init setup_arch(char **cmdline_p) setup_bootmem(); paging_init(); unflatten_device_tree(); + +#ifdef CONFIG_SWIOTLB swiotlb_init(1); +#endif #ifdef CONFIG_SMP setup_smp(); diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 718d0c984ef0..f9b5e7e352ef 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -37,45 +37,69 @@ struct rt_sigframe { struct ucontext uc; }; -static long restore_d_state(struct pt_regs *regs, - struct __riscv_d_ext_state __user *state) +#ifdef CONFIG_FPU +static long restore_fp_state(struct pt_regs *regs, + union __riscv_fp_state *sc_fpregs) { long err; + struct __riscv_d_ext_state __user *state = &sc_fpregs->d; + size_t i; + err = __copy_from_user(¤t->thread.fstate, state, sizeof(*state)); - if (likely(!err)) - fstate_restore(current, regs); + if (unlikely(err)) + return err; + + fstate_restore(current, regs); + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { + u32 value; + + err = __get_user(value, &sc_fpregs->q.reserved[i]); + if (unlikely(err)) + break; + if (value != 0) + return -EINVAL; + } + return err; } -static long save_d_state(struct pt_regs *regs, - struct __riscv_d_ext_state __user *state) +static long save_fp_state(struct pt_regs *regs, + union __riscv_fp_state *sc_fpregs) { + long err; + struct __riscv_d_ext_state __user *state = &sc_fpregs->d; + size_t i; + fstate_save(current, regs); - return __copy_to_user(state, ¤t->thread.fstate, sizeof(*state)); + err = __copy_to_user(state, ¤t->thread.fstate, sizeof(*state)); + if (unlikely(err)) + return err; + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { + err = __put_user(0, &sc_fpregs->q.reserved[i]); + if (unlikely(err)) + break; + } + + return err; } +#else +#define save_fp_state(task, regs) (0) +#define restore_fp_state(task, regs) (0) +#endif static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { long err; - size_t i; /* sc_regs is structured the same as the start of pt_regs */ err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs)); - if (unlikely(err)) - return err; /* Restore the floating-point state. */ - err = restore_d_state(regs, &sc->sc_fpregs.d); - if (unlikely(err)) - return err; - /* We support no other extension state at this time. */ - for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) { - u32 value; - err = __get_user(value, &sc->sc_fpregs.q.reserved[i]); - if (unlikely(err)) - break; - if (value != 0) - return -EINVAL; - } + if (has_fpu) + err |= restore_fp_state(regs, &sc->sc_fpregs); return err; } @@ -124,14 +148,11 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, { struct sigcontext __user *sc = &frame->uc.uc_mcontext; long err; - size_t i; /* sc_regs is structured the same as the start of pt_regs */ err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs)); /* Save the floating-point state. */ - err |= save_d_state(regs, &sc->sc_fpregs.d); - /* We support no other extension state at this time. */ - for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) - err |= __put_user(0, &sc->sc_fpregs.q.reserved[i]); + if (has_fpu) + err |= save_fp_state(regs, &sc->sc_fpregs); return err; } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 906fe21ea21b..57b1383e5ef7 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -22,23 +22,44 @@ #include <linux/interrupt.h> #include <linux/smp.h> #include <linux/sched.h> +#include <linux/seq_file.h> #include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> -/* A collection of single bit ipi messages. */ -static struct { - unsigned long bits ____cacheline_aligned; -} ipi_data[NR_CPUS] __cacheline_aligned; - enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_MAX }; +/* A collection of single bit ipi messages. */ +static struct { + unsigned long stats[IPI_MAX] ____cacheline_aligned; + unsigned long bits ____cacheline_aligned; +} ipi_data[NR_CPUS] __cacheline_aligned; + +int riscv_hartid_to_cpuid(int hartid) +{ + int i = -1; + + for (i = 0; i < NR_CPUS; i++) + if (cpuid_to_hartid_map(i) == hartid) + return i; + pr_err("Couldn't find cpu id for hartid [%d]\n", hartid); + BUG(); + return i; +} + +void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out) +{ + int cpu; + + for_each_cpu(cpu, in) + cpumask_set_cpu(cpuid_to_hartid_map(cpu), out); +} /* Unsupported */ int setup_profiling_timer(unsigned int multiplier) { @@ -48,6 +69,7 @@ int setup_profiling_timer(unsigned int multiplier) void riscv_software_interrupt(void) { unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; + unsigned long *stats = ipi_data[smp_processor_id()].stats; /* Clear pending IPI */ csr_clear(sip, SIE_SSIE); @@ -62,11 +84,15 @@ void riscv_software_interrupt(void) if (ops == 0) return; - if (ops & (1 << IPI_RESCHEDULE)) + if (ops & (1 << IPI_RESCHEDULE)) { + stats[IPI_RESCHEDULE]++; scheduler_ipi(); + } - if (ops & (1 << IPI_CALL_FUNC)) + if (ops & (1 << IPI_CALL_FUNC)) { + stats[IPI_CALL_FUNC]++; generic_smp_call_function_interrupt(); + } BUG_ON((ops >> IPI_MAX) != 0); @@ -78,14 +104,36 @@ void riscv_software_interrupt(void) static void send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation) { - int i; + int cpuid, hartid; + struct cpumask hartid_mask; + cpumask_clear(&hartid_mask); mb(); - for_each_cpu(i, to_whom) - set_bit(operation, &ipi_data[i].bits); - + for_each_cpu(cpuid, to_whom) { + set_bit(operation, &ipi_data[cpuid].bits); + hartid = cpuid_to_hartid_map(cpuid); + cpumask_set_cpu(hartid, &hartid_mask); + } mb(); - sbi_send_ipi(cpumask_bits(to_whom)); + sbi_send_ipi(cpumask_bits(&hartid_mask)); +} + +static const char * const ipi_names[] = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", +}; + +void show_ipi_stats(struct seq_file *p, int prec) +{ + unsigned int cpu, i; + + for (i = 0; i < IPI_MAX; i++) { + seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, + prec >= 4 ? " " : ""); + for_each_online_cpu(cpu) + seq_printf(p, "%10lu ", ipi_data[cpu].stats[i]); + seq_printf(p, " %s\n", ipi_names[i]); + } } void arch_send_call_function_ipi_mask(struct cpumask *mask) @@ -127,7 +175,7 @@ void smp_send_reschedule(int cpu) void flush_icache_mm(struct mm_struct *mm, bool local) { unsigned int cpu; - cpumask_t others, *mask; + cpumask_t others, hmask, *mask; preempt_disable(); @@ -145,9 +193,11 @@ void flush_icache_mm(struct mm_struct *mm, bool local) */ cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu)); local |= cpumask_empty(&others); - if (mm != current->active_mm || !local) - sbi_remote_fence_i(others.bits); - else { + if (mm != current->active_mm || !local) { + cpumask_clear(&hmask); + riscv_cpuid_to_hartid_mask(&others, &hmask); + sbi_remote_fence_i(hmask.bits); + } else { /* * It's assumed that at least one strongly ordered operation is * performed on this hart between setting a hart's cpumask bit diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 56abab6a9812..18cda0e8cf94 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -30,6 +30,7 @@ #include <linux/irq.h> #include <linux/of.h> #include <linux/sched/task_stack.h> +#include <linux/sched/mm.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> @@ -50,25 +51,33 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __init setup_smp(void) { struct device_node *dn = NULL; - int hart, im_okay_therefore_i_am = 0; + int hart; + bool found_boot_cpu = false; + int cpuid = 1; while ((dn = of_find_node_by_type(dn, "cpu"))) { - hart = riscv_of_processor_hart(dn); - if (hart >= 0) { - set_cpu_possible(hart, true); - set_cpu_present(hart, true); - if (hart == smp_processor_id()) { - BUG_ON(im_okay_therefore_i_am); - im_okay_therefore_i_am = 1; - } + hart = riscv_of_processor_hartid(dn); + if (hart < 0) + continue; + + if (hart == cpuid_to_hartid_map(0)) { + BUG_ON(found_boot_cpu); + found_boot_cpu = 1; + continue; } + + cpuid_to_hartid_map(cpuid) = hart; + set_cpu_possible(cpuid, true); + set_cpu_present(cpuid, true); + cpuid++; } - BUG_ON(!im_okay_therefore_i_am); + BUG_ON(!found_boot_cpu); } int __cpu_up(unsigned int cpu, struct task_struct *tidle) { + int hartid = cpuid_to_hartid_map(cpu); tidle->thread_info.cpu = cpu; /* @@ -79,8 +88,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) * the spinning harts that they can continue the boot process. */ smp_mb(); - __cpu_up_stack_pointer[cpu] = task_stack_page(tidle) + THREAD_SIZE; - __cpu_up_task_pointer[cpu] = tidle; + WRITE_ONCE(__cpu_up_stack_pointer[hartid], + task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); while (!cpu_online(cpu)) cpu_relax(); @@ -100,14 +110,22 @@ asmlinkage void __init smp_callin(void) struct mm_struct *mm = &init_mm; /* All kernel threads share the same mm context. */ - atomic_inc(&mm->mm_count); + mmgrab(mm); current->active_mm = mm; trap_init(); notify_cpu_starting(smp_processor_id()); set_cpu_online(smp_processor_id(), 1); + /* + * Remote TLB flushes are ignored while the CPU is offline, so emit + * a local TLB flush right now just in case. + */ local_flush_tlb_all(); - local_irq_enable(); + /* + * Disable preemption before enabling interrupts, so we don't try to + * schedule a CPU that hasn't actually started yet. + */ preempt_disable(); + local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 445ec84f9a47..5739bd05d289 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -2,6 +2,7 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o lib-y += uaccess.o -lib-y += tishift.o + +lib-(CONFIG_64BIT) += tishift.o lib-$(CONFIG_32BIT) += udivdi3.o diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c index 70ef2724cdf6..bd2f2db557cc 100644 --- a/arch/riscv/mm/ioremap.c +++ b/arch/riscv/mm/ioremap.c @@ -42,7 +42,7 @@ static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size, /* Page-align mappings */ offset = addr & (~PAGE_MASK); - addr &= PAGE_MASK; + addr -= offset; size = PAGE_ALIGN(size + offset); area = get_vm_area_caller(size, VM_IOREMAP, caller); diff --git a/drivers/clocksource/riscv_timer.c b/drivers/clocksource/riscv_timer.c index 4e8b347e43e2..084e97dc10ed 100644 --- a/drivers/clocksource/riscv_timer.c +++ b/drivers/clocksource/riscv_timer.c @@ -8,6 +8,7 @@ #include <linux/cpu.h> #include <linux/delay.h> #include <linux/irq.h> +#include <asm/smp.h> #include <asm/sbi.h> /* @@ -84,13 +85,16 @@ void riscv_timer_interrupt(void) static int __init riscv_timer_init_dt(struct device_node *n) { - int cpu_id = riscv_of_processor_hart(n), error; + int cpuid, hartid, error; struct clocksource *cs; - if (cpu_id != smp_processor_id()) + hartid = riscv_of_processor_hartid(n); + cpuid = riscv_hartid_to_cpuid(hartid); + + if (cpuid != smp_processor_id()) return 0; - cs = per_cpu_ptr(&riscv_clocksource, cpu_id); + cs = per_cpu_ptr(&riscv_clocksource, cpuid); clocksource_register_hz(cs, riscv_timebase); error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING, @@ -98,7 +102,7 @@ static int __init riscv_timer_init_dt(struct device_node *n) riscv_timer_starting_cpu, riscv_timer_dying_cpu); if (error) pr_err("RISCV timer register failed [%d] for cpu = [%d]\n", - error, cpu_id); + error, cpuid); return error; } diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 532e9d68c704..357e9daf94ae 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -15,6 +15,7 @@ #include <linux/of_irq.h> #include <linux/platform_device.h> #include <linux/spinlock.h> +#include <asm/smp.h> /* * This driver implements a version of the RISC-V PLIC with the actual layout @@ -176,7 +177,7 @@ static int plic_find_hart_id(struct device_node *node) { for (; node; node = node->parent) { if (of_device_is_compatible(node, "riscv")) - return riscv_of_processor_hart(node); + return riscv_of_processor_hartid(node); } return -1; @@ -218,7 +219,7 @@ static int __init plic_init(struct device_node *node, struct of_phandle_args parent; struct plic_handler *handler; irq_hw_number_t hwirq; - int cpu; + int cpu, hartid; if (of_irq_parse_one(node, i, &parent)) { pr_err("failed to parse parent for context %d.\n", i); @@ -229,12 +230,13 @@ static int __init plic_init(struct device_node *node, if (parent.args[0] == -1) continue; - cpu = plic_find_hart_id(parent.np); - if (cpu < 0) { + hartid = plic_find_hart_id(parent.np); + if (hartid < 0) { pr_warn("failed to parse hart ID for context %d.\n", i); continue; } + cpu = riscv_hartid_to_cpuid(hartid); handler = per_cpu_ptr(&plic_handlers, cpu); handler->present = true; handler->ctxid = i; diff --git a/lib/Kconfig b/lib/Kconfig index a3928d4438b5..d82f20609939 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -621,3 +621,6 @@ config GENERIC_LIB_CMPDI2 config GENERIC_LIB_UCMPDI2 bool + +config GENERIC_LIB_UMODDI3 + bool diff --git a/lib/Makefile b/lib/Makefile index 423876446810..56a8d9c23ef3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -270,3 +270,4 @@ obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o +obj-$(CONFIG_GENERIC_LIB_UMODDI3) += umoddi3.o udivmoddi4.o diff --git a/lib/udivmoddi4.c b/lib/udivmoddi4.c new file mode 100644 index 000000000000..c08bc8a5f1cf --- /dev/null +++ b/lib/udivmoddi4.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc. + */ + +#include <linux/libgcc.h> + +#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz(X)) + +#define W_TYPE_SIZE 32 + +#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2)) + +/* If we still don't have umul_ppmm, define it using plain C. */ +#if !defined(umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + unsigned long __x0, __x1, __x2, __x3; \ + unsigned short __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart(u); \ + __uh = __ll_highpart(u); \ + __vl = __ll_lowpart(v); \ + __vh = __ll_highpart(v); \ + \ + __x0 = (unsigned long) __ul * __vl; \ + __x1 = (unsigned long) __ul * __vh; \ + __x2 = (unsigned long) __uh * __vl; \ + __x3 = (unsigned long) __uh * __vh; \ + \ + __x1 += __ll_highpart(__x0); \ + __x1 += __x2; \ + if (__x1 < __x2) \ + __x3 += __ll_B; \ + \ + (w1) = __x3 + __ll_highpart(__x1); \ + (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\ + } while (0) +#endif + +#if !defined(sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + unsigned long __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) +#endif + +/* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + unsigned long __d1, __d0, __q1, __q0; \ + unsigned long __r1, __r0, __m; \ + __d1 = __ll_highpart(d); \ + __d0 = __ll_lowpart(d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (unsigned long) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart(n0); \ + if (__r1 < __m) { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (unsigned long) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ + if (__r0 < __m) { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (unsigned long) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined(udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +unsigned long long __udivmoddi4(unsigned long long u, unsigned long long v, + unsigned long long *rp) +{ + const DWunion nn = {.ll = u }; + const DWunion dd = {.ll = v }; + DWunion rr, ww; + unsigned long d0, d1, n0, n1, n2; + unsigned long q0 = 0, q1 = 0; + unsigned long b, bm; + + d0 = dd.s.low; + d1 = dd.s.high; + n0 = nn.s.low; + n1 = nn.s.high; + +#if !UDIV_NEEDS_NORMALIZATION + + if (d1 == 0) { + if (d0 > n1) { + /* 0q = nn / 0D */ + + udiv_qrnnd(q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0. */ + } else { + /* qq = NN / 0d */ + + if (d0 == 0) + /* Divide intentionally by zero. */ + d0 = 1 / d0; + + udiv_qrnnd(q1, n1, 0, n1, d0); + udiv_qrnnd(q0, n0, n1, n0, d0); + + /* Remainder in n0. */ + } + + if (rp != 0) { + rr.s.low = n0; + rr.s.high = 0; + *rp = rr.ll; + } + +#else /* UDIV_NEEDS_NORMALIZATION */ + + if (d1 == 0) { + if (d0 > n1) { + /* 0q = nn / 0D */ + + count_leading_zeros(bm, d0); + + if (bm != 0) { + /* + * Normalize, i.e. make the most significant bit + * of the denominator set. + */ + + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm)); + n0 = n0 << bm; + } + + udiv_qrnnd(q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0 >> bm. */ + } else { + /* qq = NN / 0d */ + + if (d0 == 0) + /* Divide intentionally by zero. */ + d0 = 1 / d0; + + count_leading_zeros(bm, d0); + + if (bm == 0) { + /* + * From (n1 >= d0) /\ (the most significant bit + * of d0 is set), conclude (the most significant + * bit of n1 is set) /\ (theleading quotient + * digit q1 = 1). + * + * This special case is necessary, not an + * optimization. (Shifts counts of W_TYPE_SIZE + * are undefined.) + */ + + n1 -= d0; + q1 = 1; + } else { + /* Normalize. */ + + b = W_TYPE_SIZE - bm; + + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd(q1, n1, n2, n1, d0); + } + + /* n1 != d0... */ + + udiv_qrnnd(q0, n0, n1, n0, d0); + + /* Remainder in n0 >> bm. */ + } + + if (rp != 0) { + rr.s.low = n0 >> bm; + rr.s.high = 0; + *rp = rr.ll; + } + +#endif /* UDIV_NEEDS_NORMALIZATION */ + + } else { + if (d1 > n1) { + /* 00 = nn / DD */ + + q0 = 0; + q1 = 0; + + /* Remainder in n1n0. */ + if (rp != 0) { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } else { + /* 0q = NN / dd */ + + count_leading_zeros(bm, d1); + if (bm == 0) { + /* + * From (n1 >= d1) /\ (the most significant bit + * of d1 is set), conclude (the most significant + * bit of n1 is set) /\ (the quotient digit q0 = + * 0 or 1). + * + * This special case is necessary, not an + * optimization. + */ + + /* + * The condition on the next line takes + * advantage of that n1 >= d1 (true due to + * program flow). + */ + if (n1 > d1 || n0 >= d0) { + q0 = 1; + sub_ddmmss(n1, n0, n1, n0, d1, d0); + } else { + q0 = 0; + } + + q1 = 0; + + if (rp != 0) { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } else { + unsigned long m1, m0; + /* Normalize. */ + + b = W_TYPE_SIZE - bm; + + d1 = (d1 << bm) | (d0 >> b); + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd(q0, n1, n2, n1, d1); + umul_ppmm(m1, m0, q0, d0); + + if (m1 > n1 || (m1 == n1 && m0 > n0)) { + q0--; + sub_ddmmss(m1, m0, m1, m0, d1, d0); + } + + q1 = 0; + + /* Remainder in (n1n0 - m1m0) >> bm. */ + if (rp != 0) { + sub_ddmmss(n1, n0, n1, n0, m1, m0); + rr.s.low = (n1 << b) | (n0 >> bm); + rr.s.high = n1 >> bm; + *rp = rr.ll; + } + } + } + } + + ww.s.low = q0; + ww.s.high = q1; + + return ww.ll; +} diff --git a/lib/umoddi3.c b/lib/umoddi3.c new file mode 100644 index 000000000000..d7bbf0f85197 --- /dev/null +++ b/lib/umoddi3.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc. + */ + +#include <linux/module.h> +#include <linux/libgcc.h> + +extern unsigned long long __udivmoddi4(unsigned long long u, + unsigned long long v, + unsigned long long *rp); + +unsigned long long __umoddi3(unsigned long long u, unsigned long long v) +{ + unsigned long long w; + (void)__udivmoddi4(u, v, &w); + return w; +} +EXPORT_SYMBOL(__umoddi3); |