diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-29 20:37:03 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-29 20:37:03 +0300 |
commit | 889bb74302e5aba85d987b4093344150984d7cda (patch) | |
tree | a81f49ee3b866e13a623e77090bbc153210d0091 /arch/nds32/include | |
parent | 903b77c631673eeec9e9114e9524171cdf9a2646 (diff) | |
parent | e2f3f8b4a497d26bdcd55a53246ec2e613ae0fd4 (diff) | |
download | linux-889bb74302e5aba85d987b4093344150984d7cda.tar.xz |
Merge tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux
Pull nds32 updates from Greentime Hu:
- Perf support
- Power management support
- FPU support
- Hardware prefetcher support
- Build error fixed
- Performance enhancement
* tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux:
nds32: support hardware prefetcher
nds32: Fix the items of hwcap_str ordering issue.
math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning
math-emu/op-2.h: Use statement expressions to prevent negative constant shift
nds32: support denormalized result through FP emulator
nds32: Support FP emulation
nds32: nds32 FPU port
nds32: Remove duplicated include from pm.c
nds32: Power management for nds32
nds32: Add document for NDS32 PMU.
nds32: Add perf call-graph support.
nds32: Perf porting
nds32: Fix bug in bitfield.h
nds32: Fix gcc 8.0 compiler option incompatible.
nds32: Fill all TLB entries with kernel image mapping
nds32: Remove the redundant assignment
Diffstat (limited to 'arch/nds32/include')
-rw-r--r-- | arch/nds32/include/asm/Kbuild | 1 | ||||
-rw-r--r-- | arch/nds32/include/asm/bitfield.h | 25 | ||||
-rw-r--r-- | arch/nds32/include/asm/elf.h | 11 | ||||
-rw-r--r-- | arch/nds32/include/asm/fpu.h | 126 | ||||
-rw-r--r-- | arch/nds32/include/asm/fpuemu.h | 32 | ||||
-rw-r--r-- | arch/nds32/include/asm/nds32_fpu_inst.h | 109 | ||||
-rw-r--r-- | arch/nds32/include/asm/perf_event.h | 16 | ||||
-rw-r--r-- | arch/nds32/include/asm/pmu.h | 386 | ||||
-rw-r--r-- | arch/nds32/include/asm/processor.h | 7 | ||||
-rw-r--r-- | arch/nds32/include/asm/sfp-machine.h | 158 | ||||
-rw-r--r-- | arch/nds32/include/asm/stacktrace.h | 39 | ||||
-rw-r--r-- | arch/nds32/include/asm/suspend.h | 11 | ||||
-rw-r--r-- | arch/nds32/include/asm/syscalls.h | 1 | ||||
-rw-r--r-- | arch/nds32/include/uapi/asm/auxvec.h | 7 | ||||
-rw-r--r-- | arch/nds32/include/uapi/asm/sigcontext.h | 14 | ||||
-rw-r--r-- | arch/nds32/include/uapi/asm/udftrap.h | 13 | ||||
-rw-r--r-- | arch/nds32/include/uapi/asm/unistd.h | 2 |
17 files changed, 956 insertions, 2 deletions
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index dbc4e5422550..f81b633d5379 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -36,6 +36,7 @@ generic-y += kprobes.h generic-y += kvm_para.h generic-y += limits.h generic-y += local.h +generic-y += local64.h generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += parport.h diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index 8e84fc385b94..7414fcbbab4e 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -251,6 +251,11 @@ #define ITYPE_mskSTYPE ( 0xF << ITYPE_offSTYPE ) #define ITYPE_mskCPID ( 0x3 << ITYPE_offCPID ) +/* Additional definitions of ITYPE register for FPU */ +#define FPU_DISABLE_EXCEPTION (0x1 << ITYPE_offSTYPE) +#define FPU_EXCEPTION (0x2 << ITYPE_offSTYPE) +#define FPU_CPID 0 /* FPU Co-Processor ID is 0 */ + #define NDS32_VECTOR_mskNONEXCEPTION 0x78 #define NDS32_VECTOR_offEXCEPTION 8 #define NDS32_VECTOR_offINTERRUPT 9 @@ -692,8 +697,8 @@ #define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */ #define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */ #define PFM_CTL_offSEL0 15 /* The event selection for PFMC0 */ -#define PFM_CTL_offSEL1 21 /* The event selection for PFMC1 */ -#define PFM_CTL_offSEL2 27 /* The event selection for PFMC2 */ +#define PFM_CTL_offSEL1 16 /* The event selection for PFMC1 */ +#define PFM_CTL_offSEL2 22 /* The event selection for PFMC2 */ /* bit 28:31 reserved */ #define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 ) @@ -735,14 +740,20 @@ #define N13MISC_CTL_offRTP 1 /* Disable Return Target Predictor */ #define N13MISC_CTL_offPTEPF 2 /* Disable HPTWK L2 PTE pefetch */ #define N13MISC_CTL_offSP_SHADOW_EN 4 /* Enable shadow stack pointers */ +#define MISC_CTL_offHWPRE 11 /* Enable HardWare PREFETCH */ /* bit 6, 9:31 reserved */ #define N13MISC_CTL_makBTB ( 0x1 << N13MISC_CTL_offBTB ) #define N13MISC_CTL_makRTP ( 0x1 << N13MISC_CTL_offRTP ) #define N13MISC_CTL_makPTEPF ( 0x1 << N13MISC_CTL_offPTEPF ) #define N13MISC_CTL_makSP_SHADOW_EN ( 0x1 << N13MISC_CTL_offSP_SHADOW_EN ) +#define MISC_CTL_makHWPRE_EN ( 0x1 << MISC_CTL_offHWPRE ) +#ifdef CONFIG_HW_PRE +#define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN) +#else #define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN) +#endif /****************************************************************************** * PRUSR_ACC_CTL (Privileged Resource User Access Control Registers) @@ -926,6 +937,7 @@ #define FPCSR_mskDNIT ( 0x1 << FPCSR_offDNIT ) #define FPCSR_mskRIT ( 0x1 << FPCSR_offRIT ) #define FPCSR_mskALL (FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX) +#define FPCSR_mskALLE_NO_UDFE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskIEXE) #define FPCSR_mskALLE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE) #define FPCSR_mskALLT (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT) @@ -946,6 +958,15 @@ #define FPCFG_mskIMVER ( 0x1F << FPCFG_offIMVER ) #define FPCFG_mskAVER ( 0x1F << FPCFG_offAVER ) +/* 8 Single precision or 4 double precision registers are available */ +#define SP8_DP4_reg 0 +/* 16 Single precision or 8 double precision registers are available */ +#define SP16_DP8_reg 1 +/* 32 Single precision or 16 double precision registers are available */ +#define SP32_DP16_reg 2 +/* 32 Single precision or 32 double precision registers are available */ +#define SP32_DP32_reg 3 + /****************************************************************************** * fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register) *****************************************************************************/ diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h index f5f9cf7e0544..95f3ea253e4c 100644 --- a/arch/nds32/include/asm/elf.h +++ b/arch/nds32/include/asm/elf.h @@ -9,6 +9,7 @@ */ #include <asm/ptrace.h> +#include <asm/fpu.h> typedef unsigned long elf_greg_t; typedef unsigned long elf_freg_t[3]; @@ -159,8 +160,18 @@ struct elf32_hdr; #endif + +#if IS_ENABLED(CONFIG_FPU) +#define FPU_AUX_ENT NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT) +#else +#define FPU_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0) +#endif + #define ARCH_DLINFO \ do { \ + /* Optional FPU initialization */ \ + FPU_AUX_ENT; \ + \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (elf_addr_t)current->mm->context.vdso); \ } while (0) diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h new file mode 100644 index 000000000000..019f1bcfc5ee --- /dev/null +++ b/arch/nds32/include/asm/fpu.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ + +#ifndef __ASM_NDS32_FPU_H +#define __ASM_NDS32_FPU_H + +#if IS_ENABLED(CONFIG_FPU) +#ifndef __ASSEMBLY__ +#include <linux/sched/task_stack.h> +#include <linux/preempt.h> +#include <asm/ptrace.h> + +extern bool has_fpu; + +extern void save_fpu(struct task_struct *__tsk); +extern void load_fpu(const struct fpu_struct *fpregs); +extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs); +extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu); + +#define test_tsk_fpu(regs) (regs->fucop_ctl & FUCOP_CTL_mskCP0EN) + +/* + * Initially load the FPU with signalling NANS. This bit pattern + * has the property that no matter whether considered as single or as + * double precision, it still represents a signalling NAN. + */ + +#define sNAN64 0xFFFFFFFFFFFFFFFFULL +#define sNAN32 0xFFFFFFFFUL + +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) +/* + * Denormalized number is unsupported by nds32 FPU. Hence the operation + * is treated as underflow cases when the final result is a denormalized + * number. To enhance precision, underflow exception trap should be + * enabled by default and kerenl will re-execute it by fpu emulator + * when getting underflow exception. + */ +#define FPCSR_INIT FPCSR_mskUDFE +#else +#define FPCSR_INIT 0x0UL +#endif + +extern const struct fpu_struct init_fpuregs; + +static inline void disable_ptreg_fpu(struct pt_regs *regs) +{ + regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN; +} + +static inline void enable_ptreg_fpu(struct pt_regs *regs) +{ + regs->fucop_ctl |= FUCOP_CTL_mskCP0EN; +} + +static inline void enable_fpu(void) +{ + unsigned long fucop_ctl; + + fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN; + __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL); + __nds32__isb(); +} + +static inline void disable_fpu(void) +{ + unsigned long fucop_ctl; + + fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN; + __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL); + __nds32__isb(); +} + +static inline void lose_fpu(void) +{ + preempt_disable(); +#if IS_ENABLED(CONFIG_LAZY_FPU) + if (last_task_used_math == current) { + last_task_used_math = NULL; +#else + if (test_tsk_fpu(task_pt_regs(current))) { +#endif + save_fpu(current); + } + disable_ptreg_fpu(task_pt_regs(current)); + preempt_enable(); +} + +static inline void own_fpu(void) +{ + preempt_disable(); +#if IS_ENABLED(CONFIG_LAZY_FPU) + if (last_task_used_math != current) { + if (last_task_used_math != NULL) + save_fpu(last_task_used_math); + load_fpu(¤t->thread.fpu); + last_task_used_math = current; + } +#else + if (!test_tsk_fpu(task_pt_regs(current))) { + load_fpu(¤t->thread.fpu); + } +#endif + enable_ptreg_fpu(task_pt_regs(current)); + preempt_enable(); +} + +#if !IS_ENABLED(CONFIG_LAZY_FPU) +static inline void unlazy_fpu(struct task_struct *tsk) +{ + preempt_disable(); + if (test_tsk_fpu(task_pt_regs(tsk))) + save_fpu(tsk); + preempt_enable(); +} +#endif /* !CONFIG_LAZY_FPU */ +static inline void clear_fpu(struct pt_regs *regs) +{ + preempt_disable(); + if (test_tsk_fpu(regs)) + disable_ptreg_fpu(regs); + preempt_enable(); +} +#endif /* CONFIG_FPU */ +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_NDS32_FPU_H */ diff --git a/arch/nds32/include/asm/fpuemu.h b/arch/nds32/include/asm/fpuemu.h new file mode 100644 index 000000000000..c4bd0c7faa75 --- /dev/null +++ b/arch/nds32/include/asm/fpuemu.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ + +#ifndef __ARCH_NDS32_FPUEMU_H +#define __ARCH_NDS32_FPUEMU_H + +/* + * single precision + */ + +void fadds(void *ft, void *fa, void *fb); +void fsubs(void *ft, void *fa, void *fb); +void fmuls(void *ft, void *fa, void *fb); +void fdivs(void *ft, void *fa, void *fb); +void fs2d(void *ft, void *fa); +void fsqrts(void *ft, void *fa); +void fnegs(void *ft, void *fa); +int fcmps(void *ft, void *fa, void *fb, int cop); + +/* + * double precision + */ +void faddd(void *ft, void *fa, void *fb); +void fsubd(void *ft, void *fa, void *fb); +void fmuld(void *ft, void *fa, void *fb); +void fdivd(void *ft, void *fa, void *fb); +void fsqrtd(void *ft, void *fa); +void fd2s(void *ft, void *fa); +void fnegd(void *ft, void *fa); +int fcmpd(void *ft, void *fa, void *fb, int cop); + +#endif /* __ARCH_NDS32_FPUEMU_H */ diff --git a/arch/nds32/include/asm/nds32_fpu_inst.h b/arch/nds32/include/asm/nds32_fpu_inst.h new file mode 100644 index 000000000000..1e4b86a90a48 --- /dev/null +++ b/arch/nds32/include/asm/nds32_fpu_inst.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ + +#ifndef __NDS32_FPU_INST_H +#define __NDS32_FPU_INST_H + +#define cop0_op 0x35 + +/* + * COP0 field of opcodes. + */ +#define fs1_op 0x0 +#define fs2_op 0x4 +#define fd1_op 0x8 +#define fd2_op 0xc + +/* + * FS1 opcode. + */ +enum fs1 { + fadds_op, fsubs_op, fcpynss_op, fcpyss_op, + fmadds_op, fmsubs_op, fcmovns_op, fcmovzs_op, + fnmadds_op, fnmsubs_op, + fmuls_op = 0xc, fdivs_op, + fs1_f2op_op = 0xf +}; + +/* + * FS1/F2OP opcode. + */ +enum fs1_f2 { + fs2d_op, fsqrts_op, + fui2s_op = 0x8, fsi2s_op = 0xc, + fs2ui_op = 0x10, fs2ui_z_op = 0x14, + fs2si_op = 0x18, fs2si_z_op = 0x1c +}; + +/* + * FS2 opcode. + */ +enum fs2 { + fcmpeqs_op, fcmpeqs_e_op, fcmplts_op, fcmplts_e_op, + fcmples_op, fcmples_e_op, fcmpuns_op, fcmpuns_e_op +}; + +/* + * FD1 opcode. + */ +enum fd1 { + faddd_op, fsubd_op, fcpynsd_op, fcpysd_op, + fmaddd_op, fmsubd_op, fcmovnd_op, fcmovzd_op, + fnmaddd_op, fnmsubd_op, + fmuld_op = 0xc, fdivd_op, fd1_f2op_op = 0xf +}; + +/* + * FD1/F2OP opcode. + */ +enum fd1_f2 { + fd2s_op, fsqrtd_op, + fui2d_op = 0x8, fsi2d_op = 0xc, + fd2ui_op = 0x10, fd2ui_z_op = 0x14, + fd2si_op = 0x18, fd2si_z_op = 0x1c +}; + +/* + * FD2 opcode. + */ +enum fd2 { + fcmpeqd_op, fcmpeqd_e_op, fcmpltd_op, fcmpltd_e_op, + fcmpled_op, fcmpled_e_op, fcmpund_op, fcmpund_e_op +}; + +#define NDS32Insn(x) x + +#define I_OPCODE_off 25 +#define NDS32Insn_OPCODE(x) (NDS32Insn(x) >> I_OPCODE_off) + +#define I_OPCODE_offRt 20 +#define I_OPCODE_mskRt (0x1fUL << I_OPCODE_offRt) +#define NDS32Insn_OPCODE_Rt(x) \ + ((NDS32Insn(x) & I_OPCODE_mskRt) >> I_OPCODE_offRt) + +#define I_OPCODE_offRa 15 +#define I_OPCODE_mskRa (0x1fUL << I_OPCODE_offRa) +#define NDS32Insn_OPCODE_Ra(x) \ + ((NDS32Insn(x) & I_OPCODE_mskRa) >> I_OPCODE_offRa) + +#define I_OPCODE_offRb 10 +#define I_OPCODE_mskRb (0x1fUL << I_OPCODE_offRb) +#define NDS32Insn_OPCODE_Rb(x) \ + ((NDS32Insn(x) & I_OPCODE_mskRb) >> I_OPCODE_offRb) + +#define I_OPCODE_offbit1014 10 +#define I_OPCODE_mskbit1014 (0x1fUL << I_OPCODE_offbit1014) +#define NDS32Insn_OPCODE_BIT1014(x) \ + ((NDS32Insn(x) & I_OPCODE_mskbit1014) >> I_OPCODE_offbit1014) + +#define I_OPCODE_offbit69 6 +#define I_OPCODE_mskbit69 (0xfUL << I_OPCODE_offbit69) +#define NDS32Insn_OPCODE_BIT69(x) \ + ((NDS32Insn(x) & I_OPCODE_mskbit69) >> I_OPCODE_offbit69) + +#define I_OPCODE_offCOP0 0 +#define I_OPCODE_mskCOP0 (0x3fUL << I_OPCODE_offCOP0) +#define NDS32Insn_OPCODE_COP0(x) \ + ((NDS32Insn(x) & I_OPCODE_mskCOP0) >> I_OPCODE_offCOP0) + +#endif /* __NDS32_FPU_INST_H */ diff --git a/arch/nds32/include/asm/perf_event.h b/arch/nds32/include/asm/perf_event.h new file mode 100644 index 000000000000..fcdff02acc14 --- /dev/null +++ b/arch/nds32/include/asm/perf_event.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2008-2018 Andes Technology Corporation */ + +#ifndef __ASM_PERF_EVENT_H +#define __ASM_PERF_EVENT_H + +/* + * This file is request by Perf, + * please refer to tools/perf/design.txt for more details + */ +struct pt_regs; +unsigned long perf_instruction_pointer(struct pt_regs *regs); +unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +#endif diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h new file mode 100644 index 000000000000..e1ac0b0b8bcf --- /dev/null +++ b/arch/nds32/include/asm/pmu.h @@ -0,0 +1,386 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2008-2018 Andes Technology Corporation */ + +#ifndef __ASM_PMU_H +#define __ASM_PMU_H + +#include <linux/interrupt.h> +#include <linux/perf_event.h> +#include <asm/unistd.h> +#include <asm/bitfield.h> + +/* Has special meaning for perf core implementation */ +#define HW_OP_UNSUPPORTED 0x0 +#define C(_x) PERF_COUNT_HW_CACHE_##_x +#define CACHE_OP_UNSUPPORTED 0x0 + +/* Enough for both software and hardware defined events */ +#define SOFTWARE_EVENT_MASK 0xFF + +#define PFM_OFFSET_MAGIC_0 2 /* DO NOT START FROM 0 */ +#define PFM_OFFSET_MAGIC_1 (PFM_OFFSET_MAGIC_0 + 36) +#define PFM_OFFSET_MAGIC_2 (PFM_OFFSET_MAGIC_1 + 36) + +enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS }; + +u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1, + PFM_CTL_mskOVF2 }; +u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1, + PFM_CTL_mskEN2 }; +u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1, + PFM_CTL_offSEL2 }; +u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 }; +u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 }; +u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 }; +u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 }; +/* + * Perf Events' indices + */ +#define NDS32_IDX_CYCLE_COUNTER 0 +#define NDS32_IDX_COUNTER0 1 +#define NDS32_IDX_COUNTER1 2 + +/* The events for a given PMU register set. */ +struct pmu_hw_events { + /* + * The events that are active on the PMU for the given index. + */ + struct perf_event *events[MAX_COUNTERS]; + + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)]; + + /* + * Hardware lock to serialize accesses to PMU registers. Needed for the + * read/modify/write sequences. + */ + raw_spinlock_t pmu_lock; +}; + +struct nds32_pmu { + struct pmu pmu; + cpumask_t active_irqs; + char *name; + irqreturn_t (*handle_irq)(int irq_num, void *dev); + void (*enable)(struct perf_event *event); + void (*disable)(struct perf_event *event); + int (*get_event_idx)(struct pmu_hw_events *hw_events, + struct perf_event *event); + int (*set_event_filter)(struct hw_perf_event *evt, + struct perf_event_attr *attr); + u32 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u32 val); + void (*start)(struct nds32_pmu *nds32_pmu); + void (*stop)(struct nds32_pmu *nds32_pmu); + void (*reset)(void *data); + int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler); + void (*free_irq)(struct nds32_pmu *nds32_pmu); + int (*map_event)(struct perf_event *event); + int num_events; + atomic_t active_events; + u64 max_period; + struct platform_device *plat_device; + struct pmu_hw_events *(*get_hw_events)(void); +}; + +#define to_nds32_pmu(p) (container_of(p, struct nds32_pmu, pmu)) + +int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type); + +u64 nds32_pmu_event_update(struct perf_event *event); + +int nds32_pmu_event_set_period(struct perf_event *event); + +/* + * Common NDS32 SPAv3 event types + * + * Note: An implementation may not be able to count all of these events + * but the encodings are considered to be `reserved' in the case that + * they are not available. + * + * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as + * NOT_SUPPORTED EVENT mapping in generic perf code. + * You will need to deal it in the event writing implementation. + */ +enum spav3_counter_0_perf_types { + SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0, /* counting symbol */ + SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0, + SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0, + SPAV3_0_SEL_LAST /* counting symbol */ +}; + +enum spav3_counter_1_perf_types { + SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1, /* counting symbol */ + SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1, + SPAV3_1_SEL_LAST /* counting symbol */ +}; + +enum spav3_counter_2_perf_types { + SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2, /* counting symbol */ + SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT = + 3 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2, + SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2, + SPAV3_2_SEL_LAST /* counting symbol */ +}; + +/* Get converted event counter index */ +static inline int get_converted_event_idx(unsigned long event) +{ + int idx; + + if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) { + idx = 0; + } else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) { + idx = 1; + } else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) { + idx = 2; + } else { + pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n"); + return -EPERM; + } + + return idx; +} + +/* Get converted hardware event number */ +static inline u32 get_converted_evet_hw_num(u32 event) +{ + if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) + event -= PFM_OFFSET_MAGIC_0; + else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) + event -= PFM_OFFSET_MAGIC_1; + else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) + event -= PFM_OFFSET_MAGIC_2; + else if (event != 0) + pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n"); + + return event; +} + +/* + * NDS32 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED +}; + +static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS, + [C(RESULT_MISS)] = + SPAV3_2_SEL_LOAD_DATA_CACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS, + [C(RESULT_MISS)] = + SPAV3_2_SEL_STORE_DATA_CACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = + CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + SPAV3_1_SEL_CODE_CACHE_ACCESS, + [C(RESULT_MISS)] = + SPAV3_2_SEL_CODE_CACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + SPAV3_1_SEL_CODE_CACHE_ACCESS, + [C(RESULT_MISS)] = + SPAV3_2_SEL_CODE_CACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + /* TODO: L2CC */ + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + /* NDS32 PMU does not support TLB read/write hit/miss, + * However, it can count access/miss, which mixed with read and write. + * Therefore, only READ counter will use it. + * We do as possible as we can. + */ + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + SPAV3_1_SEL_UDTLB_ACCESS, + [C(RESULT_MISS)] = + SPAV3_2_SEL_UDTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = + CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + SPAV3_1_SEL_UITLB_ACCESS, + [C(RESULT_MISS)] = + SPAV3_2_SEL_UITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = + CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = + CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { /* What is BPU? */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = + CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = + CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = + CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { /* What is NODE? */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = + CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = + CACHE_OP_UNSUPPORTED, + }, + }, +}; + +int nds32_pmu_map_event(struct perf_event *event, + const unsigned int (*event_map)[PERF_COUNT_HW_MAX], + const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask); + +#endif /* __ASM_PMU_H */ diff --git a/arch/nds32/include/asm/processor.h b/arch/nds32/include/asm/processor.h index c2660f566bac..72024f8bc129 100644 --- a/arch/nds32/include/asm/processor.h +++ b/arch/nds32/include/asm/processor.h @@ -35,6 +35,8 @@ struct thread_struct { unsigned long address; unsigned long trap_no; unsigned long error_code; + + struct fpu_struct fpu; }; #define INIT_THREAD { } @@ -72,6 +74,11 @@ struct task_struct; /* Free all resources held by a thread. */ #define release_thread(thread) do { } while(0) +#if IS_ENABLED(CONFIG_FPU) +#if !IS_ENABLED(CONFIG_UNLAZU_FPU) +extern struct task_struct *last_task_used_math; +#endif +#endif /* Prepare to copy thread state - unlazy all lazy status */ #define prepare_to_copy(tsk) do { } while (0) diff --git a/arch/nds32/include/asm/sfp-machine.h b/arch/nds32/include/asm/sfp-machine.h new file mode 100644 index 000000000000..b1a5caa332b5 --- /dev/null +++ b/arch/nds32/include/asm/sfp-machine.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ + +#include <asm/bitfield.h> + +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +#define _FP_MUL_MEAT_S(R, X, Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm) +#define _FP_MUL_MEAT_D(R, X, Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm) +#define _FP_MUL_MEAT_Q(R, X, Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm) + +#define _FP_MUL_MEAT_DW_S(R, X, Y) \ + _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm) +#define _FP_MUL_MEAT_DW_D(R, X, Y) \ + _FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm) + +#define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm(S, R, X, Y) +#define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_2_udiv(D, R, X, Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ +do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R, Y); \ + } else { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R, X); \ + } \ + R##_c = FP_CLS_NAN; \ +} while (0) + +#define __FPU_FPCSR (current->thread.fpu.fpcsr) + +/* Obtain the current rounding mode. */ +#define FP_ROUNDMODE \ +({ \ + __FPU_FPCSR & FPCSR_mskRM; \ +}) + +#define FP_RND_NEAREST 0 +#define FP_RND_PINF 1 +#define FP_RND_MINF 2 +#define FP_RND_ZERO 3 + +#define FP_EX_INVALID FPCSR_mskIVO +#define FP_EX_DIVZERO FPCSR_mskDBZ +#define FP_EX_OVERFLOW FPCSR_mskOVF +#define FP_EX_UNDERFLOW FPCSR_mskUDF +#define FP_EX_INEXACT FPCSR_mskIEX + +#define SF_CEQ 2 +#define SF_CLT 1 +#define SF_CGT 3 +#define SF_CUN 4 + +#include <asm/byteorder.h> + +#ifdef __BIG_ENDIAN__ +#define __BYTE_ORDER __BIG_ENDIAN +#define __LITTLE_ENDIAN 0 +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#define __BIG_ENDIAN 0 +#endif + +#define abort() do { } while (0) +#define umul_ppmm(w1, w0, u, v) \ +do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart(u); \ + __uh = __ll_highpart(u); \ + __vl = __ll_lowpart(v); \ + __vh = __ll_highpart(v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart(__x0); \ + __x1 += __x2; \ + if (__x1 < __x2) \ + __x3 += __ll_B; \ + \ + (w1) = __x3 + __ll_highpart(__x1); \ + (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \ +} while (0) + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ +} while (0) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ +} while (0) + +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart(d); \ + __d0 = __ll_lowpart(d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart(n0); \ + if (__r1 < __m) { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ + if (__r0 < __m) { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ +} while (0) diff --git a/arch/nds32/include/asm/stacktrace.h b/arch/nds32/include/asm/stacktrace.h new file mode 100644 index 000000000000..6bf7c777bda4 --- /dev/null +++ b/arch/nds32/include/asm/stacktrace.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2008-2018 Andes Technology Corporation */ + +#ifndef __ASM_STACKTRACE_H +#define __ASM_STACKTRACE_H + +/* Kernel callchain */ +struct stackframe { + unsigned long fp; + unsigned long sp; + unsigned long lp; +}; + +/* + * struct frame_tail: User callchain + * IMPORTANT: + * This struct is used for call-stack walking, + * the order and types matters. + * Do not use array, it only stores sizeof(pointer) + * + * The details can refer to arch/arm/kernel/perf_event.c + */ +struct frame_tail { + unsigned long stack_fp; + unsigned long stack_lp; +}; + +/* For User callchain with optimize for size */ +struct frame_tail_opt_size { + unsigned long stack_r6; + unsigned long stack_fp; + unsigned long stack_gp; + unsigned long stack_lp; +}; + +extern void +get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph); + +#endif /* __ASM_STACKTRACE_H */ diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h new file mode 100644 index 000000000000..6ed2418af1ac --- /dev/null +++ b/arch/nds32/include/asm/suspend.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2008-2017 Andes Technology Corporation + +#ifndef __ASM_NDS32_SUSPEND_H +#define __ASM_NDS32_SUSPEND_H + +extern void suspend2ram(void); +extern void cpu_resume(void); +extern unsigned long wake_mask; + +#endif diff --git a/arch/nds32/include/asm/syscalls.h b/arch/nds32/include/asm/syscalls.h index 78778ecff60c..da32101b455d 100644 --- a/arch/nds32/include/asm/syscalls.h +++ b/arch/nds32/include/asm/syscalls.h @@ -7,6 +7,7 @@ asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op); asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len); asmlinkage long sys_rt_sigreturn_wrapper(void); +asmlinkage long sys_udftrap(int option); #include <asm-generic/syscalls.h> diff --git a/arch/nds32/include/uapi/asm/auxvec.h b/arch/nds32/include/uapi/asm/auxvec.h index 56043ce4972f..2d3213f5e595 100644 --- a/arch/nds32/include/uapi/asm/auxvec.h +++ b/arch/nds32/include/uapi/asm/auxvec.h @@ -4,6 +4,13 @@ #ifndef __ASM_AUXVEC_H #define __ASM_AUXVEC_H +/* + * This entry gives some information about the FPU initialization + * performed by the kernel. + */ +#define AT_FPUCW 18 /* Used FPU control word. */ + + /* VDSO location */ #define AT_SYSINFO_EHDR 33 diff --git a/arch/nds32/include/uapi/asm/sigcontext.h b/arch/nds32/include/uapi/asm/sigcontext.h index 00567b237b0c..58afc416473e 100644 --- a/arch/nds32/include/uapi/asm/sigcontext.h +++ b/arch/nds32/include/uapi/asm/sigcontext.h @@ -9,6 +9,19 @@ * before the signal handler was invoked. Note: only add new entries * to the end of the structure. */ +struct fpu_struct { + unsigned long long fd_regs[32]; + unsigned long fpcsr; + /* + * UDF_trap is used to recognize whether underflow trap is enabled + * or not. When UDF_trap == 1, this process will be traped and then + * get a SIGFPE signal when encountering an underflow exception. + * UDF_trap is only modified through setfputrap syscall. Therefore, + * UDF_trap needn't be saved or loaded to context in each context + * switch. + */ + unsigned long UDF_trap; +}; struct zol_struct { unsigned long nds32_lc; /* $LC */ @@ -54,6 +67,7 @@ struct sigcontext { unsigned long fault_address; unsigned long used_math_flag; /* FPU Registers */ + struct fpu_struct fpu; struct zol_struct zol; }; diff --git a/arch/nds32/include/uapi/asm/udftrap.h b/arch/nds32/include/uapi/asm/udftrap.h new file mode 100644 index 000000000000..433f79d679c0 --- /dev/null +++ b/arch/nds32/include/uapi/asm/udftrap.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ +#ifndef _ASM_SETFPUTRAP +#define _ASM_SETFPUTRAP + +/* + * Options for setfputrap system call + */ +#define DISABLE_UDFTRAP 0 /* disable underflow exception trap */ +#define ENABLE_UDFTRAP 1 /* enable undeflos exception trap */ +#define GET_UDFTRAP 2 /* only get undeflos exception trap status */ + +#endif /* _ASM_CACHECTL */ diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h index 603e826e0449..c2c3a3e34083 100644 --- a/arch/nds32/include/uapi/asm/unistd.h +++ b/arch/nds32/include/uapi/asm/unistd.h @@ -9,4 +9,6 @@ /* Additional NDS32 specific syscalls. */ #define __NR_cacheflush (__NR_arch_specific_syscall) +#define __NR_udftrap (__NR_arch_specific_syscall + 1) __SYSCALL(__NR_cacheflush, sys_cacheflush) +__SYSCALL(__NR_udftrap, sys_udftrap) |