1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
|
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1994 Linus Torvalds
*
* Pentium III FXSR, SSE support
* General FPU state handling cleanups
* Gareth Hughes <gareth@valinux.com>, May 2000
* x86-64 work by Andi Kleen 2002
*/
#ifndef _ASM_X86_FPU_INTERNAL_H
#define _ASM_X86_FPU_INTERNAL_H
#include <linux/compat.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/user.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
#include <asm/cpufeature.h>
#include <asm/trace/fpu.h>
/*
* High level FPU state handling functions:
*/
extern void fpu__prepare_read(struct fpu *fpu);
extern void fpu__prepare_write(struct fpu *fpu);
extern void fpu__save(struct fpu *fpu);
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
extern void fpu__drop(struct fpu *fpu);
extern int fpu__copy(struct task_struct *dst, struct task_struct *src);
extern void fpu__clear_user_states(struct fpu *fpu);
extern void fpu__clear_all(struct fpu *fpu);
extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
/*
* Boot time FPU initialization functions:
*/
extern void fpu__init_cpu(void);
extern void fpu__init_system_xstate(void);
extern void fpu__init_cpu_xstate(void);
extern void fpu__init_system(struct cpuinfo_x86 *c);
extern void fpu__init_check_bugs(void);
extern void fpu__resume_cpu(void);
extern u64 fpu__get_supported_xfeatures_mask(void);
/*
* Debugging facility:
*/
#ifdef CONFIG_X86_DEBUG_FPU
# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
#else
# define WARN_ON_FPU(x) ({ (void)(x); 0; })
#endif
/*
* FPU related CPU feature flag helper routines:
*/
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
return static_cpu_has(X86_FEATURE_FXSR);
}
/*
* fpstate handling functions:
*/
extern union fpregs_state init_fpstate;
extern void fpstate_init(union fpregs_state *state);
#ifdef CONFIG_MATH_EMULATION
extern void fpstate_init_soft(struct swregs_state *soft);
#else
static inline void fpstate_init_soft(struct swregs_state *soft) {}
#endif
static inline void fpstate_init_xstate(struct xregs_state *xsave)
{
/*
* XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP:
*/
xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
}
static inline void fpstate_init_fxstate(struct fxregs_state *fx)
{
fx->cwd = 0x37f;
fx->mxcsr = MXCSR_DEFAULT;
}
extern void fpstate_sanitize_xstate(struct fpu *fpu);
#define user_insn(insn, output, input...) \
({ \
int err; \
\
might_fault(); \
\
asm volatile(ASM_STAC "\n" \
"1:" #insn "\n\t" \
"2: " ASM_CLAC "\n" \
".section .fixup,\"ax\"\n" \
"3: movl $-1,%[err]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err), output \
: "0"(0), input); \
err; \
})
#define kernel_insn_err(insn, output, input...) \
({ \
int err; \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movl $-1,%[err]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err), output \
: "0"(0), input); \
err; \
})
#define kernel_insn(insn, output, input...) \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
: output : input)
static inline int copy_fregs_to_user(struct fregs_state __user *fx)
{
return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
}
static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
else
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
}
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
{
kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_kernel_to_fregs_err(struct fregs_state *fx)
{
return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_user_to_fregs(struct fregs_state __user *fx)
{
return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline void copy_fxregs_to_kernel(struct fpu *fpu)
{
if (IS_ENABLED(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
else
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
}
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
#define XSTATE_OP(op, st, lmask, hmask, err) \
asm volatile("1:" op "\n\t" \
"xor %[err], %[err]\n" \
"2:\n\t" \
".pushsection .fixup,\"ax\"\n\t" \
"3: movl $-2,%[err]\n\t" \
"jmp 2b\n\t" \
".popsection\n\t" \
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
* format and supervisor states in addition to modified optimization in
* XSAVEOPT.
*
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
* supports modified optimization which is not supported by XSAVE.
*
* We use XSAVE as a fallback.
*
* The 661 label is defined in the ALTERNATIVE* macros as the address of the
* original instruction which gets replaced. We need to use it here as the
* address of the instruction where we might get an exception at.
*/
#define XSTATE_XSAVE(st, lmask, hmask, err) \
asm volatile(ALTERNATIVE_2(XSAVE, \
XSAVEOPT, X86_FEATURE_XSAVEOPT, \
XSAVES, X86_FEATURE_XSAVES) \
"\n" \
"xor %[err], %[err]\n" \
"3:\n" \
".pushsection .fixup,\"ax\"\n" \
"4: movl $-2, %[err]\n" \
"jmp 3b\n" \
".popsection\n" \
_ASM_EXTABLE(661b, 4b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
* XSAVE area format.
*/
#define XSTATE_XRESTORE(st, lmask, hmask) \
asm volatile(ALTERNATIVE(XRSTOR, \
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
"3:\n" \
_ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
: \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
{
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
WARN_ON(system_state != SYSTEM_BOOTING);
if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
}
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
{
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
WARN_ON(system_state != SYSTEM_BOOTING);
if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
/*
* We should never fault when copying from a kernel buffer, and the FPU
* state we set at boot time should be valid.
*/
WARN_ON_FPU(err);
}
/*
* Save processor xstate to xsave area.
*/
static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
{
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
WARN_ON_FPU(!alternatives_patched);
XSTATE_XSAVE(xstate, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
}
/*
* Restore processor xstate from xsave area.
*/
static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
XSTATE_XRESTORE(xstate, lmask, hmask);
}
/*
* Save xstate to user space xsave area.
*
* We don't use modified optimization because xrstor/xrstors might track
* a different application.
*
* We don't use compacted format xsave area for
* backward compatibility for old applications which don't understand
* compacted format of xsave area.
*/
static inline int copy_xregs_to_user(struct xregs_state __user *buf)
{
int err;
/*
* Clear the xsave header first, so that reserved fields are
* initialized to zero.
*/
err = __clear_user(&buf->header, sizeof(buf->header));
if (unlikely(err))
return -EFAULT;
stac();
XSTATE_OP(XSAVE, buf, -1, -1, err);
clac();
return err;
}
/*
* Restore xstate from user space xsave area.
*/
static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
{
struct xregs_state *xstate = ((__force struct xregs_state *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
stac();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
clac();
return err;
}
/*
* Restore xstate from kernel space xsave area, return an error code instead of
* an exception.
*/
static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
return err;
}
/*
* These must be called with preempt disabled. Returns
* 'true' if the FPU state is still intact and we can
* keep registers active.
*
* The legacy FNSAVE instruction cleared all FPU state
* unconditionally, so registers are essentially destroyed.
* Modern FPU state can be kept in registers, if there are
* no pending FP exceptions.
*/
static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
{
if (likely(use_xsave())) {
copy_xregs_to_kernel(&fpu->state.xsave);
/*
* AVX512 state is tracked here because its use is
* known to slow the max clock speed of the core.
*/
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
fpu->avx512_timestamp = jiffies;
return 1;
}
if (likely(use_fxsr())) {
copy_fxregs_to_kernel(fpu);
return 1;
}
/*
* Legacy FPU register saving, FNSAVE always clears FPU registers,
* so we have to mark them inactive:
*/
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
return 0;
}
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
{
if (use_xsave()) {
copy_kernel_to_xregs(&fpstate->xsave, mask);
} else {
if (use_fxsr())
copy_kernel_to_fxregs(&fpstate->fxsave);
else
copy_kernel_to_fregs(&fpstate->fsave);
}
}
static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
{
/*
* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
* pending. Clear the x87 state here by setting it to fixed values.
* "m" is a random variable that should be in L1.
*/
if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
"fildl %P[addr]" /* set F?P to defined value */
: : [addr] "m" (fpstate));
}
__copy_kernel_to_fpregs(fpstate, -1);
}
extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
/*
* FPU context switch related helper methods:
*/
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
/*
* The in-register FPU state for an FPU context on a CPU is assumed to be
* valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
* matches the FPU.
*
* If the FPU register state is valid, the kernel can skip restoring the
* FPU state from memory.
*
* Any code that clobbers the FPU registers or updates the in-memory
* FPU state for a task MUST let the rest of the kernel know that the
* FPU registers are no longer valid for this task.
*
* Either one of these invalidation functions is enough. Invalidate
* a resource you control: CPU if using the CPU for something else
* (with preemption disabled), FPU for the current task, or a task that
* is prevented from running by the current task.
*/
static inline void __cpu_invalidate_fpregs_state(void)
{
__this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}
static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
fpu->last_cpu = -1;
}
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}
/*
* These generally need preemption protection to work,
* do try to avoid using these on their own:
*/
static inline void fpregs_deactivate(struct fpu *fpu)
{
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
}
static inline void fpregs_activate(struct fpu *fpu)
{
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
}
/*
* Internal helper, do not use directly. Use switch_fpu_return() instead.
*/
static inline void __fpregs_load_activate(void)
{
struct fpu *fpu = ¤t->thread.fpu;
int cpu = smp_processor_id();
if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
return;
if (!fpregs_state_valid(fpu, cpu)) {
copy_kernel_to_fpregs(&fpu->state);
fpregs_activate(fpu);
fpu->last_cpu = cpu;
}
clear_thread_flag(TIF_NEED_FPU_LOAD);
}
/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
* - switch_fpu_prepare() saves the old state.
* This is done within the context of the old process.
*
* - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
* will get loaded on return to userspace, or when the kernel needs it.
*
* If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
* are saved in the current thread's FPU register state.
*
* If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
* hold current()'s FPU registers. It is required to load the
* registers before returning to userland or using the content
* otherwise.
*
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
old_fpu->last_cpu = cpu;
/* But leave fpu_fpregs_owner_ctx! */
trace_x86_fpu_regs_deactivated(old_fpu);
}
}
/*
* Misc helper functions:
*/
/*
* Load PKRU from the FPU context if available. Delay loading of the
* complete FPU state until the return to userland.
*/
static inline void switch_fpu_finish(struct fpu *new_fpu)
{
u32 pkru_val = init_pkru_value;
struct pkru_state *pk;
if (!static_cpu_has(X86_FEATURE_FPU))
return;
set_thread_flag(TIF_NEED_FPU_LOAD);
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return;
/*
* PKRU state is switched eagerly because it needs to be valid before we
* return to userland e.g. for a copy_to_user() operation.
*/
if (current->mm) {
pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
if (pk)
pkru_val = pk->pkru;
}
__write_pkru(pkru_val);
}
/*
* MXCSR and XCR definitions:
*/
static inline void ldmxcsr(u32 mxcsr)
{
asm volatile("ldmxcsr %0" :: "m" (mxcsr));
}
extern unsigned int mxcsr_feature_mask;
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
static inline u64 xgetbv(u32 index)
{
u32 eax, edx;
asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
: "=a" (eax), "=d" (edx)
: "c" (index));
return eax + ((u64)edx << 32);
}
static inline void xsetbv(u32 index, u64 value)
{
u32 eax = value;
u32 edx = value >> 32;
asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
: : "a" (eax), "d" (edx), "c" (index));
}
#endif /* _ASM_X86_FPU_INTERNAL_H */
|