1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
|
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2021 Google LLC
* Author: Fuad Tabba <tabba@google.com>
*/
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <hyp/adjust_pc.h>
#include <nvhe/fixed_config.h>
#include "../../sys_regs.h"
/*
* Copies of the host's CPU features registers holding sanitized values at hyp.
*/
u64 id_aa64pfr0_el1_sys_val;
u64 id_aa64pfr1_el1_sys_val;
u64 id_aa64isar0_el1_sys_val;
u64 id_aa64isar1_el1_sys_val;
u64 id_aa64isar2_el1_sys_val;
u64 id_aa64mmfr0_el1_sys_val;
u64 id_aa64mmfr1_el1_sys_val;
u64 id_aa64mmfr2_el1_sys_val;
/*
* Inject an unknown/undefined exception to an AArch64 guest while most of its
* sysregs are live.
*/
static void inject_undef64(struct kvm_vcpu *vcpu)
{
u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_PENDING_EXCEPTION);
__kvm_adjust_pc(vcpu);
write_sysreg_el1(esr, SYS_ESR);
write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
}
/*
* Returns the restricted features values of the feature register based on the
* limitations in restrict_fields.
* A feature id field value of 0b0000 does not impose any restrictions.
* Note: Use only for unsigned feature field values.
*/
static u64 get_restricted_features_unsigned(u64 sys_reg_val,
u64 restrict_fields)
{
u64 value = 0UL;
u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
/*
* According to the Arm Architecture Reference Manual, feature fields
* use increasing values to indicate increases in functionality.
* Iterate over the restricted feature fields and calculate the minimum
* unsigned value between the one supported by the system, and what the
* value is being restricted to.
*/
while (sys_reg_val && restrict_fields) {
value |= min(sys_reg_val & mask, restrict_fields & mask);
sys_reg_val &= ~mask;
restrict_fields &= ~mask;
mask <<= ARM64_FEATURE_FIELD_BITS;
}
return value;
}
/*
* Functions that return the value of feature id registers for protected VMs
* based on allowed features, system features, and KVM support.
*/
static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
{
const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
u64 set_mask = 0;
u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
/* Spectre and Meltdown mitigation in KVM */
set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2),
(u64)kvm->arch.pfr0_csv2);
set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3),
(u64)kvm->arch.pfr0_csv3);
return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
}
static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
{
const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
if (!kvm_has_mte(kvm))
allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
return id_aa64pfr1_el1_sys_val & allow_mask;
}
static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu)
{
/*
* No support for Scalable Vectors, therefore, hyp has no sanitized
* copy of the feature id register.
*/
BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL);
return 0;
}
static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu)
{
/*
* No support for debug, including breakpoints, and watchpoints,
* therefore, pKVM has no sanitized copy of the feature id register.
*/
BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL);
return 0;
}
static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu)
{
/*
* No support for debug, therefore, hyp has no sanitized copy of the
* feature id register.
*/
BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL);
return 0;
}
static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu)
{
/*
* No support for implementation defined features, therefore, hyp has no
* sanitized copy of the feature id register.
*/
BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL);
return 0;
}
static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu)
{
/*
* No support for implementation defined features, therefore, hyp has no
* sanitized copy of the feature id register.
*/
BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL);
return 0;
}
static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu)
{
return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW;
}
static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
{
u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
if (!vcpu_has_ptrauth(vcpu))
allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
return id_aa64isar1_el1_sys_val & allow_mask;
}
static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
{
u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
if (!vcpu_has_ptrauth(vcpu))
allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
return id_aa64isar2_el1_sys_val & allow_mask;
}
static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
{
u64 set_mask;
set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val,
PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED);
return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask;
}
static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu)
{
return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW;
}
static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu)
{
return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW;
}
/* Read a sanitized cpufeature ID register by its encoding */
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
{
switch (id) {
case SYS_ID_AA64PFR0_EL1:
return get_pvm_id_aa64pfr0(vcpu);
case SYS_ID_AA64PFR1_EL1:
return get_pvm_id_aa64pfr1(vcpu);
case SYS_ID_AA64ZFR0_EL1:
return get_pvm_id_aa64zfr0(vcpu);
case SYS_ID_AA64DFR0_EL1:
return get_pvm_id_aa64dfr0(vcpu);
case SYS_ID_AA64DFR1_EL1:
return get_pvm_id_aa64dfr1(vcpu);
case SYS_ID_AA64AFR0_EL1:
return get_pvm_id_aa64afr0(vcpu);
case SYS_ID_AA64AFR1_EL1:
return get_pvm_id_aa64afr1(vcpu);
case SYS_ID_AA64ISAR0_EL1:
return get_pvm_id_aa64isar0(vcpu);
case SYS_ID_AA64ISAR1_EL1:
return get_pvm_id_aa64isar1(vcpu);
case SYS_ID_AA64ISAR2_EL1:
return get_pvm_id_aa64isar2(vcpu);
case SYS_ID_AA64MMFR0_EL1:
return get_pvm_id_aa64mmfr0(vcpu);
case SYS_ID_AA64MMFR1_EL1:
return get_pvm_id_aa64mmfr1(vcpu);
case SYS_ID_AA64MMFR2_EL1:
return get_pvm_id_aa64mmfr2(vcpu);
default:
/*
* Should never happen because all cases are covered in
* pvm_sys_reg_descs[].
*/
WARN_ON(1);
break;
}
return 0;
}
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
struct sys_reg_desc const *r)
{
return pvm_read_id_reg(vcpu, reg_to_encoding(r));
}
/* Handler to RAZ/WI sysregs */
static bool pvm_access_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!p->is_write)
p->regval = 0;
return true;
}
/*
* Accessor for AArch32 feature id registers.
*
* The value of these registers is "unknown" according to the spec if AArch32
* isn't supported.
*/
static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
inject_undef64(vcpu);
return false;
}
/*
* No support for AArch32 guests, therefore, pKVM has no sanitized copy
* of AArch32 feature id registers.
*/
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY);
return pvm_access_raz_wi(vcpu, p, r);
}
/*
* Accessor for AArch64 feature id registers.
*
* If access is allowed, set the regval to the protected VM's view of the
* register and return true.
* Otherwise, inject an undefined exception and return false.
*/
static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
inject_undef64(vcpu);
return false;
}
p->regval = read_id_reg(vcpu, r);
return true;
}
static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
/* pVMs only support GICv3. 'nuf said. */
if (!p->is_write)
p->regval = ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE;
return true;
}
/* Mark the specified system register as an AArch32 feature id register. */
#define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 }
/* Mark the specified system register as an AArch64 feature id register. */
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
/* Mark the specified system register as not being handled in hyp. */
#define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL }
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
*
* NOTE: Anything not explicitly listed here is *restricted by default*, i.e.,
* it will lead to injecting an exception into the guest.
*/
static const struct sys_reg_desc pvm_sys_reg_descs[] = {
/* Cache maintenance by set/way operations are restricted. */
/* Debug and Trace Registers are restricted. */
/* AArch64 mappings of the AArch32 ID registers */
/* CRm=1 */
AARCH32(SYS_ID_PFR0_EL1),
AARCH32(SYS_ID_PFR1_EL1),
AARCH32(SYS_ID_DFR0_EL1),
AARCH32(SYS_ID_AFR0_EL1),
AARCH32(SYS_ID_MMFR0_EL1),
AARCH32(SYS_ID_MMFR1_EL1),
AARCH32(SYS_ID_MMFR2_EL1),
AARCH32(SYS_ID_MMFR3_EL1),
/* CRm=2 */
AARCH32(SYS_ID_ISAR0_EL1),
AARCH32(SYS_ID_ISAR1_EL1),
AARCH32(SYS_ID_ISAR2_EL1),
AARCH32(SYS_ID_ISAR3_EL1),
AARCH32(SYS_ID_ISAR4_EL1),
AARCH32(SYS_ID_ISAR5_EL1),
AARCH32(SYS_ID_MMFR4_EL1),
AARCH32(SYS_ID_ISAR6_EL1),
/* CRm=3 */
AARCH32(SYS_MVFR0_EL1),
AARCH32(SYS_MVFR1_EL1),
AARCH32(SYS_MVFR2_EL1),
AARCH32(SYS_ID_PFR2_EL1),
AARCH32(SYS_ID_DFR1_EL1),
AARCH32(SYS_ID_MMFR5_EL1),
/* AArch64 ID registers */
/* CRm=4 */
AARCH64(SYS_ID_AA64PFR0_EL1),
AARCH64(SYS_ID_AA64PFR1_EL1),
AARCH64(SYS_ID_AA64ZFR0_EL1),
AARCH64(SYS_ID_AA64DFR0_EL1),
AARCH64(SYS_ID_AA64DFR1_EL1),
AARCH64(SYS_ID_AA64AFR0_EL1),
AARCH64(SYS_ID_AA64AFR1_EL1),
AARCH64(SYS_ID_AA64ISAR0_EL1),
AARCH64(SYS_ID_AA64ISAR1_EL1),
AARCH64(SYS_ID_AA64MMFR0_EL1),
AARCH64(SYS_ID_AA64MMFR1_EL1),
AARCH64(SYS_ID_AA64MMFR2_EL1),
/* Scalable Vector Registers are restricted. */
RAZ_WI(SYS_ERRIDR_EL1),
RAZ_WI(SYS_ERRSELR_EL1),
RAZ_WI(SYS_ERXFR_EL1),
RAZ_WI(SYS_ERXCTLR_EL1),
RAZ_WI(SYS_ERXSTATUS_EL1),
RAZ_WI(SYS_ERXADDR_EL1),
RAZ_WI(SYS_ERXMISC0_EL1),
RAZ_WI(SYS_ERXMISC1_EL1),
/* Performance Monitoring Registers are restricted. */
/* Limited Ordering Regions Registers are restricted. */
HOST_HANDLED(SYS_ICC_SGI1R_EL1),
HOST_HANDLED(SYS_ICC_ASGI1R_EL1),
HOST_HANDLED(SYS_ICC_SGI0R_EL1),
{ SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, },
HOST_HANDLED(SYS_CCSIDR_EL1),
HOST_HANDLED(SYS_CLIDR_EL1),
HOST_HANDLED(SYS_CSSELR_EL1),
HOST_HANDLED(SYS_CTR_EL0),
/* Performance Monitoring Registers are restricted. */
/* Activity Monitoring Registers are restricted. */
HOST_HANDLED(SYS_CNTP_TVAL_EL0),
HOST_HANDLED(SYS_CNTP_CTL_EL0),
HOST_HANDLED(SYS_CNTP_CVAL_EL0),
/* Performance Monitoring Registers are restricted. */
};
/*
* Checks that the sysreg table is unique and in-order.
*
* Returns 0 if the table is consistent, or 1 otherwise.
*/
int kvm_check_pvm_sysreg_table(void)
{
unsigned int i;
for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_descs); i++) {
if (cmp_sys_reg(&pvm_sys_reg_descs[i-1], &pvm_sys_reg_descs[i]) >= 0)
return 1;
}
return 0;
}
/*
* Handler for protected VM MSR, MRS or System instruction execution.
*
* Returns true if the hypervisor has handled the exit, and control should go
* back to the guest, or false if it hasn't, to be handled by the host.
*/
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
const struct sys_reg_desc *r;
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
params = esr_sys64_to_params(esr);
params.regval = vcpu_get_reg(vcpu, Rt);
r = find_reg(¶ms, pvm_sys_reg_descs, ARRAY_SIZE(pvm_sys_reg_descs));
/* Undefined (RESTRICTED). */
if (r == NULL) {
inject_undef64(vcpu);
return true;
}
/* Handled by the host (HOST_HANDLED) */
if (r->access == NULL)
return false;
/* Handled by hyp: skip instruction if instructed to do so. */
if (r->access(vcpu, ¶ms, r))
__kvm_skip_instr(vcpu);
if (!params.is_write)
vcpu_set_reg(vcpu, Rt, params.regval);
return true;
}
/*
* Handler for protected VM restricted exceptions.
*
* Inject an undefined exception into the guest and return true to indicate that
* the hypervisor has handled the exit, and control should go back to the guest.
*/
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code)
{
inject_undef64(vcpu);
return true;
}
|