1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
|
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_MSHYPER_H
#define _ASM_X86_MSHYPER_H
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/nmi.h>
#include <asm/io.h>
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
#define VP_INVAL U32_MAX
struct ms_hyperv_info {
u32 features;
u32 misc_features;
u32 hints;
u32 nested_features;
u32 max_vp_index;
u32 max_lp_index;
};
extern struct ms_hyperv_info ms_hyperv;
/*
* Generate the guest ID.
*/
static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
__u64 d_info2)
{
__u64 guest_id = 0;
guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48);
guest_id |= (d_info1 << 48);
guest_id |= (kernel_version << 16);
guest_id |= d_info2;
return guest_id;
}
/* Free the message slot and signal end-of-message if required */
static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
{
/*
* On crash we're reading some other CPU's message page and we need
* to be careful: this other CPU may already had cleared the header
* and the host may already had delivered some other message there.
* In case we blindly write msg->header.message_type we're going
* to lose it. We can still lose a message of the same type but
* we count on the fact that there can only be one
* CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
* on crash.
*/
if (cmpxchg(&msg->header.message_type, old_msg_type,
HVMSG_NONE) != old_msg_type)
return;
/*
* Make sure the write to MessageType (ie set to
* HVMSG_NONE) happens before we read the
* MessagePending and EOMing. Otherwise, the EOMing
* will not deliver any more messages since there is
* no empty slot
*/
mb();
if (msg->header.message_flags.msg_pending) {
/*
* This will cause message queue rescan to
* possibly deliver another msg from the
* hypervisor
*/
wrmsrl(HV_X64_MSR_EOM, 0);
}
}
#define hv_init_timer(timer, tick) \
wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick)
#define hv_init_timer_config(timer, val) \
wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val)
#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val)
#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val)
#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val)
#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val)
#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val)
#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val)
#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
#define hv_get_synint_state(int_num, val) \
rdmsrl(HV_X64_MSR_SINT0 + int_num, val)
#define hv_set_synint_state(int_num, val) \
wrmsrl(HV_X64_MSR_SINT0 + int_num, val)
#define hv_get_crash_ctl(val) \
rdmsrl(HV_X64_MSR_CRASH_CTL, val)
void hyperv_callback_vector(void);
void hyperv_reenlightenment_vector(void);
#ifdef CONFIG_TRACING
#define trace_hyperv_callback_vector hyperv_callback_vector
#endif
void hyperv_vector_handler(struct pt_regs *regs);
void hv_setup_vmbus_irq(void (*handler)(void));
void hv_remove_vmbus_irq(void);
void hv_setup_kexec_handler(void (*handler)(void));
void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
void hv_remove_crash_handler(void);
/*
* Routines for stimer0 Direct Mode handling.
* On x86/x64, there are no percpu actions to take.
*/
void hv_stimer0_vector_handler(struct pt_regs *regs);
void hv_stimer0_callback_vector(void);
int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void));
void hv_remove_stimer0_irq(int irq);
static inline void hv_enable_stimer0_percpu_irq(int irq) {}
static inline void hv_disable_stimer0_percpu_irq(int irq) {}
#if IS_ENABLED(CONFIG_HYPERV)
extern struct clocksource *hyperv_cs;
extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg;
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
u64 output_address = output ? virt_to_phys(output) : 0;
u64 hv_status;
#ifdef CONFIG_X86_64
if (!hv_hypercall_pg)
return U64_MAX;
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
: "r" (output_address),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
#else
u32 input_address_hi = upper_32_bits(input_address);
u32 input_address_lo = lower_32_bits(input_address);
u32 output_address_hi = upper_32_bits(output_address);
u32 output_address_lo = lower_32_bits(output_address);
if (!hv_hypercall_pg)
return U64_MAX;
__asm__ __volatile__(CALL_NOSPEC
: "=A" (hv_status),
"+c" (input_address_lo), ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input_address_hi),
"D"(output_address_hi), "S"(output_address_lo),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory");
#endif /* !x86_64 */
return hv_status;
}
/* Fast hypercall with 8 bytes of input and no output */
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
{
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
#ifdef CONFIG_X86_64
{
__asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
: THUNK_TARGET(hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
}
#else
{
u32 input1_hi = upper_32_bits(input1);
u32 input1_lo = lower_32_bits(input1);
__asm__ __volatile__ (CALL_NOSPEC
: "=A"(hv_status),
"+c"(input1_lo),
ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input1_hi),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "edi", "esi");
}
#endif
return hv_status;
}
/* Fast hypercall with 16 bytes of input */
static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
{
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
#ifdef CONFIG_X86_64
{
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
: "r" (input2),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
}
#else
{
u32 input1_hi = upper_32_bits(input1);
u32 input1_lo = lower_32_bits(input1);
u32 input2_hi = upper_32_bits(input2);
u32 input2_lo = lower_32_bits(input2);
__asm__ __volatile__ (CALL_NOSPEC
: "=A"(hv_status),
"+c"(input1_lo), ASM_CALL_CONSTRAINT
: "A" (control), "b" (input1_hi),
"D"(input2_hi), "S"(input2_lo),
THUNK_TARGET(hv_hypercall_pg)
: "cc");
}
#endif
return hv_status;
}
/*
* Rep hypercalls. Callers of this functions are supposed to ensure that
* rep_count and varhead_size comply with Hyper-V hypercall definition.
*/
static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
void *input, void *output)
{
u64 control = code;
u64 status;
u16 rep_comp;
control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
do {
status = hv_do_hypercall(control, input, output);
if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS)
return status;
/* Bits 32-43 of status have 'Reps completed' data. */
rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >>
HV_HYPERCALL_REP_COMP_OFFSET;
control &= ~HV_HYPERCALL_REP_START_MASK;
control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET;
touch_nmi_watchdog();
} while (rep_comp < rep_count);
return status;
}
/*
* Hypervisor's notion of virtual processor ID is different from
* Linux' notion of CPU ID. This information can only be retrieved
* in the context of the calling CPU. Setup a map for easy access
* to this information.
*/
extern u32 *hv_vp_index;
extern u32 hv_max_vp_index;
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
{
if (!hv_vp_assist_page)
return NULL;
return hv_vp_assist_page[cpu];
}
/**
* hv_cpu_number_to_vp_number() - Map CPU to VP.
* @cpu_number: CPU number in Linux terms
*
* This function returns the mapping between the Linux processor
* number and the hypervisor's virtual processor number, useful
* in making hypercalls and such that talk about specific
* processors.
*
* Return: Virtual processor number in Hyper-V terms
*/
static inline int hv_cpu_number_to_vp_number(int cpu_number)
{
return hv_vp_index[cpu_number];
}
static inline int cpumask_to_vpset(struct hv_vpset *vpset,
const struct cpumask *cpus)
{
int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
/* valid_bank_mask can represent up to 64 banks */
if (hv_max_vp_index / 64 >= 64)
return 0;
/*
* Clear all banks up to the maximum possible bank as hv_tlb_flush_ex
* structs are not cleared between calls, we risk flushing unneeded
* vCPUs otherwise.
*/
for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
vpset->bank_contents[vcpu_bank] = 0;
/*
* Some banks may end up being empty but this is acceptable.
*/
for_each_cpu(cpu, cpus) {
vcpu = hv_cpu_number_to_vp_number(cpu);
if (vcpu == VP_INVAL)
return -1;
vcpu_bank = vcpu / 64;
vcpu_offset = vcpu % 64;
__set_bit(vcpu_offset, (unsigned long *)
&vpset->bank_contents[vcpu_bank]);
if (vcpu_bank >= nr_bank)
nr_bank = vcpu_bank + 1;
}
vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
return nr_bank;
}
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
void hyperv_report_panic(struct pt_regs *regs, long err);
void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
bool hv_is_hyperv_initialized(void);
void hyperv_cleanup(void);
void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
int hyperv_flush_guest_mapping(u64 as);
#ifdef CONFIG_X86_64
void hv_apic_init(void);
void __init hv_init_spinlocks(void);
bool hv_vcpu_is_preempted(int vcpu);
#else
static inline void hv_apic_init(void) {}
#endif
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline void hyperv_cleanup(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
static inline void clear_hv_tscchange_cb(void) {}
static inline void hyperv_stop_tsc_emulation(void) {};
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
{
return NULL;
}
static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
u64 *cur_tsc)
{
u64 scale, offset;
u32 sequence;
/*
* The protocol for reading Hyper-V TSC page is specified in Hypervisor
* Top-Level Functional Specification ver. 3.0 and above. To get the
* reference time we must do the following:
* - READ ReferenceTscSequence
* A special '0' value indicates the time source is unreliable and we
* need to use something else. The currently published specification
* versions (up to 4.0b) contain a mistake and wrongly claim '-1'
* instead of '0' as the special value, see commit c35b82ef0294.
* - ReferenceTime =
* ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
* - READ ReferenceTscSequence again. In case its value has changed
* since our first reading we need to discard ReferenceTime and repeat
* the whole sequence as the hypervisor was updating the page in
* between.
*/
do {
sequence = READ_ONCE(tsc_pg->tsc_sequence);
if (!sequence)
return U64_MAX;
/*
* Make sure we read sequence before we read other values from
* TSC page.
*/
smp_rmb();
scale = READ_ONCE(tsc_pg->tsc_scale);
offset = READ_ONCE(tsc_pg->tsc_offset);
*cur_tsc = rdtsc_ordered();
/*
* Make sure we read sequence after we read all other values
* from TSC page.
*/
smp_rmb();
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
}
static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
{
u64 cur_tsc;
return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
}
#else
static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return NULL;
}
static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
u64 *cur_tsc)
{
BUG();
return U64_MAX;
}
#endif
#endif
|