1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
|
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2024 Google LLC
*/
#include <linux/arch_topology.h>
#include <linux/cpufreq.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
/*
* CPU0..CPUn
* +-------------+-------------------------------+--------+-------+
* | Register | Description | Offset | Len |
* +-------------+-------------------------------+--------+-------+
* | cur_perf | read this register to get | 0x0 | 0x4 |
* | | the current perf (integer val | | |
* | | representing perf relative to | | |
* | | max performance) | | |
* | | that vCPU is running at | | |
* +-------------+-------------------------------+--------+-------+
* | set_perf | write to this register to set | 0x4 | 0x4 |
* | | perf value of the vCPU | | |
* +-------------+-------------------------------+--------+-------+
* | perftbl_len | number of entries in perf | 0x8 | 0x4 |
* | | table. A single entry in the | | |
* | | perf table denotes no table | | |
* | | and the entry contains | | |
* | | the maximum perf value | | |
* | | that this vCPU supports. | | |
* | | The guest can request any | | |
* | | value between 1 and max perf | | |
* | | when perftbls are not used. | | |
* +---------------------------------------------+--------+-------+
* | perftbl_sel | write to this register to | 0xc | 0x4 |
* | | select perf table entry to | | |
* | | read from | | |
* +---------------------------------------------+--------+-------+
* | perftbl_rd | read this register to get | 0x10 | 0x4 |
* | | perf value of the selected | | |
* | | entry based on perftbl_sel | | |
* +---------------------------------------------+--------+-------+
* | perf_domain | performance domain number | 0x14 | 0x4 |
* | | that this vCPU belongs to. | | |
* | | vCPUs sharing the same perf | | |
* | | domain number are part of the | | |
* | | same performance domain. | | |
* +-------------+-------------------------------+--------+-------+
*/
#define REG_CUR_PERF_STATE_OFFSET 0x0
#define REG_SET_PERF_STATE_OFFSET 0x4
#define REG_PERFTBL_LEN_OFFSET 0x8
#define REG_PERFTBL_SEL_OFFSET 0xc
#define REG_PERFTBL_RD_OFFSET 0x10
#define REG_PERF_DOMAIN_OFFSET 0x14
#define PER_CPU_OFFSET 0x1000
#define PERFTBL_MAX_ENTRIES 64U
static void __iomem *base;
static DEFINE_PER_CPU(u32, perftbl_num_entries);
static void virt_scale_freq_tick(void)
{
int cpu = smp_processor_id();
u32 max_freq = (u32)cpufreq_get_hw_max_freq(cpu);
u64 cur_freq;
unsigned long scale;
cur_freq = (u64)readl_relaxed(base + cpu * PER_CPU_OFFSET
+ REG_CUR_PERF_STATE_OFFSET);
cur_freq <<= SCHED_CAPACITY_SHIFT;
scale = (unsigned long)div_u64(cur_freq, max_freq);
scale = min(scale, SCHED_CAPACITY_SCALE);
this_cpu_write(arch_freq_scale, scale);
}
static struct scale_freq_data virt_sfd = {
.source = SCALE_FREQ_SOURCE_VIRT,
.set_freq_scale = virt_scale_freq_tick,
};
static unsigned int virt_cpufreq_set_perf(struct cpufreq_policy *policy,
unsigned int target_freq)
{
writel_relaxed(target_freq,
base + policy->cpu * PER_CPU_OFFSET + REG_SET_PERF_STATE_OFFSET);
return 0;
}
static unsigned int virt_cpufreq_fast_switch(struct cpufreq_policy *policy,
unsigned int target_freq)
{
virt_cpufreq_set_perf(policy, target_freq);
return target_freq;
}
static u32 virt_cpufreq_get_perftbl_entry(int cpu, u32 idx)
{
writel_relaxed(idx, base + cpu * PER_CPU_OFFSET +
REG_PERFTBL_SEL_OFFSET);
return readl_relaxed(base + cpu * PER_CPU_OFFSET +
REG_PERFTBL_RD_OFFSET);
}
static int virt_cpufreq_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
struct cpufreq_freqs freqs;
int ret = 0;
freqs.old = policy->cur;
freqs.new = target_freq;
cpufreq_freq_transition_begin(policy, &freqs);
ret = virt_cpufreq_set_perf(policy, target_freq);
cpufreq_freq_transition_end(policy, &freqs, ret != 0);
return ret;
}
static int virt_cpufreq_get_sharing_cpus(struct cpufreq_policy *policy)
{
u32 cur_perf_domain, perf_domain;
struct device *cpu_dev;
int cpu;
cur_perf_domain = readl_relaxed(base + policy->cpu *
PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET);
for_each_possible_cpu(cpu) {
cpu_dev = get_cpu_device(cpu);
if (!cpu_dev)
continue;
perf_domain = readl_relaxed(base + cpu *
PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET);
if (perf_domain == cur_perf_domain)
cpumask_set_cpu(cpu, policy->cpus);
}
return 0;
}
static int virt_cpufreq_get_freq_info(struct cpufreq_policy *policy)
{
struct cpufreq_frequency_table *table;
u32 num_perftbl_entries, idx;
num_perftbl_entries = per_cpu(perftbl_num_entries, policy->cpu);
if (num_perftbl_entries == 1) {
policy->cpuinfo.min_freq = 1;
policy->cpuinfo.max_freq = virt_cpufreq_get_perftbl_entry(policy->cpu, 0);
policy->min = policy->cpuinfo.min_freq;
policy->max = policy->cpuinfo.max_freq;
policy->cur = policy->max;
return 0;
}
table = kcalloc(num_perftbl_entries + 1, sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;
for (idx = 0; idx < num_perftbl_entries; idx++)
table[idx].frequency = virt_cpufreq_get_perftbl_entry(policy->cpu, idx);
table[idx].frequency = CPUFREQ_TABLE_END;
policy->freq_table = table;
return 0;
}
static int virt_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
struct device *cpu_dev;
int ret;
cpu_dev = get_cpu_device(policy->cpu);
if (!cpu_dev)
return -ENODEV;
ret = virt_cpufreq_get_freq_info(policy);
if (ret) {
dev_warn(cpu_dev, "failed to get cpufreq info\n");
return ret;
}
ret = virt_cpufreq_get_sharing_cpus(policy);
if (ret) {
dev_warn(cpu_dev, "failed to get sharing cpumask\n");
return ret;
}
/*
* To simplify and improve latency of handling frequency requests on
* the host side, this ensures that the vCPU thread triggering the MMIO
* abort is the same thread whose performance constraints (Ex. uclamp
* settings) need to be updated. This simplifies the VMM (Virtual
* Machine Manager) having to find the correct vCPU thread and/or
* facing permission issues when configuring other threads.
*/
policy->dvfs_possible_from_any_cpu = false;
policy->fast_switch_possible = true;
/*
* Using the default SCALE_FREQ_SOURCE_CPUFREQ is insufficient since
* the actual physical CPU frequency may not match requested frequency
* from the vCPU thread due to frequency update latencies or other
* inputs to the physical CPU frequency selection. This additional FIE
* source allows for more accurate freq_scale updates and only takes
* effect if another FIE source such as AMUs have not been registered.
*/
topology_set_scale_freq_source(&virt_sfd, policy->cpus);
return 0;
}
static void virt_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_VIRT, policy->related_cpus);
kfree(policy->freq_table);
}
static int virt_cpufreq_online(struct cpufreq_policy *policy)
{
/* Nothing to restore. */
return 0;
}
static int virt_cpufreq_offline(struct cpufreq_policy *policy)
{
/* Dummy offline() to avoid exit() being called and freeing resources. */
return 0;
}
static int virt_cpufreq_verify_policy(struct cpufreq_policy_data *policy)
{
if (policy->freq_table)
return cpufreq_frequency_table_verify(policy, policy->freq_table);
cpufreq_verify_within_cpu_limits(policy);
return 0;
}
static struct cpufreq_driver cpufreq_virt_driver = {
.name = "virt-cpufreq",
.init = virt_cpufreq_cpu_init,
.exit = virt_cpufreq_cpu_exit,
.online = virt_cpufreq_online,
.offline = virt_cpufreq_offline,
.verify = virt_cpufreq_verify_policy,
.target = virt_cpufreq_target,
.fast_switch = virt_cpufreq_fast_switch,
.attr = cpufreq_generic_attr,
};
static int virt_cpufreq_driver_probe(struct platform_device *pdev)
{
u32 num_perftbl_entries;
int ret, cpu;
base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(base))
return PTR_ERR(base);
for_each_possible_cpu(cpu) {
num_perftbl_entries = readl_relaxed(base + cpu * PER_CPU_OFFSET +
REG_PERFTBL_LEN_OFFSET);
if (!num_perftbl_entries || num_perftbl_entries > PERFTBL_MAX_ENTRIES)
return -ENODEV;
per_cpu(perftbl_num_entries, cpu) = num_perftbl_entries;
}
ret = cpufreq_register_driver(&cpufreq_virt_driver);
if (ret) {
dev_err(&pdev->dev, "Virtual CPUFreq driver failed to register: %d\n", ret);
return ret;
}
dev_dbg(&pdev->dev, "Virtual CPUFreq driver initialized\n");
return 0;
}
static void virt_cpufreq_driver_remove(struct platform_device *pdev)
{
cpufreq_unregister_driver(&cpufreq_virt_driver);
}
static const struct of_device_id virt_cpufreq_match[] = {
{ .compatible = "qemu,virtual-cpufreq", .data = NULL},
{}
};
MODULE_DEVICE_TABLE(of, virt_cpufreq_match);
static struct platform_driver virt_cpufreq_driver = {
.probe = virt_cpufreq_driver_probe,
.remove = virt_cpufreq_driver_remove,
.driver = {
.name = "virt-cpufreq",
.of_match_table = virt_cpufreq_match,
},
};
static int __init virt_cpufreq_init(void)
{
return platform_driver_register(&virt_cpufreq_driver);
}
postcore_initcall(virt_cpufreq_init);
static void __exit virt_cpufreq_exit(void)
{
platform_driver_unregister(&virt_cpufreq_driver);
}
module_exit(virt_cpufreq_exit);
MODULE_DESCRIPTION("Virtual cpufreq driver");
MODULE_LICENSE("GPL");
|