Commit | Line | Data |
---|---|---|
9bdcb44e RW |
1 | /* |
2 | * CPUFreq governor based on scheduler-provided CPU utilization data. | |
3 | * | |
4 | * Copyright (C) 2016, Intel Corporation | |
5 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
60f05e86 VK |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
9bdcb44e | 14 | #include <linux/cpufreq.h> |
9bdcb44e RW |
15 | #include <linux/slab.h> |
16 | #include <trace/events/power.h> | |
17 | ||
18 | #include "sched.h" | |
19 | ||
20 | struct sugov_tunables { | |
21 | struct gov_attr_set attr_set; | |
22 | unsigned int rate_limit_us; | |
23 | }; | |
24 | ||
25 | struct sugov_policy { | |
26 | struct cpufreq_policy *policy; | |
27 | ||
28 | struct sugov_tunables *tunables; | |
29 | struct list_head tunables_hook; | |
30 | ||
31 | raw_spinlock_t update_lock; /* For shared policies */ | |
32 | u64 last_freq_update_time; | |
33 | s64 freq_update_delay_ns; | |
34 | unsigned int next_freq; | |
35 | ||
36 | /* The next fields are only needed if fast switch cannot be used. */ | |
37 | struct irq_work irq_work; | |
38 | struct work_struct work; | |
39 | struct mutex work_lock; | |
40 | bool work_in_progress; | |
41 | ||
42 | bool need_freq_update; | |
43 | }; | |
44 | ||
45 | struct sugov_cpu { | |
46 | struct update_util_data update_util; | |
47 | struct sugov_policy *sg_policy; | |
48 | ||
5cbea469 SM |
49 | unsigned int cached_raw_freq; |
50 | ||
9bdcb44e RW |
51 | /* The fields below are only needed when sharing a policy. */ |
52 | unsigned long util; | |
53 | unsigned long max; | |
54 | u64 last_update; | |
58919e83 | 55 | unsigned int flags; |
9bdcb44e RW |
56 | }; |
57 | ||
58 | static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); | |
59 | ||
60 | /************************ Governor internals ***********************/ | |
61 | ||
62 | static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) | |
63 | { | |
64 | s64 delta_ns; | |
65 | ||
66 | if (sg_policy->work_in_progress) | |
67 | return false; | |
68 | ||
69 | if (unlikely(sg_policy->need_freq_update)) { | |
70 | sg_policy->need_freq_update = false; | |
71 | /* | |
72 | * This happens when limits change, so forget the previous | |
73 | * next_freq value and force an update. | |
74 | */ | |
75 | sg_policy->next_freq = UINT_MAX; | |
76 | return true; | |
77 | } | |
78 | ||
79 | delta_ns = time - sg_policy->last_freq_update_time; | |
80 | return delta_ns >= sg_policy->freq_update_delay_ns; | |
81 | } | |
82 | ||
83 | static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, | |
84 | unsigned int next_freq) | |
85 | { | |
86 | struct cpufreq_policy *policy = sg_policy->policy; | |
87 | ||
88 | sg_policy->last_freq_update_time = time; | |
89 | ||
90 | if (policy->fast_switch_enabled) { | |
91 | if (sg_policy->next_freq == next_freq) { | |
92 | trace_cpu_frequency(policy->cur, smp_processor_id()); | |
93 | return; | |
94 | } | |
95 | sg_policy->next_freq = next_freq; | |
96 | next_freq = cpufreq_driver_fast_switch(policy, next_freq); | |
97 | if (next_freq == CPUFREQ_ENTRY_INVALID) | |
98 | return; | |
99 | ||
100 | policy->cur = next_freq; | |
101 | trace_cpu_frequency(next_freq, smp_processor_id()); | |
102 | } else if (sg_policy->next_freq != next_freq) { | |
103 | sg_policy->next_freq = next_freq; | |
104 | sg_policy->work_in_progress = true; | |
105 | irq_work_queue(&sg_policy->irq_work); | |
106 | } | |
107 | } | |
108 | ||
109 | /** | |
110 | * get_next_freq - Compute a new frequency for a given cpufreq policy. | |
5cbea469 | 111 | * @sg_cpu: schedutil cpu object to compute the new frequency for. |
9bdcb44e RW |
112 | * @util: Current CPU utilization. |
113 | * @max: CPU capacity. | |
114 | * | |
115 | * If the utilization is frequency-invariant, choose the new frequency to be | |
116 | * proportional to it, that is | |
117 | * | |
118 | * next_freq = C * max_freq * util / max | |
119 | * | |
120 | * Otherwise, approximate the would-be frequency-invariant utilization by | |
121 | * util_raw * (curr_freq / max_freq) which leads to | |
122 | * | |
123 | * next_freq = C * curr_freq * util_raw / max | |
124 | * | |
125 | * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. | |
5cbea469 SM |
126 | * |
127 | * The lowest driver-supported frequency which is equal or greater than the raw | |
128 | * next_freq (as calculated above) is returned, subject to policy min/max and | |
129 | * cpufreq driver limitations. | |
9bdcb44e | 130 | */ |
5cbea469 SM |
131 | static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, |
132 | unsigned long max) | |
9bdcb44e | 133 | { |
5cbea469 SM |
134 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
135 | struct cpufreq_policy *policy = sg_policy->policy; | |
9bdcb44e RW |
136 | unsigned int freq = arch_scale_freq_invariant() ? |
137 | policy->cpuinfo.max_freq : policy->cur; | |
138 | ||
5cbea469 SM |
139 | freq = (freq + (freq >> 2)) * util / max; |
140 | ||
141 | if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) | |
142 | return sg_policy->next_freq; | |
143 | sg_cpu->cached_raw_freq = freq; | |
144 | return cpufreq_driver_resolve_freq(policy, freq); | |
9bdcb44e RW |
145 | } |
146 | ||
58919e83 RW |
147 | static void sugov_get_util(unsigned long *util, unsigned long *max) |
148 | { | |
149 | struct rq *rq = this_rq(); | |
8314bc83 SM |
150 | unsigned long cfs_max; |
151 | ||
152 | cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); | |
58919e83 RW |
153 | |
154 | *util = min(rq->cfs.avg.util_avg, cfs_max); | |
155 | *max = cfs_max; | |
156 | } | |
157 | ||
9bdcb44e | 158 | static void sugov_update_single(struct update_util_data *hook, u64 time, |
58919e83 | 159 | unsigned int flags) |
9bdcb44e RW |
160 | { |
161 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
162 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
163 | struct cpufreq_policy *policy = sg_policy->policy; | |
58919e83 | 164 | unsigned long util, max; |
9bdcb44e RW |
165 | unsigned int next_f; |
166 | ||
167 | if (!sugov_should_update_freq(sg_policy, time)) | |
168 | return; | |
169 | ||
58919e83 RW |
170 | if (flags & SCHED_CPUFREQ_RT_DL) { |
171 | next_f = policy->cpuinfo.max_freq; | |
172 | } else { | |
173 | sugov_get_util(&util, &max); | |
174 | next_f = get_next_freq(sg_cpu, util, max); | |
175 | } | |
9bdcb44e RW |
176 | sugov_update_commit(sg_policy, time, next_f); |
177 | } | |
178 | ||
5cbea469 | 179 | static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, |
58919e83 RW |
180 | unsigned long util, unsigned long max, |
181 | unsigned int flags) | |
9bdcb44e | 182 | { |
5cbea469 | 183 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
9bdcb44e RW |
184 | struct cpufreq_policy *policy = sg_policy->policy; |
185 | unsigned int max_f = policy->cpuinfo.max_freq; | |
186 | u64 last_freq_update_time = sg_policy->last_freq_update_time; | |
187 | unsigned int j; | |
188 | ||
58919e83 | 189 | if (flags & SCHED_CPUFREQ_RT_DL) |
9bdcb44e RW |
190 | return max_f; |
191 | ||
192 | for_each_cpu(j, policy->cpus) { | |
193 | struct sugov_cpu *j_sg_cpu; | |
194 | unsigned long j_util, j_max; | |
195 | s64 delta_ns; | |
196 | ||
197 | if (j == smp_processor_id()) | |
198 | continue; | |
199 | ||
200 | j_sg_cpu = &per_cpu(sugov_cpu, j); | |
201 | /* | |
202 | * If the CPU utilization was last updated before the previous | |
203 | * frequency update and the time elapsed between the last update | |
204 | * of the CPU utilization and the last frequency update is long | |
205 | * enough, don't take the CPU into account as it probably is | |
206 | * idle now. | |
207 | */ | |
208 | delta_ns = last_freq_update_time - j_sg_cpu->last_update; | |
209 | if (delta_ns > TICK_NSEC) | |
210 | continue; | |
211 | ||
58919e83 | 212 | if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) |
9bdcb44e RW |
213 | return max_f; |
214 | ||
58919e83 | 215 | j_util = j_sg_cpu->util; |
9bdcb44e RW |
216 | j_max = j_sg_cpu->max; |
217 | if (j_util * max > j_max * util) { | |
218 | util = j_util; | |
219 | max = j_max; | |
220 | } | |
221 | } | |
222 | ||
5cbea469 | 223 | return get_next_freq(sg_cpu, util, max); |
9bdcb44e RW |
224 | } |
225 | ||
226 | static void sugov_update_shared(struct update_util_data *hook, u64 time, | |
58919e83 | 227 | unsigned int flags) |
9bdcb44e RW |
228 | { |
229 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
230 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
58919e83 | 231 | unsigned long util, max; |
9bdcb44e RW |
232 | unsigned int next_f; |
233 | ||
58919e83 RW |
234 | sugov_get_util(&util, &max); |
235 | ||
9bdcb44e RW |
236 | raw_spin_lock(&sg_policy->update_lock); |
237 | ||
238 | sg_cpu->util = util; | |
239 | sg_cpu->max = max; | |
58919e83 | 240 | sg_cpu->flags = flags; |
9bdcb44e RW |
241 | sg_cpu->last_update = time; |
242 | ||
243 | if (sugov_should_update_freq(sg_policy, time)) { | |
58919e83 | 244 | next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); |
9bdcb44e RW |
245 | sugov_update_commit(sg_policy, time, next_f); |
246 | } | |
247 | ||
248 | raw_spin_unlock(&sg_policy->update_lock); | |
249 | } | |
250 | ||
251 | static void sugov_work(struct work_struct *work) | |
252 | { | |
253 | struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); | |
254 | ||
255 | mutex_lock(&sg_policy->work_lock); | |
256 | __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, | |
257 | CPUFREQ_RELATION_L); | |
258 | mutex_unlock(&sg_policy->work_lock); | |
259 | ||
260 | sg_policy->work_in_progress = false; | |
261 | } | |
262 | ||
263 | static void sugov_irq_work(struct irq_work *irq_work) | |
264 | { | |
265 | struct sugov_policy *sg_policy; | |
266 | ||
267 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | |
268 | schedule_work_on(smp_processor_id(), &sg_policy->work); | |
269 | } | |
270 | ||
271 | /************************** sysfs interface ************************/ | |
272 | ||
273 | static struct sugov_tunables *global_tunables; | |
274 | static DEFINE_MUTEX(global_tunables_lock); | |
275 | ||
276 | static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) | |
277 | { | |
278 | return container_of(attr_set, struct sugov_tunables, attr_set); | |
279 | } | |
280 | ||
281 | static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) | |
282 | { | |
283 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
284 | ||
285 | return sprintf(buf, "%u\n", tunables->rate_limit_us); | |
286 | } | |
287 | ||
288 | static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, | |
289 | size_t count) | |
290 | { | |
291 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
292 | struct sugov_policy *sg_policy; | |
293 | unsigned int rate_limit_us; | |
294 | ||
295 | if (kstrtouint(buf, 10, &rate_limit_us)) | |
296 | return -EINVAL; | |
297 | ||
298 | tunables->rate_limit_us = rate_limit_us; | |
299 | ||
300 | list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) | |
301 | sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; | |
302 | ||
303 | return count; | |
304 | } | |
305 | ||
306 | static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); | |
307 | ||
308 | static struct attribute *sugov_attributes[] = { | |
309 | &rate_limit_us.attr, | |
310 | NULL | |
311 | }; | |
312 | ||
313 | static struct kobj_type sugov_tunables_ktype = { | |
314 | .default_attrs = sugov_attributes, | |
315 | .sysfs_ops = &governor_sysfs_ops, | |
316 | }; | |
317 | ||
318 | /********************** cpufreq governor interface *********************/ | |
319 | ||
320 | static struct cpufreq_governor schedutil_gov; | |
321 | ||
322 | static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) | |
323 | { | |
324 | struct sugov_policy *sg_policy; | |
325 | ||
326 | sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); | |
327 | if (!sg_policy) | |
328 | return NULL; | |
329 | ||
330 | sg_policy->policy = policy; | |
331 | init_irq_work(&sg_policy->irq_work, sugov_irq_work); | |
332 | INIT_WORK(&sg_policy->work, sugov_work); | |
333 | mutex_init(&sg_policy->work_lock); | |
334 | raw_spin_lock_init(&sg_policy->update_lock); | |
335 | return sg_policy; | |
336 | } | |
337 | ||
338 | static void sugov_policy_free(struct sugov_policy *sg_policy) | |
339 | { | |
340 | mutex_destroy(&sg_policy->work_lock); | |
341 | kfree(sg_policy); | |
342 | } | |
343 | ||
344 | static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) | |
345 | { | |
346 | struct sugov_tunables *tunables; | |
347 | ||
348 | tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); | |
349 | if (tunables) { | |
350 | gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); | |
351 | if (!have_governor_per_policy()) | |
352 | global_tunables = tunables; | |
353 | } | |
354 | return tunables; | |
355 | } | |
356 | ||
357 | static void sugov_tunables_free(struct sugov_tunables *tunables) | |
358 | { | |
359 | if (!have_governor_per_policy()) | |
360 | global_tunables = NULL; | |
361 | ||
362 | kfree(tunables); | |
363 | } | |
364 | ||
365 | static int sugov_init(struct cpufreq_policy *policy) | |
366 | { | |
367 | struct sugov_policy *sg_policy; | |
368 | struct sugov_tunables *tunables; | |
369 | unsigned int lat; | |
370 | int ret = 0; | |
371 | ||
372 | /* State should be equivalent to EXIT */ | |
373 | if (policy->governor_data) | |
374 | return -EBUSY; | |
375 | ||
376 | sg_policy = sugov_policy_alloc(policy); | |
377 | if (!sg_policy) | |
378 | return -ENOMEM; | |
379 | ||
380 | mutex_lock(&global_tunables_lock); | |
381 | ||
382 | if (global_tunables) { | |
383 | if (WARN_ON(have_governor_per_policy())) { | |
384 | ret = -EINVAL; | |
385 | goto free_sg_policy; | |
386 | } | |
387 | policy->governor_data = sg_policy; | |
388 | sg_policy->tunables = global_tunables; | |
389 | ||
390 | gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); | |
391 | goto out; | |
392 | } | |
393 | ||
394 | tunables = sugov_tunables_alloc(sg_policy); | |
395 | if (!tunables) { | |
396 | ret = -ENOMEM; | |
397 | goto free_sg_policy; | |
398 | } | |
399 | ||
400 | tunables->rate_limit_us = LATENCY_MULTIPLIER; | |
401 | lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; | |
402 | if (lat) | |
403 | tunables->rate_limit_us *= lat; | |
404 | ||
405 | policy->governor_data = sg_policy; | |
406 | sg_policy->tunables = tunables; | |
407 | ||
408 | ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, | |
409 | get_governor_parent_kobj(policy), "%s", | |
410 | schedutil_gov.name); | |
411 | if (ret) | |
412 | goto fail; | |
413 | ||
414 | out: | |
415 | mutex_unlock(&global_tunables_lock); | |
416 | ||
417 | cpufreq_enable_fast_switch(policy); | |
418 | return 0; | |
419 | ||
420 | fail: | |
421 | policy->governor_data = NULL; | |
422 | sugov_tunables_free(tunables); | |
423 | ||
424 | free_sg_policy: | |
425 | mutex_unlock(&global_tunables_lock); | |
426 | ||
427 | sugov_policy_free(sg_policy); | |
60f05e86 | 428 | pr_err("initialization failed (error %d)\n", ret); |
9bdcb44e RW |
429 | return ret; |
430 | } | |
431 | ||
e788892b | 432 | static void sugov_exit(struct cpufreq_policy *policy) |
9bdcb44e RW |
433 | { |
434 | struct sugov_policy *sg_policy = policy->governor_data; | |
435 | struct sugov_tunables *tunables = sg_policy->tunables; | |
436 | unsigned int count; | |
437 | ||
6c9d9c81 RW |
438 | cpufreq_disable_fast_switch(policy); |
439 | ||
9bdcb44e RW |
440 | mutex_lock(&global_tunables_lock); |
441 | ||
442 | count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); | |
443 | policy->governor_data = NULL; | |
444 | if (!count) | |
445 | sugov_tunables_free(tunables); | |
446 | ||
447 | mutex_unlock(&global_tunables_lock); | |
448 | ||
449 | sugov_policy_free(sg_policy); | |
9bdcb44e RW |
450 | } |
451 | ||
452 | static int sugov_start(struct cpufreq_policy *policy) | |
453 | { | |
454 | struct sugov_policy *sg_policy = policy->governor_data; | |
455 | unsigned int cpu; | |
456 | ||
457 | sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; | |
458 | sg_policy->last_freq_update_time = 0; | |
459 | sg_policy->next_freq = UINT_MAX; | |
460 | sg_policy->work_in_progress = false; | |
461 | sg_policy->need_freq_update = false; | |
462 | ||
463 | for_each_cpu(cpu, policy->cpus) { | |
464 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
465 | ||
466 | sg_cpu->sg_policy = sg_policy; | |
467 | if (policy_is_shared(policy)) { | |
58919e83 | 468 | sg_cpu->util = 0; |
9bdcb44e | 469 | sg_cpu->max = 0; |
58919e83 | 470 | sg_cpu->flags = SCHED_CPUFREQ_RT; |
9bdcb44e | 471 | sg_cpu->last_update = 0; |
5cbea469 | 472 | sg_cpu->cached_raw_freq = 0; |
9bdcb44e RW |
473 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, |
474 | sugov_update_shared); | |
475 | } else { | |
476 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, | |
477 | sugov_update_single); | |
478 | } | |
479 | } | |
480 | return 0; | |
481 | } | |
482 | ||
e788892b | 483 | static void sugov_stop(struct cpufreq_policy *policy) |
9bdcb44e RW |
484 | { |
485 | struct sugov_policy *sg_policy = policy->governor_data; | |
486 | unsigned int cpu; | |
487 | ||
488 | for_each_cpu(cpu, policy->cpus) | |
489 | cpufreq_remove_update_util_hook(cpu); | |
490 | ||
491 | synchronize_sched(); | |
492 | ||
493 | irq_work_sync(&sg_policy->irq_work); | |
494 | cancel_work_sync(&sg_policy->work); | |
9bdcb44e RW |
495 | } |
496 | ||
e788892b | 497 | static void sugov_limits(struct cpufreq_policy *policy) |
9bdcb44e RW |
498 | { |
499 | struct sugov_policy *sg_policy = policy->governor_data; | |
500 | ||
501 | if (!policy->fast_switch_enabled) { | |
502 | mutex_lock(&sg_policy->work_lock); | |
bf2be2de | 503 | cpufreq_policy_apply_limits(policy); |
9bdcb44e RW |
504 | mutex_unlock(&sg_policy->work_lock); |
505 | } | |
506 | ||
507 | sg_policy->need_freq_update = true; | |
9bdcb44e RW |
508 | } |
509 | ||
510 | static struct cpufreq_governor schedutil_gov = { | |
511 | .name = "schedutil", | |
9bdcb44e | 512 | .owner = THIS_MODULE, |
e788892b RW |
513 | .init = sugov_init, |
514 | .exit = sugov_exit, | |
515 | .start = sugov_start, | |
516 | .stop = sugov_stop, | |
517 | .limits = sugov_limits, | |
9bdcb44e RW |
518 | }; |
519 | ||
9bdcb44e RW |
520 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL |
521 | struct cpufreq_governor *cpufreq_default_governor(void) | |
522 | { | |
523 | return &schedutil_gov; | |
524 | } | |
9bdcb44e | 525 | #endif |
58919e83 RW |
526 | |
527 | static int __init sugov_register(void) | |
528 | { | |
529 | return cpufreq_register_governor(&schedutil_gov); | |
530 | } | |
531 | fs_initcall(sugov_register); |