6bc2f50cc1d9774d4e3e962770e25bee0338834f
[deliverable/linux.git] / drivers / cpufreq / cpufreq_governor.c
1 /*
2 * drivers/cpufreq/cpufreq_governor.c
3 *
4 * CPUFREQ governors common code
5 *
6 * Copyright (C) 2001 Russell King
7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk>
10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/export.h>
20 #include <linux/kernel_stat.h>
21 #include <linux/slab.h>
22
23 #include "cpufreq_governor.h"
24
25 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
26 {
27 if (have_governor_per_policy())
28 return dbs_data->cdata->attr_group_gov_pol;
29 else
30 return dbs_data->cdata->attr_group_gov_sys;
31 }
32
33 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
34 {
35 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
36 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
37 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
38 struct cpufreq_policy *policy = cdbs->shared->policy;
39 unsigned int sampling_rate;
40 unsigned int max_load = 0;
41 unsigned int ignore_nice;
42 unsigned int j;
43
44 if (dbs_data->cdata->governor == GOV_ONDEMAND) {
45 struct od_cpu_dbs_info_s *od_dbs_info =
46 dbs_data->cdata->get_cpu_dbs_info_s(cpu);
47
48 /*
49 * Sometimes, the ondemand governor uses an additional
50 * multiplier to give long delays. So apply this multiplier to
51 * the 'sampling_rate', so as to keep the wake-up-from-idle
52 * detection logic a bit conservative.
53 */
54 sampling_rate = od_tuners->sampling_rate;
55 sampling_rate *= od_dbs_info->rate_mult;
56
57 ignore_nice = od_tuners->ignore_nice_load;
58 } else {
59 sampling_rate = cs_tuners->sampling_rate;
60 ignore_nice = cs_tuners->ignore_nice_load;
61 }
62
63 /* Get Absolute Load */
64 for_each_cpu(j, policy->cpus) {
65 struct cpu_dbs_info *j_cdbs;
66 u64 cur_wall_time, cur_idle_time;
67 unsigned int idle_time, wall_time;
68 unsigned int load;
69 int io_busy = 0;
70
71 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
72
73 /*
74 * For the purpose of ondemand, waiting for disk IO is
75 * an indication that you're performance critical, and
76 * not that the system is actually idle. So do not add
77 * the iowait time to the cpu idle time.
78 */
79 if (dbs_data->cdata->governor == GOV_ONDEMAND)
80 io_busy = od_tuners->io_is_busy;
81 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
82
83 wall_time = (unsigned int)
84 (cur_wall_time - j_cdbs->prev_cpu_wall);
85 j_cdbs->prev_cpu_wall = cur_wall_time;
86
87 if (cur_idle_time < j_cdbs->prev_cpu_idle)
88 cur_idle_time = j_cdbs->prev_cpu_idle;
89
90 idle_time = (unsigned int)
91 (cur_idle_time - j_cdbs->prev_cpu_idle);
92 j_cdbs->prev_cpu_idle = cur_idle_time;
93
94 if (ignore_nice) {
95 u64 cur_nice;
96 unsigned long cur_nice_jiffies;
97
98 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
99 cdbs->prev_cpu_nice;
100 /*
101 * Assumption: nice time between sampling periods will
102 * be less than 2^32 jiffies for 32 bit sys
103 */
104 cur_nice_jiffies = (unsigned long)
105 cputime64_to_jiffies64(cur_nice);
106
107 cdbs->prev_cpu_nice =
108 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
109 idle_time += jiffies_to_usecs(cur_nice_jiffies);
110 }
111
112 if (unlikely(!wall_time || wall_time < idle_time))
113 continue;
114
115 /*
116 * If the CPU had gone completely idle, and a task just woke up
117 * on this CPU now, it would be unfair to calculate 'load' the
118 * usual way for this elapsed time-window, because it will show
119 * near-zero load, irrespective of how CPU intensive that task
120 * actually is. This is undesirable for latency-sensitive bursty
121 * workloads.
122 *
123 * To avoid this, we reuse the 'load' from the previous
124 * time-window and give this task a chance to start with a
125 * reasonably high CPU frequency. (However, we shouldn't over-do
126 * this copy, lest we get stuck at a high load (high frequency)
127 * for too long, even when the current system load has actually
128 * dropped down. So we perform the copy only once, upon the
129 * first wake-up from idle.)
130 *
131 * Detecting this situation is easy: the governor's utilization
132 * update handler would not have run during CPU-idle periods.
133 * Hence, an unusually large 'wall_time' (as compared to the
134 * sampling rate) indicates this scenario.
135 *
136 * prev_load can be zero in two cases and we must recalculate it
137 * for both cases:
138 * - during long idle intervals
139 * - explicitly set to zero
140 */
141 if (unlikely(wall_time > (2 * sampling_rate) &&
142 j_cdbs->prev_load)) {
143 load = j_cdbs->prev_load;
144
145 /*
146 * Perform a destructive copy, to ensure that we copy
147 * the previous load only once, upon the first wake-up
148 * from idle.
149 */
150 j_cdbs->prev_load = 0;
151 } else {
152 load = 100 * (wall_time - idle_time) / wall_time;
153 j_cdbs->prev_load = load;
154 }
155
156 if (load > max_load)
157 max_load = load;
158 }
159
160 dbs_data->cdata->gov_check_cpu(cpu, max_load);
161 }
162 EXPORT_SYMBOL_GPL(dbs_check_cpu);
163
164 void gov_set_update_util(struct cpu_common_dbs_info *shared,
165 unsigned int delay_us)
166 {
167 struct cpufreq_policy *policy = shared->policy;
168 struct dbs_data *dbs_data = policy->governor_data;
169 int cpu;
170
171 gov_update_sample_delay(shared, delay_us);
172 shared->last_sample_time = 0;
173
174 for_each_cpu(cpu, policy->cpus) {
175 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
176
177 cpufreq_set_update_util_data(cpu, &cdbs->update_util);
178 }
179 }
180 EXPORT_SYMBOL_GPL(gov_set_update_util);
181
182 static inline void gov_clear_update_util(struct cpufreq_policy *policy)
183 {
184 int i;
185
186 for_each_cpu(i, policy->cpus)
187 cpufreq_set_update_util_data(i, NULL);
188
189 synchronize_rcu();
190 }
191
192 static void gov_cancel_work(struct cpu_common_dbs_info *shared)
193 {
194 /* Tell dbs_update_util_handler() to skip queuing up work items. */
195 atomic_inc(&shared->skip_work);
196 /*
197 * If dbs_update_util_handler() is already running, it may not notice
198 * the incremented skip_work, so wait for it to complete to prevent its
199 * work item from being queued up after the cancel_work_sync() below.
200 */
201 gov_clear_update_util(shared->policy);
202 irq_work_sync(&shared->irq_work);
203 cancel_work_sync(&shared->work);
204 atomic_set(&shared->skip_work, 0);
205 }
206
207 static void dbs_work_handler(struct work_struct *work)
208 {
209 struct cpu_common_dbs_info *shared = container_of(work, struct
210 cpu_common_dbs_info, work);
211 struct cpufreq_policy *policy;
212 struct dbs_data *dbs_data;
213 unsigned int delay;
214
215 policy = shared->policy;
216 dbs_data = policy->governor_data;
217
218 /*
219 * Make sure cpufreq_governor_limits() isn't evaluating load or the
220 * ondemand governor isn't updating the sampling rate in parallel.
221 */
222 mutex_lock(&shared->timer_mutex);
223 delay = dbs_data->cdata->gov_dbs_timer(policy);
224 shared->sample_delay_ns = jiffies_to_nsecs(delay);
225 mutex_unlock(&shared->timer_mutex);
226
227 /*
228 * If the atomic operation below is reordered with respect to the
229 * sample delay modification, the utilization update handler may end
230 * up using a stale sample delay value.
231 */
232 smp_mb__before_atomic();
233 atomic_dec(&shared->skip_work);
234 }
235
236 static void dbs_irq_work(struct irq_work *irq_work)
237 {
238 struct cpu_common_dbs_info *shared;
239
240 shared = container_of(irq_work, struct cpu_common_dbs_info, irq_work);
241 schedule_work(&shared->work);
242 }
243
244 static inline void gov_queue_irq_work(struct cpu_common_dbs_info *shared)
245 {
246 #ifdef CONFIG_SMP
247 irq_work_queue_on(&shared->irq_work, smp_processor_id());
248 #else
249 irq_work_queue(&shared->irq_work);
250 #endif
251 }
252
253 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
254 unsigned long util, unsigned long max)
255 {
256 struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
257 struct cpu_common_dbs_info *shared = cdbs->shared;
258
259 /*
260 * The work may not be allowed to be queued up right now.
261 * Possible reasons:
262 * - Work has already been queued up or is in progress.
263 * - The governor is being stopped.
264 * - It is too early (too little time from the previous sample).
265 */
266 if (atomic_inc_return(&shared->skip_work) == 1) {
267 u64 delta_ns;
268
269 delta_ns = time - shared->last_sample_time;
270 if ((s64)delta_ns >= shared->sample_delay_ns) {
271 shared->last_sample_time = time;
272 gov_queue_irq_work(shared);
273 return;
274 }
275 }
276 atomic_dec(&shared->skip_work);
277 }
278
279 static void set_sampling_rate(struct dbs_data *dbs_data,
280 unsigned int sampling_rate)
281 {
282 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
283 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
284 cs_tuners->sampling_rate = sampling_rate;
285 } else {
286 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
287 od_tuners->sampling_rate = sampling_rate;
288 }
289 }
290
291 static int alloc_common_dbs_info(struct cpufreq_policy *policy,
292 struct common_dbs_data *cdata)
293 {
294 struct cpu_common_dbs_info *shared;
295 int j;
296
297 /* Allocate memory for the common information for policy->cpus */
298 shared = kzalloc(sizeof(*shared), GFP_KERNEL);
299 if (!shared)
300 return -ENOMEM;
301
302 /* Set shared for all CPUs, online+offline */
303 for_each_cpu(j, policy->related_cpus)
304 cdata->get_cpu_cdbs(j)->shared = shared;
305
306 mutex_init(&shared->timer_mutex);
307 atomic_set(&shared->skip_work, 0);
308 init_irq_work(&shared->irq_work, dbs_irq_work);
309 INIT_WORK(&shared->work, dbs_work_handler);
310 return 0;
311 }
312
313 static void free_common_dbs_info(struct cpufreq_policy *policy,
314 struct common_dbs_data *cdata)
315 {
316 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
317 struct cpu_common_dbs_info *shared = cdbs->shared;
318 int j;
319
320 mutex_destroy(&shared->timer_mutex);
321
322 for_each_cpu(j, policy->cpus)
323 cdata->get_cpu_cdbs(j)->shared = NULL;
324
325 kfree(shared);
326 }
327
328 static int cpufreq_governor_init(struct cpufreq_policy *policy,
329 struct dbs_data *dbs_data,
330 struct common_dbs_data *cdata)
331 {
332 unsigned int latency;
333 int ret;
334
335 /* State should be equivalent to EXIT */
336 if (policy->governor_data)
337 return -EBUSY;
338
339 if (dbs_data) {
340 if (WARN_ON(have_governor_per_policy()))
341 return -EINVAL;
342
343 ret = alloc_common_dbs_info(policy, cdata);
344 if (ret)
345 return ret;
346
347 dbs_data->usage_count++;
348 policy->governor_data = dbs_data;
349 return 0;
350 }
351
352 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
353 if (!dbs_data)
354 return -ENOMEM;
355
356 ret = alloc_common_dbs_info(policy, cdata);
357 if (ret)
358 goto free_dbs_data;
359
360 dbs_data->cdata = cdata;
361 dbs_data->usage_count = 1;
362
363 ret = cdata->init(dbs_data, !policy->governor->initialized);
364 if (ret)
365 goto free_common_dbs_info;
366
367 /* policy latency is in ns. Convert it to us first */
368 latency = policy->cpuinfo.transition_latency / 1000;
369 if (latency == 0)
370 latency = 1;
371
372 /* Bring kernel and HW constraints together */
373 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
374 MIN_LATENCY_MULTIPLIER * latency);
375 set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
376 latency * LATENCY_MULTIPLIER));
377
378 if (!have_governor_per_policy())
379 cdata->gdbs_data = dbs_data;
380
381 policy->governor_data = dbs_data;
382
383 ret = sysfs_create_group(get_governor_parent_kobj(policy),
384 get_sysfs_attr(dbs_data));
385 if (ret)
386 goto reset_gdbs_data;
387
388 return 0;
389
390 reset_gdbs_data:
391 policy->governor_data = NULL;
392
393 if (!have_governor_per_policy())
394 cdata->gdbs_data = NULL;
395 cdata->exit(dbs_data, !policy->governor->initialized);
396 free_common_dbs_info:
397 free_common_dbs_info(policy, cdata);
398 free_dbs_data:
399 kfree(dbs_data);
400 return ret;
401 }
402
403 static int cpufreq_governor_exit(struct cpufreq_policy *policy,
404 struct dbs_data *dbs_data)
405 {
406 struct common_dbs_data *cdata = dbs_data->cdata;
407 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
408
409 /* State should be equivalent to INIT */
410 if (!cdbs->shared || cdbs->shared->policy)
411 return -EBUSY;
412
413 if (!--dbs_data->usage_count) {
414 sysfs_remove_group(get_governor_parent_kobj(policy),
415 get_sysfs_attr(dbs_data));
416
417 policy->governor_data = NULL;
418
419 if (!have_governor_per_policy())
420 cdata->gdbs_data = NULL;
421
422 cdata->exit(dbs_data, policy->governor->initialized == 1);
423 kfree(dbs_data);
424 } else {
425 policy->governor_data = NULL;
426 }
427
428 free_common_dbs_info(policy, cdata);
429 return 0;
430 }
431
432 static int cpufreq_governor_start(struct cpufreq_policy *policy,
433 struct dbs_data *dbs_data)
434 {
435 struct common_dbs_data *cdata = dbs_data->cdata;
436 unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
437 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
438 struct cpu_common_dbs_info *shared = cdbs->shared;
439 int io_busy = 0;
440
441 if (!policy->cur)
442 return -EINVAL;
443
444 /* State should be equivalent to INIT */
445 if (!shared || shared->policy)
446 return -EBUSY;
447
448 if (cdata->governor == GOV_CONSERVATIVE) {
449 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
450
451 sampling_rate = cs_tuners->sampling_rate;
452 ignore_nice = cs_tuners->ignore_nice_load;
453 } else {
454 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
455
456 sampling_rate = od_tuners->sampling_rate;
457 ignore_nice = od_tuners->ignore_nice_load;
458 io_busy = od_tuners->io_is_busy;
459 }
460
461 for_each_cpu(j, policy->cpus) {
462 struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
463 unsigned int prev_load;
464
465 j_cdbs->prev_cpu_idle =
466 get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
467
468 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall -
469 j_cdbs->prev_cpu_idle);
470 j_cdbs->prev_load = 100 * prev_load /
471 (unsigned int)j_cdbs->prev_cpu_wall;
472
473 if (ignore_nice)
474 j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
475
476 j_cdbs->update_util.func = dbs_update_util_handler;
477 }
478 shared->policy = policy;
479
480 if (cdata->governor == GOV_CONSERVATIVE) {
481 struct cs_cpu_dbs_info_s *cs_dbs_info =
482 cdata->get_cpu_dbs_info_s(cpu);
483
484 cs_dbs_info->down_skip = 0;
485 cs_dbs_info->requested_freq = policy->cur;
486 } else {
487 struct od_ops *od_ops = cdata->gov_ops;
488 struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu);
489
490 od_dbs_info->rate_mult = 1;
491 od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
492 od_ops->powersave_bias_init_cpu(cpu);
493 }
494
495 gov_set_update_util(shared, sampling_rate);
496 return 0;
497 }
498
499 static int cpufreq_governor_stop(struct cpufreq_policy *policy,
500 struct dbs_data *dbs_data)
501 {
502 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu);
503 struct cpu_common_dbs_info *shared = cdbs->shared;
504
505 /* State should be equivalent to START */
506 if (!shared || !shared->policy)
507 return -EBUSY;
508
509 gov_cancel_work(shared);
510 shared->policy = NULL;
511
512 return 0;
513 }
514
515 static int cpufreq_governor_limits(struct cpufreq_policy *policy,
516 struct dbs_data *dbs_data)
517 {
518 struct common_dbs_data *cdata = dbs_data->cdata;
519 unsigned int cpu = policy->cpu;
520 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
521
522 /* State should be equivalent to START */
523 if (!cdbs->shared || !cdbs->shared->policy)
524 return -EBUSY;
525
526 mutex_lock(&cdbs->shared->timer_mutex);
527 if (policy->max < cdbs->shared->policy->cur)
528 __cpufreq_driver_target(cdbs->shared->policy, policy->max,
529 CPUFREQ_RELATION_H);
530 else if (policy->min > cdbs->shared->policy->cur)
531 __cpufreq_driver_target(cdbs->shared->policy, policy->min,
532 CPUFREQ_RELATION_L);
533 dbs_check_cpu(dbs_data, cpu);
534 mutex_unlock(&cdbs->shared->timer_mutex);
535
536 return 0;
537 }
538
539 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
540 struct common_dbs_data *cdata, unsigned int event)
541 {
542 struct dbs_data *dbs_data;
543 int ret;
544
545 /* Lock governor to block concurrent initialization of governor */
546 mutex_lock(&cdata->mutex);
547
548 if (have_governor_per_policy())
549 dbs_data = policy->governor_data;
550 else
551 dbs_data = cdata->gdbs_data;
552
553 if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) {
554 ret = -EINVAL;
555 goto unlock;
556 }
557
558 switch (event) {
559 case CPUFREQ_GOV_POLICY_INIT:
560 ret = cpufreq_governor_init(policy, dbs_data, cdata);
561 break;
562 case CPUFREQ_GOV_POLICY_EXIT:
563 ret = cpufreq_governor_exit(policy, dbs_data);
564 break;
565 case CPUFREQ_GOV_START:
566 ret = cpufreq_governor_start(policy, dbs_data);
567 break;
568 case CPUFREQ_GOV_STOP:
569 ret = cpufreq_governor_stop(policy, dbs_data);
570 break;
571 case CPUFREQ_GOV_LIMITS:
572 ret = cpufreq_governor_limits(policy, dbs_data);
573 break;
574 default:
575 ret = -EINVAL;
576 }
577
578 unlock:
579 mutex_unlock(&cdata->mutex);
580
581 return ret;
582 }
583 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);
This page took 0.041535 seconds and 4 git commands to generate.