drivers/cpufreq/cpufreq_governor.c

   1 /*
   2  * drivers/cpufreq/cpufreq_governor.c
   3  *
   4  * CPUFREQ governors common code
   5  *
   6  * Copyright    (C) 2001 Russell King
   7  *              (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
   8  *              (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
   9  *              (C) 2009 Alexander Clouter <alex@digriz.org.uk>
  10  *              (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
  11  *
  12  * This program is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU General Public License version 2 as
  14  * published by the Free Software Foundation.
  15  */
  16
  17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  18
  19 #include <linux/export.h>
  20 #include <linux/kernel_stat.h>
  21 #include <linux/slab.h>
  22
  23 #include "cpufreq_governor.h"
  24
  25 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
  26 {
  27         if (have_governor_per_policy())
  28                 return dbs_data->cdata->attr_group_gov_pol;
  29         else
  30                 return dbs_data->cdata->attr_group_gov_sys;
  31 }
  32
  33 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
  34 {
  35         struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
  36         struct od_dbs_tuners *od_tuners = dbs_data->tuners;
  37         struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
  38         struct cpufreq_policy *policy = cdbs->shared->policy;
  39         unsigned int sampling_rate;
  40         unsigned int max_load = 0;
  41         unsigned int ignore_nice;
  42         unsigned int j;
  43
  44         if (dbs_data->cdata->governor == GOV_ONDEMAND) {
  45                 struct od_cpu_dbs_info_s *od_dbs_info =
  46                                 dbs_data->cdata->get_cpu_dbs_info_s(cpu);
  47
  48                 /*
  49                  * Sometimes, the ondemand governor uses an additional
  50                  * multiplier to give long delays. So apply this multiplier to
  51                  * the 'sampling_rate', so as to keep the wake-up-from-idle
  52                  * detection logic a bit conservative.
  53                  */
  54                 sampling_rate = od_tuners->sampling_rate;
  55                 sampling_rate *= od_dbs_info->rate_mult;
  56
  57                 ignore_nice = od_tuners->ignore_nice_load;
  58         } else {
  59                 sampling_rate = cs_tuners->sampling_rate;
  60                 ignore_nice = cs_tuners->ignore_nice_load;
  61         }
  62
  63         /* Get Absolute Load */
  64         for_each_cpu(j, policy->cpus) {
  65                 struct cpu_dbs_info *j_cdbs;
  66                 u64 cur_wall_time, cur_idle_time;
  67                 unsigned int idle_time, wall_time;
  68                 unsigned int load;
  69                 int io_busy = 0;
  70
  71                 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
  72
  73                 /*
  74                  * For the purpose of ondemand, waiting for disk IO is
  75                  * an indication that you're performance critical, and
  76                  * not that the system is actually idle. So do not add
  77                  * the iowait time to the cpu idle time.
  78                  */
  79                 if (dbs_data->cdata->governor == GOV_ONDEMAND)
  80                         io_busy = od_tuners->io_is_busy;
  81                 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
  82
  83                 wall_time = (unsigned int)
  84                         (cur_wall_time - j_cdbs->prev_cpu_wall);
  85                 j_cdbs->prev_cpu_wall = cur_wall_time;
  86
  87                 if (cur_idle_time < j_cdbs->prev_cpu_idle)
  88                         cur_idle_time = j_cdbs->prev_cpu_idle;
  89
  90                 idle_time = (unsigned int)
  91                         (cur_idle_time - j_cdbs->prev_cpu_idle);
  92                 j_cdbs->prev_cpu_idle = cur_idle_time;
  93
  94                 if (ignore_nice) {
  95                         u64 cur_nice;
  96                         unsigned long cur_nice_jiffies;
  97
  98                         cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
  99                                          cdbs->prev_cpu_nice;
 100                         /*
 101                          * Assumption: nice time between sampling periods will
 102                          * be less than 2^32 jiffies for 32 bit sys
 103                          */
 104                         cur_nice_jiffies = (unsigned long)
 105                                         cputime64_to_jiffies64(cur_nice);
 106
 107                         cdbs->prev_cpu_nice =
 108                                 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 109                         idle_time += jiffies_to_usecs(cur_nice_jiffies);
 110                 }
 111
 112                 if (unlikely(!wall_time || wall_time < idle_time))
 113                         continue;
 114
 115                 /*
 116                  * If the CPU had gone completely idle, and a task just woke up
 117                  * on this CPU now, it would be unfair to calculate 'load' the
 118                  * usual way for this elapsed time-window, because it will show
 119                  * near-zero load, irrespective of how CPU intensive that task
 120                  * actually is. This is undesirable for latency-sensitive bursty
 121                  * workloads.
 122                  *
 123                  * To avoid this, we reuse the 'load' from the previous
 124                  * time-window and give this task a chance to start with a
 125                  * reasonably high CPU frequency. (However, we shouldn't over-do
 126                  * this copy, lest we get stuck at a high load (high frequency)
 127                  * for too long, even when the current system load has actually
 128                  * dropped down. So we perform the copy only once, upon the
 129                  * first wake-up from idle.)
 130                  *
 131                  * Detecting this situation is easy: the governor's utilization
 132                  * update handler would not have run during CPU-idle periods.
 133                  * Hence, an unusually large 'wall_time' (as compared to the
 134                  * sampling rate) indicates this scenario.
 135                  *
 136                  * prev_load can be zero in two cases and we must recalculate it
 137                  * for both cases:
 138                  * - during long idle intervals
 139                  * - explicitly set to zero
 140                  */
 141                 if (unlikely(wall_time > (2 * sampling_rate) &&
 142                              j_cdbs->prev_load)) {
 143                         load = j_cdbs->prev_load;
 144
 145                         /*
 146                          * Perform a destructive copy, to ensure that we copy
 147                          * the previous load only once, upon the first wake-up
 148                          * from idle.
 149                          */
 150                         j_cdbs->prev_load = 0;
 151                 } else {
 152                         load = 100 * (wall_time - idle_time) / wall_time;
 153                         j_cdbs->prev_load = load;
 154                 }
 155
 156                 if (load > max_load)
 157                         max_load = load;
 158         }
 159
 160         dbs_data->cdata->gov_check_cpu(cpu, max_load);
 161 }
 162 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 163
 164 void gov_set_update_util(struct cpu_common_dbs_info *shared,
 165                          unsigned int delay_us)
 166 {
 167         struct cpufreq_policy *policy = shared->policy;
 168         struct dbs_data *dbs_data = policy->governor_data;
 169         int cpu;
 170
 171         gov_update_sample_delay(shared, delay_us);
 172         shared->last_sample_time = 0;
 173
 174         for_each_cpu(cpu, policy->cpus) {
 175                 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 176
 177                 cpufreq_set_update_util_data(cpu, &cdbs->update_util);
 178         }
 179 }
 180 EXPORT_SYMBOL_GPL(gov_set_update_util);
 181
 182 static inline void gov_clear_update_util(struct cpufreq_policy *policy)
 183 {
 184         int i;
 185
 186         for_each_cpu(i, policy->cpus)
 187                 cpufreq_set_update_util_data(i, NULL);
 188
 189         synchronize_rcu();
 190 }
 191
 192 static void gov_cancel_work(struct cpu_common_dbs_info *shared)
 193 {
 194         /* Tell dbs_update_util_handler() to skip queuing up work items. */
 195         atomic_inc(&shared->skip_work);
 196         /*
 197          * If dbs_update_util_handler() is already running, it may not notice
 198          * the incremented skip_work, so wait for it to complete to prevent its
 199          * work item from being queued up after the cancel_work_sync() below.
 200          */
 201         gov_clear_update_util(shared->policy);
 202         irq_work_sync(&shared->irq_work);
 203         cancel_work_sync(&shared->work);
 204         atomic_set(&shared->skip_work, 0);
 205 }
 206
 207 static void dbs_work_handler(struct work_struct *work)
 208 {
 209         struct cpu_common_dbs_info *shared = container_of(work, struct
 210                                         cpu_common_dbs_info, work);
 211         struct cpufreq_policy *policy;
 212         struct dbs_data *dbs_data;
 213         unsigned int delay;
 214
 215         policy = shared->policy;
 216         dbs_data = policy->governor_data;
 217
 218         /*
 219          * Make sure cpufreq_governor_limits() isn't evaluating load or the
 220          * ondemand governor isn't updating the sampling rate in parallel.
 221          */
 222         mutex_lock(&shared->timer_mutex);
 223         delay = dbs_data->cdata->gov_dbs_timer(policy);
 224         shared->sample_delay_ns = jiffies_to_nsecs(delay);
 225         mutex_unlock(&shared->timer_mutex);
 226
 227         /*
 228          * If the atomic operation below is reordered with respect to the
 229          * sample delay modification, the utilization update handler may end
 230          * up using a stale sample delay value.
 231          */
 232         smp_mb__before_atomic();
 233         atomic_dec(&shared->skip_work);
 234 }
 235
 236 static void dbs_irq_work(struct irq_work *irq_work)
 237 {
 238         struct cpu_common_dbs_info *shared;
 239
 240         shared = container_of(irq_work, struct cpu_common_dbs_info, irq_work);
 241         schedule_work(&shared->work);
 242 }
 243
 244 static inline void gov_queue_irq_work(struct cpu_common_dbs_info *shared)
 245 {
 246 #ifdef CONFIG_SMP
 247         irq_work_queue_on(&shared->irq_work, smp_processor_id());
 248 #else
 249         irq_work_queue(&shared->irq_work);
 250 #endif
 251 }
 252
 253 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
 254                                     unsigned long util, unsigned long max)
 255 {
 256         struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
 257         struct cpu_common_dbs_info *shared = cdbs->shared;
 258
 259         /*
 260          * The work may not be allowed to be queued up right now.
 261          * Possible reasons:
 262          * - Work has already been queued up or is in progress.
 263          * - The governor is being stopped.
 264          * - It is too early (too little time from the previous sample).
 265          */
 266         if (atomic_inc_return(&shared->skip_work) == 1) {
 267                 u64 delta_ns;
 268
 269                 delta_ns = time - shared->last_sample_time;
 270                 if ((s64)delta_ns >= shared->sample_delay_ns) {
 271                         shared->last_sample_time = time;
 272                         gov_queue_irq_work(shared);
 273                         return;
 274                 }
 275         }
 276         atomic_dec(&shared->skip_work);
 277 }
 278
 279 static void set_sampling_rate(struct dbs_data *dbs_data,
 280                 unsigned int sampling_rate)
 281 {
 282         if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 283                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 284                 cs_tuners->sampling_rate = sampling_rate;
 285         } else {
 286                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 287                 od_tuners->sampling_rate = sampling_rate;
 288         }
 289 }
 290
 291 static int alloc_common_dbs_info(struct cpufreq_policy *policy,
 292                                  struct common_dbs_data *cdata)
 293 {
 294         struct cpu_common_dbs_info *shared;
 295         int j;
 296
 297         /* Allocate memory for the common information for policy->cpus */
 298         shared = kzalloc(sizeof(*shared), GFP_KERNEL);
 299         if (!shared)
 300                 return -ENOMEM;
 301
 302         /* Set shared for all CPUs, online+offline */
 303         for_each_cpu(j, policy->related_cpus)
 304                 cdata->get_cpu_cdbs(j)->shared = shared;
 305
 306         mutex_init(&shared->timer_mutex);
 307         atomic_set(&shared->skip_work, 0);
 308         init_irq_work(&shared->irq_work, dbs_irq_work);
 309         INIT_WORK(&shared->work, dbs_work_handler);
 310         return 0;
 311 }
 312
 313 static void free_common_dbs_info(struct cpufreq_policy *policy,
 314                                  struct common_dbs_data *cdata)
 315 {
 316         struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
 317         struct cpu_common_dbs_info *shared = cdbs->shared;
 318         int j;
 319
 320         mutex_destroy(&shared->timer_mutex);
 321
 322         for_each_cpu(j, policy->cpus)
 323                 cdata->get_cpu_cdbs(j)->shared = NULL;
 324
 325         kfree(shared);
 326 }
 327
 328 static int cpufreq_governor_init(struct cpufreq_policy *policy,
 329                                  struct dbs_data *dbs_data,
 330                                  struct common_dbs_data *cdata)
 331 {
 332         unsigned int latency;
 333         int ret;
 334
 335         /* State should be equivalent to EXIT */
 336         if (policy->governor_data)
 337                 return -EBUSY;
 338
 339         if (dbs_data) {
 340                 if (WARN_ON(have_governor_per_policy()))
 341                         return -EINVAL;
 342
 343                 ret = alloc_common_dbs_info(policy, cdata);
 344                 if (ret)
 345                         return ret;
 346
 347                 dbs_data->usage_count++;
 348                 policy->governor_data = dbs_data;
 349                 return 0;
 350         }
 351
 352         dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
 353         if (!dbs_data)
 354                 return -ENOMEM;
 355
 356         ret = alloc_common_dbs_info(policy, cdata);
 357         if (ret)
 358                 goto free_dbs_data;
 359
 360         dbs_data->cdata = cdata;
 361         dbs_data->usage_count = 1;
 362
 363         ret = cdata->init(dbs_data, !policy->governor->initialized);
 364         if (ret)
 365                 goto free_common_dbs_info;
 366
 367         /* policy latency is in ns. Convert it to us first */
 368         latency = policy->cpuinfo.transition_latency / 1000;
 369         if (latency == 0)
 370                 latency = 1;
 371
 372         /* Bring kernel and HW constraints together */
 373         dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
 374                                           MIN_LATENCY_MULTIPLIER * latency);
 375         set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
 376                                         latency * LATENCY_MULTIPLIER));
 377
 378         if (!have_governor_per_policy())
 379                 cdata->gdbs_data = dbs_data;
 380
 381         policy->governor_data = dbs_data;
 382
 383         ret = sysfs_create_group(get_governor_parent_kobj(policy),
 384                                  get_sysfs_attr(dbs_data));
 385         if (ret)
 386                 goto reset_gdbs_data;
 387
 388         return 0;
 389
 390 reset_gdbs_data:
 391         policy->governor_data = NULL;
 392
 393         if (!have_governor_per_policy())
 394                 cdata->gdbs_data = NULL;
 395         cdata->exit(dbs_data, !policy->governor->initialized);
 396 free_common_dbs_info:
 397         free_common_dbs_info(policy, cdata);
 398 free_dbs_data:
 399         kfree(dbs_data);
 400         return ret;
 401 }
 402
 403 static int cpufreq_governor_exit(struct cpufreq_policy *policy,
 404                                  struct dbs_data *dbs_data)
 405 {
 406         struct common_dbs_data *cdata = dbs_data->cdata;
 407         struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
 408
 409         /* State should be equivalent to INIT */
 410         if (!cdbs->shared || cdbs->shared->policy)
 411                 return -EBUSY;
 412
 413         if (!--dbs_data->usage_count) {
 414                 sysfs_remove_group(get_governor_parent_kobj(policy),
 415                                    get_sysfs_attr(dbs_data));
 416
 417                 policy->governor_data = NULL;
 418
 419                 if (!have_governor_per_policy())
 420                         cdata->gdbs_data = NULL;
 421
 422                 cdata->exit(dbs_data, policy->governor->initialized == 1);
 423                 kfree(dbs_data);
 424         } else {
 425                 policy->governor_data = NULL;
 426         }
 427
 428         free_common_dbs_info(policy, cdata);
 429         return 0;
 430 }
 431
 432 static int cpufreq_governor_start(struct cpufreq_policy *policy,
 433                                   struct dbs_data *dbs_data)
 434 {
 435         struct common_dbs_data *cdata = dbs_data->cdata;
 436         unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
 437         struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 438         struct cpu_common_dbs_info *shared = cdbs->shared;
 439         int io_busy = 0;
 440
 441         if (!policy->cur)
 442                 return -EINVAL;
 443
 444         /* State should be equivalent to INIT */
 445         if (!shared || shared->policy)
 446                 return -EBUSY;
 447
 448         if (cdata->governor == GOV_CONSERVATIVE) {
 449                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 450
 451                 sampling_rate = cs_tuners->sampling_rate;
 452                 ignore_nice = cs_tuners->ignore_nice_load;
 453         } else {
 454                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 455
 456                 sampling_rate = od_tuners->sampling_rate;
 457                 ignore_nice = od_tuners->ignore_nice_load;
 458                 io_busy = od_tuners->io_is_busy;
 459         }
 460
 461         for_each_cpu(j, policy->cpus) {
 462                 struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
 463                 unsigned int prev_load;
 464
 465                 j_cdbs->prev_cpu_idle =
 466                         get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
 467
 468                 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall -
 469                                             j_cdbs->prev_cpu_idle);
 470                 j_cdbs->prev_load = 100 * prev_load /
 471                                     (unsigned int)j_cdbs->prev_cpu_wall;
 472
 473                 if (ignore_nice)
 474                         j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 475
 476                 j_cdbs->update_util.func = dbs_update_util_handler;
 477         }
 478         shared->policy = policy;
 479
 480         if (cdata->governor == GOV_CONSERVATIVE) {
 481                 struct cs_cpu_dbs_info_s *cs_dbs_info =
 482                         cdata->get_cpu_dbs_info_s(cpu);
 483
 484                 cs_dbs_info->down_skip = 0;
 485                 cs_dbs_info->requested_freq = policy->cur;
 486         } else {
 487                 struct od_ops *od_ops = cdata->gov_ops;
 488                 struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu);
 489
 490                 od_dbs_info->rate_mult = 1;
 491                 od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
 492                 od_ops->powersave_bias_init_cpu(cpu);
 493         }
 494
 495         gov_set_update_util(shared, sampling_rate);
 496         return 0;
 497 }
 498
 499 static int cpufreq_governor_stop(struct cpufreq_policy *policy,
 500                                  struct dbs_data *dbs_data)
 501 {
 502         struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu);
 503         struct cpu_common_dbs_info *shared = cdbs->shared;
 504
 505         /* State should be equivalent to START */
 506         if (!shared || !shared->policy)
 507                 return -EBUSY;
 508
 509         gov_cancel_work(shared);
 510         shared->policy = NULL;
 511
 512         return 0;
 513 }
 514
 515 static int cpufreq_governor_limits(struct cpufreq_policy *policy,
 516                                    struct dbs_data *dbs_data)
 517 {
 518         struct common_dbs_data *cdata = dbs_data->cdata;
 519         unsigned int cpu = policy->cpu;
 520         struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 521
 522         /* State should be equivalent to START */
 523         if (!cdbs->shared || !cdbs->shared->policy)
 524                 return -EBUSY;
 525
 526         mutex_lock(&cdbs->shared->timer_mutex);
 527         if (policy->max < cdbs->shared->policy->cur)
 528                 __cpufreq_driver_target(cdbs->shared->policy, policy->max,
 529                                         CPUFREQ_RELATION_H);
 530         else if (policy->min > cdbs->shared->policy->cur)
 531                 __cpufreq_driver_target(cdbs->shared->policy, policy->min,
 532                                         CPUFREQ_RELATION_L);
 533         dbs_check_cpu(dbs_data, cpu);
 534         mutex_unlock(&cdbs->shared->timer_mutex);
 535
 536         return 0;
 537 }
 538
 539 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 540                          struct common_dbs_data *cdata, unsigned int event)
 541 {
 542         struct dbs_data *dbs_data;
 543         int ret;
 544
 545         /* Lock governor to block concurrent initialization of governor */
 546         mutex_lock(&cdata->mutex);
 547
 548         if (have_governor_per_policy())
 549                 dbs_data = policy->governor_data;
 550         else
 551                 dbs_data = cdata->gdbs_data;
 552
 553         if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) {
 554                 ret = -EINVAL;
 555                 goto unlock;
 556         }
 557
 558         switch (event) {
 559         case CPUFREQ_GOV_POLICY_INIT:
 560                 ret = cpufreq_governor_init(policy, dbs_data, cdata);
 561                 break;
 562         case CPUFREQ_GOV_POLICY_EXIT:
 563                 ret = cpufreq_governor_exit(policy, dbs_data);
 564                 break;
 565         case CPUFREQ_GOV_START:
 566                 ret = cpufreq_governor_start(policy, dbs_data);
 567                 break;
 568         case CPUFREQ_GOV_STOP:
 569                 ret = cpufreq_governor_stop(policy, dbs_data);
 570                 break;
 571         case CPUFREQ_GOV_LIMITS:
 572                 ret = cpufreq_governor_limits(policy, dbs_data);
 573                 break;
 574         default:
 575                 ret = -EINVAL;
 576         }
 577
 578 unlock:
 579         mutex_unlock(&cdata->mutex);
 580
 581         return ret;
 582 }
 583 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);