intel_pstate: Fix intel_pstate_get()
[deliverable/linux.git] / drivers / cpufreq / intel_pstate.c
index cd83d477e32d412394da574e8e02adb6dd7be832..b230ebaae66cb7ee0def1d228ff33c778343f344 100644 (file)
@@ -64,6 +64,25 @@ static inline int ceiling_fp(int32_t x)
        return ret;
 }
 
+/**
+ * struct sample -     Store performance sample
+ * @core_pct_busy:     Ratio of APERF/MPERF in percent, which is actual
+ *                     performance during last sample period
+ * @busy_scaled:       Scaled busy value which is used to calculate next
+ *                     P state. This can be different than core_pct_busy
+ *                     to account for cpu idle period
+ * @aperf:             Difference of actual performance frequency clock count
+ *                     read from APERF MSR between last and current sample
+ * @mperf:             Difference of maximum performance frequency clock count
+ *                     read from MPERF MSR between last and current sample
+ * @tsc:               Difference of time stamp counter between last and
+ *                     current sample
+ * @freq:              Effective frequency calculated from APERF/MPERF
+ * @time:              Current time from scheduler
+ *
+ * This structure is used in the cpudata structure to store performance sample
+ * data for choosing next P State.
+ */
 struct sample {
        int32_t core_pct_busy;
        int32_t busy_scaled;
@@ -71,9 +90,23 @@ struct sample {
        u64 mperf;
        u64 tsc;
        int freq;
-       ktime_t time;
+       u64 time;
 };
 
+/**
+ * struct pstate_data - Store P state data
+ * @current_pstate:    Current requested P state
+ * @min_pstate:                Min P state possible for this platform
+ * @max_pstate:                Max P state possible for this platform
+ * @max_pstate_physical:This is physical Max P state for a processor
+ *                     This can be higher than the max_pstate which can
+ *                     be limited by platform thermal design power limits
+ * @scaling:           Scaling factor to  convert frequency to cpufreq
+ *                     frequency units
+ * @turbo_pstate:      Max Turbo P state possible for this platform
+ *
+ * Stores the per cpu model P state limits and current P state.
+ */
 struct pstate_data {
        int     current_pstate;
        int     min_pstate;
@@ -83,6 +116,19 @@ struct pstate_data {
        int     turbo_pstate;
 };
 
+/**
+ * struct vid_data -   Stores voltage information data
+ * @min:               VID data for this platform corresponding to
+ *                     the lowest P state
+ * @max:               VID data corresponding to the highest P State.
+ * @turbo:             VID data for turbo P state
+ * @ratio:             Ratio of (vid max - vid min) /
+ *                     (max P state - Min P State)
+ *
+ * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling)
+ * This data is used in Atom platforms, where in addition to target P state,
+ * the voltage data needs to be specified to select next P State.
+ */
 struct vid_data {
        int min;
        int max;
@@ -90,6 +136,18 @@ struct vid_data {
        int32_t ratio;
 };
 
+/**
+ * struct _pid -       Stores PID data
+ * @setpoint:          Target set point for busyness or performance
+ * @integral:          Storage for accumulated error values
+ * @p_gain:            PID proportional gain
+ * @i_gain:            PID integral gain
+ * @d_gain:            PID derivative gain
+ * @deadband:          PID deadband
+ * @last_err:          Last error storage for integral part of PID calculation
+ *
+ * Stores PID coefficients and last error for PID controller.
+ */
 struct _pid {
        int setpoint;
        int32_t integral;
@@ -100,16 +158,33 @@ struct _pid {
        int32_t last_err;
 };
 
+/**
+ * struct cpudata -    Per CPU instance data storage
+ * @cpu:               CPU number for this instance data
+ * @update_util:       CPUFreq utility callback information
+ * @pstate:            Stores P state limits for this CPU
+ * @vid:               Stores VID limits for this CPU
+ * @pid:               Stores PID parameters for this CPU
+ * @last_sample_time:  Last Sample time
+ * @prev_aperf:                Last APERF value read from APERF MSR
+ * @prev_mperf:                Last MPERF value read from MPERF MSR
+ * @prev_tsc:          Last timestamp counter (TSC) value
+ * @prev_cummulative_iowait: IO Wait time difference from last and
+ *                     current sample
+ * @sample:            Storage for storing last Sample data
+ *
+ * This structure stores per CPU instance data for all CPUs.
+ */
 struct cpudata {
        int cpu;
 
-       struct timer_list timer;
+       struct update_util_data update_util;
 
        struct pstate_data pstate;
        struct vid_data vid;
        struct _pid pid;
 
-       ktime_t last_sample_time;
+       u64     last_sample_time;
        u64     prev_aperf;
        u64     prev_mperf;
        u64     prev_tsc;
@@ -118,8 +193,22 @@ struct cpudata {
 };
 
 static struct cpudata **all_cpu_data;
+
+/**
+ * struct pid_adjust_policy - Stores static PID configuration data
+ * @sample_rate_ms:    PID calculation sample rate in ms
+ * @sample_rate_ns:    Sample rate calculation in ns
+ * @deadband:          PID deadband
+ * @setpoint:          PID Setpoint
+ * @p_gain_pct:                PID proportional gain
+ * @i_gain_pct:                PID integral gain
+ * @d_gain_pct:                PID derivative gain
+ *
+ * Stores per CPU model static PID configuration data.
+ */
 struct pstate_adjust_policy {
        int sample_rate_ms;
+       s64 sample_rate_ns;
        int deadband;
        int setpoint;
        int p_gain_pct;
@@ -127,17 +216,36 @@ struct pstate_adjust_policy {
        int i_gain_pct;
 };
 
+/**
+ * struct pstate_funcs - Per CPU model specific callbacks
+ * @get_max:           Callback to get maximum non turbo effective P state
+ * @get_max_physical:  Callback to get maximum non turbo physical P state
+ * @get_min:           Callback to get minimum P state
+ * @get_turbo:         Callback to get turbo P state
+ * @get_scaling:       Callback to get frequency scaling factor
+ * @get_val:           Callback to convert P state to actual MSR write value
+ * @get_vid:           Callback to get VID data for Atom platforms
+ * @get_target_pstate: Callback to a function to calculate next P state to use
+ *
+ * Core and Atom CPU models have different way to get P State limits. This
+ * structure is used to store those callbacks.
+ */
 struct pstate_funcs {
        int (*get_max)(void);
        int (*get_max_physical)(void);
        int (*get_min)(void);
        int (*get_turbo)(void);
        int (*get_scaling)(void);
-       void (*set)(struct cpudata*, int pstate);
+       u64 (*get_val)(struct cpudata*, int pstate);
        void (*get_vid)(struct cpudata *);
        int32_t (*get_target_pstate)(struct cpudata *);
 };
 
+/**
+ * struct cpu_defaults- Per CPU model default config data
+ * @pid_policy:        PID config data
+ * @funcs:             Callback function data
+ */
 struct cpu_defaults {
        struct pstate_adjust_policy pid_policy;
        struct pstate_funcs funcs;
@@ -150,6 +258,34 @@ static struct pstate_adjust_policy pid_params;
 static struct pstate_funcs pstate_funcs;
 static int hwp_active;
 
+
+/**
+ * struct perf_limits - Store user and policy limits
+ * @no_turbo:          User requested turbo state from intel_pstate sysfs
+ * @turbo_disabled:    Platform turbo status either from msr
+ *                     MSR_IA32_MISC_ENABLE or when maximum available pstate
+ *                     matches the maximum turbo pstate
+ * @max_perf_pct:      Effective maximum performance limit in percentage, this
+ *                     is minimum of either limits enforced by cpufreq policy
+ *                     or limits from user set limits via intel_pstate sysfs
+ * @min_perf_pct:      Effective minimum performance limit in percentage, this
+ *                     is maximum of either limits enforced by cpufreq policy
+ *                     or limits from user set limits via intel_pstate sysfs
+ * @max_perf:          This is a scaled value between 0 to 255 for max_perf_pct
+ *                     This value is used to limit max pstate
+ * @min_perf:          This is a scaled value between 0 to 255 for min_perf_pct
+ *                     This value is used to limit min pstate
+ * @max_policy_pct:    The maximum performance in percentage enforced by
+ *                     cpufreq setpolicy interface
+ * @max_sysfs_pct:     The maximum performance in percentage enforced by
+ *                     intel pstate sysfs interface
+ * @min_policy_pct:    The minimum performance in percentage enforced by
+ *                     cpufreq setpolicy interface
+ * @min_sysfs_pct:     The minimum performance in percentage enforced by
+ *                     intel pstate sysfs interface
+ *
+ * Storage for user and policy defined limits.
+ */
 struct perf_limits {
        int no_turbo;
        int turbo_disabled;
@@ -197,8 +333,8 @@ static struct perf_limits *limits = &powersave_limits;
 
 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
                             int deadband, int integral) {
-       pid->setpoint = setpoint;
-       pid->deadband  = deadband;
+       pid->setpoint = int_tofp(setpoint);
+       pid->deadband  = int_tofp(deadband);
        pid->integral  = int_tofp(integral);
        pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
 }
@@ -224,9 +360,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
        int32_t pterm, dterm, fp_error;
        int32_t integral_limit;
 
-       fp_error = int_tofp(pid->setpoint) - busy;
+       fp_error = pid->setpoint - busy;
 
-       if (abs(fp_error) <= int_tofp(pid->deadband))
+       if (abs(fp_error) <= pid->deadband)
                return 0;
 
        pterm = mul_fp(pid->p_gain, fp_error);
@@ -286,7 +422,7 @@ static inline void update_turbo_state(void)
                 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
 
-static void intel_pstate_hwp_set(void)
+static void intel_pstate_hwp_set(const struct cpumask *cpumask)
 {
        int min, hw_min, max, hw_max, cpu, range, adj_range;
        u64 value, cap;
@@ -296,9 +432,7 @@ static void intel_pstate_hwp_set(void)
        hw_max = HWP_HIGHEST_PERF(cap);
        range = hw_max - hw_min;
 
-       get_online_cpus();
-
-       for_each_online_cpu(cpu) {
+       for_each_cpu(cpu, cpumask) {
                rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
                adj_range = limits->min_perf_pct * range / 100;
                min = hw_min + adj_range;
@@ -317,7 +451,20 @@ static void intel_pstate_hwp_set(void)
                value |= HWP_MAX_PERF(max);
                wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
        }
+}
 
+static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
+{
+       if (hwp_active)
+               intel_pstate_hwp_set(policy->cpus);
+
+       return 0;
+}
+
+static void intel_pstate_hwp_set_online_cpus(void)
+{
+       get_online_cpus();
+       intel_pstate_hwp_set(cpu_online_mask);
        put_online_cpus();
 }
 
@@ -439,7 +586,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
        limits->no_turbo = clamp_t(int, input, 0, 1);
 
        if (hwp_active)
-               intel_pstate_hwp_set();
+               intel_pstate_hwp_set_online_cpus();
 
        return count;
 }
@@ -465,7 +612,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
                                  int_tofp(100));
 
        if (hwp_active)
-               intel_pstate_hwp_set();
+               intel_pstate_hwp_set_online_cpus();
        return count;
 }
 
@@ -490,7 +637,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
                                  int_tofp(100));
 
        if (hwp_active)
-               intel_pstate_hwp_set();
+               intel_pstate_hwp_set_online_cpus();
        return count;
 }
 
@@ -531,6 +678,9 @@ static void __init intel_pstate_sysfs_expose_params(void)
 
 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
+       /* First disable HWP notification interrupt as we don't process them */
+       wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+
        wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
 }
 
@@ -558,7 +708,7 @@ static int atom_get_turbo_pstate(void)
        return value & 0x7F;
 }
 
-static void atom_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 atom_get_val(struct cpudata *cpudata, int pstate)
 {
        u64 val;
        int32_t vid_fp;
@@ -578,9 +728,7 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate)
        if (pstate > cpudata->pstate.max_pstate)
                vid = cpudata->vid.turbo;
 
-       val |= vid;
-
-       wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
+       return val | vid;
 }
 
 static int silvermont_get_scaling(void)
@@ -673,6 +821,11 @@ static int core_get_max_pstate(void)
                        if (err)
                                goto skip_tar;
 
+                       /* For level 1 and 2, bits[23:16] contain the ratio */
+                       if (tdp_ctrl)
+                               tdp_ratio >>= 16;
+
+                       tdp_ratio &= 0xff; /* ratios are only 8 bits long */
                        if (tdp_ratio - 1 == tar) {
                                max_pstate = tar;
                                pr_debug("max_pstate=TAC %x\n", max_pstate);
@@ -704,7 +857,7 @@ static inline int core_get_scaling(void)
        return 100000;
 }
 
-static void core_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 core_get_val(struct cpudata *cpudata, int pstate)
 {
        u64 val;
 
@@ -712,7 +865,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
        if (limits->no_turbo && !limits->turbo_disabled)
                val |= (u64)1 << 32;
 
-       wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
+       return val;
 }
 
 static int knl_get_turbo_pstate(void)
@@ -743,7 +896,7 @@ static struct cpu_defaults core_params = {
                .get_min = core_get_min_pstate,
                .get_turbo = core_get_turbo_pstate,
                .get_scaling = core_get_scaling,
-               .set = core_set_pstate,
+               .get_val = core_get_val,
                .get_target_pstate = get_target_pstate_use_performance,
        },
 };
@@ -762,7 +915,7 @@ static struct cpu_defaults silvermont_params = {
                .get_max_physical = atom_get_max_pstate,
                .get_min = atom_get_min_pstate,
                .get_turbo = atom_get_turbo_pstate,
-               .set = atom_set_pstate,
+               .get_val = atom_get_val,
                .get_scaling = silvermont_get_scaling,
                .get_vid = atom_get_vid,
                .get_target_pstate = get_target_pstate_use_cpu_load,
@@ -783,7 +936,7 @@ static struct cpu_defaults airmont_params = {
                .get_max_physical = atom_get_max_pstate,
                .get_min = atom_get_min_pstate,
                .get_turbo = atom_get_turbo_pstate,
-               .set = atom_set_pstate,
+               .get_val = atom_get_val,
                .get_scaling = airmont_get_scaling,
                .get_vid = atom_get_vid,
                .get_target_pstate = get_target_pstate_use_cpu_load,
@@ -805,7 +958,7 @@ static struct cpu_defaults knl_params = {
                .get_min = core_get_min_pstate,
                .get_turbo = knl_get_turbo_pstate,
                .get_scaling = core_get_scaling,
-               .set = core_set_pstate,
+               .get_val = core_get_val,
                .get_target_pstate = get_target_pstate_use_performance,
        },
 };
@@ -824,33 +977,32 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
         * policy, or by cpu specific default values determined through
         * experimentation.
         */
-       max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
+       max_perf_adj = fp_toint(max_perf * limits->max_perf);
        *max = clamp_t(int, max_perf_adj,
                        cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
 
-       min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
+       min_perf = fp_toint(max_perf * limits->min_perf);
        *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
-static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
+static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
 {
-       int max_perf, min_perf;
-
-       if (force) {
-               update_turbo_state();
-
-               intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
-
-               pstate = clamp_t(int, pstate, min_perf, max_perf);
-
-               if (pstate == cpu->pstate.current_pstate)
-                       return;
-       }
        trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
-
        cpu->pstate.current_pstate = pstate;
+}
+
+static void intel_pstate_set_min_pstate(struct cpudata *cpu)
+{
+       int pstate = cpu->pstate.min_pstate;
 
-       pstate_funcs.set(cpu, pstate);
+       intel_pstate_record_pstate(cpu, pstate);
+       /*
+        * Generally, there is no guarantee that this code will always run on
+        * the CPU being updated, so force the register update to run on the
+        * right CPU.
+        */
+       wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+                     pstate_funcs.get_val(cpu, pstate));
 }
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
@@ -863,7 +1015,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 
        if (pstate_funcs.get_vid)
                pstate_funcs.get_vid(cpu);
-       intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+
+       intel_pstate_set_min_pstate(cpu);
 }
 
 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
@@ -874,16 +1027,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
        core_pct = int_tofp(sample->aperf) * int_tofp(100);
        core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
 
-       sample->freq = fp_toint(
-               mul_fp(int_tofp(
-                       cpu->pstate.max_pstate_physical *
-                       cpu->pstate.scaling / 100),
-                       core_pct));
-
        sample->core_pct_busy = (int32_t)core_pct;
 }
 
-static inline void intel_pstate_sample(struct cpudata *cpu)
+static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
 {
        u64 aperf, mperf;
        unsigned long flags;
@@ -893,14 +1040,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
        rdmsrl(MSR_IA32_APERF, aperf);
        rdmsrl(MSR_IA32_MPERF, mperf);
        tsc = rdtsc();
-       if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) {
+       if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
                local_irq_restore(flags);
-               return;
+               return false;
        }
        local_irq_restore(flags);
 
        cpu->last_sample_time = cpu->sample.time;
-       cpu->sample.time = ktime_get();
+       cpu->sample.time = time;
        cpu->sample.aperf = aperf;
        cpu->sample.mperf = mperf;
        cpu->sample.tsc =  tsc;
@@ -908,27 +1055,24 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
        cpu->sample.mperf -= cpu->prev_mperf;
        cpu->sample.tsc -= cpu->prev_tsc;
 
-       intel_pstate_calc_busy(cpu);
-
        cpu->prev_aperf = aperf;
        cpu->prev_mperf = mperf;
        cpu->prev_tsc = tsc;
+       /*
+        * First time this function is invoked in a given cycle, all of the
+        * previous sample data fields are equal to zero or stale and they must
+        * be populated with meaningful numbers for things to work, so assume
+        * that sample.time will always be reset before setting the utilization
+        * update hook and make the caller skip the sample then.
+        */
+       return !!cpu->last_sample_time;
 }
 
-static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
-{
-       int delay;
-
-       delay = msecs_to_jiffies(50);
-       mod_timer_pinned(&cpu->timer, jiffies + delay);
-}
-
-static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
+static inline int32_t get_avg_frequency(struct cpudata *cpu)
 {
-       int delay;
-
-       delay = msecs_to_jiffies(pid_params.sample_rate_ms);
-       mod_timer_pinned(&cpu->timer, jiffies + delay);
+       return fp_toint(mul_fp(cpu->sample.core_pct_busy,
+                              int_tofp(cpu->pstate.max_pstate_physical *
+                                               cpu->pstate.scaling / 100)));
 }
 
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
@@ -954,7 +1098,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
        mperf = cpu->sample.mperf + delta_iowait_mperf;
        cpu->prev_cummulative_iowait = cummulative_iowait;
 
-
        /*
         * The load can be estimated as the ratio of the mperf counter
         * running at a constant frequency during active periods
@@ -970,8 +1113,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 {
        int32_t core_busy, max_pstate, current_pstate, sample_ratio;
-       s64 duration_us;
-       u32 sample_time;
+       u64 duration_ns;
 
        /*
         * core_busy is the ratio of actual performance to max
@@ -990,25 +1132,41 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
        core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
        /*
-        * Since we have a deferred timer, it will not fire unless
-        * we are in C0.  So, determine if the actual elapsed time
-        * is significantly greater (3x) than our sample interval.  If it
-        * is, then we were idle for a long enough period of time
-        * to adjust our busyness.
+        * Since our utilization update callback will not run unless we are
+        * in C0, check if the actual elapsed time is significantly greater (3x)
+        * than our sample interval.  If it is, then we were idle for a long
+        * enough period of time to adjust our busyness.
         */
-       sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
-       duration_us = ktime_us_delta(cpu->sample.time,
-                                    cpu->last_sample_time);
-       if (duration_us > sample_time * 3) {
-               sample_ratio = div_fp(int_tofp(sample_time),
-                                     int_tofp(duration_us));
+       duration_ns = cpu->sample.time - cpu->last_sample_time;
+       if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
+               sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
+                                     int_tofp(duration_ns));
                core_busy = mul_fp(core_busy, sample_ratio);
+       } else {
+               sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc);
+               if (sample_ratio < int_tofp(1))
+                       core_busy = 0;
        }
 
        cpu->sample.busy_scaled = core_busy;
        return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
 }
 
+static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
+{
+       int max_perf, min_perf;
+
+       update_turbo_state();
+
+       intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
+       pstate = clamp_t(int, pstate, min_perf, max_perf);
+       if (pstate == cpu->pstate.current_pstate)
+               return;
+
+       intel_pstate_record_pstate(cpu, pstate);
+       wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
+}
+
 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 {
        int from, target_pstate;
@@ -1018,7 +1176,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 
        target_pstate = pstate_funcs.get_target_pstate(cpu);
 
-       intel_pstate_set_pstate(cpu, target_pstate, true);
+       intel_pstate_update_pstate(cpu, target_pstate);
 
        sample = &cpu->sample;
        trace_pstate_sample(fp_toint(sample->core_pct_busy),
@@ -1028,26 +1186,24 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
                sample->mperf,
                sample->aperf,
                sample->tsc,
-               sample->freq);
-}
-
-static void intel_hwp_timer_func(unsigned long __data)
-{
-       struct cpudata *cpu = (struct cpudata *) __data;
-
-       intel_pstate_sample(cpu);
-       intel_hwp_set_sample_time(cpu);
+               get_avg_frequency(cpu));
 }
 
-static void intel_pstate_timer_func(unsigned long __data)
+static void intel_pstate_update_util(struct update_util_data *data, u64 time,
+                                    unsigned long util, unsigned long max)
 {
-       struct cpudata *cpu = (struct cpudata *) __data;
+       struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+       u64 delta_ns = time - cpu->sample.time;
 
-       intel_pstate_sample(cpu);
+       if ((s64)delta_ns >= pid_params.sample_rate_ns) {
+               bool sample_taken = intel_pstate_sample(cpu, time);
 
-       intel_pstate_adjust_busy_pstate(cpu);
-
-       intel_pstate_set_sample_time(cpu);
+               if (sample_taken) {
+                       intel_pstate_calc_busy(cpu);
+                       if (!hwp_active)
+                               intel_pstate_adjust_busy_pstate(cpu);
+               }
+       }
 }
 
 #define ICPU(model, policy) \
@@ -1095,24 +1251,17 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 
        cpu->cpu = cpunum;
 
-       if (hwp_active)
+       if (hwp_active) {
                intel_pstate_hwp_enable(cpu);
+               pid_params.sample_rate_ms = 50;
+               pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
+       }
 
        intel_pstate_get_cpu_pstates(cpu);
 
-       init_timer_deferrable(&cpu->timer);
-       cpu->timer.data = (unsigned long)cpu;
-       cpu->timer.expires = jiffies + HZ/100;
-
-       if (!hwp_active)
-               cpu->timer.function = intel_pstate_timer_func;
-       else
-               cpu->timer.function = intel_hwp_timer_func;
-
        intel_pstate_busy_pid_reset(cpu);
-       intel_pstate_sample(cpu);
 
-       add_timer_on(&cpu->timer, cpunum);
+       cpu->update_util.func = intel_pstate_update_util;
 
        pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
 
@@ -1128,7 +1277,36 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
        if (!cpu)
                return 0;
        sample = &cpu->sample;
-       return sample->freq;
+       return get_avg_frequency(cpu);
+}
+
+static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
+{
+       struct cpudata *cpu = all_cpu_data[cpu_num];
+
+       /* Prevent intel_pstate_update_util() from using stale data. */
+       cpu->sample.time = 0;
+       cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
+}
+
+static void intel_pstate_clear_update_util_hook(unsigned int cpu)
+{
+       cpufreq_set_update_util_data(cpu, NULL);
+       synchronize_sched();
+}
+
+static void intel_pstate_set_performance_limits(struct perf_limits *limits)
+{
+       limits->no_turbo = 0;
+       limits->turbo_disabled = 0;
+       limits->max_perf_pct = 100;
+       limits->max_perf = int_tofp(1);
+       limits->min_perf_pct = 100;
+       limits->min_perf = int_tofp(1);
+       limits->max_policy_pct = 100;
+       limits->max_sysfs_pct = 100;
+       limits->min_policy_pct = 0;
+       limits->min_sysfs_pct = 0;
 }
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
@@ -1136,17 +1314,20 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
        if (!policy->cpuinfo.max_freq)
                return -ENODEV;
 
-       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
-           policy->max >= policy->cpuinfo.max_freq) {
-               pr_debug("intel_pstate: set performance\n");
+       intel_pstate_clear_update_util_hook(policy->cpu);
+
+       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
                limits = &performance_limits;
-               if (hwp_active)
-                       intel_pstate_hwp_set();
-               return 0;
+               if (policy->max >= policy->cpuinfo.max_freq) {
+                       pr_debug("intel_pstate: set performance\n");
+                       intel_pstate_set_performance_limits(limits);
+                       goto out;
+               }
+       } else {
+               pr_debug("intel_pstate: set powersave\n");
+               limits = &powersave_limits;
        }
 
-       pr_debug("intel_pstate: set powersave\n");
-       limits = &powersave_limits;
        limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
        limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
        limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
@@ -1172,8 +1353,10 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
        limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
                                  int_tofp(100));
 
-       if (hwp_active)
-               intel_pstate_hwp_set();
+ out:
+       intel_pstate_set_update_util_hook(policy->cpu);
+
+       intel_pstate_hwp_set_policy(policy);
 
        return 0;
 }
@@ -1196,11 +1379,12 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 
        pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
 
-       del_timer_sync(&all_cpu_data[cpu_num]->timer);
+       intel_pstate_clear_update_util_hook(cpu_num);
+
        if (hwp_active)
                return;
 
-       intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+       intel_pstate_set_min_pstate(cpu);
 }
 
 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
@@ -1236,6 +1420,7 @@ static struct cpufreq_driver intel_pstate_driver = {
        .flags          = CPUFREQ_CONST_LOOPS,
        .verify         = intel_pstate_verify_policy,
        .setpolicy      = intel_pstate_set_policy,
+       .resume         = intel_pstate_hwp_set_policy,
        .get            = intel_pstate_get,
        .init           = intel_pstate_cpu_init,
        .stop_cpu       = intel_pstate_stop_cpu,
@@ -1260,6 +1445,7 @@ static int intel_pstate_msrs_not_valid(void)
 static void copy_pid_params(struct pstate_adjust_policy *policy)
 {
        pid_params.sample_rate_ms = policy->sample_rate_ms;
+       pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
        pid_params.p_gain_pct = policy->p_gain_pct;
        pid_params.i_gain_pct = policy->i_gain_pct;
        pid_params.d_gain_pct = policy->d_gain_pct;
@@ -1274,7 +1460,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
        pstate_funcs.get_min   = funcs->get_min;
        pstate_funcs.get_turbo = funcs->get_turbo;
        pstate_funcs.get_scaling = funcs->get_scaling;
-       pstate_funcs.set       = funcs->set;
+       pstate_funcs.get_val   = funcs->get_val;
        pstate_funcs.get_vid   = funcs->get_vid;
        pstate_funcs.get_target_pstate = funcs->get_target_pstate;
 
@@ -1397,6 +1583,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
 static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
 #endif /* CONFIG_ACPI */
 
+static const struct x86_cpu_id hwp_support_ids[] __initconst = {
+       { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP },
+       {}
+};
+
 static int __init intel_pstate_init(void)
 {
        int cpu, rc = 0;
@@ -1406,17 +1597,16 @@ static int __init intel_pstate_init(void)
        if (no_load)
                return -ENODEV;
 
+       if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
+               copy_cpu_funcs(&core_params.funcs);
+               hwp_active++;
+               goto hwp_cpu_matched;
+       }
+
        id = x86_match_cpu(intel_pstate_cpu_ids);
        if (!id)
                return -ENODEV;
 
-       /*
-        * The Intel pstate driver will be ignored if the platform
-        * firmware has its own power management modes.
-        */
-       if (intel_pstate_platform_pwr_mgmt_exists())
-               return -ENODEV;
-
        cpu_def = (struct cpu_defaults *)id->driver_data;
 
        copy_pid_params(&cpu_def->pid_policy);
@@ -1425,17 +1615,20 @@ static int __init intel_pstate_init(void)
        if (intel_pstate_msrs_not_valid())
                return -ENODEV;
 
+hwp_cpu_matched:
+       /*
+        * The Intel pstate driver will be ignored if the platform
+        * firmware has its own power management modes.
+        */
+       if (intel_pstate_platform_pwr_mgmt_exists())
+               return -ENODEV;
+
        pr_info("Intel P-state driver initializing.\n");
 
        all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
        if (!all_cpu_data)
                return -ENOMEM;
 
-       if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
-               pr_info("intel_pstate: HWP enabled\n");
-               hwp_active++;
-       }
-
        if (!hwp_active && hwp_only)
                goto out;
 
@@ -1446,12 +1639,15 @@ static int __init intel_pstate_init(void)
        intel_pstate_debug_expose_params();
        intel_pstate_sysfs_expose_params();
 
+       if (hwp_active)
+               pr_info("intel_pstate: HWP enabled\n");
+
        return rc;
 out:
        get_online_cpus();
        for_each_online_cpu(cpu) {
                if (all_cpu_data[cpu]) {
-                       del_timer_sync(&all_cpu_data[cpu]->timer);
+                       intel_pstate_clear_update_util_hook(cpu);
                        kfree(all_cpu_data[cpu]);
                }
        }
This page took 0.034792 seconds and 5 git commands to generate.