Commit | Line | Data |
---|---|---|
8b45b72b | 1 | /* |
1da177e4 | 2 | * @file op_model_ppro.h |
b9917028 | 3 | * Family 6 perfmon and architectural perfmon MSR operations |
1da177e4 LT |
4 | * |
5 | * @remark Copyright 2002 OProfile authors | |
b9917028 | 6 | * @remark Copyright 2008 Intel Corporation |
1da177e4 LT |
7 | * @remark Read the file COPYING |
8 | * | |
9 | * @author John Levon | |
10 | * @author Philippe Elie | |
11 | * @author Graydon Hoare | |
b9917028 | 12 | * @author Andi Kleen |
1da177e4 LT |
13 | */ |
14 | ||
15 | #include <linux/oprofile.h> | |
b9917028 | 16 | #include <linux/slab.h> |
1da177e4 LT |
17 | #include <asm/ptrace.h> |
18 | #include <asm/msr.h> | |
19 | #include <asm/apic.h> | |
3e4ff115 | 20 | #include <asm/nmi.h> |
b9917028 | 21 | #include <asm/intel_arch_perfmon.h> |
8b45b72b | 22 | |
1da177e4 LT |
23 | #include "op_x86_model.h" |
24 | #include "op_counter.h" | |
25 | ||
b9917028 AK |
26 | static int num_counters = 2; |
27 | static int counter_width = 32; | |
1da177e4 | 28 | |
8b45b72b PC |
29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) |
30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | |
b9917028 | 31 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) |
1da177e4 | 32 | |
8b45b72b PC |
33 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) |
34 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | |
35 | #define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | |
1da177e4 LT |
36 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) |
37 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | |
38 | #define CTRL_CLEAR(x) (x &= (1<<21)) | |
39 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | |
8b45b72b PC |
40 | #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) |
41 | #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) | |
1da177e4 LT |
42 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) |
43 | #define CTRL_SET_EVENT(val, e) (val |= e) | |
44 | ||
b9917028 | 45 | static u64 *reset_value; |
8b45b72b | 46 | |
1da177e4 LT |
47 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) |
48 | { | |
cb9c448c DZ |
49 | int i; |
50 | ||
b9917028 | 51 | for (i = 0; i < num_counters; i++) { |
cb9c448c DZ |
52 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
53 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | |
54 | else | |
55 | msrs->counters[i].addr = 0; | |
56 | } | |
8b45b72b | 57 | |
b9917028 | 58 | for (i = 0; i < num_counters; i++) { |
cb9c448c DZ |
59 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
60 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | |
61 | else | |
62 | msrs->controls[i].addr = 0; | |
63 | } | |
1da177e4 LT |
64 | } |
65 | ||
66 | ||
67 | static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |
68 | { | |
69 | unsigned int low, high; | |
70 | int i; | |
71 | ||
b9917028 AK |
72 | if (!reset_value) { |
73 | reset_value = kmalloc(sizeof(unsigned) * num_counters, | |
74 | GFP_ATOMIC); | |
75 | if (!reset_value) | |
76 | return; | |
77 | } | |
78 | ||
79 | if (cpu_has_arch_perfmon) { | |
80 | union cpuid10_eax eax; | |
81 | eax.full = cpuid_eax(0xa); | |
82 | if (counter_width < eax.split.bit_width) | |
83 | counter_width = eax.split.bit_width; | |
84 | } | |
85 | ||
1da177e4 | 86 | /* clear all counters */ |
b9917028 | 87 | for (i = 0 ; i < num_counters; ++i) { |
8b45b72b | 88 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) |
cb9c448c | 89 | continue; |
1da177e4 LT |
90 | CTRL_READ(low, high, msrs, i); |
91 | CTRL_CLEAR(low); | |
92 | CTRL_WRITE(low, high, msrs, i); | |
93 | } | |
8b45b72b | 94 | |
1da177e4 | 95 | /* avoid a false detection of ctr overflows in NMI handler */ |
b9917028 | 96 | for (i = 0; i < num_counters; ++i) { |
8b45b72b | 97 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) |
cb9c448c | 98 | continue; |
b9917028 | 99 | wrmsrl(msrs->counters[i].addr, -1LL); |
1da177e4 LT |
100 | } |
101 | ||
102 | /* enable active counters */ | |
b9917028 | 103 | for (i = 0; i < num_counters; ++i) { |
8b45b72b | 104 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { |
1da177e4 LT |
105 | reset_value[i] = counter_config[i].count; |
106 | ||
b9917028 | 107 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
1da177e4 LT |
108 | |
109 | CTRL_READ(low, high, msrs, i); | |
110 | CTRL_CLEAR(low); | |
111 | CTRL_SET_ENABLE(low); | |
112 | CTRL_SET_USR(low, counter_config[i].user); | |
113 | CTRL_SET_KERN(low, counter_config[i].kernel); | |
114 | CTRL_SET_UM(low, counter_config[i].unit_mask); | |
115 | CTRL_SET_EVENT(low, counter_config[i].event); | |
116 | CTRL_WRITE(low, high, msrs, i); | |
cb9c448c DZ |
117 | } else { |
118 | reset_value[i] = 0; | |
1da177e4 LT |
119 | } |
120 | } | |
121 | } | |
122 | ||
8b45b72b | 123 | |
1da177e4 LT |
124 | static int ppro_check_ctrs(struct pt_regs * const regs, |
125 | struct op_msrs const * const msrs) | |
126 | { | |
127 | unsigned int low, high; | |
128 | int i; | |
8b45b72b | 129 | |
b9917028 | 130 | for (i = 0 ; i < num_counters; ++i) { |
cb9c448c DZ |
131 | if (!reset_value[i]) |
132 | continue; | |
1da177e4 LT |
133 | CTR_READ(low, high, msrs, i); |
134 | if (CTR_OVERFLOWED(low)) { | |
135 | oprofile_add_sample(regs, i); | |
b9917028 | 136 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
1da177e4 LT |
137 | } |
138 | } | |
139 | ||
140 | /* Only P6 based Pentium M need to re-unmask the apic vector but it | |
141 | * doesn't hurt other P6 variant */ | |
142 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | |
143 | ||
144 | /* We can't work out if we really handled an interrupt. We | |
145 | * might have caught a *second* counter just after overflowing | |
146 | * the interrupt for this counter then arrives | |
147 | * and we don't find a counter that's overflowed, so we | |
148 | * would return 0 and get dazed + confused. Instead we always | |
149 | * assume we found an overflow. This sucks. | |
150 | */ | |
151 | return 1; | |
152 | } | |
153 | ||
8b45b72b | 154 | |
1da177e4 LT |
155 | static void ppro_start(struct op_msrs const * const msrs) |
156 | { | |
8b45b72b | 157 | unsigned int low, high; |
6b77df08 | 158 | int i; |
cb9c448c | 159 | |
b9917028 | 160 | for (i = 0; i < num_counters; ++i) { |
6b77df08 AS |
161 | if (reset_value[i]) { |
162 | CTRL_READ(low, high, msrs, i); | |
163 | CTRL_SET_ACTIVE(low); | |
164 | CTRL_WRITE(low, high, msrs, i); | |
165 | } | |
cb9c448c | 166 | } |
1da177e4 LT |
167 | } |
168 | ||
169 | ||
170 | static void ppro_stop(struct op_msrs const * const msrs) | |
171 | { | |
8b45b72b | 172 | unsigned int low, high; |
6b77df08 | 173 | int i; |
cb9c448c | 174 | |
b9917028 | 175 | for (i = 0; i < num_counters; ++i) { |
6b77df08 AS |
176 | if (!reset_value[i]) |
177 | continue; | |
178 | CTRL_READ(low, high, msrs, i); | |
cb9c448c | 179 | CTRL_SET_INACTIVE(low); |
6b77df08 | 180 | CTRL_WRITE(low, high, msrs, i); |
cb9c448c DZ |
181 | } |
182 | } | |
183 | ||
184 | static void ppro_shutdown(struct op_msrs const * const msrs) | |
185 | { | |
186 | int i; | |
187 | ||
b9917028 | 188 | for (i = 0 ; i < num_counters ; ++i) { |
8b45b72b | 189 | if (CTR_IS_RESERVED(msrs, i)) |
cb9c448c DZ |
190 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
191 | } | |
b9917028 | 192 | for (i = 0 ; i < num_counters ; ++i) { |
8b45b72b | 193 | if (CTRL_IS_RESERVED(msrs, i)) |
cb9c448c DZ |
194 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
195 | } | |
b9917028 AK |
196 | if (reset_value) { |
197 | kfree(reset_value); | |
198 | reset_value = NULL; | |
199 | } | |
1da177e4 LT |
200 | } |
201 | ||
202 | ||
59512900 | 203 | struct op_x86_model_spec op_ppro_spec = { |
5a289395 RR |
204 | .num_counters = 2, /* can be overriden */ |
205 | .num_controls = 2, /* dito */ | |
206 | .fill_in_addresses = &ppro_fill_in_addresses, | |
207 | .setup_ctrs = &ppro_setup_ctrs, | |
208 | .check_ctrs = &ppro_check_ctrs, | |
209 | .start = &ppro_start, | |
210 | .stop = &ppro_stop, | |
211 | .shutdown = &ppro_shutdown | |
b9917028 AK |
212 | }; |
213 | ||
214 | /* | |
215 | * Architectural performance monitoring. | |
216 | * | |
217 | * Newer Intel CPUs (Core1+) have support for architectural | |
218 | * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. | |
219 | * The advantage of this is that it can be done without knowing about | |
220 | * the specific CPU. | |
221 | */ | |
222 | ||
223 | void arch_perfmon_setup_counters(void) | |
224 | { | |
225 | union cpuid10_eax eax; | |
226 | ||
227 | eax.full = cpuid_eax(0xa); | |
228 | ||
229 | /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ | |
230 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | |
231 | current_cpu_data.x86_model == 15) { | |
232 | eax.split.version_id = 2; | |
233 | eax.split.num_counters = 2; | |
234 | eax.split.bit_width = 40; | |
235 | } | |
236 | ||
237 | num_counters = eax.split.num_counters; | |
238 | ||
239 | op_arch_perfmon_spec.num_counters = num_counters; | |
240 | op_arch_perfmon_spec.num_controls = num_counters; | |
59512900 AK |
241 | op_ppro_spec.num_counters = num_counters; |
242 | op_ppro_spec.num_controls = num_counters; | |
b9917028 AK |
243 | } |
244 | ||
245 | struct op_x86_model_spec op_arch_perfmon_spec = { | |
246 | /* num_counters/num_controls filled in at runtime */ | |
c92960fc | 247 | .fill_in_addresses = &ppro_fill_in_addresses, |
b9917028 | 248 | /* user space does the cpuid check for available events */ |
c92960fc RR |
249 | .setup_ctrs = &ppro_setup_ctrs, |
250 | .check_ctrs = &ppro_check_ctrs, | |
251 | .start = &ppro_start, | |
252 | .stop = &ppro_stop, | |
253 | .shutdown = &ppro_shutdown | |
1da177e4 | 254 | }; |