perf_counter: Optimize sched in/out of counters
[deliverable/linux.git] / arch / x86 / kernel / cpu / perf_counter.c
CommitLineData
241771ef
IM
1/*
2 * Performance counter x86 architecture code
3 *
98144511
IM
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
241771ef
IM
9 *
10 * For licencing details see kernel-base/COPYING
11 */
12
13#include <linux/perf_counter.h>
14#include <linux/capability.h>
15#include <linux/notifier.h>
16#include <linux/hardirq.h>
17#include <linux/kprobes.h>
4ac13294 18#include <linux/module.h>
241771ef
IM
19#include <linux/kdebug.h>
20#include <linux/sched.h>
d7d59fb3 21#include <linux/uaccess.h>
241771ef 22
241771ef 23#include <asm/apic.h>
d7d59fb3 24#include <asm/stacktrace.h>
4e935e47 25#include <asm/nmi.h>
241771ef 26
862a1a5f 27static u64 perf_counter_mask __read_mostly;
703e937c 28
241771ef 29struct cpu_hw_counters {
862a1a5f 30 struct perf_counter *counters[X86_PMC_IDX_MAX];
43f6201a
RR
31 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
32 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
4b39fd96 33 unsigned long interrupts;
b0f3f28e 34 int enabled;
241771ef
IM
35};
36
37/*
5f4ec28f 38 * struct x86_pmu - generic x86 pmu
241771ef 39 */
5f4ec28f 40struct x86_pmu {
faa28ae0
RR
41 const char *name;
42 int version;
39d81eab 43 int (*handle_irq)(struct pt_regs *, int);
9e35ad38
PZ
44 void (*disable_all)(void);
45 void (*enable_all)(void);
7c90cc45 46 void (*enable)(struct hw_perf_counter *, int);
d4369891 47 void (*disable)(struct hw_perf_counter *, int);
169e41eb
JSR
48 unsigned eventsel;
49 unsigned perfctr;
b0f3f28e
PZ
50 u64 (*event_map)(int);
51 u64 (*raw_event)(u64);
169e41eb 52 int max_events;
0933e5c6
RR
53 int num_counters;
54 int num_counters_fixed;
55 int counter_bits;
56 u64 counter_mask;
c619b8ff 57 u64 max_period;
9e35ad38 58 u64 intel_ctrl;
b56a3802
JSR
59};
60
4a06bd85 61static struct x86_pmu x86_pmu __read_mostly;
b56a3802 62
b0f3f28e
PZ
63static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
64 .enabled = 1,
65};
241771ef 66
b56a3802
JSR
67/*
68 * Intel PerfMon v3. Used on Core2 and later.
69 */
b0f3f28e 70static const u64 intel_perfmon_event_map[] =
241771ef 71{
f650a672 72 [PERF_COUNT_CPU_CYCLES] = 0x003c,
241771ef
IM
73 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
74 [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
75 [PERF_COUNT_CACHE_MISSES] = 0x412e,
76 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
77 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
f650a672 78 [PERF_COUNT_BUS_CYCLES] = 0x013c,
241771ef
IM
79};
80
5f4ec28f 81static u64 intel_pmu_event_map(int event)
b56a3802
JSR
82{
83 return intel_perfmon_event_map[event];
84}
241771ef 85
5f4ec28f 86static u64 intel_pmu_raw_event(u64 event)
b0f3f28e 87{
82bae4f8
PZ
88#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
89#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
90#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
b0f3f28e
PZ
91
92#define CORE_EVNTSEL_MASK \
93 (CORE_EVNTSEL_EVENT_MASK | \
94 CORE_EVNTSEL_UNIT_MASK | \
95 CORE_EVNTSEL_COUNTER_MASK)
96
97 return event & CORE_EVNTSEL_MASK;
98}
99
f87ad35d
JSR
100/*
101 * AMD Performance Monitor K7 and later.
102 */
b0f3f28e 103static const u64 amd_perfmon_event_map[] =
f87ad35d
JSR
104{
105 [PERF_COUNT_CPU_CYCLES] = 0x0076,
106 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
107 [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
108 [PERF_COUNT_CACHE_MISSES] = 0x0081,
109 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
110 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
111};
112
5f4ec28f 113static u64 amd_pmu_event_map(int event)
f87ad35d
JSR
114{
115 return amd_perfmon_event_map[event];
116}
117
5f4ec28f 118static u64 amd_pmu_raw_event(u64 event)
b0f3f28e 119{
82bae4f8
PZ
120#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
121#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
122#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
b0f3f28e
PZ
123
124#define K7_EVNTSEL_MASK \
125 (K7_EVNTSEL_EVENT_MASK | \
126 K7_EVNTSEL_UNIT_MASK | \
127 K7_EVNTSEL_COUNTER_MASK)
128
129 return event & K7_EVNTSEL_MASK;
130}
131
ee06094f
IM
132/*
133 * Propagate counter elapsed time into the generic counter.
134 * Can only be executed on the CPU where the counter is active.
135 * Returns the delta events processed.
136 */
4b7bfd0d 137static u64
ee06094f
IM
138x86_perf_counter_update(struct perf_counter *counter,
139 struct hw_perf_counter *hwc, int idx)
140{
ec3232bd
PZ
141 int shift = 64 - x86_pmu.counter_bits;
142 u64 prev_raw_count, new_raw_count;
143 s64 delta;
ee06094f 144
ee06094f
IM
145 /*
146 * Careful: an NMI might modify the previous counter value.
147 *
148 * Our tactic to handle this is to first atomically read and
149 * exchange a new raw count - then add that new-prev delta
150 * count to the generic counter atomically:
151 */
152again:
153 prev_raw_count = atomic64_read(&hwc->prev_count);
154 rdmsrl(hwc->counter_base + idx, new_raw_count);
155
156 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
157 new_raw_count) != prev_raw_count)
158 goto again;
159
160 /*
161 * Now we have the new raw value and have updated the prev
162 * timestamp already. We can now calculate the elapsed delta
163 * (counter-)time and add that to the generic counter.
164 *
165 * Careful, not all hw sign-extends above the physical width
ec3232bd 166 * of the count.
ee06094f 167 */
ec3232bd
PZ
168 delta = (new_raw_count << shift) - (prev_raw_count << shift);
169 delta >>= shift;
ee06094f
IM
170
171 atomic64_add(delta, &counter->count);
172 atomic64_sub(delta, &hwc->period_left);
4b7bfd0d
RR
173
174 return new_raw_count;
ee06094f
IM
175}
176
ba77813a 177static atomic_t active_counters;
4e935e47
PZ
178static DEFINE_MUTEX(pmc_reserve_mutex);
179
180static bool reserve_pmc_hardware(void)
181{
182 int i;
183
184 if (nmi_watchdog == NMI_LOCAL_APIC)
185 disable_lapic_nmi_watchdog();
186
0933e5c6 187 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85 188 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
4e935e47
PZ
189 goto perfctr_fail;
190 }
191
0933e5c6 192 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85 193 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
4e935e47
PZ
194 goto eventsel_fail;
195 }
196
197 return true;
198
199eventsel_fail:
200 for (i--; i >= 0; i--)
4a06bd85 201 release_evntsel_nmi(x86_pmu.eventsel + i);
4e935e47 202
0933e5c6 203 i = x86_pmu.num_counters;
4e935e47
PZ
204
205perfctr_fail:
206 for (i--; i >= 0; i--)
4a06bd85 207 release_perfctr_nmi(x86_pmu.perfctr + i);
4e935e47
PZ
208
209 if (nmi_watchdog == NMI_LOCAL_APIC)
210 enable_lapic_nmi_watchdog();
211
212 return false;
213}
214
215static void release_pmc_hardware(void)
216{
217 int i;
218
0933e5c6 219 for (i = 0; i < x86_pmu.num_counters; i++) {
4a06bd85
RR
220 release_perfctr_nmi(x86_pmu.perfctr + i);
221 release_evntsel_nmi(x86_pmu.eventsel + i);
4e935e47
PZ
222 }
223
224 if (nmi_watchdog == NMI_LOCAL_APIC)
225 enable_lapic_nmi_watchdog();
226}
227
228static void hw_perf_counter_destroy(struct perf_counter *counter)
229{
ba77813a 230 if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
4e935e47
PZ
231 release_pmc_hardware();
232 mutex_unlock(&pmc_reserve_mutex);
233 }
234}
235
85cf9dba
RR
236static inline int x86_pmu_initialized(void)
237{
238 return x86_pmu.handle_irq != NULL;
239}
240
241771ef
IM
241/*
242 * Setup the hardware configuration for a given hw_event_type
243 */
621a01ea 244static int __hw_perf_counter_init(struct perf_counter *counter)
241771ef 245{
9f66a381 246 struct perf_counter_hw_event *hw_event = &counter->hw_event;
241771ef 247 struct hw_perf_counter *hwc = &counter->hw;
4e935e47 248 int err;
241771ef 249
85cf9dba
RR
250 if (!x86_pmu_initialized())
251 return -ENODEV;
241771ef 252
4e935e47 253 err = 0;
ba77813a 254 if (!atomic_inc_not_zero(&active_counters)) {
4e935e47 255 mutex_lock(&pmc_reserve_mutex);
ba77813a 256 if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
4e935e47
PZ
257 err = -EBUSY;
258 else
ba77813a 259 atomic_inc(&active_counters);
4e935e47
PZ
260 mutex_unlock(&pmc_reserve_mutex);
261 }
262 if (err)
263 return err;
264
241771ef 265 /*
0475f9ea 266 * Generate PMC IRQs:
241771ef
IM
267 * (keep 'enabled' bit clear for now)
268 */
0475f9ea 269 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
241771ef
IM
270
271 /*
0475f9ea 272 * Count user and OS events unless requested not to.
241771ef 273 */
0475f9ea
PM
274 if (!hw_event->exclude_user)
275 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
276 if (!hw_event->exclude_kernel)
241771ef 277 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
0475f9ea
PM
278
279 /*
280 * If privileged enough, allow NMI events:
281 */
282 hwc->nmi = 0;
a026dfec
PZ
283 if (hw_event->nmi) {
284 if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
285 return -EACCES;
0475f9ea 286 hwc->nmi = 1;
a026dfec 287 }
b68f1d2e 288 perf_counters_lapic_init(hwc->nmi);
241771ef 289
d2517a49
IM
290 if (!hwc->irq_period)
291 hwc->irq_period = x86_pmu.max_period;
292
60db5e09
PZ
293 atomic64_set(&hwc->period_left,
294 min(x86_pmu.max_period, hwc->irq_period));
241771ef
IM
295
296 /*
dfa7c899 297 * Raw event type provide the config in the event structure
241771ef 298 */
f4a2deb4 299 if (perf_event_raw(hw_event)) {
4a06bd85 300 hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event));
241771ef 301 } else {
4a06bd85 302 if (perf_event_id(hw_event) >= x86_pmu.max_events)
241771ef
IM
303 return -EINVAL;
304 /*
305 * The generic map:
306 */
4a06bd85 307 hwc->config |= x86_pmu.event_map(perf_event_id(hw_event));
241771ef 308 }
241771ef 309
4e935e47
PZ
310 counter->destroy = hw_perf_counter_destroy;
311
241771ef
IM
312 return 0;
313}
314
9e35ad38 315static void intel_pmu_disable_all(void)
4ac13294 316{
862a1a5f 317 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
241771ef 318}
b56a3802 319
9e35ad38 320static void amd_pmu_disable_all(void)
f87ad35d 321{
b0f3f28e 322 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
9e35ad38
PZ
323 int idx;
324
325 if (!cpuc->enabled)
326 return;
b0f3f28e 327
b0f3f28e 328 cpuc->enabled = 0;
60b3df9c
PZ
329 /*
330 * ensure we write the disable before we start disabling the
5f4ec28f
RR
331 * counters proper, so that amd_pmu_enable_counter() does the
332 * right thing.
60b3df9c 333 */
b0f3f28e 334 barrier();
f87ad35d 335
0933e5c6 336 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
b0f3f28e
PZ
337 u64 val;
338
43f6201a 339 if (!test_bit(idx, cpuc->active_mask))
4295ee62 340 continue;
f87ad35d 341 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
4295ee62
RR
342 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
343 continue;
344 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
345 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
f87ad35d 346 }
f87ad35d
JSR
347}
348
9e35ad38 349void hw_perf_disable(void)
b56a3802 350{
85cf9dba 351 if (!x86_pmu_initialized())
9e35ad38
PZ
352 return;
353 return x86_pmu.disable_all();
b56a3802 354}
241771ef 355
9e35ad38 356static void intel_pmu_enable_all(void)
b56a3802 357{
9e35ad38 358 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
b56a3802
JSR
359}
360
9e35ad38 361static void amd_pmu_enable_all(void)
f87ad35d 362{
b0f3f28e 363 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
f87ad35d
JSR
364 int idx;
365
9e35ad38 366 if (cpuc->enabled)
b0f3f28e
PZ
367 return;
368
9e35ad38
PZ
369 cpuc->enabled = 1;
370 barrier();
371
0933e5c6 372 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
4295ee62 373 u64 val;
b0f3f28e 374
43f6201a 375 if (!test_bit(idx, cpuc->active_mask))
4295ee62
RR
376 continue;
377 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
378 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
379 continue;
380 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
381 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
f87ad35d
JSR
382 }
383}
384
9e35ad38 385void hw_perf_enable(void)
ee06094f 386{
85cf9dba 387 if (!x86_pmu_initialized())
2b9ff0db 388 return;
9e35ad38 389 x86_pmu.enable_all();
ee06094f 390}
ee06094f 391
19d84dab 392static inline u64 intel_pmu_get_status(void)
b0f3f28e
PZ
393{
394 u64 status;
395
b7f8859a 396 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
b0f3f28e 397
b7f8859a 398 return status;
b0f3f28e
PZ
399}
400
dee5d906 401static inline void intel_pmu_ack_status(u64 ack)
b0f3f28e
PZ
402{
403 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
404}
405
7c90cc45 406static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
b0f3f28e 407{
7c90cc45 408 int err;
7c90cc45
RR
409 err = checking_wrmsrl(hwc->config_base + idx,
410 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
b0f3f28e
PZ
411}
412
d4369891 413static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
b0f3f28e 414{
d4369891 415 int err;
d4369891
RR
416 err = checking_wrmsrl(hwc->config_base + idx,
417 hwc->config);
b0f3f28e
PZ
418}
419
2f18d1e8 420static inline void
d4369891 421intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
2f18d1e8
IM
422{
423 int idx = __idx - X86_PMC_IDX_FIXED;
424 u64 ctrl_val, mask;
425 int err;
426
427 mask = 0xfULL << (idx * 4);
428
429 rdmsrl(hwc->config_base, ctrl_val);
430 ctrl_val &= ~mask;
431 err = checking_wrmsrl(hwc->config_base, ctrl_val);
432}
433
7e2ae347 434static inline void
d4369891 435intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
7e2ae347 436{
d4369891
RR
437 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
438 intel_pmu_disable_fixed(hwc, idx);
439 return;
440 }
441
442 x86_pmu_disable_counter(hwc, idx);
443}
444
445static inline void
446amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
447{
448 x86_pmu_disable_counter(hwc, idx);
7e2ae347
IM
449}
450
2f18d1e8 451static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
241771ef 452
ee06094f
IM
453/*
454 * Set the next IRQ period, based on the hwc->period_left value.
455 * To be called with the counter disabled in hw:
456 */
457static void
26816c28 458x86_perf_counter_set_period(struct perf_counter *counter,
ee06094f 459 struct hw_perf_counter *hwc, int idx)
241771ef 460{
2f18d1e8 461 s64 left = atomic64_read(&hwc->period_left);
60db5e09 462 s64 period = min(x86_pmu.max_period, hwc->irq_period);
2f18d1e8 463 int err;
ee06094f 464
ee06094f
IM
465 /*
466 * If we are way outside a reasoable range then just skip forward:
467 */
468 if (unlikely(left <= -period)) {
469 left = period;
470 atomic64_set(&hwc->period_left, left);
471 }
472
473 if (unlikely(left <= 0)) {
474 left += period;
475 atomic64_set(&hwc->period_left, left);
476 }
1c80f4b5
IM
477 /*
478 * Quirk: certain CPUs dont like it if just 1 event is left:
479 */
480 if (unlikely(left < 2))
481 left = 2;
241771ef 482
ee06094f
IM
483 per_cpu(prev_left[idx], smp_processor_id()) = left;
484
485 /*
486 * The hw counter starts counting from this counter offset,
487 * mark it to be able to extra future deltas:
488 */
2f18d1e8 489 atomic64_set(&hwc->prev_count, (u64)-left);
ee06094f 490
2f18d1e8 491 err = checking_wrmsrl(hwc->counter_base + idx,
0933e5c6 492 (u64)(-left) & x86_pmu.counter_mask);
2f18d1e8
IM
493}
494
495static inline void
7c90cc45 496intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
2f18d1e8
IM
497{
498 int idx = __idx - X86_PMC_IDX_FIXED;
499 u64 ctrl_val, bits, mask;
500 int err;
501
502 /*
0475f9ea
PM
503 * Enable IRQ generation (0x8),
504 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
505 * if requested:
2f18d1e8 506 */
0475f9ea
PM
507 bits = 0x8ULL;
508 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
509 bits |= 0x2;
2f18d1e8
IM
510 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
511 bits |= 0x1;
512 bits <<= (idx * 4);
513 mask = 0xfULL << (idx * 4);
514
515 rdmsrl(hwc->config_base, ctrl_val);
516 ctrl_val &= ~mask;
517 ctrl_val |= bits;
518 err = checking_wrmsrl(hwc->config_base, ctrl_val);
7e2ae347
IM
519}
520
7c90cc45 521static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
7e2ae347 522{
7c90cc45
RR
523 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
524 intel_pmu_enable_fixed(hwc, idx);
525 return;
526 }
527
528 x86_pmu_enable_counter(hwc, idx);
529}
530
531static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
532{
533 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
534
535 if (cpuc->enabled)
536 x86_pmu_enable_counter(hwc, idx);
2b583d8b 537 else
d4369891 538 x86_pmu_disable_counter(hwc, idx);
241771ef
IM
539}
540
2f18d1e8
IM
541static int
542fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
862a1a5f 543{
2f18d1e8
IM
544 unsigned int event;
545
ef7b3e09 546 if (!x86_pmu.num_counters_fixed)
f87ad35d
JSR
547 return -1;
548
2f18d1e8
IM
549 if (unlikely(hwc->nmi))
550 return -1;
551
552 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
553
4a06bd85 554 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
2f18d1e8 555 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
4a06bd85 556 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES)))
2f18d1e8 557 return X86_PMC_IDX_FIXED_CPU_CYCLES;
4a06bd85 558 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES)))
2f18d1e8
IM
559 return X86_PMC_IDX_FIXED_BUS_CYCLES;
560
862a1a5f
IM
561 return -1;
562}
563
ee06094f
IM
564/*
565 * Find a PMC slot for the freshly enabled / scheduled in counter:
566 */
4aeb0b42 567static int x86_pmu_enable(struct perf_counter *counter)
241771ef
IM
568{
569 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
570 struct hw_perf_counter *hwc = &counter->hw;
2f18d1e8 571 int idx;
241771ef 572
2f18d1e8
IM
573 idx = fixed_mode_idx(counter, hwc);
574 if (idx >= 0) {
575 /*
576 * Try to get the fixed counter, if that is already taken
577 * then try to get a generic counter:
578 */
43f6201a 579 if (test_and_set_bit(idx, cpuc->used_mask))
2f18d1e8 580 goto try_generic;
0dff86aa 581
2f18d1e8
IM
582 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
583 /*
584 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
585 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
586 */
587 hwc->counter_base =
588 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
241771ef 589 hwc->idx = idx;
2f18d1e8
IM
590 } else {
591 idx = hwc->idx;
592 /* Try to get the previous generic counter again */
43f6201a 593 if (test_and_set_bit(idx, cpuc->used_mask)) {
2f18d1e8 594try_generic:
43f6201a 595 idx = find_first_zero_bit(cpuc->used_mask,
0933e5c6
RR
596 x86_pmu.num_counters);
597 if (idx == x86_pmu.num_counters)
2f18d1e8
IM
598 return -EAGAIN;
599
43f6201a 600 set_bit(idx, cpuc->used_mask);
2f18d1e8
IM
601 hwc->idx = idx;
602 }
4a06bd85
RR
603 hwc->config_base = x86_pmu.eventsel;
604 hwc->counter_base = x86_pmu.perfctr;
241771ef
IM
605 }
606
d4369891 607 x86_pmu.disable(hwc, idx);
241771ef 608
862a1a5f 609 cpuc->counters[idx] = counter;
43f6201a 610 set_bit(idx, cpuc->active_mask);
7e2ae347 611
26816c28 612 x86_perf_counter_set_period(counter, hwc, idx);
7c90cc45 613 x86_pmu.enable(hwc, idx);
95cdd2e7
IM
614
615 return 0;
241771ef
IM
616}
617
618void perf_counter_print_debug(void)
619{
2f18d1e8 620 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
0dff86aa 621 struct cpu_hw_counters *cpuc;
5bb9efe3 622 unsigned long flags;
1e125676
IM
623 int cpu, idx;
624
0933e5c6 625 if (!x86_pmu.num_counters)
1e125676 626 return;
241771ef 627
5bb9efe3 628 local_irq_save(flags);
241771ef
IM
629
630 cpu = smp_processor_id();
0dff86aa 631 cpuc = &per_cpu(cpu_hw_counters, cpu);
241771ef 632
faa28ae0 633 if (x86_pmu.version >= 2) {
a1ef58f4
JSR
634 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
635 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
636 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
637 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
638
639 pr_info("\n");
640 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
641 pr_info("CPU#%d: status: %016llx\n", cpu, status);
642 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
643 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
f87ad35d 644 }
43f6201a 645 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
241771ef 646
0933e5c6 647 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
4a06bd85
RR
648 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
649 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
241771ef 650
ee06094f 651 prev_left = per_cpu(prev_left[idx], cpu);
241771ef 652
a1ef58f4 653 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
241771ef 654 cpu, idx, pmc_ctrl);
a1ef58f4 655 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
241771ef 656 cpu, idx, pmc_count);
a1ef58f4 657 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
ee06094f 658 cpu, idx, prev_left);
241771ef 659 }
0933e5c6 660 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
2f18d1e8
IM
661 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
662
a1ef58f4 663 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
2f18d1e8
IM
664 cpu, idx, pmc_count);
665 }
5bb9efe3 666 local_irq_restore(flags);
241771ef
IM
667}
668
4aeb0b42 669static void x86_pmu_disable(struct perf_counter *counter)
241771ef
IM
670{
671 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
672 struct hw_perf_counter *hwc = &counter->hw;
6f00cada 673 int idx = hwc->idx;
241771ef 674
09534238
RR
675 /*
676 * Must be done before we disable, otherwise the nmi handler
677 * could reenable again:
678 */
43f6201a 679 clear_bit(idx, cpuc->active_mask);
d4369891 680 x86_pmu.disable(hwc, idx);
241771ef 681
2f18d1e8
IM
682 /*
683 * Make sure the cleared pointer becomes visible before we
684 * (potentially) free the counter:
685 */
527e26af 686 barrier();
241771ef 687
ee06094f
IM
688 /*
689 * Drain the remaining delta count out of a counter
690 * that we are disabling:
691 */
692 x86_perf_counter_update(counter, hwc, idx);
09534238 693 cpuc->counters[idx] = NULL;
43f6201a 694 clear_bit(idx, cpuc->used_mask);
241771ef
IM
695}
696
7e2ae347 697/*
ee06094f
IM
698 * Save and restart an expired counter. Called by NMI contexts,
699 * so it has to be careful about preempting normal counter ops:
7e2ae347 700 */
55de0f2e 701static void intel_pmu_save_and_restart(struct perf_counter *counter)
241771ef
IM
702{
703 struct hw_perf_counter *hwc = &counter->hw;
704 int idx = hwc->idx;
241771ef 705
ee06094f 706 x86_perf_counter_update(counter, hwc, idx);
26816c28 707 x86_perf_counter_set_period(counter, hwc, idx);
7e2ae347 708
2f18d1e8 709 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
7c90cc45 710 intel_pmu_enable_counter(hwc, idx);
241771ef
IM
711}
712
4b39fd96
MG
713/*
714 * Maximum interrupt frequency of 100KHz per CPU
715 */
169e41eb 716#define PERFMON_MAX_INTERRUPTS (100000/HZ)
4b39fd96 717
241771ef
IM
718/*
719 * This handler is triggered by the local APIC, so the APIC IRQ handling
720 * rules apply:
721 */
39d81eab 722static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
241771ef 723{
9029a5e3
IM
724 struct cpu_hw_counters *cpuc;
725 struct cpu_hw_counters;
726 int bit, cpu, loops;
4b39fd96 727 u64 ack, status;
9029a5e3
IM
728
729 cpu = smp_processor_id();
730 cpuc = &per_cpu(cpu_hw_counters, cpu);
241771ef 731
9e35ad38 732 perf_disable();
19d84dab 733 status = intel_pmu_get_status();
9e35ad38
PZ
734 if (!status) {
735 perf_enable();
736 return 0;
737 }
87b9cf46 738
9029a5e3 739 loops = 0;
241771ef 740again:
9029a5e3
IM
741 if (++loops > 100) {
742 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
743 return 1;
744 }
745
d278c484 746 inc_irq_stat(apic_perf_irqs);
241771ef 747 ack = status;
2f18d1e8 748 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
862a1a5f 749 struct perf_counter *counter = cpuc->counters[bit];
241771ef
IM
750
751 clear_bit(bit, (unsigned long *) &status);
43f6201a 752 if (!test_bit(bit, cpuc->active_mask))
241771ef
IM
753 continue;
754
55de0f2e 755 intel_pmu_save_and_restart(counter);
78f13e95 756 if (perf_counter_overflow(counter, nmi, regs, 0))
d4369891 757 intel_pmu_disable_counter(&counter->hw, bit);
241771ef
IM
758 }
759
dee5d906 760 intel_pmu_ack_status(ack);
241771ef
IM
761
762 /*
763 * Repeat if there is more work to be done:
764 */
19d84dab 765 status = intel_pmu_get_status();
241771ef
IM
766 if (status)
767 goto again;
b0f3f28e 768
9e35ad38
PZ
769 if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS)
770 perf_enable();
771
772 return 1;
1b023a96
MG
773}
774
a29aa8a7
RR
775static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
776{
9029a5e3
IM
777 int cpu, idx, throttle = 0, handled = 0;
778 struct cpu_hw_counters *cpuc;
a29aa8a7
RR
779 struct perf_counter *counter;
780 struct hw_perf_counter *hwc;
9029a5e3
IM
781 u64 val;
782
783 cpu = smp_processor_id();
784 cpuc = &per_cpu(cpu_hw_counters, cpu);
962bf7a6 785
9e35ad38
PZ
786 if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) {
787 throttle = 1;
788 __perf_disable();
789 cpuc->enabled = 0;
790 barrier();
962bf7a6 791 }
a29aa8a7 792
a29aa8a7 793 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
962bf7a6
PZ
794 int disable = 0;
795
43f6201a 796 if (!test_bit(idx, cpuc->active_mask))
a29aa8a7 797 continue;
962bf7a6 798
a29aa8a7
RR
799 counter = cpuc->counters[idx];
800 hwc = &counter->hw;
a4016a79
PZ
801
802 if (counter->hw_event.nmi != nmi)
803 goto next;
804
4b7bfd0d 805 val = x86_perf_counter_update(counter, hwc, idx);
a29aa8a7 806 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
962bf7a6
PZ
807 goto next;
808
a29aa8a7
RR
809 /* counter overflow */
810 x86_perf_counter_set_period(counter, hwc, idx);
811 handled = 1;
812 inc_irq_stat(apic_perf_irqs);
962bf7a6
PZ
813 disable = perf_counter_overflow(counter, nmi, regs, 0);
814
815next:
816 if (disable || throttle)
a29aa8a7 817 amd_pmu_disable_counter(hwc, idx);
a29aa8a7 818 }
962bf7a6 819
a29aa8a7
RR
820 return handled;
821}
39d81eab 822
1b023a96
MG
823void perf_counter_unthrottle(void)
824{
825 struct cpu_hw_counters *cpuc;
826
85cf9dba 827 if (!x86_pmu_initialized())
1b023a96
MG
828 return;
829
b0f3f28e 830 cpuc = &__get_cpu_var(cpu_hw_counters);
4b39fd96 831 if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
f5a5a2f6
IM
832 /*
833 * Clear them before re-enabling irqs/NMIs again:
834 */
835 cpuc->interrupts = 0;
9e35ad38 836 perf_enable();
f5a5a2f6
IM
837 } else {
838 cpuc->interrupts = 0;
1b023a96 839 }
241771ef
IM
840}
841
842void smp_perf_counter_interrupt(struct pt_regs *regs)
843{
844 irq_enter();
241771ef 845 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
b0f3f28e 846 ack_APIC_irq();
4a06bd85 847 x86_pmu.handle_irq(regs, 0);
241771ef
IM
848 irq_exit();
849}
850
b6276f35
PZ
851void smp_perf_pending_interrupt(struct pt_regs *regs)
852{
853 irq_enter();
854 ack_APIC_irq();
855 inc_irq_stat(apic_pending_irqs);
856 perf_counter_do_pending();
857 irq_exit();
858}
859
860void set_perf_counter_pending(void)
861{
862 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
863}
864
3415dd91 865void perf_counters_lapic_init(int nmi)
241771ef
IM
866{
867 u32 apic_val;
868
85cf9dba 869 if (!x86_pmu_initialized())
241771ef 870 return;
85cf9dba 871
241771ef
IM
872 /*
873 * Enable the performance counter vector in the APIC LVT:
874 */
875 apic_val = apic_read(APIC_LVTERR);
876
877 apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
878 if (nmi)
879 apic_write(APIC_LVTPC, APIC_DM_NMI);
880 else
881 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
882 apic_write(APIC_LVTERR, apic_val);
883}
884
885static int __kprobes
886perf_counter_nmi_handler(struct notifier_block *self,
887 unsigned long cmd, void *__args)
888{
889 struct die_args *args = __args;
890 struct pt_regs *regs;
b0f3f28e 891
ba77813a 892 if (!atomic_read(&active_counters))
63a809a2
PZ
893 return NOTIFY_DONE;
894
b0f3f28e
PZ
895 switch (cmd) {
896 case DIE_NMI:
897 case DIE_NMI_IPI:
898 break;
241771ef 899
b0f3f28e 900 default:
241771ef 901 return NOTIFY_DONE;
b0f3f28e 902 }
241771ef
IM
903
904 regs = args->regs;
905
906 apic_write(APIC_LVTPC, APIC_DM_NMI);
a4016a79
PZ
907 /*
908 * Can't rely on the handled return value to say it was our NMI, two
909 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
910 *
911 * If the first NMI handles both, the latter will be empty and daze
912 * the CPU.
913 */
914 x86_pmu.handle_irq(regs, 1);
241771ef 915
a4016a79 916 return NOTIFY_STOP;
241771ef
IM
917}
918
919static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
5b75af0a
MG
920 .notifier_call = perf_counter_nmi_handler,
921 .next = NULL,
922 .priority = 1
241771ef
IM
923};
924
5f4ec28f 925static struct x86_pmu intel_pmu = {
faa28ae0 926 .name = "Intel",
39d81eab 927 .handle_irq = intel_pmu_handle_irq,
9e35ad38
PZ
928 .disable_all = intel_pmu_disable_all,
929 .enable_all = intel_pmu_enable_all,
5f4ec28f
RR
930 .enable = intel_pmu_enable_counter,
931 .disable = intel_pmu_disable_counter,
b56a3802
JSR
932 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
933 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
5f4ec28f
RR
934 .event_map = intel_pmu_event_map,
935 .raw_event = intel_pmu_raw_event,
b56a3802 936 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
c619b8ff
RR
937 /*
938 * Intel PMCs cannot be accessed sanely above 32 bit width,
939 * so we install an artificial 1<<31 period regardless of
940 * the generic counter period:
941 */
942 .max_period = (1ULL << 31) - 1,
b56a3802
JSR
943};
944
5f4ec28f 945static struct x86_pmu amd_pmu = {
faa28ae0 946 .name = "AMD",
39d81eab 947 .handle_irq = amd_pmu_handle_irq,
9e35ad38
PZ
948 .disable_all = amd_pmu_disable_all,
949 .enable_all = amd_pmu_enable_all,
5f4ec28f
RR
950 .enable = amd_pmu_enable_counter,
951 .disable = amd_pmu_disable_counter,
f87ad35d
JSR
952 .eventsel = MSR_K7_EVNTSEL0,
953 .perfctr = MSR_K7_PERFCTR0,
5f4ec28f
RR
954 .event_map = amd_pmu_event_map,
955 .raw_event = amd_pmu_raw_event,
f87ad35d 956 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
0933e5c6
RR
957 .num_counters = 4,
958 .counter_bits = 48,
959 .counter_mask = (1ULL << 48) - 1,
c619b8ff
RR
960 /* use highest bit to detect overflow */
961 .max_period = (1ULL << 47) - 1,
f87ad35d
JSR
962};
963
72eae04d 964static int intel_pmu_init(void)
241771ef 965{
7bb497bd 966 union cpuid10_edx edx;
241771ef 967 union cpuid10_eax eax;
703e937c 968 unsigned int unused;
7bb497bd 969 unsigned int ebx;
faa28ae0 970 int version;
241771ef 971
da1a776b 972 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
72eae04d 973 return -ENODEV;
da1a776b 974
241771ef
IM
975 /*
976 * Check whether the Architectural PerfMon supports
977 * Branch Misses Retired Event or not.
978 */
703e937c 979 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
241771ef 980 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
72eae04d 981 return -ENODEV;
241771ef 982
faa28ae0
RR
983 version = eax.split.version_id;
984 if (version < 2)
72eae04d 985 return -ENODEV;
7bb497bd 986
4a06bd85 987 x86_pmu = intel_pmu;
faa28ae0 988 x86_pmu.version = version;
0933e5c6 989 x86_pmu.num_counters = eax.split.num_counters;
066d7dea
IM
990
991 /*
992 * Quirk: v2 perfmon does not report fixed-purpose counters, so
993 * assume at least 3 counters:
994 */
995 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
996
0933e5c6
RR
997 x86_pmu.counter_bits = eax.split.bit_width;
998 x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
b56a3802 999
9e35ad38
PZ
1000 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1001
72eae04d 1002 return 0;
b56a3802
JSR
1003}
1004
72eae04d 1005static int amd_pmu_init(void)
f87ad35d 1006{
4a06bd85 1007 x86_pmu = amd_pmu;
72eae04d 1008 return 0;
f87ad35d
JSR
1009}
1010
b56a3802
JSR
1011void __init init_hw_perf_counters(void)
1012{
72eae04d
RR
1013 int err;
1014
b56a3802
JSR
1015 switch (boot_cpu_data.x86_vendor) {
1016 case X86_VENDOR_INTEL:
72eae04d 1017 err = intel_pmu_init();
b56a3802 1018 break;
f87ad35d 1019 case X86_VENDOR_AMD:
72eae04d 1020 err = amd_pmu_init();
f87ad35d 1021 break;
4138960a
RR
1022 default:
1023 return;
b56a3802 1024 }
72eae04d 1025 if (err != 0)
b56a3802
JSR
1026 return;
1027
faa28ae0
RR
1028 pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name);
1029 pr_info("... version: %d\n", x86_pmu.version);
1030 pr_info("... bit width: %d\n", x86_pmu.counter_bits);
1031
0933e5c6
RR
1032 pr_info("... num counters: %d\n", x86_pmu.num_counters);
1033 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1034 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
241771ef 1035 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
0933e5c6 1036 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
241771ef 1037 }
0933e5c6
RR
1038 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1039 perf_max_counters = x86_pmu.num_counters;
241771ef 1040
0933e5c6 1041 pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
c619b8ff 1042 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
2f18d1e8 1043
0933e5c6
RR
1044 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1045 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
703e937c 1046 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
0933e5c6 1047 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
703e937c 1048 }
0933e5c6 1049 pr_info("... fixed counters: %d\n", x86_pmu.num_counters_fixed);
862a1a5f 1050
0933e5c6
RR
1051 perf_counter_mask |=
1052 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
241771ef 1053
a1ef58f4 1054 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
75f224cf 1055
b68f1d2e 1056 perf_counters_lapic_init(1);
241771ef 1057 register_die_notifier(&perf_counter_nmi_notifier);
241771ef 1058}
621a01ea 1059
bb775fc2 1060static inline void x86_pmu_read(struct perf_counter *counter)
ee06094f
IM
1061{
1062 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1063}
1064
4aeb0b42
RR
1065static const struct pmu pmu = {
1066 .enable = x86_pmu_enable,
1067 .disable = x86_pmu_disable,
1068 .read = x86_pmu_read,
621a01ea
IM
1069};
1070
4aeb0b42 1071const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
621a01ea
IM
1072{
1073 int err;
1074
1075 err = __hw_perf_counter_init(counter);
1076 if (err)
9ea98e19 1077 return ERR_PTR(err);
621a01ea 1078
4aeb0b42 1079 return &pmu;
621a01ea 1080}
d7d59fb3
PZ
1081
1082/*
1083 * callchain support
1084 */
1085
1086static inline
1087void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
1088{
1089 if (entry->nr < MAX_STACK_DEPTH)
1090 entry->ip[entry->nr++] = ip;
1091}
1092
1093static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1094static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1095
1096
1097static void
1098backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1099{
1100 /* Ignore warnings */
1101}
1102
1103static void backtrace_warning(void *data, char *msg)
1104{
1105 /* Ignore warnings */
1106}
1107
1108static int backtrace_stack(void *data, char *name)
1109{
1110 /* Don't bother with IRQ stacks for now */
1111 return -1;
1112}
1113
1114static void backtrace_address(void *data, unsigned long addr, int reliable)
1115{
1116 struct perf_callchain_entry *entry = data;
1117
1118 if (reliable)
1119 callchain_store(entry, addr);
1120}
1121
1122static const struct stacktrace_ops backtrace_ops = {
1123 .warning = backtrace_warning,
1124 .warning_symbol = backtrace_warning_symbol,
1125 .stack = backtrace_stack,
1126 .address = backtrace_address,
1127};
1128
1129static void
1130perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1131{
1132 unsigned long bp;
1133 char *stack;
5872bdb8 1134 int nr = entry->nr;
d7d59fb3
PZ
1135
1136 callchain_store(entry, instruction_pointer(regs));
1137
1138 stack = ((char *)regs + sizeof(struct pt_regs));
1139#ifdef CONFIG_FRAME_POINTER
1140 bp = frame_pointer(regs);
1141#else
1142 bp = 0;
1143#endif
1144
1145 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
5872bdb8
PZ
1146
1147 entry->kernel = entry->nr - nr;
d7d59fb3
PZ
1148}
1149
1150
1151struct stack_frame {
1152 const void __user *next_fp;
1153 unsigned long return_address;
1154};
1155
1156static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1157{
1158 int ret;
1159
1160 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
1161 return 0;
1162
1163 ret = 1;
1164 pagefault_disable();
1165 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1166 ret = 0;
1167 pagefault_enable();
1168
1169 return ret;
1170}
1171
1172static void
1173perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1174{
1175 struct stack_frame frame;
1176 const void __user *fp;
5872bdb8 1177 int nr = entry->nr;
d7d59fb3
PZ
1178
1179 regs = (struct pt_regs *)current->thread.sp0 - 1;
1180 fp = (void __user *)regs->bp;
1181
1182 callchain_store(entry, regs->ip);
1183
1184 while (entry->nr < MAX_STACK_DEPTH) {
1185 frame.next_fp = NULL;
1186 frame.return_address = 0;
1187
1188 if (!copy_stack_frame(fp, &frame))
1189 break;
1190
1191 if ((unsigned long)fp < user_stack_pointer(regs))
1192 break;
1193
1194 callchain_store(entry, frame.return_address);
1195 fp = frame.next_fp;
1196 }
5872bdb8
PZ
1197
1198 entry->user = entry->nr - nr;
d7d59fb3
PZ
1199}
1200
1201static void
1202perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1203{
1204 int is_user;
1205
1206 if (!regs)
1207 return;
1208
1209 is_user = user_mode(regs);
1210
1211 if (!current || current->pid == 0)
1212 return;
1213
1214 if (is_user && current->state != TASK_RUNNING)
1215 return;
1216
1217 if (!is_user)
1218 perf_callchain_kernel(regs, entry);
1219
1220 if (current->mm)
1221 perf_callchain_user(regs, entry);
1222}
1223
1224struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1225{
1226 struct perf_callchain_entry *entry;
1227
1228 if (in_nmi())
1229 entry = &__get_cpu_var(nmi_entry);
1230 else
1231 entry = &__get_cpu_var(irq_entry);
1232
1233 entry->nr = 0;
5872bdb8
PZ
1234 entry->hv = 0;
1235 entry->kernel = 0;
1236 entry->user = 0;
d7d59fb3
PZ
1237
1238 perf_do_callchain(regs, entry);
1239
1240 return entry;
1241}
This page took 0.10523 seconds and 5 git commands to generate.