perf_counter: use list_move_tail()
[deliverable/linux.git] / arch / x86 / kernel / cpu / perf_counter.c
CommitLineData
241771ef
IM
1/*
2 * Performance counter x86 architecture code
3 *
4 * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
b56a3802 6 * Copyright(C) 2009 Jaswinder Singh Rajput
241771ef
IM
7 *
8 * For licencing details see kernel-base/COPYING
9 */
10
11#include <linux/perf_counter.h>
12#include <linux/capability.h>
13#include <linux/notifier.h>
14#include <linux/hardirq.h>
15#include <linux/kprobes.h>
4ac13294 16#include <linux/module.h>
241771ef
IM
17#include <linux/kdebug.h>
18#include <linux/sched.h>
19
241771ef
IM
20#include <asm/apic.h>
21
22static bool perf_counters_initialized __read_mostly;
23
24/*
25 * Number of (generic) HW counters:
26 */
862a1a5f
IM
27static int nr_counters_generic __read_mostly;
28static u64 perf_counter_mask __read_mostly;
2f18d1e8 29static u64 counter_value_mask __read_mostly;
b0f3f28e 30static int counter_value_bits __read_mostly;
241771ef 31
862a1a5f 32static int nr_counters_fixed __read_mostly;
703e937c 33
241771ef 34struct cpu_hw_counters {
862a1a5f
IM
35 struct perf_counter *counters[X86_PMC_IDX_MAX];
36 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
4b39fd96 37 unsigned long interrupts;
b0f3f28e 38 u64 throttle_ctrl;
184fe4ab 39 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
b0f3f28e 40 int enabled;
241771ef
IM
41};
42
43/*
b56a3802 44 * struct pmc_x86_ops - performance counter x86 ops
241771ef 45 */
b56a3802 46struct pmc_x86_ops {
169e41eb 47 u64 (*save_disable_all)(void);
b0f3f28e
PZ
48 void (*restore_all)(u64);
49 u64 (*get_status)(u64);
50 void (*ack_status)(u64);
51 void (*enable)(int, u64);
52 void (*disable)(int, u64);
169e41eb
JSR
53 unsigned eventsel;
54 unsigned perfctr;
b0f3f28e
PZ
55 u64 (*event_map)(int);
56 u64 (*raw_event)(u64);
169e41eb 57 int max_events;
b56a3802
JSR
58};
59
60static struct pmc_x86_ops *pmc_ops;
61
b0f3f28e
PZ
62static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
63 .enabled = 1,
64};
241771ef 65
b56a3802
JSR
66/*
67 * Intel PerfMon v3. Used on Core2 and later.
68 */
b0f3f28e 69static const u64 intel_perfmon_event_map[] =
241771ef 70{
f650a672 71 [PERF_COUNT_CPU_CYCLES] = 0x003c,
241771ef
IM
72 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
73 [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
74 [PERF_COUNT_CACHE_MISSES] = 0x412e,
75 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
76 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
f650a672 77 [PERF_COUNT_BUS_CYCLES] = 0x013c,
241771ef
IM
78};
79
b0f3f28e 80static u64 pmc_intel_event_map(int event)
b56a3802
JSR
81{
82 return intel_perfmon_event_map[event];
83}
241771ef 84
b0f3f28e
PZ
85static u64 pmc_intel_raw_event(u64 event)
86{
87#define CORE_EVNTSEL_EVENT_MASK 0x000000FF
88#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00
89#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000
90
91#define CORE_EVNTSEL_MASK \
92 (CORE_EVNTSEL_EVENT_MASK | \
93 CORE_EVNTSEL_UNIT_MASK | \
94 CORE_EVNTSEL_COUNTER_MASK)
95
96 return event & CORE_EVNTSEL_MASK;
97}
98
f87ad35d
JSR
99/*
100 * AMD Performance Monitor K7 and later.
101 */
b0f3f28e 102static const u64 amd_perfmon_event_map[] =
f87ad35d
JSR
103{
104 [PERF_COUNT_CPU_CYCLES] = 0x0076,
105 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
106 [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
107 [PERF_COUNT_CACHE_MISSES] = 0x0081,
108 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
109 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
110};
111
b0f3f28e 112static u64 pmc_amd_event_map(int event)
f87ad35d
JSR
113{
114 return amd_perfmon_event_map[event];
115}
116
b0f3f28e
PZ
117static u64 pmc_amd_raw_event(u64 event)
118{
119#define K7_EVNTSEL_EVENT_MASK 0x7000000FF
120#define K7_EVNTSEL_UNIT_MASK 0x00000FF00
121#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000
122
123#define K7_EVNTSEL_MASK \
124 (K7_EVNTSEL_EVENT_MASK | \
125 K7_EVNTSEL_UNIT_MASK | \
126 K7_EVNTSEL_COUNTER_MASK)
127
128 return event & K7_EVNTSEL_MASK;
129}
130
ee06094f
IM
131/*
132 * Propagate counter elapsed time into the generic counter.
133 * Can only be executed on the CPU where the counter is active.
134 * Returns the delta events processed.
135 */
136static void
137x86_perf_counter_update(struct perf_counter *counter,
138 struct hw_perf_counter *hwc, int idx)
139{
140 u64 prev_raw_count, new_raw_count, delta;
141
ee06094f
IM
142 /*
143 * Careful: an NMI might modify the previous counter value.
144 *
145 * Our tactic to handle this is to first atomically read and
146 * exchange a new raw count - then add that new-prev delta
147 * count to the generic counter atomically:
148 */
149again:
150 prev_raw_count = atomic64_read(&hwc->prev_count);
151 rdmsrl(hwc->counter_base + idx, new_raw_count);
152
153 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
154 new_raw_count) != prev_raw_count)
155 goto again;
156
157 /*
158 * Now we have the new raw value and have updated the prev
159 * timestamp already. We can now calculate the elapsed delta
160 * (counter-)time and add that to the generic counter.
161 *
162 * Careful, not all hw sign-extends above the physical width
163 * of the count, so we do that by clipping the delta to 32 bits:
164 */
165 delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
ee06094f
IM
166
167 atomic64_add(delta, &counter->count);
168 atomic64_sub(delta, &hwc->period_left);
169}
170
241771ef
IM
171/*
172 * Setup the hardware configuration for a given hw_event_type
173 */
621a01ea 174static int __hw_perf_counter_init(struct perf_counter *counter)
241771ef 175{
9f66a381 176 struct perf_counter_hw_event *hw_event = &counter->hw_event;
241771ef
IM
177 struct hw_perf_counter *hwc = &counter->hw;
178
179 if (unlikely(!perf_counters_initialized))
180 return -EINVAL;
181
182 /*
0475f9ea 183 * Generate PMC IRQs:
241771ef
IM
184 * (keep 'enabled' bit clear for now)
185 */
0475f9ea 186 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
241771ef
IM
187
188 /*
0475f9ea 189 * Count user and OS events unless requested not to.
241771ef 190 */
0475f9ea
PM
191 if (!hw_event->exclude_user)
192 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
193 if (!hw_event->exclude_kernel)
241771ef 194 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
0475f9ea
PM
195
196 /*
197 * If privileged enough, allow NMI events:
198 */
199 hwc->nmi = 0;
200 if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
201 hwc->nmi = 1;
241771ef 202
9f66a381 203 hwc->irq_period = hw_event->irq_period;
241771ef
IM
204 /*
205 * Intel PMCs cannot be accessed sanely above 32 bit width,
206 * so we install an artificial 1<<31 period regardless of
207 * the generic counter period:
208 */
f87ad35d
JSR
209 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
210 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
211 hwc->irq_period = 0x7FFFFFFF;
241771ef 212
ee06094f 213 atomic64_set(&hwc->period_left, hwc->irq_period);
241771ef
IM
214
215 /*
dfa7c899 216 * Raw event type provide the config in the event structure
241771ef 217 */
9f66a381 218 if (hw_event->raw) {
b0f3f28e 219 hwc->config |= pmc_ops->raw_event(hw_event->type);
241771ef 220 } else {
b56a3802 221 if (hw_event->type >= pmc_ops->max_events)
241771ef
IM
222 return -EINVAL;
223 /*
224 * The generic map:
225 */
b56a3802 226 hwc->config |= pmc_ops->event_map(hw_event->type);
241771ef 227 }
241771ef
IM
228 counter->wakeup_pending = 0;
229
230 return 0;
231}
232
b56a3802 233static u64 pmc_intel_save_disable_all(void)
4ac13294
TG
234{
235 u64 ctrl;
236
237 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
862a1a5f 238 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
2b9ff0db 239
4ac13294 240 return ctrl;
241771ef 241}
b56a3802 242
f87ad35d
JSR
243static u64 pmc_amd_save_disable_all(void)
244{
b0f3f28e
PZ
245 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
246 int enabled, idx;
247
248 enabled = cpuc->enabled;
249 cpuc->enabled = 0;
250 barrier();
f87ad35d
JSR
251
252 for (idx = 0; idx < nr_counters_generic; idx++) {
b0f3f28e
PZ
253 u64 val;
254
f87ad35d 255 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
b0f3f28e
PZ
256 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
257 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
258 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
259 }
f87ad35d
JSR
260 }
261
b0f3f28e 262 return enabled;
f87ad35d
JSR
263}
264
b56a3802
JSR
265u64 hw_perf_save_disable(void)
266{
267 if (unlikely(!perf_counters_initialized))
268 return 0;
269
270 return pmc_ops->save_disable_all();
271}
b0f3f28e
PZ
272/*
273 * Exported because of ACPI idle
274 */
01b2838c 275EXPORT_SYMBOL_GPL(hw_perf_save_disable);
241771ef 276
b56a3802
JSR
277static void pmc_intel_restore_all(u64 ctrl)
278{
279 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
280}
281
f87ad35d
JSR
282static void pmc_amd_restore_all(u64 ctrl)
283{
b0f3f28e 284 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
f87ad35d
JSR
285 int idx;
286
b0f3f28e
PZ
287 cpuc->enabled = ctrl;
288 barrier();
289 if (!ctrl)
290 return;
291
f87ad35d 292 for (idx = 0; idx < nr_counters_generic; idx++) {
184fe4ab 293 if (test_bit(idx, cpuc->active_mask)) {
b0f3f28e
PZ
294 u64 val;
295
f87ad35d
JSR
296 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
297 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
298 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
299 }
300 }
301}
302
ee06094f
IM
303void hw_perf_restore(u64 ctrl)
304{
2b9ff0db
IM
305 if (unlikely(!perf_counters_initialized))
306 return;
307
b56a3802 308 pmc_ops->restore_all(ctrl);
ee06094f 309}
b0f3f28e
PZ
310/*
311 * Exported because of ACPI idle
312 */
ee06094f
IM
313EXPORT_SYMBOL_GPL(hw_perf_restore);
314
b0f3f28e
PZ
315static u64 pmc_intel_get_status(u64 mask)
316{
317 u64 status;
318
319 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
320
321 return status;
322}
323
324static u64 pmc_amd_get_status(u64 mask)
325{
326 u64 status = 0;
327 int idx;
328
329 for (idx = 0; idx < nr_counters_generic; idx++) {
330 s64 val;
331
332 if (!(mask & (1 << idx)))
333 continue;
334
335 rdmsrl(MSR_K7_PERFCTR0 + idx, val);
336 val <<= (64 - counter_value_bits);
337 if (val >= 0)
338 status |= (1 << idx);
339 }
340
341 return status;
342}
343
344static u64 hw_perf_get_status(u64 mask)
345{
346 if (unlikely(!perf_counters_initialized))
347 return 0;
348
349 return pmc_ops->get_status(mask);
350}
351
352static void pmc_intel_ack_status(u64 ack)
353{
354 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
355}
356
357static void pmc_amd_ack_status(u64 ack)
358{
359}
360
361static void hw_perf_ack_status(u64 ack)
362{
363 if (unlikely(!perf_counters_initialized))
364 return;
365
366 pmc_ops->ack_status(ack);
367}
368
369static void pmc_intel_enable(int idx, u64 config)
370{
371 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
372 config | ARCH_PERFMON_EVENTSEL0_ENABLE);
373}
374
375static void pmc_amd_enable(int idx, u64 config)
376{
377 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
378
184fe4ab 379 set_bit(idx, cpuc->active_mask);
b0f3f28e
PZ
380 if (cpuc->enabled)
381 config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
382
383 wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
384}
385
386static void hw_perf_enable(int idx, u64 config)
387{
388 if (unlikely(!perf_counters_initialized))
389 return;
390
391 pmc_ops->enable(idx, config);
392}
393
394static void pmc_intel_disable(int idx, u64 config)
395{
396 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
397}
398
399static void pmc_amd_disable(int idx, u64 config)
400{
401 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
402
184fe4ab 403 clear_bit(idx, cpuc->active_mask);
b0f3f28e
PZ
404 wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
405
406}
407
408static void hw_perf_disable(int idx, u64 config)
409{
410 if (unlikely(!perf_counters_initialized))
411 return;
412
413 pmc_ops->disable(idx, config);
414}
415
2f18d1e8
IM
416static inline void
417__pmc_fixed_disable(struct perf_counter *counter,
418 struct hw_perf_counter *hwc, unsigned int __idx)
419{
420 int idx = __idx - X86_PMC_IDX_FIXED;
421 u64 ctrl_val, mask;
422 int err;
423
424 mask = 0xfULL << (idx * 4);
425
426 rdmsrl(hwc->config_base, ctrl_val);
427 ctrl_val &= ~mask;
428 err = checking_wrmsrl(hwc->config_base, ctrl_val);
429}
430
7e2ae347 431static inline void
eb2b8618 432__pmc_generic_disable(struct perf_counter *counter,
ee06094f 433 struct hw_perf_counter *hwc, unsigned int idx)
7e2ae347 434{
2f18d1e8 435 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
2b583d8b
JSR
436 __pmc_fixed_disable(counter, hwc, idx);
437 else
b0f3f28e 438 hw_perf_disable(idx, hwc->config);
7e2ae347
IM
439}
440
2f18d1e8 441static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
241771ef 442
ee06094f
IM
443/*
444 * Set the next IRQ period, based on the hwc->period_left value.
445 * To be called with the counter disabled in hw:
446 */
447static void
448__hw_perf_counter_set_period(struct perf_counter *counter,
449 struct hw_perf_counter *hwc, int idx)
241771ef 450{
2f18d1e8 451 s64 left = atomic64_read(&hwc->period_left);
595258aa 452 s64 period = hwc->irq_period;
2f18d1e8 453 int err;
ee06094f 454
ee06094f
IM
455 /*
456 * If we are way outside a reasoable range then just skip forward:
457 */
458 if (unlikely(left <= -period)) {
459 left = period;
460 atomic64_set(&hwc->period_left, left);
461 }
462
463 if (unlikely(left <= 0)) {
464 left += period;
465 atomic64_set(&hwc->period_left, left);
466 }
241771ef 467
ee06094f
IM
468 per_cpu(prev_left[idx], smp_processor_id()) = left;
469
470 /*
471 * The hw counter starts counting from this counter offset,
472 * mark it to be able to extra future deltas:
473 */
2f18d1e8 474 atomic64_set(&hwc->prev_count, (u64)-left);
ee06094f 475
2f18d1e8
IM
476 err = checking_wrmsrl(hwc->counter_base + idx,
477 (u64)(-left) & counter_value_mask);
478}
479
480static inline void
481__pmc_fixed_enable(struct perf_counter *counter,
482 struct hw_perf_counter *hwc, unsigned int __idx)
483{
484 int idx = __idx - X86_PMC_IDX_FIXED;
485 u64 ctrl_val, bits, mask;
486 int err;
487
488 /*
0475f9ea
PM
489 * Enable IRQ generation (0x8),
490 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
491 * if requested:
2f18d1e8 492 */
0475f9ea
PM
493 bits = 0x8ULL;
494 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
495 bits |= 0x2;
2f18d1e8
IM
496 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
497 bits |= 0x1;
498 bits <<= (idx * 4);
499 mask = 0xfULL << (idx * 4);
500
501 rdmsrl(hwc->config_base, ctrl_val);
502 ctrl_val &= ~mask;
503 ctrl_val |= bits;
504 err = checking_wrmsrl(hwc->config_base, ctrl_val);
7e2ae347
IM
505}
506
ee06094f 507static void
eb2b8618 508__pmc_generic_enable(struct perf_counter *counter,
ee06094f 509 struct hw_perf_counter *hwc, int idx)
7e2ae347 510{
2f18d1e8 511 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
2b583d8b
JSR
512 __pmc_fixed_enable(counter, hwc, idx);
513 else
b0f3f28e 514 hw_perf_enable(idx, hwc->config);
241771ef
IM
515}
516
2f18d1e8
IM
517static int
518fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
862a1a5f 519{
2f18d1e8
IM
520 unsigned int event;
521
f87ad35d
JSR
522 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
523 return -1;
524
2f18d1e8
IM
525 if (unlikely(hwc->nmi))
526 return -1;
527
528 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
529
b56a3802 530 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
2f18d1e8 531 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
b56a3802 532 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
2f18d1e8 533 return X86_PMC_IDX_FIXED_CPU_CYCLES;
b56a3802 534 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
2f18d1e8
IM
535 return X86_PMC_IDX_FIXED_BUS_CYCLES;
536
862a1a5f
IM
537 return -1;
538}
539
ee06094f
IM
540/*
541 * Find a PMC slot for the freshly enabled / scheduled in counter:
542 */
95cdd2e7 543static int pmc_generic_enable(struct perf_counter *counter)
241771ef
IM
544{
545 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
546 struct hw_perf_counter *hwc = &counter->hw;
2f18d1e8 547 int idx;
241771ef 548
2f18d1e8
IM
549 idx = fixed_mode_idx(counter, hwc);
550 if (idx >= 0) {
551 /*
552 * Try to get the fixed counter, if that is already taken
553 * then try to get a generic counter:
554 */
555 if (test_and_set_bit(idx, cpuc->used))
556 goto try_generic;
0dff86aa 557
2f18d1e8
IM
558 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
559 /*
560 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
561 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
562 */
563 hwc->counter_base =
564 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
241771ef 565 hwc->idx = idx;
2f18d1e8
IM
566 } else {
567 idx = hwc->idx;
568 /* Try to get the previous generic counter again */
569 if (test_and_set_bit(idx, cpuc->used)) {
570try_generic:
571 idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
572 if (idx == nr_counters_generic)
573 return -EAGAIN;
574
575 set_bit(idx, cpuc->used);
576 hwc->idx = idx;
577 }
b56a3802
JSR
578 hwc->config_base = pmc_ops->eventsel;
579 hwc->counter_base = pmc_ops->perfctr;
241771ef
IM
580 }
581
582 perf_counters_lapic_init(hwc->nmi);
583
eb2b8618 584 __pmc_generic_disable(counter, hwc, idx);
241771ef 585
862a1a5f 586 cpuc->counters[idx] = counter;
2f18d1e8
IM
587 /*
588 * Make it visible before enabling the hw:
589 */
590 smp_wmb();
7e2ae347 591
ee06094f 592 __hw_perf_counter_set_period(counter, hwc, idx);
eb2b8618 593 __pmc_generic_enable(counter, hwc, idx);
95cdd2e7
IM
594
595 return 0;
241771ef
IM
596}
597
598void perf_counter_print_debug(void)
599{
2f18d1e8 600 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
0dff86aa 601 struct cpu_hw_counters *cpuc;
1e125676
IM
602 int cpu, idx;
603
862a1a5f 604 if (!nr_counters_generic)
1e125676 605 return;
241771ef
IM
606
607 local_irq_disable();
608
609 cpu = smp_processor_id();
0dff86aa 610 cpuc = &per_cpu(cpu_hw_counters, cpu);
241771ef 611
f87ad35d 612 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
a1ef58f4
JSR
613 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
614 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
615 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
616 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
617
618 pr_info("\n");
619 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
620 pr_info("CPU#%d: status: %016llx\n", cpu, status);
621 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
622 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
f87ad35d 623 }
a1ef58f4 624 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
241771ef 625
862a1a5f 626 for (idx = 0; idx < nr_counters_generic; idx++) {
b56a3802
JSR
627 rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
628 rdmsrl(pmc_ops->perfctr + idx, pmc_count);
241771ef 629
ee06094f 630 prev_left = per_cpu(prev_left[idx], cpu);
241771ef 631
a1ef58f4 632 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
241771ef 633 cpu, idx, pmc_ctrl);
a1ef58f4 634 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
241771ef 635 cpu, idx, pmc_count);
a1ef58f4 636 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
ee06094f 637 cpu, idx, prev_left);
241771ef 638 }
2f18d1e8
IM
639 for (idx = 0; idx < nr_counters_fixed; idx++) {
640 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
641
a1ef58f4 642 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
2f18d1e8
IM
643 cpu, idx, pmc_count);
644 }
241771ef
IM
645 local_irq_enable();
646}
647
eb2b8618 648static void pmc_generic_disable(struct perf_counter *counter)
241771ef
IM
649{
650 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
651 struct hw_perf_counter *hwc = &counter->hw;
652 unsigned int idx = hwc->idx;
653
eb2b8618 654 __pmc_generic_disable(counter, hwc, idx);
241771ef
IM
655
656 clear_bit(idx, cpuc->used);
862a1a5f 657 cpuc->counters[idx] = NULL;
2f18d1e8
IM
658 /*
659 * Make sure the cleared pointer becomes visible before we
660 * (potentially) free the counter:
661 */
662 smp_wmb();
241771ef 663
ee06094f
IM
664 /*
665 * Drain the remaining delta count out of a counter
666 * that we are disabling:
667 */
668 x86_perf_counter_update(counter, hwc, idx);
241771ef
IM
669}
670
671static void perf_store_irq_data(struct perf_counter *counter, u64 data)
672{
673 struct perf_data *irqdata = counter->irqdata;
674
675 if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
676 irqdata->overrun++;
677 } else {
678 u64 *p = (u64 *) &irqdata->data[irqdata->len];
679
680 *p = data;
681 irqdata->len += sizeof(u64);
682 }
683}
684
7e2ae347 685/*
ee06094f
IM
686 * Save and restart an expired counter. Called by NMI contexts,
687 * so it has to be careful about preempting normal counter ops:
7e2ae347 688 */
241771ef
IM
689static void perf_save_and_restart(struct perf_counter *counter)
690{
691 struct hw_perf_counter *hwc = &counter->hw;
692 int idx = hwc->idx;
241771ef 693
ee06094f
IM
694 x86_perf_counter_update(counter, hwc, idx);
695 __hw_perf_counter_set_period(counter, hwc, idx);
7e2ae347 696
2f18d1e8 697 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
eb2b8618 698 __pmc_generic_enable(counter, hwc, idx);
241771ef
IM
699}
700
701static void
04289bb9 702perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
241771ef 703{
04289bb9 704 struct perf_counter *counter, *group_leader = sibling->group_leader;
241771ef 705
04289bb9 706 /*
ee06094f 707 * Store sibling timestamps (if any):
04289bb9
IM
708 */
709 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
2f18d1e8 710
ee06094f 711 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
04289bb9 712 perf_store_irq_data(sibling, counter->hw_event.type);
ee06094f 713 perf_store_irq_data(sibling, atomic64_read(&counter->count));
241771ef
IM
714 }
715}
716
4b39fd96
MG
717/*
718 * Maximum interrupt frequency of 100KHz per CPU
719 */
169e41eb 720#define PERFMON_MAX_INTERRUPTS (100000/HZ)
4b39fd96 721
241771ef
IM
722/*
723 * This handler is triggered by the local APIC, so the APIC IRQ handling
724 * rules apply:
725 */
b0f3f28e 726static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
241771ef
IM
727{
728 int bit, cpu = smp_processor_id();
4b39fd96 729 u64 ack, status;
1b023a96 730 struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
b0f3f28e 731 int ret = 0;
43874d23 732
b0f3f28e 733 cpuc->throttle_ctrl = hw_perf_save_disable();
241771ef 734
b0f3f28e 735 status = hw_perf_get_status(cpuc->throttle_ctrl);
87b9cf46
IM
736 if (!status)
737 goto out;
738
b0f3f28e 739 ret = 1;
241771ef 740again:
d278c484 741 inc_irq_stat(apic_perf_irqs);
241771ef 742 ack = status;
2f18d1e8 743 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
862a1a5f 744 struct perf_counter *counter = cpuc->counters[bit];
241771ef
IM
745
746 clear_bit(bit, (unsigned long *) &status);
747 if (!counter)
748 continue;
749
750 perf_save_and_restart(counter);
751
9f66a381 752 switch (counter->hw_event.record_type) {
241771ef
IM
753 case PERF_RECORD_SIMPLE:
754 continue;
755 case PERF_RECORD_IRQ:
756 perf_store_irq_data(counter, instruction_pointer(regs));
757 break;
758 case PERF_RECORD_GROUP:
241771ef
IM
759 perf_handle_group(counter, &status, &ack);
760 break;
761 }
762 /*
763 * From NMI context we cannot call into the scheduler to
eb2b8618 764 * do a task wakeup - but we mark these generic as
241771ef
IM
765 * wakeup_pending and initate a wakeup callback:
766 */
767 if (nmi) {
768 counter->wakeup_pending = 1;
769 set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
770 } else {
771 wake_up(&counter->waitq);
772 }
773 }
774
b0f3f28e 775 hw_perf_ack_status(ack);
241771ef
IM
776
777 /*
778 * Repeat if there is more work to be done:
779 */
b0f3f28e 780 status = hw_perf_get_status(cpuc->throttle_ctrl);
241771ef
IM
781 if (status)
782 goto again;
87b9cf46 783out:
241771ef 784 /*
1b023a96 785 * Restore - do not reenable when global enable is off or throttled:
241771ef 786 */
4b39fd96 787 if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
b0f3f28e
PZ
788 hw_perf_restore(cpuc->throttle_ctrl);
789
790 return ret;
1b023a96
MG
791}
792
793void perf_counter_unthrottle(void)
794{
795 struct cpu_hw_counters *cpuc;
796
797 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
798 return;
799
800 if (unlikely(!perf_counters_initialized))
801 return;
802
b0f3f28e 803 cpuc = &__get_cpu_var(cpu_hw_counters);
4b39fd96 804 if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
1b023a96 805 if (printk_ratelimit())
4b39fd96 806 printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
b0f3f28e 807 hw_perf_restore(cpuc->throttle_ctrl);
1b023a96 808 }
4b39fd96 809 cpuc->interrupts = 0;
241771ef
IM
810}
811
812void smp_perf_counter_interrupt(struct pt_regs *regs)
813{
814 irq_enter();
241771ef 815 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
b0f3f28e 816 ack_APIC_irq();
241771ef 817 __smp_perf_counter_interrupt(regs, 0);
241771ef
IM
818 irq_exit();
819}
820
821/*
822 * This handler is triggered by NMI contexts:
823 */
824void perf_counter_notify(struct pt_regs *regs)
825{
826 struct cpu_hw_counters *cpuc;
827 unsigned long flags;
828 int bit, cpu;
829
830 local_irq_save(flags);
831 cpu = smp_processor_id();
832 cpuc = &per_cpu(cpu_hw_counters, cpu);
833
862a1a5f
IM
834 for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
835 struct perf_counter *counter = cpuc->counters[bit];
241771ef
IM
836
837 if (!counter)
838 continue;
839
840 if (counter->wakeup_pending) {
841 counter->wakeup_pending = 0;
842 wake_up(&counter->waitq);
843 }
844 }
845
846 local_irq_restore(flags);
847}
848
3415dd91 849void perf_counters_lapic_init(int nmi)
241771ef
IM
850{
851 u32 apic_val;
852
853 if (!perf_counters_initialized)
854 return;
855 /*
856 * Enable the performance counter vector in the APIC LVT:
857 */
858 apic_val = apic_read(APIC_LVTERR);
859
860 apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
861 if (nmi)
862 apic_write(APIC_LVTPC, APIC_DM_NMI);
863 else
864 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
865 apic_write(APIC_LVTERR, apic_val);
866}
867
868static int __kprobes
869perf_counter_nmi_handler(struct notifier_block *self,
870 unsigned long cmd, void *__args)
871{
872 struct die_args *args = __args;
873 struct pt_regs *regs;
b0f3f28e
PZ
874 int ret;
875
876 switch (cmd) {
877 case DIE_NMI:
878 case DIE_NMI_IPI:
879 break;
241771ef 880
b0f3f28e 881 default:
241771ef 882 return NOTIFY_DONE;
b0f3f28e 883 }
241771ef
IM
884
885 regs = args->regs;
886
887 apic_write(APIC_LVTPC, APIC_DM_NMI);
b0f3f28e 888 ret = __smp_perf_counter_interrupt(regs, 1);
241771ef 889
b0f3f28e 890 return ret ? NOTIFY_STOP : NOTIFY_OK;
241771ef
IM
891}
892
893static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
5b75af0a
MG
894 .notifier_call = perf_counter_nmi_handler,
895 .next = NULL,
896 .priority = 1
241771ef
IM
897};
898
b56a3802
JSR
899static struct pmc_x86_ops pmc_intel_ops = {
900 .save_disable_all = pmc_intel_save_disable_all,
901 .restore_all = pmc_intel_restore_all,
b0f3f28e
PZ
902 .get_status = pmc_intel_get_status,
903 .ack_status = pmc_intel_ack_status,
904 .enable = pmc_intel_enable,
905 .disable = pmc_intel_disable,
b56a3802
JSR
906 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
907 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
908 .event_map = pmc_intel_event_map,
b0f3f28e 909 .raw_event = pmc_intel_raw_event,
b56a3802
JSR
910 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
911};
912
f87ad35d
JSR
913static struct pmc_x86_ops pmc_amd_ops = {
914 .save_disable_all = pmc_amd_save_disable_all,
915 .restore_all = pmc_amd_restore_all,
b0f3f28e
PZ
916 .get_status = pmc_amd_get_status,
917 .ack_status = pmc_amd_ack_status,
918 .enable = pmc_amd_enable,
919 .disable = pmc_amd_disable,
f87ad35d
JSR
920 .eventsel = MSR_K7_EVNTSEL0,
921 .perfctr = MSR_K7_PERFCTR0,
922 .event_map = pmc_amd_event_map,
b0f3f28e 923 .raw_event = pmc_amd_raw_event,
f87ad35d
JSR
924 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
925};
926
b56a3802 927static struct pmc_x86_ops *pmc_intel_init(void)
241771ef
IM
928{
929 union cpuid10_eax eax;
241771ef 930 unsigned int ebx;
703e937c
IM
931 unsigned int unused;
932 union cpuid10_edx edx;
241771ef 933
241771ef
IM
934 /*
935 * Check whether the Architectural PerfMon supports
936 * Branch Misses Retired Event or not.
937 */
703e937c 938 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
241771ef 939 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
b56a3802 940 return NULL;
241771ef 941
a1ef58f4
JSR
942 pr_info("Intel Performance Monitoring support detected.\n");
943 pr_info("... version: %d\n", eax.split.version_id);
944 pr_info("... bit width: %d\n", eax.split.bit_width);
945 pr_info("... mask length: %d\n", eax.split.mask_length);
b56a3802 946
862a1a5f 947 nr_counters_generic = eax.split.num_counters;
b56a3802
JSR
948 nr_counters_fixed = edx.split.num_counters_fixed;
949 counter_value_mask = (1ULL << eax.split.bit_width) - 1;
950
951 return &pmc_intel_ops;
952}
953
f87ad35d
JSR
954static struct pmc_x86_ops *pmc_amd_init(void)
955{
956 nr_counters_generic = 4;
957 nr_counters_fixed = 0;
b5e8acf6
PZ
958 counter_value_mask = 0x0000FFFFFFFFFFFFULL;
959 counter_value_bits = 48;
f87ad35d 960
a1ef58f4 961 pr_info("AMD Performance Monitoring support detected.\n");
f87ad35d
JSR
962
963 return &pmc_amd_ops;
964}
965
b56a3802
JSR
966void __init init_hw_perf_counters(void)
967{
968 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
969 return;
970
971 switch (boot_cpu_data.x86_vendor) {
972 case X86_VENDOR_INTEL:
973 pmc_ops = pmc_intel_init();
974 break;
f87ad35d
JSR
975 case X86_VENDOR_AMD:
976 pmc_ops = pmc_amd_init();
977 break;
b56a3802
JSR
978 }
979 if (!pmc_ops)
980 return;
981
a1ef58f4 982 pr_info("... num counters: %d\n", nr_counters_generic);
862a1a5f
IM
983 if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
984 nr_counters_generic = X86_PMC_MAX_GENERIC;
241771ef 985 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
862a1a5f 986 nr_counters_generic, X86_PMC_MAX_GENERIC);
241771ef 987 }
862a1a5f
IM
988 perf_counter_mask = (1 << nr_counters_generic) - 1;
989 perf_max_counters = nr_counters_generic;
241771ef 990
a1ef58f4 991 pr_info("... value mask: %016Lx\n", counter_value_mask);
2f18d1e8 992
862a1a5f
IM
993 if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
994 nr_counters_fixed = X86_PMC_MAX_FIXED;
703e937c 995 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
862a1a5f 996 nr_counters_fixed, X86_PMC_MAX_FIXED);
703e937c 997 }
a1ef58f4 998 pr_info("... fixed counters: %d\n", nr_counters_fixed);
862a1a5f
IM
999
1000 perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
241771ef 1001
a1ef58f4 1002 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
75f224cf
IM
1003 perf_counters_initialized = true;
1004
241771ef
IM
1005 perf_counters_lapic_init(0);
1006 register_die_notifier(&perf_counter_nmi_notifier);
241771ef 1007}
621a01ea 1008
eb2b8618 1009static void pmc_generic_read(struct perf_counter *counter)
ee06094f
IM
1010{
1011 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1012}
1013
5c92d124 1014static const struct hw_perf_counter_ops x86_perf_counter_ops = {
7671581f
IM
1015 .enable = pmc_generic_enable,
1016 .disable = pmc_generic_disable,
1017 .read = pmc_generic_read,
621a01ea
IM
1018};
1019
5c92d124
IM
1020const struct hw_perf_counter_ops *
1021hw_perf_counter_init(struct perf_counter *counter)
621a01ea
IM
1022{
1023 int err;
1024
1025 err = __hw_perf_counter_init(counter);
1026 if (err)
1027 return NULL;
1028
1029 return &x86_perf_counter_ops;
1030}
This page took 0.081573 seconds and 5 git commands to generate.