Commit | Line | Data |
---|---|---|
9bd46da4 MF |
1 | /* |
2 | * Xtensa Performance Monitor Module driver | |
3 | * See Tensilica Debug User's Guide for PMU registers documentation. | |
4 | * | |
5 | * Copyright (C) 2015 Cadence Design Systems Inc. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
12 | #include <linux/interrupt.h> | |
13 | #include <linux/irqdomain.h> | |
14 | #include <linux/module.h> | |
15 | #include <linux/of.h> | |
16 | #include <linux/perf_event.h> | |
17 | #include <linux/platform_device.h> | |
18 | ||
19 | #include <asm/processor.h> | |
20 | #include <asm/stacktrace.h> | |
21 | ||
22 | /* Global control/status for all perf counters */ | |
23 | #define XTENSA_PMU_PMG 0x1000 | |
24 | /* Perf counter values */ | |
25 | #define XTENSA_PMU_PM(i) (0x1080 + (i) * 4) | |
26 | /* Perf counter control registers */ | |
27 | #define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4) | |
28 | /* Perf counter status registers */ | |
29 | #define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4) | |
30 | ||
31 | #define XTENSA_PMU_PMG_PMEN 0x1 | |
32 | ||
33 | #define XTENSA_PMU_COUNTER_MASK 0xffffffffULL | |
34 | #define XTENSA_PMU_COUNTER_MAX 0x7fffffff | |
35 | ||
36 | #define XTENSA_PMU_PMCTRL_INTEN 0x00000001 | |
37 | #define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008 | |
38 | #define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0 | |
39 | #define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8 | |
40 | #define XTENSA_PMU_PMCTRL_SELECT 0x00001f00 | |
41 | #define XTENSA_PMU_PMCTRL_MASK_SHIFT 16 | |
42 | #define XTENSA_PMU_PMCTRL_MASK 0xffff0000 | |
43 | ||
44 | #define XTENSA_PMU_MASK(select, mask) \ | |
45 | (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \ | |
46 | ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \ | |
47 | XTENSA_PMU_PMCTRL_TRACELEVEL | \ | |
48 | XTENSA_PMU_PMCTRL_INTEN) | |
49 | ||
50 | #define XTENSA_PMU_PMSTAT_OVFL 0x00000001 | |
51 | #define XTENSA_PMU_PMSTAT_INTASRT 0x00000010 | |
52 | ||
53 | struct xtensa_pmu_events { | |
54 | /* Array of events currently on this core */ | |
55 | struct perf_event *event[XCHAL_NUM_PERF_COUNTERS]; | |
56 | /* Bitmap of used hardware counters */ | |
57 | unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)]; | |
58 | }; | |
59 | static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events); | |
60 | ||
61 | static const u32 xtensa_hw_ctl[] = { | |
62 | [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1), | |
63 | [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff), | |
64 | [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1), | |
65 | [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1), | |
66 | /* Taken and non-taken branches + taken loop ends */ | |
67 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490), | |
68 | /* Instruction-related + other global stall cycles */ | |
69 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff), | |
70 | /* Data-related global stall cycles */ | |
71 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff), | |
72 | }; | |
73 | ||
74 | #define C(_x) PERF_COUNT_HW_CACHE_##_x | |
75 | ||
76 | static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = { | |
77 | [C(L1D)] = { | |
78 | [C(OP_READ)] = { | |
79 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1), | |
80 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2), | |
81 | }, | |
82 | [C(OP_WRITE)] = { | |
83 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1), | |
84 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2), | |
85 | }, | |
86 | }, | |
87 | [C(L1I)] = { | |
88 | [C(OP_READ)] = { | |
89 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1), | |
90 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2), | |
91 | }, | |
92 | }, | |
93 | [C(DTLB)] = { | |
94 | [C(OP_READ)] = { | |
95 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1), | |
96 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8), | |
97 | }, | |
98 | }, | |
99 | [C(ITLB)] = { | |
100 | [C(OP_READ)] = { | |
101 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1), | |
102 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8), | |
103 | }, | |
104 | }, | |
105 | }; | |
106 | ||
107 | static int xtensa_pmu_cache_event(u64 config) | |
108 | { | |
109 | unsigned int cache_type, cache_op, cache_result; | |
110 | int ret; | |
111 | ||
112 | cache_type = (config >> 0) & 0xff; | |
113 | cache_op = (config >> 8) & 0xff; | |
114 | cache_result = (config >> 16) & 0xff; | |
115 | ||
116 | if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) || | |
117 | cache_op >= C(OP_MAX) || | |
118 | cache_result >= C(RESULT_MAX)) | |
119 | return -EINVAL; | |
120 | ||
121 | ret = xtensa_cache_ctl[cache_type][cache_op][cache_result]; | |
122 | ||
123 | if (ret == 0) | |
124 | return -EINVAL; | |
125 | ||
126 | return ret; | |
127 | } | |
128 | ||
129 | static inline uint32_t xtensa_pmu_read_counter(int idx) | |
130 | { | |
131 | return get_er(XTENSA_PMU_PM(idx)); | |
132 | } | |
133 | ||
134 | static inline void xtensa_pmu_write_counter(int idx, uint32_t v) | |
135 | { | |
136 | set_er(v, XTENSA_PMU_PM(idx)); | |
137 | } | |
138 | ||
139 | static void xtensa_perf_event_update(struct perf_event *event, | |
140 | struct hw_perf_event *hwc, int idx) | |
141 | { | |
142 | uint64_t prev_raw_count, new_raw_count; | |
143 | int64_t delta; | |
144 | ||
145 | do { | |
146 | prev_raw_count = local64_read(&hwc->prev_count); | |
147 | new_raw_count = xtensa_pmu_read_counter(event->hw.idx); | |
148 | } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | |
149 | new_raw_count) != prev_raw_count); | |
150 | ||
151 | delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK; | |
152 | ||
153 | local64_add(delta, &event->count); | |
154 | local64_sub(delta, &hwc->period_left); | |
155 | } | |
156 | ||
157 | static bool xtensa_perf_event_set_period(struct perf_event *event, | |
158 | struct hw_perf_event *hwc, int idx) | |
159 | { | |
160 | bool rc = false; | |
161 | s64 left; | |
162 | ||
163 | if (!is_sampling_event(event)) { | |
164 | left = XTENSA_PMU_COUNTER_MAX; | |
165 | } else { | |
166 | s64 period = hwc->sample_period; | |
167 | ||
168 | left = local64_read(&hwc->period_left); | |
169 | if (left <= -period) { | |
170 | left = period; | |
171 | local64_set(&hwc->period_left, left); | |
172 | hwc->last_period = period; | |
173 | rc = true; | |
174 | } else if (left <= 0) { | |
175 | left += period; | |
176 | local64_set(&hwc->period_left, left); | |
177 | hwc->last_period = period; | |
178 | rc = true; | |
179 | } | |
180 | if (left > XTENSA_PMU_COUNTER_MAX) | |
181 | left = XTENSA_PMU_COUNTER_MAX; | |
182 | } | |
183 | ||
184 | local64_set(&hwc->prev_count, -left); | |
185 | xtensa_pmu_write_counter(idx, -left); | |
186 | perf_event_update_userpage(event); | |
187 | ||
188 | return rc; | |
189 | } | |
190 | ||
191 | static void xtensa_pmu_enable(struct pmu *pmu) | |
192 | { | |
193 | set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); | |
194 | } | |
195 | ||
196 | static void xtensa_pmu_disable(struct pmu *pmu) | |
197 | { | |
198 | set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); | |
199 | } | |
200 | ||
201 | static int xtensa_pmu_event_init(struct perf_event *event) | |
202 | { | |
203 | int ret; | |
204 | ||
205 | switch (event->attr.type) { | |
206 | case PERF_TYPE_HARDWARE: | |
207 | if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) || | |
208 | xtensa_hw_ctl[event->attr.config] == 0) | |
209 | return -EINVAL; | |
210 | event->hw.config = xtensa_hw_ctl[event->attr.config]; | |
211 | return 0; | |
212 | ||
213 | case PERF_TYPE_HW_CACHE: | |
214 | ret = xtensa_pmu_cache_event(event->attr.config); | |
215 | if (ret < 0) | |
216 | return ret; | |
217 | event->hw.config = ret; | |
218 | return 0; | |
219 | ||
220 | case PERF_TYPE_RAW: | |
221 | /* Not 'previous counter' select */ | |
222 | if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) == | |
223 | (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT)) | |
224 | return -EINVAL; | |
225 | event->hw.config = (event->attr.config & | |
226 | (XTENSA_PMU_PMCTRL_KRNLCNT | | |
227 | XTENSA_PMU_PMCTRL_TRACELEVEL | | |
228 | XTENSA_PMU_PMCTRL_SELECT | | |
229 | XTENSA_PMU_PMCTRL_MASK)) | | |
230 | XTENSA_PMU_PMCTRL_INTEN; | |
231 | return 0; | |
232 | ||
233 | default: | |
234 | return -ENOENT; | |
235 | } | |
236 | } | |
237 | ||
238 | /* | |
239 | * Starts/Stops a counter present on the PMU. The PMI handler | |
240 | * should stop the counter when perf_event_overflow() returns | |
241 | * !0. ->start() will be used to continue. | |
242 | */ | |
243 | static void xtensa_pmu_start(struct perf_event *event, int flags) | |
244 | { | |
245 | struct hw_perf_event *hwc = &event->hw; | |
246 | int idx = hwc->idx; | |
247 | ||
248 | if (WARN_ON_ONCE(idx == -1)) | |
249 | return; | |
250 | ||
251 | if (flags & PERF_EF_RELOAD) { | |
252 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | |
253 | xtensa_perf_event_set_period(event, hwc, idx); | |
254 | } | |
255 | ||
256 | hwc->state = 0; | |
257 | ||
258 | set_er(hwc->config, XTENSA_PMU_PMCTRL(idx)); | |
259 | } | |
260 | ||
261 | static void xtensa_pmu_stop(struct perf_event *event, int flags) | |
262 | { | |
263 | struct hw_perf_event *hwc = &event->hw; | |
264 | int idx = hwc->idx; | |
265 | ||
266 | if (!(hwc->state & PERF_HES_STOPPED)) { | |
267 | set_er(0, XTENSA_PMU_PMCTRL(idx)); | |
268 | set_er(get_er(XTENSA_PMU_PMSTAT(idx)), | |
269 | XTENSA_PMU_PMSTAT(idx)); | |
270 | hwc->state |= PERF_HES_STOPPED; | |
271 | } | |
272 | ||
273 | if ((flags & PERF_EF_UPDATE) && | |
274 | !(event->hw.state & PERF_HES_UPTODATE)) { | |
275 | xtensa_perf_event_update(event, &event->hw, idx); | |
276 | event->hw.state |= PERF_HES_UPTODATE; | |
277 | } | |
278 | } | |
279 | ||
280 | /* | |
281 | * Adds/Removes a counter to/from the PMU, can be done inside | |
282 | * a transaction, see the ->*_txn() methods. | |
283 | */ | |
284 | static int xtensa_pmu_add(struct perf_event *event, int flags) | |
285 | { | |
286 | struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); | |
287 | struct hw_perf_event *hwc = &event->hw; | |
288 | int idx = hwc->idx; | |
289 | ||
290 | if (__test_and_set_bit(idx, ev->used_mask)) { | |
291 | idx = find_first_zero_bit(ev->used_mask, | |
292 | XCHAL_NUM_PERF_COUNTERS); | |
293 | if (idx == XCHAL_NUM_PERF_COUNTERS) | |
294 | return -EAGAIN; | |
295 | ||
296 | __set_bit(idx, ev->used_mask); | |
297 | hwc->idx = idx; | |
298 | } | |
299 | ev->event[idx] = event; | |
300 | ||
301 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
302 | ||
303 | if (flags & PERF_EF_START) | |
304 | xtensa_pmu_start(event, PERF_EF_RELOAD); | |
305 | ||
306 | perf_event_update_userpage(event); | |
307 | return 0; | |
308 | } | |
309 | ||
310 | static void xtensa_pmu_del(struct perf_event *event, int flags) | |
311 | { | |
312 | struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); | |
313 | ||
314 | xtensa_pmu_stop(event, PERF_EF_UPDATE); | |
315 | __clear_bit(event->hw.idx, ev->used_mask); | |
316 | perf_event_update_userpage(event); | |
317 | } | |
318 | ||
319 | static void xtensa_pmu_read(struct perf_event *event) | |
320 | { | |
321 | xtensa_perf_event_update(event, &event->hw, event->hw.idx); | |
322 | } | |
323 | ||
324 | static int callchain_trace(struct stackframe *frame, void *data) | |
325 | { | |
cfbcf468 | 326 | struct perf_callchain_entry_ctx *entry = data; |
9bd46da4 MF |
327 | |
328 | perf_callchain_store(entry, frame->pc); | |
329 | return 0; | |
330 | } | |
331 | ||
cfbcf468 | 332 | void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, |
9bd46da4 MF |
333 | struct pt_regs *regs) |
334 | { | |
cfbcf468 | 335 | xtensa_backtrace_kernel(regs, entry->max_stack, |
9bd46da4 MF |
336 | callchain_trace, NULL, entry); |
337 | } | |
338 | ||
cfbcf468 | 339 | void perf_callchain_user(struct perf_callchain_entry_ctx *entry, |
9bd46da4 MF |
340 | struct pt_regs *regs) |
341 | { | |
cfbcf468 | 342 | xtensa_backtrace_user(regs, entry->max_stack, |
9bd46da4 MF |
343 | callchain_trace, entry); |
344 | } | |
345 | ||
346 | void perf_event_print_debug(void) | |
347 | { | |
348 | unsigned long flags; | |
349 | unsigned i; | |
350 | ||
351 | local_irq_save(flags); | |
352 | pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(), | |
353 | get_er(XTENSA_PMU_PMG)); | |
354 | for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) | |
355 | pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n", | |
356 | i, get_er(XTENSA_PMU_PM(i)), | |
357 | i, get_er(XTENSA_PMU_PMCTRL(i)), | |
358 | i, get_er(XTENSA_PMU_PMSTAT(i))); | |
359 | local_irq_restore(flags); | |
360 | } | |
361 | ||
38fef73c | 362 | irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) |
9bd46da4 MF |
363 | { |
364 | irqreturn_t rc = IRQ_NONE; | |
365 | struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); | |
366 | unsigned i; | |
367 | ||
368 | for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); | |
369 | i < XCHAL_NUM_PERF_COUNTERS; | |
370 | i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) { | |
371 | uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); | |
372 | struct perf_event *event = ev->event[i]; | |
373 | struct hw_perf_event *hwc = &event->hw; | |
374 | u64 last_period; | |
375 | ||
376 | if (!(v & XTENSA_PMU_PMSTAT_OVFL)) | |
377 | continue; | |
378 | ||
379 | set_er(v, XTENSA_PMU_PMSTAT(i)); | |
380 | xtensa_perf_event_update(event, hwc, i); | |
381 | last_period = hwc->last_period; | |
382 | if (xtensa_perf_event_set_period(event, hwc, i)) { | |
383 | struct perf_sample_data data; | |
384 | struct pt_regs *regs = get_irq_regs(); | |
385 | ||
386 | perf_sample_data_init(&data, 0, last_period); | |
387 | if (perf_event_overflow(event, &data, regs)) | |
388 | xtensa_pmu_stop(event, 0); | |
389 | } | |
390 | ||
391 | rc = IRQ_HANDLED; | |
392 | } | |
393 | return rc; | |
394 | } | |
395 | ||
396 | static struct pmu xtensa_pmu = { | |
397 | .pmu_enable = xtensa_pmu_enable, | |
398 | .pmu_disable = xtensa_pmu_disable, | |
399 | .event_init = xtensa_pmu_event_init, | |
400 | .add = xtensa_pmu_add, | |
401 | .del = xtensa_pmu_del, | |
402 | .start = xtensa_pmu_start, | |
403 | .stop = xtensa_pmu_stop, | |
404 | .read = xtensa_pmu_read, | |
405 | }; | |
406 | ||
407 | static void xtensa_pmu_setup(void) | |
408 | { | |
409 | unsigned i; | |
410 | ||
411 | set_er(0, XTENSA_PMU_PMG); | |
412 | for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) { | |
413 | set_er(0, XTENSA_PMU_PMCTRL(i)); | |
414 | set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i)); | |
415 | } | |
416 | } | |
417 | ||
418 | static int xtensa_pmu_notifier(struct notifier_block *self, | |
419 | unsigned long action, void *data) | |
420 | { | |
421 | switch (action & ~CPU_TASKS_FROZEN) { | |
422 | case CPU_STARTING: | |
423 | xtensa_pmu_setup(); | |
424 | break; | |
425 | ||
426 | default: | |
427 | break; | |
428 | } | |
429 | ||
430 | return NOTIFY_OK; | |
431 | } | |
432 | ||
433 | static int __init xtensa_pmu_init(void) | |
434 | { | |
435 | int ret; | |
436 | int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); | |
437 | ||
438 | perf_cpu_notifier(xtensa_pmu_notifier); | |
38fef73c MF |
439 | #if XTENSA_FAKE_NMI |
440 | enable_irq(irq); | |
441 | #else | |
9bd46da4 MF |
442 | ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU, |
443 | "pmu", NULL); | |
444 | if (ret < 0) | |
445 | return ret; | |
38fef73c | 446 | #endif |
9bd46da4 MF |
447 | |
448 | ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW); | |
449 | if (ret) | |
450 | free_irq(irq, NULL); | |
451 | ||
452 | return ret; | |
453 | } | |
454 | early_initcall(xtensa_pmu_init); |