Commit | Line | Data |
---|---|---|
0bcbf2e3 MP |
1 | /* |
2 | * Copyright(C) 2015 Linaro Limited. All rights reserved. | |
3 | * Author: Mathieu Poirier <mathieu.poirier@linaro.org> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU General Public License version 2 as published by | |
7 | * the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License along with | |
15 | * this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | */ | |
17 | ||
18 | #include <linux/coresight.h> | |
19 | #include <linux/coresight-pmu.h> | |
20 | #include <linux/cpumask.h> | |
21 | #include <linux/device.h> | |
22 | #include <linux/list.h> | |
23 | #include <linux/mm.h> | |
ca48fa22 | 24 | #include <linux/init.h> |
0bcbf2e3 MP |
25 | #include <linux/perf_event.h> |
26 | #include <linux/slab.h> | |
27 | #include <linux/types.h> | |
28 | #include <linux/workqueue.h> | |
29 | ||
ca878b14 | 30 | #include "coresight-etm-perf.h" |
0bcbf2e3 MP |
31 | #include "coresight-priv.h" |
32 | ||
33 | static struct pmu etm_pmu; | |
34 | static bool etm_perf_up; | |
35 | ||
36 | /** | |
37 | * struct etm_event_data - Coresight specifics associated to an event | |
38 | * @work: Handle to free allocated memory outside IRQ context. | |
39 | * @mask: Hold the CPU(s) this event was set for. | |
40 | * @snk_config: The sink configuration. | |
41 | * @path: An array of path, each slot for one CPU. | |
42 | */ | |
43 | struct etm_event_data { | |
44 | struct work_struct work; | |
45 | cpumask_t mask; | |
46 | void *snk_config; | |
47 | struct list_head **path; | |
48 | }; | |
49 | ||
50 | static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); | |
51 | static DEFINE_PER_CPU(struct coresight_device *, csdev_src); | |
52 | ||
53 | /* ETMv3.5/PTM's ETMCR is 'config' */ | |
54 | PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); | |
55 | PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); | |
56 | ||
57 | static struct attribute *etm_config_formats_attr[] = { | |
58 | &format_attr_cycacc.attr, | |
59 | &format_attr_timestamp.attr, | |
60 | NULL, | |
61 | }; | |
62 | ||
63 | static struct attribute_group etm_pmu_format_group = { | |
64 | .name = "format", | |
65 | .attrs = etm_config_formats_attr, | |
66 | }; | |
67 | ||
68 | static const struct attribute_group *etm_pmu_attr_groups[] = { | |
69 | &etm_pmu_format_group, | |
70 | NULL, | |
71 | }; | |
72 | ||
73 | static void etm_event_read(struct perf_event *event) {} | |
74 | ||
ca878b14 | 75 | static int etm_addr_filters_alloc(struct perf_event *event) |
0bcbf2e3 | 76 | { |
ca878b14 MP |
77 | struct etm_filters *filters; |
78 | int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); | |
79 | ||
80 | filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node); | |
81 | if (!filters) | |
82 | return -ENOMEM; | |
83 | ||
84 | if (event->parent) | |
85 | memcpy(filters, event->parent->hw.addr_filters, | |
86 | sizeof(*filters)); | |
87 | ||
88 | event->hw.addr_filters = filters; | |
0bcbf2e3 MP |
89 | |
90 | return 0; | |
91 | } | |
92 | ||
ca878b14 MP |
93 | static void etm_event_destroy(struct perf_event *event) |
94 | { | |
95 | kfree(event->hw.addr_filters); | |
96 | event->hw.addr_filters = NULL; | |
97 | } | |
98 | ||
99 | static int etm_event_init(struct perf_event *event) | |
100 | { | |
101 | int ret = 0; | |
102 | ||
103 | if (event->attr.type != etm_pmu.type) { | |
104 | ret = -ENOENT; | |
105 | goto out; | |
106 | } | |
107 | ||
108 | ret = etm_addr_filters_alloc(event); | |
109 | if (ret) | |
110 | goto out; | |
111 | ||
112 | event->destroy = etm_event_destroy; | |
113 | out: | |
114 | return ret; | |
115 | } | |
116 | ||
0bcbf2e3 MP |
117 | static void free_event_data(struct work_struct *work) |
118 | { | |
119 | int cpu; | |
120 | cpumask_t *mask; | |
121 | struct etm_event_data *event_data; | |
122 | struct coresight_device *sink; | |
123 | ||
124 | event_data = container_of(work, struct etm_event_data, work); | |
125 | mask = &event_data->mask; | |
126 | /* | |
127 | * First deal with the sink configuration. See comment in | |
128 | * etm_setup_aux() about why we take the first available path. | |
129 | */ | |
130 | if (event_data->snk_config) { | |
131 | cpu = cpumask_first(mask); | |
132 | sink = coresight_get_sink(event_data->path[cpu]); | |
133 | if (sink_ops(sink)->free_buffer) | |
134 | sink_ops(sink)->free_buffer(event_data->snk_config); | |
135 | } | |
136 | ||
137 | for_each_cpu(cpu, mask) { | |
8e67cdbc | 138 | if (!(IS_ERR_OR_NULL(event_data->path[cpu]))) |
0bcbf2e3 MP |
139 | coresight_release_path(event_data->path[cpu]); |
140 | } | |
141 | ||
142 | kfree(event_data->path); | |
143 | kfree(event_data); | |
144 | } | |
145 | ||
146 | static void *alloc_event_data(int cpu) | |
147 | { | |
148 | int size; | |
149 | cpumask_t *mask; | |
150 | struct etm_event_data *event_data; | |
151 | ||
152 | /* First get memory for the session's data */ | |
153 | event_data = kzalloc(sizeof(struct etm_event_data), GFP_KERNEL); | |
154 | if (!event_data) | |
155 | return NULL; | |
156 | ||
157 | /* Make sure nothing disappears under us */ | |
158 | get_online_cpus(); | |
159 | size = num_online_cpus(); | |
160 | ||
161 | mask = &event_data->mask; | |
162 | if (cpu != -1) | |
163 | cpumask_set_cpu(cpu, mask); | |
164 | else | |
165 | cpumask_copy(mask, cpu_online_mask); | |
166 | put_online_cpus(); | |
167 | ||
168 | /* | |
169 | * Each CPU has a single path between source and destination. As such | |
170 | * allocate an array using CPU numbers as indexes. That way a path | |
171 | * for any CPU can easily be accessed at any given time. We proceed | |
172 | * the same way for sessions involving a single CPU. The cost of | |
173 | * unused memory when dealing with single CPU trace scenarios is small | |
174 | * compared to the cost of searching through an optimized array. | |
175 | */ | |
176 | event_data->path = kcalloc(size, | |
177 | sizeof(struct list_head *), GFP_KERNEL); | |
178 | if (!event_data->path) { | |
179 | kfree(event_data); | |
180 | return NULL; | |
181 | } | |
182 | ||
183 | return event_data; | |
184 | } | |
185 | ||
186 | static void etm_free_aux(void *data) | |
187 | { | |
188 | struct etm_event_data *event_data = data; | |
189 | ||
190 | schedule_work(&event_data->work); | |
191 | } | |
192 | ||
193 | static void *etm_setup_aux(int event_cpu, void **pages, | |
194 | int nr_pages, bool overwrite) | |
195 | { | |
196 | int cpu; | |
197 | cpumask_t *mask; | |
198 | struct coresight_device *sink; | |
199 | struct etm_event_data *event_data = NULL; | |
200 | ||
201 | event_data = alloc_event_data(event_cpu); | |
202 | if (!event_data) | |
203 | return NULL; | |
204 | ||
205 | INIT_WORK(&event_data->work, free_event_data); | |
206 | ||
207 | mask = &event_data->mask; | |
208 | ||
209 | /* Setup the path for each CPU in a trace session */ | |
210 | for_each_cpu(cpu, mask) { | |
211 | struct coresight_device *csdev; | |
212 | ||
213 | csdev = per_cpu(csdev_src, cpu); | |
214 | if (!csdev) | |
215 | goto err; | |
216 | ||
217 | /* | |
218 | * Building a path doesn't enable it, it simply builds a | |
219 | * list of devices from source to sink that can be | |
220 | * referenced later when the path is actually needed. | |
221 | */ | |
222 | event_data->path[cpu] = coresight_build_path(csdev); | |
8e67cdbc | 223 | if (IS_ERR(event_data->path[cpu])) |
0bcbf2e3 MP |
224 | goto err; |
225 | } | |
226 | ||
227 | /* | |
228 | * In theory nothing prevent tracers in a trace session from being | |
229 | * associated with different sinks, nor having a sink per tracer. But | |
230 | * until we have HW with this kind of topology and a way to convey | |
231 | * sink assignement from the perf cmd line we need to assume tracers | |
232 | * in a trace session are using the same sink. Therefore pick the sink | |
233 | * found at the end of the first available path. | |
234 | */ | |
235 | cpu = cpumask_first(mask); | |
236 | /* Grab the sink at the end of the path */ | |
237 | sink = coresight_get_sink(event_data->path[cpu]); | |
238 | if (!sink) | |
239 | goto err; | |
240 | ||
241 | if (!sink_ops(sink)->alloc_buffer) | |
242 | goto err; | |
243 | ||
244 | /* Get the AUX specific data from the sink buffer */ | |
245 | event_data->snk_config = | |
246 | sink_ops(sink)->alloc_buffer(sink, cpu, pages, | |
247 | nr_pages, overwrite); | |
248 | if (!event_data->snk_config) | |
249 | goto err; | |
250 | ||
251 | out: | |
252 | return event_data; | |
253 | ||
254 | err: | |
255 | etm_free_aux(event_data); | |
256 | event_data = NULL; | |
257 | goto out; | |
258 | } | |
259 | ||
260 | static void etm_event_start(struct perf_event *event, int flags) | |
261 | { | |
262 | int cpu = smp_processor_id(); | |
263 | struct etm_event_data *event_data; | |
264 | struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); | |
265 | struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); | |
266 | ||
267 | if (!csdev) | |
268 | goto fail; | |
269 | ||
270 | /* | |
271 | * Deal with the ring buffer API and get a handle on the | |
272 | * session's information. | |
273 | */ | |
274 | event_data = perf_aux_output_begin(handle, event); | |
275 | if (!event_data) | |
276 | goto fail; | |
277 | ||
278 | /* We need a sink, no need to continue without one */ | |
279 | sink = coresight_get_sink(event_data->path[cpu]); | |
280 | if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer)) | |
281 | goto fail_end_stop; | |
282 | ||
283 | /* Configure the sink */ | |
284 | if (sink_ops(sink)->set_buffer(sink, handle, | |
285 | event_data->snk_config)) | |
286 | goto fail_end_stop; | |
287 | ||
288 | /* Nothing will happen without a path */ | |
289 | if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF)) | |
290 | goto fail_end_stop; | |
291 | ||
292 | /* Tell the perf core the event is alive */ | |
293 | event->hw.state = 0; | |
294 | ||
295 | /* Finally enable the tracer */ | |
68905d73 | 296 | if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) |
0bcbf2e3 MP |
297 | goto fail_end_stop; |
298 | ||
299 | out: | |
300 | return; | |
301 | ||
302 | fail_end_stop: | |
303 | perf_aux_output_end(handle, 0, true); | |
304 | fail: | |
305 | event->hw.state = PERF_HES_STOPPED; | |
306 | goto out; | |
307 | } | |
308 | ||
309 | static void etm_event_stop(struct perf_event *event, int mode) | |
310 | { | |
311 | bool lost; | |
312 | int cpu = smp_processor_id(); | |
313 | unsigned long size; | |
314 | struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); | |
315 | struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); | |
316 | struct etm_event_data *event_data = perf_get_aux(handle); | |
317 | ||
318 | if (event->hw.state == PERF_HES_STOPPED) | |
319 | return; | |
320 | ||
321 | if (!csdev) | |
322 | return; | |
323 | ||
324 | sink = coresight_get_sink(event_data->path[cpu]); | |
325 | if (!sink) | |
326 | return; | |
327 | ||
328 | /* stop tracer */ | |
68905d73 | 329 | source_ops(csdev)->disable(csdev, event); |
0bcbf2e3 MP |
330 | |
331 | /* tell the core */ | |
332 | event->hw.state = PERF_HES_STOPPED; | |
333 | ||
334 | if (mode & PERF_EF_UPDATE) { | |
335 | if (WARN_ON_ONCE(handle->event != event)) | |
336 | return; | |
337 | ||
338 | /* update trace information */ | |
339 | if (!sink_ops(sink)->update_buffer) | |
340 | return; | |
341 | ||
342 | sink_ops(sink)->update_buffer(sink, handle, | |
343 | event_data->snk_config); | |
344 | ||
345 | if (!sink_ops(sink)->reset_buffer) | |
346 | return; | |
347 | ||
348 | size = sink_ops(sink)->reset_buffer(sink, handle, | |
349 | event_data->snk_config, | |
350 | &lost); | |
351 | ||
352 | perf_aux_output_end(handle, size, lost); | |
353 | } | |
354 | ||
355 | /* Disabling the path make its elements available to other sessions */ | |
356 | coresight_disable_path(event_data->path[cpu]); | |
357 | } | |
358 | ||
359 | static int etm_event_add(struct perf_event *event, int mode) | |
360 | { | |
361 | int ret = 0; | |
362 | struct hw_perf_event *hwc = &event->hw; | |
363 | ||
364 | if (mode & PERF_EF_START) { | |
365 | etm_event_start(event, 0); | |
366 | if (hwc->state & PERF_HES_STOPPED) | |
367 | ret = -EINVAL; | |
368 | } else { | |
369 | hwc->state = PERF_HES_STOPPED; | |
370 | } | |
371 | ||
372 | return ret; | |
373 | } | |
374 | ||
375 | static void etm_event_del(struct perf_event *event, int mode) | |
376 | { | |
377 | etm_event_stop(event, PERF_EF_UPDATE); | |
378 | } | |
379 | ||
ca878b14 MP |
380 | static int etm_addr_filters_validate(struct list_head *filters) |
381 | { | |
382 | bool range = false, address = false; | |
383 | int index = 0; | |
384 | struct perf_addr_filter *filter; | |
385 | ||
386 | list_for_each_entry(filter, filters, entry) { | |
387 | /* | |
388 | * No need to go further if there's no more | |
389 | * room for filters. | |
390 | */ | |
391 | if (++index > ETM_ADDR_CMP_MAX) | |
392 | return -EOPNOTSUPP; | |
393 | ||
394 | /* | |
395 | * As taken from the struct perf_addr_filter documentation: | |
396 | * @range: 1: range, 0: address | |
397 | * | |
398 | * At this time we don't allow range and start/stop filtering | |
399 | * to cohabitate, they have to be mutually exclusive. | |
400 | */ | |
401 | if ((filter->range == 1) && address) | |
402 | return -EOPNOTSUPP; | |
403 | ||
404 | if ((filter->range == 0) && range) | |
405 | return -EOPNOTSUPP; | |
406 | ||
407 | /* | |
408 | * For range filtering, the second address in the address | |
409 | * range comparator needs to be higher than the first. | |
410 | * Invalid otherwise. | |
411 | */ | |
412 | if (filter->range && filter->size == 0) | |
413 | return -EINVAL; | |
414 | ||
415 | /* | |
416 | * Everything checks out with this filter, record what we've | |
417 | * received before moving on to the next one. | |
418 | */ | |
419 | if (filter->range) | |
420 | range = true; | |
421 | else | |
422 | address = true; | |
423 | } | |
424 | ||
425 | return 0; | |
426 | } | |
427 | ||
428 | static void etm_addr_filters_sync(struct perf_event *event) | |
429 | { | |
430 | struct perf_addr_filters_head *head = perf_event_addr_filters(event); | |
431 | unsigned long start, stop, *offs = event->addr_filters_offs; | |
432 | struct etm_filters *filters = event->hw.addr_filters; | |
433 | struct etm_filter *etm_filter; | |
434 | struct perf_addr_filter *filter; | |
435 | int i = 0; | |
436 | ||
437 | list_for_each_entry(filter, &head->list, entry) { | |
438 | start = filter->offset + offs[i]; | |
439 | stop = start + filter->size; | |
440 | etm_filter = &filters->etm_filter[i]; | |
441 | ||
442 | if (filter->range == 1) { | |
443 | etm_filter->start_addr = start; | |
444 | etm_filter->stop_addr = stop; | |
445 | etm_filter->type = ETM_ADDR_TYPE_RANGE; | |
446 | } else { | |
447 | if (filter->filter == 1) { | |
448 | etm_filter->start_addr = start; | |
449 | etm_filter->type = ETM_ADDR_TYPE_START; | |
450 | } else { | |
451 | etm_filter->stop_addr = stop; | |
452 | etm_filter->type = ETM_ADDR_TYPE_STOP; | |
453 | } | |
454 | } | |
455 | i++; | |
456 | } | |
457 | ||
458 | filters->nr_filters = i; | |
459 | } | |
460 | ||
0bcbf2e3 MP |
461 | int etm_perf_symlink(struct coresight_device *csdev, bool link) |
462 | { | |
463 | char entry[sizeof("cpu9999999")]; | |
464 | int ret = 0, cpu = source_ops(csdev)->cpu_id(csdev); | |
465 | struct device *pmu_dev = etm_pmu.dev; | |
466 | struct device *cs_dev = &csdev->dev; | |
467 | ||
468 | sprintf(entry, "cpu%d", cpu); | |
469 | ||
470 | if (!etm_perf_up) | |
471 | return -EPROBE_DEFER; | |
472 | ||
473 | if (link) { | |
474 | ret = sysfs_create_link(&pmu_dev->kobj, &cs_dev->kobj, entry); | |
475 | if (ret) | |
476 | return ret; | |
477 | per_cpu(csdev_src, cpu) = csdev; | |
478 | } else { | |
479 | sysfs_remove_link(&pmu_dev->kobj, entry); | |
480 | per_cpu(csdev_src, cpu) = NULL; | |
481 | } | |
482 | ||
483 | return 0; | |
484 | } | |
485 | ||
486 | static int __init etm_perf_init(void) | |
487 | { | |
488 | int ret; | |
489 | ||
ca878b14 MP |
490 | etm_pmu.capabilities = PERF_PMU_CAP_EXCLUSIVE; |
491 | ||
492 | etm_pmu.attr_groups = etm_pmu_attr_groups; | |
493 | etm_pmu.task_ctx_nr = perf_sw_context; | |
494 | etm_pmu.read = etm_event_read; | |
495 | etm_pmu.event_init = etm_event_init; | |
496 | etm_pmu.setup_aux = etm_setup_aux; | |
497 | etm_pmu.free_aux = etm_free_aux; | |
498 | etm_pmu.start = etm_event_start; | |
499 | etm_pmu.stop = etm_event_stop; | |
500 | etm_pmu.add = etm_event_add; | |
501 | etm_pmu.del = etm_event_del; | |
502 | etm_pmu.addr_filters_sync = etm_addr_filters_sync; | |
503 | etm_pmu.addr_filters_validate = etm_addr_filters_validate; | |
504 | etm_pmu.nr_addr_filters = ETM_ADDR_CMP_MAX; | |
0bcbf2e3 MP |
505 | |
506 | ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1); | |
507 | if (ret == 0) | |
508 | etm_perf_up = true; | |
509 | ||
510 | return ret; | |
511 | } | |
ca48fa22 | 512 | device_initcall(etm_perf_init); |