2 * Intel(R) Processor Trace PMU driver for perf
3 * Copyright (c) 2013-2014, Intel Corporation.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * Intel PT is specified in the Intel Architecture Instruction Set Extensions
15 * Programming Reference:
16 * http://software.intel.com/en-us/intel-isa-extensions
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
23 #include <linux/types.h>
24 #include <linux/slab.h>
25 #include <linux/device.h>
27 #include <asm/perf_event.h>
31 #include "perf_event.h"
34 static DEFINE_PER_CPU(struct pt
, pt_ctx
);
36 static struct pt_pmu pt_pmu
;
46 * Capabilities of Intel PT hardware, such as number of address bits or
47 * supported output schemes, are cached and exported to userspace as "caps"
48 * attribute group of pt pmu device
49 * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
50 * relevant bits together with intel_pt traces.
52 * These are necessary for both trace decoding (payloads_lip, contains address
53 * width encoded in IP-related packets), and event configuration (bitmasks with
54 * permitted values for certain bit fields).
56 #define PT_CAP(_n, _l, _r, _m) \
57 [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l, \
58 .reg = _r, .mask = _m }
60 static struct pt_cap_desc
{
66 PT_CAP(max_subleaf
, 0, CR_EAX
, 0xffffffff),
67 PT_CAP(cr3_filtering
, 0, CR_EBX
, BIT(0)),
68 PT_CAP(topa_output
, 0, CR_ECX
, BIT(0)),
69 PT_CAP(topa_multiple_entries
, 0, CR_ECX
, BIT(1)),
70 PT_CAP(payloads_lip
, 0, CR_ECX
, BIT(31)),
73 static u32
pt_cap_get(enum pt_capabilities cap
)
75 struct pt_cap_desc
*cd
= &pt_caps
[cap
];
76 u32 c
= pt_pmu
.caps
[cd
->leaf
* 4 + cd
->reg
];
77 unsigned int shift
= __ffs(cd
->mask
);
79 return (c
& cd
->mask
) >> shift
;
82 static ssize_t
pt_cap_show(struct device
*cdev
,
83 struct device_attribute
*attr
,
86 struct dev_ext_attribute
*ea
=
87 container_of(attr
, struct dev_ext_attribute
, attr
);
88 enum pt_capabilities cap
= (long)ea
->var
;
90 return snprintf(buf
, PAGE_SIZE
, "%x\n", pt_cap_get(cap
));
93 static struct attribute_group pt_cap_group
= {
97 PMU_FORMAT_ATTR(tsc
, "config:10" );
98 PMU_FORMAT_ATTR(noretcomp
, "config:11" );
100 static struct attribute
*pt_formats_attr
[] = {
101 &format_attr_tsc
.attr
,
102 &format_attr_noretcomp
.attr
,
106 static struct attribute_group pt_format_group
= {
108 .attrs
= pt_formats_attr
,
111 static const struct attribute_group
*pt_attr_groups
[] = {
117 static int __init
pt_pmu_hw_init(void)
119 struct dev_ext_attribute
*de_attrs
;
120 struct attribute
**attrs
;
127 if (!test_cpu_cap(&boot_cpu_data
, X86_FEATURE_INTEL_PT
))
130 for (i
= 0; i
< PT_CPUID_LEAVES
; i
++) {
132 &pt_pmu
.caps
[CR_EAX
+ i
*4],
133 &pt_pmu
.caps
[CR_EBX
+ i
*4],
134 &pt_pmu
.caps
[CR_ECX
+ i
*4],
135 &pt_pmu
.caps
[CR_EDX
+ i
*4]);
139 size
= sizeof(struct attribute
*) * (ARRAY_SIZE(pt_caps
)+1);
140 attrs
= kzalloc(size
, GFP_KERNEL
);
144 size
= sizeof(struct dev_ext_attribute
) * (ARRAY_SIZE(pt_caps
)+1);
145 de_attrs
= kzalloc(size
, GFP_KERNEL
);
149 for (i
= 0; i
< ARRAY_SIZE(pt_caps
); i
++) {
150 struct dev_ext_attribute
*de_attr
= de_attrs
+ i
;
152 de_attr
->attr
.attr
.name
= pt_caps
[i
].name
;
154 sysfs_attr_init(&de_attr
->attr
.attr
);
156 de_attr
->attr
.attr
.mode
= S_IRUGO
;
157 de_attr
->attr
.show
= pt_cap_show
;
158 de_attr
->var
= (void *)i
;
160 attrs
[i
] = &de_attr
->attr
.attr
;
163 pt_cap_group
.attrs
= attrs
;
173 #define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC)
175 static bool pt_event_valid(struct perf_event
*event
)
177 u64 config
= event
->attr
.config
;
179 if ((config
& PT_CONFIG_MASK
) != config
)
186 * PT configuration helpers
187 * These all are cpu affine and operate on a local PT
190 static void pt_config(struct perf_event
*event
)
194 if (!event
->hw
.itrace_started
) {
195 event
->hw
.itrace_started
= 1;
196 wrmsrl(MSR_IA32_RTIT_STATUS
, 0);
199 reg
= RTIT_CTL_TOPA
| RTIT_CTL_BRANCH_EN
| RTIT_CTL_TRACEEN
;
201 if (!event
->attr
.exclude_kernel
)
203 if (!event
->attr
.exclude_user
)
206 reg
|= (event
->attr
.config
& PT_CONFIG_MASK
);
208 wrmsrl(MSR_IA32_RTIT_CTL
, reg
);
211 static void pt_config_start(bool start
)
215 rdmsrl(MSR_IA32_RTIT_CTL
, ctl
);
217 ctl
|= RTIT_CTL_TRACEEN
;
219 ctl
&= ~RTIT_CTL_TRACEEN
;
220 wrmsrl(MSR_IA32_RTIT_CTL
, ctl
);
223 * A wrmsr that disables trace generation serializes other PT
224 * registers and causes all data packets to be written to memory,
225 * but a fence is required for the data to become globally visible.
227 * The below WMB, separating data store and aux_head store matches
228 * the consumer's RMB that separates aux_head load and data load.
234 static void pt_config_buffer(void *buf
, unsigned int topa_idx
,
235 unsigned int output_off
)
239 wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE
, virt_to_phys(buf
));
241 reg
= 0x7f | ((u64
)topa_idx
<< 7) | ((u64
)output_off
<< 32);
243 wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK
, reg
);
247 * Keep ToPA table-related metadata on the same page as the actual table,
248 * taking up a few words from the top
251 #define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
254 * struct topa - page-sized ToPA table with metadata at the top
255 * @table: actual ToPA table entries, as understood by PT hardware
256 * @list: linkage to struct pt_buffer's list of tables
257 * @phys: physical address of this page
258 * @offset: offset of the first entry in this table in the buffer
259 * @size: total size of all entries in this table
260 * @last: index of the last initialized entry in this table
263 struct topa_entry table
[TENTS_PER_PAGE
];
264 struct list_head list
;
271 /* make -1 stand for the last table entry */
272 #define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
275 * topa_alloc() - allocate page-sized ToPA table
276 * @cpu: CPU on which to allocate.
277 * @gfp: Allocation flags.
279 * Return: On success, return the pointer to ToPA table page.
281 static struct topa
*topa_alloc(int cpu
, gfp_t gfp
)
283 int node
= cpu_to_node(cpu
);
287 p
= alloc_pages_node(node
, gfp
| __GFP_ZERO
, 0);
291 topa
= page_address(p
);
293 topa
->phys
= page_to_phys(p
);
296 * In case of singe-entry ToPA, always put the self-referencing END
297 * link as the 2nd entry in the table
299 if (!pt_cap_get(PT_CAP_topa_multiple_entries
)) {
300 TOPA_ENTRY(topa
, 1)->base
= topa
->phys
>> TOPA_SHIFT
;
301 TOPA_ENTRY(topa
, 1)->end
= 1;
308 * topa_free() - free a page-sized ToPA table
309 * @topa: Table to deallocate.
311 static void topa_free(struct topa
*topa
)
313 free_page((unsigned long)topa
);
317 * topa_insert_table() - insert a ToPA table into a buffer
318 * @buf: PT buffer that's being extended.
319 * @topa: New topa table to be inserted.
321 * If it's the first table in this buffer, set up buffer's pointers
322 * accordingly; otherwise, add a END=1 link entry to @topa to the current
323 * "last" table and adjust the last table pointer to @topa.
325 static void topa_insert_table(struct pt_buffer
*buf
, struct topa
*topa
)
327 struct topa
*last
= buf
->last
;
329 list_add_tail(&topa
->list
, &buf
->tables
);
332 buf
->first
= buf
->last
= buf
->cur
= topa
;
336 topa
->offset
= last
->offset
+ last
->size
;
339 if (!pt_cap_get(PT_CAP_topa_multiple_entries
))
342 BUG_ON(last
->last
!= TENTS_PER_PAGE
- 1);
344 TOPA_ENTRY(last
, -1)->base
= topa
->phys
>> TOPA_SHIFT
;
345 TOPA_ENTRY(last
, -1)->end
= 1;
349 * topa_table_full() - check if a ToPA table is filled up
352 static bool topa_table_full(struct topa
*topa
)
354 /* single-entry ToPA is a special case */
355 if (!pt_cap_get(PT_CAP_topa_multiple_entries
))
358 return topa
->last
== TENTS_PER_PAGE
- 1;
362 * topa_insert_pages() - create a list of ToPA tables
363 * @buf: PT buffer being initialized.
364 * @gfp: Allocation flags.
366 * This initializes a list of ToPA tables with entries from
367 * the data_pages provided by rb_alloc_aux().
369 * Return: 0 on success or error code.
371 static int topa_insert_pages(struct pt_buffer
*buf
, gfp_t gfp
)
373 struct topa
*topa
= buf
->last
;
377 p
= virt_to_page(buf
->data_pages
[buf
->nr_pages
]);
379 order
= page_private(p
);
381 if (topa_table_full(topa
)) {
382 topa
= topa_alloc(buf
->cpu
, gfp
);
386 topa_insert_table(buf
, topa
);
389 TOPA_ENTRY(topa
, -1)->base
= page_to_phys(p
) >> TOPA_SHIFT
;
390 TOPA_ENTRY(topa
, -1)->size
= order
;
391 if (!buf
->snapshot
&& !pt_cap_get(PT_CAP_topa_multiple_entries
)) {
392 TOPA_ENTRY(topa
, -1)->intr
= 1;
393 TOPA_ENTRY(topa
, -1)->stop
= 1;
397 topa
->size
+= sizes(order
);
399 buf
->nr_pages
+= 1ul << order
;
405 * pt_topa_dump() - print ToPA tables and their entries
408 static void pt_topa_dump(struct pt_buffer
*buf
)
412 list_for_each_entry(topa
, &buf
->tables
, list
) {
415 pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa
->table
,
416 topa
->phys
, topa
->offset
, topa
->size
);
417 for (i
= 0; i
< TENTS_PER_PAGE
; i
++) {
418 pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
420 (unsigned long)topa
->table
[i
].base
<< TOPA_SHIFT
,
421 sizes(topa
->table
[i
].size
),
422 topa
->table
[i
].end
? 'E' : ' ',
423 topa
->table
[i
].intr
? 'I' : ' ',
424 topa
->table
[i
].stop
? 'S' : ' ',
425 *(u64
*)&topa
->table
[i
]);
426 if ((pt_cap_get(PT_CAP_topa_multiple_entries
) &&
427 topa
->table
[i
].stop
) ||
435 * pt_buffer_advance() - advance to the next output region
438 * Advance the current pointers in the buffer to the next ToPA entry.
440 static void pt_buffer_advance(struct pt_buffer
*buf
)
445 if (buf
->cur_idx
== buf
->cur
->last
) {
446 if (buf
->cur
== buf
->last
)
447 buf
->cur
= buf
->first
;
449 buf
->cur
= list_entry(buf
->cur
->list
.next
, struct topa
,
456 * pt_update_head() - calculate current offsets and sizes
457 * @pt: Per-cpu pt context.
459 * Update buffer's current write pointer position and data size.
461 static void pt_update_head(struct pt
*pt
)
463 struct pt_buffer
*buf
= perf_get_aux(&pt
->handle
);
464 u64 topa_idx
, base
, old
;
466 /* offset of the first region in this table from the beginning of buf */
467 base
= buf
->cur
->offset
+ buf
->output_off
;
469 /* offset of the current output region within this table */
470 for (topa_idx
= 0; topa_idx
< buf
->cur_idx
; topa_idx
++)
471 base
+= sizes(buf
->cur
->table
[topa_idx
].size
);
474 local_set(&buf
->data_size
, base
);
476 old
= (local64_xchg(&buf
->head
, base
) &
477 ((buf
->nr_pages
<< PAGE_SHIFT
) - 1));
479 base
+= buf
->nr_pages
<< PAGE_SHIFT
;
481 local_add(base
- old
, &buf
->data_size
);
486 * pt_buffer_region() - obtain current output region's address
489 static void *pt_buffer_region(struct pt_buffer
*buf
)
491 return phys_to_virt(buf
->cur
->table
[buf
->cur_idx
].base
<< TOPA_SHIFT
);
495 * pt_buffer_region_size() - obtain current output region's size
498 static size_t pt_buffer_region_size(struct pt_buffer
*buf
)
500 return sizes(buf
->cur
->table
[buf
->cur_idx
].size
);
504 * pt_handle_status() - take care of possible status conditions
505 * @pt: Per-cpu pt context.
507 static void pt_handle_status(struct pt
*pt
)
509 struct pt_buffer
*buf
= perf_get_aux(&pt
->handle
);
513 rdmsrl(MSR_IA32_RTIT_STATUS
, status
);
515 if (status
& RTIT_STATUS_ERROR
) {
516 pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
518 status
&= ~RTIT_STATUS_ERROR
;
521 if (status
& RTIT_STATUS_STOPPED
) {
522 status
&= ~RTIT_STATUS_STOPPED
;
525 * On systems that only do single-entry ToPA, hitting STOP
526 * means we are already losing data; need to let the decoder
529 if (!pt_cap_get(PT_CAP_topa_multiple_entries
) ||
530 buf
->output_off
== sizes(TOPA_ENTRY(buf
->cur
, buf
->cur_idx
)->size
)) {
531 local_inc(&buf
->lost
);
537 * Also on single-entry ToPA implementations, interrupt will come
538 * before the output reaches its output region's boundary.
540 if (!pt_cap_get(PT_CAP_topa_multiple_entries
) && !buf
->snapshot
&&
541 pt_buffer_region_size(buf
) - buf
->output_off
<= TOPA_PMI_MARGIN
) {
542 void *head
= pt_buffer_region(buf
);
544 /* everything within this margin needs to be zeroed out */
545 memset(head
+ buf
->output_off
, 0,
546 pt_buffer_region_size(buf
) -
552 pt_buffer_advance(buf
);
554 wrmsrl(MSR_IA32_RTIT_STATUS
, status
);
558 * pt_read_offset() - translate registers into buffer pointers
561 * Set buffer's output pointers from MSR values.
563 static void pt_read_offset(struct pt_buffer
*buf
)
565 u64 offset
, base_topa
;
567 rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE
, base_topa
);
568 buf
->cur
= phys_to_virt(base_topa
);
570 rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK
, offset
);
571 /* offset within current output region */
572 buf
->output_off
= offset
>> 32;
573 /* index of current output region within this table */
574 buf
->cur_idx
= (offset
& 0xffffff80) >> 7;
578 * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
580 * @pg: Page offset in the buffer.
582 * When advancing to the next output region (ToPA entry), given a page offset
583 * into the buffer, we need to find the offset of the first page in the next
586 static unsigned int pt_topa_next_entry(struct pt_buffer
*buf
, unsigned int pg
)
588 struct topa_entry
*te
= buf
->topa_index
[pg
];
591 if (buf
->first
== buf
->last
&& buf
->first
->last
== 1)
596 pg
&= buf
->nr_pages
- 1;
597 } while (buf
->topa_index
[pg
] == te
);
603 * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
605 * @handle: Current output handle.
607 * Place INT and STOP marks to prevent overwriting old data that the consumer
608 * hasn't yet collected and waking up the consumer after a certain fraction of
609 * the buffer has filled up. Only needed and sensible for non-snapshot counters.
611 * This obviously relies on buf::head to figure out buffer markers, so it has
612 * to be called after pt_buffer_reset_offsets() and before the hardware tracing
615 static int pt_buffer_reset_markers(struct pt_buffer
*buf
,
616 struct perf_output_handle
*handle
)
619 unsigned long head
= local64_read(&buf
->head
);
620 unsigned long idx
, npages
, wakeup
;
622 /* can't stop in the middle of an output region */
623 if (buf
->output_off
+ handle
->size
+ 1 <
624 sizes(TOPA_ENTRY(buf
->cur
, buf
->cur_idx
)->size
))
628 /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
629 if (!pt_cap_get(PT_CAP_topa_multiple_entries
))
632 /* clear STOP and INT from current entry */
633 buf
->topa_index
[buf
->stop_pos
]->stop
= 0;
634 buf
->topa_index
[buf
->intr_pos
]->intr
= 0;
636 /* how many pages till the STOP marker */
637 npages
= handle
->size
>> PAGE_SHIFT
;
639 /* if it's on a page boundary, fill up one more page */
640 if (!offset_in_page(head
+ handle
->size
+ 1))
643 idx
= (head
>> PAGE_SHIFT
) + npages
;
644 idx
&= buf
->nr_pages
- 1;
647 wakeup
= handle
->wakeup
>> PAGE_SHIFT
;
649 /* in the worst case, wake up the consumer one page before hard stop */
650 idx
= (head
>> PAGE_SHIFT
) + npages
- 1;
654 idx
&= buf
->nr_pages
- 1;
657 buf
->topa_index
[buf
->stop_pos
]->stop
= 1;
658 buf
->topa_index
[buf
->intr_pos
]->intr
= 1;
664 * pt_buffer_setup_topa_index() - build topa_index[] table of regions
667 * topa_index[] references output regions indexed by offset into the
668 * buffer for purposes of quick reverse lookup.
670 static void pt_buffer_setup_topa_index(struct pt_buffer
*buf
)
672 struct topa
*cur
= buf
->first
, *prev
= buf
->last
;
673 struct topa_entry
*te_cur
= TOPA_ENTRY(cur
, 0),
674 *te_prev
= TOPA_ENTRY(prev
, prev
->last
- 1);
677 while (pg
< buf
->nr_pages
) {
680 /* pages within one topa entry */
681 for (tidx
= 0; tidx
< 1 << te_cur
->size
; tidx
++, pg
++)
682 buf
->topa_index
[pg
] = te_prev
;
686 if (idx
== cur
->last
- 1) {
687 /* advance to next topa table */
689 cur
= list_entry(cur
->list
.next
, struct topa
, list
);
693 te_cur
= TOPA_ENTRY(cur
, idx
);
699 * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
701 * @head: Write pointer (aux_head) from AUX buffer.
703 * Find the ToPA table and entry corresponding to given @head and set buffer's
704 * "current" pointers accordingly. This is done after we have obtained the
705 * current aux_head position from a successful call to perf_aux_output_begin()
706 * to make sure the hardware is writing to the right place.
708 * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
709 * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
710 * which are used to determine INT and STOP markers' locations by a subsequent
711 * call to pt_buffer_reset_markers().
713 static void pt_buffer_reset_offsets(struct pt_buffer
*buf
, unsigned long head
)
718 head
&= (buf
->nr_pages
<< PAGE_SHIFT
) - 1;
720 pg
= (head
>> PAGE_SHIFT
) & (buf
->nr_pages
- 1);
721 pg
= pt_topa_next_entry(buf
, pg
);
723 buf
->cur
= (struct topa
*)((unsigned long)buf
->topa_index
[pg
] & PAGE_MASK
);
724 buf
->cur_idx
= ((unsigned long)buf
->topa_index
[pg
] -
725 (unsigned long)buf
->cur
) / sizeof(struct topa_entry
);
726 buf
->output_off
= head
& (sizes(buf
->cur
->table
[buf
->cur_idx
].size
) - 1);
728 local64_set(&buf
->head
, head
);
729 local_set(&buf
->data_size
, 0);
733 * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
736 static void pt_buffer_fini_topa(struct pt_buffer
*buf
)
738 struct topa
*topa
, *iter
;
740 list_for_each_entry_safe(topa
, iter
, &buf
->tables
, list
) {
742 * right now, this is in free_aux() path only, so
743 * no need to unlink this table from the list
750 * pt_buffer_init_topa() - initialize ToPA table for pt buffer
752 * @size: Total size of all regions within this ToPA.
753 * @gfp: Allocation flags.
755 static int pt_buffer_init_topa(struct pt_buffer
*buf
, unsigned long nr_pages
,
761 topa
= topa_alloc(buf
->cpu
, gfp
);
765 topa_insert_table(buf
, topa
);
767 while (buf
->nr_pages
< nr_pages
) {
768 err
= topa_insert_pages(buf
, gfp
);
770 pt_buffer_fini_topa(buf
);
775 pt_buffer_setup_topa_index(buf
);
777 /* link last table to the first one, unless we're double buffering */
778 if (pt_cap_get(PT_CAP_topa_multiple_entries
)) {
779 TOPA_ENTRY(buf
->last
, -1)->base
= buf
->first
->phys
>> TOPA_SHIFT
;
780 TOPA_ENTRY(buf
->last
, -1)->end
= 1;
788 * pt_buffer_setup_aux() - set up topa tables for a PT buffer
789 * @cpu: Cpu on which to allocate, -1 means current.
790 * @pages: Array of pointers to buffer pages passed from perf core.
791 * @nr_pages: Number of pages in the buffer.
792 * @snapshot: If this is a snapshot/overwrite counter.
794 * This is a pmu::setup_aux callback that sets up ToPA tables and all the
795 * bookkeeping for an AUX buffer.
797 * Return: Our private PT buffer structure.
800 pt_buffer_setup_aux(int cpu
, void **pages
, int nr_pages
, bool snapshot
)
802 struct pt_buffer
*buf
;
809 cpu
= raw_smp_processor_id();
810 node
= cpu_to_node(cpu
);
812 buf
= kzalloc_node(offsetof(struct pt_buffer
, topa_index
[nr_pages
]),
818 buf
->snapshot
= snapshot
;
819 buf
->data_pages
= pages
;
821 INIT_LIST_HEAD(&buf
->tables
);
823 ret
= pt_buffer_init_topa(buf
, nr_pages
, GFP_KERNEL
);
833 * pt_buffer_free_aux() - perf AUX deallocation path callback
836 static void pt_buffer_free_aux(void *data
)
838 struct pt_buffer
*buf
= data
;
840 pt_buffer_fini_topa(buf
);
845 * pt_buffer_is_full() - check if the buffer is full
847 * @pt: Per-cpu pt handle.
849 * If the user hasn't read data from the output region that aux_head
850 * points to, the buffer is considered full: the user needs to read at
851 * least this region and update aux_tail to point past it.
853 static bool pt_buffer_is_full(struct pt_buffer
*buf
, struct pt
*pt
)
858 if (local_read(&buf
->data_size
) >= pt
->handle
.size
)
865 * intel_pt_interrupt() - PT PMI handler
867 void intel_pt_interrupt(void)
869 struct pt
*pt
= this_cpu_ptr(&pt_ctx
);
870 struct pt_buffer
*buf
;
871 struct perf_event
*event
= pt
->handle
.event
;
874 * There may be a dangling PT bit in the interrupt status register
875 * after PT has been disabled by pt_event_stop(). Make sure we don't
876 * do anything (particularly, re-enable) for this event here.
878 if (!ACCESS_ONCE(pt
->handle_nmi
))
881 pt_config_start(false);
886 buf
= perf_get_aux(&pt
->handle
);
892 pt_handle_status(pt
);
896 perf_aux_output_end(&pt
->handle
, local_xchg(&buf
->data_size
, 0),
897 local_xchg(&buf
->lost
, 0));
899 if (!event
->hw
.state
) {
902 buf
= perf_aux_output_begin(&pt
->handle
, event
);
904 event
->hw
.state
= PERF_HES_STOPPED
;
908 pt_buffer_reset_offsets(buf
, pt
->handle
.head
);
909 /* snapshot counters don't use PMI, so it's safe */
910 ret
= pt_buffer_reset_markers(buf
, &pt
->handle
);
912 perf_aux_output_end(&pt
->handle
, 0, true);
916 pt_config_buffer(buf
->cur
->table
, buf
->cur_idx
,
926 static void pt_event_start(struct perf_event
*event
, int mode
)
928 struct pt
*pt
= this_cpu_ptr(&pt_ctx
);
929 struct pt_buffer
*buf
= perf_get_aux(&pt
->handle
);
931 if (!buf
|| pt_buffer_is_full(buf
, pt
)) {
932 event
->hw
.state
= PERF_HES_STOPPED
;
936 ACCESS_ONCE(pt
->handle_nmi
) = 1;
939 pt_config_buffer(buf
->cur
->table
, buf
->cur_idx
,
944 static void pt_event_stop(struct perf_event
*event
, int mode
)
946 struct pt
*pt
= this_cpu_ptr(&pt_ctx
);
949 * Protect against the PMI racing with disabling wrmsr,
950 * see comment in intel_pt_interrupt().
952 ACCESS_ONCE(pt
->handle_nmi
) = 0;
953 pt_config_start(false);
955 if (event
->hw
.state
== PERF_HES_STOPPED
)
958 event
->hw
.state
= PERF_HES_STOPPED
;
960 if (mode
& PERF_EF_UPDATE
) {
961 struct pt_buffer
*buf
= perf_get_aux(&pt
->handle
);
966 if (WARN_ON_ONCE(pt
->handle
.event
!= event
))
971 pt_handle_status(pt
);
977 static void pt_event_del(struct perf_event
*event
, int mode
)
979 struct pt
*pt
= this_cpu_ptr(&pt_ctx
);
980 struct pt_buffer
*buf
;
982 pt_event_stop(event
, PERF_EF_UPDATE
);
984 buf
= perf_get_aux(&pt
->handle
);
989 local_xchg(&buf
->data_size
,
990 buf
->nr_pages
<< PAGE_SHIFT
);
991 perf_aux_output_end(&pt
->handle
, local_xchg(&buf
->data_size
, 0),
992 local_xchg(&buf
->lost
, 0));
996 static int pt_event_add(struct perf_event
*event
, int mode
)
998 struct pt_buffer
*buf
;
999 struct pt
*pt
= this_cpu_ptr(&pt_ctx
);
1000 struct hw_perf_event
*hwc
= &event
->hw
;
1003 if (pt
->handle
.event
)
1006 buf
= perf_aux_output_begin(&pt
->handle
, event
);
1011 pt_buffer_reset_offsets(buf
, pt
->handle
.head
);
1012 if (!buf
->snapshot
) {
1013 ret
= pt_buffer_reset_markers(buf
, &pt
->handle
);
1018 if (mode
& PERF_EF_START
) {
1019 pt_event_start(event
, 0);
1021 if (hwc
->state
== PERF_HES_STOPPED
)
1024 hwc
->state
= PERF_HES_STOPPED
;
1030 perf_aux_output_end(&pt
->handle
, 0, true);
1032 hwc
->state
= PERF_HES_STOPPED
;
1037 static void pt_event_read(struct perf_event
*event
)
1041 static void pt_event_destroy(struct perf_event
*event
)
1043 x86_del_exclusive(x86_lbr_exclusive_pt
);
1046 static int pt_event_init(struct perf_event
*event
)
1048 if (event
->attr
.type
!= pt_pmu
.pmu
.type
)
1051 if (!pt_event_valid(event
))
1054 if (x86_add_exclusive(x86_lbr_exclusive_pt
))
1057 event
->destroy
= pt_event_destroy
;
1062 static __init
int pt_init(void)
1064 int ret
, cpu
, prior_warn
= 0;
1066 BUILD_BUG_ON(sizeof(struct topa
) > PAGE_SIZE
);
1068 for_each_online_cpu(cpu
) {
1071 ret
= rdmsrl_safe_on_cpu(cpu
, MSR_IA32_RTIT_CTL
, &ctl
);
1072 if (!ret
&& (ctl
& RTIT_CTL_TRACEEN
))
1078 x86_add_exclusive(x86_lbr_exclusive_pt
);
1079 pr_warn("PT is enabled at boot time, doing nothing\n");
1084 ret
= pt_pmu_hw_init();
1088 if (!pt_cap_get(PT_CAP_topa_output
)) {
1089 pr_warn("ToPA output is not supported on this CPU\n");
1093 if (!pt_cap_get(PT_CAP_topa_multiple_entries
))
1094 pt_pmu
.pmu
.capabilities
=
1095 PERF_PMU_CAP_AUX_NO_SG
| PERF_PMU_CAP_AUX_SW_DOUBLEBUF
;
1097 pt_pmu
.pmu
.capabilities
|= PERF_PMU_CAP_EXCLUSIVE
| PERF_PMU_CAP_ITRACE
;
1098 pt_pmu
.pmu
.attr_groups
= pt_attr_groups
;
1099 pt_pmu
.pmu
.task_ctx_nr
= perf_sw_context
;
1100 pt_pmu
.pmu
.event_init
= pt_event_init
;
1101 pt_pmu
.pmu
.add
= pt_event_add
;
1102 pt_pmu
.pmu
.del
= pt_event_del
;
1103 pt_pmu
.pmu
.start
= pt_event_start
;
1104 pt_pmu
.pmu
.stop
= pt_event_stop
;
1105 pt_pmu
.pmu
.read
= pt_event_read
;
1106 pt_pmu
.pmu
.setup_aux
= pt_buffer_setup_aux
;
1107 pt_pmu
.pmu
.free_aux
= pt_buffer_free_aux
;
1108 ret
= perf_pmu_register(&pt_pmu
.pmu
, "intel_pt", -1);
1112 arch_initcall(pt_init
);