Commit | Line | Data |
---|---|---|
0793a61d | 1 | /* |
57c0c15b | 2 | * Performance events: |
0793a61d | 3 | * |
a308444c | 4 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> |
e7e7ee2e IM |
5 | * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra | |
0793a61d | 7 | * |
57c0c15b | 8 | * Data type definitions, declarations, prototypes. |
0793a61d | 9 | * |
a308444c | 10 | * Started by: Thomas Gleixner and Ingo Molnar |
0793a61d | 11 | * |
57c0c15b | 12 | * For licencing details see kernel-base/COPYING |
0793a61d | 13 | */ |
cdd6c482 IM |
14 | #ifndef _LINUX_PERF_EVENT_H |
15 | #define _LINUX_PERF_EVENT_H | |
0793a61d | 16 | |
f3dfd265 PM |
17 | #include <linux/types.h> |
18 | #include <linux/ioctl.h> | |
9aaa131a | 19 | #include <asm/byteorder.h> |
0793a61d TG |
20 | |
21 | /* | |
9f66a381 IM |
22 | * User-space ABI bits: |
23 | */ | |
24 | ||
25 | /* | |
0d48696f | 26 | * attr.type |
0793a61d | 27 | */ |
1c432d89 | 28 | enum perf_type_id { |
a308444c IM |
29 | PERF_TYPE_HARDWARE = 0, |
30 | PERF_TYPE_SOFTWARE = 1, | |
31 | PERF_TYPE_TRACEPOINT = 2, | |
32 | PERF_TYPE_HW_CACHE = 3, | |
33 | PERF_TYPE_RAW = 4, | |
24f1e32c | 34 | PERF_TYPE_BREAKPOINT = 5, |
b8e83514 | 35 | |
a308444c | 36 | PERF_TYPE_MAX, /* non-ABI */ |
b8e83514 | 37 | }; |
6c594c21 | 38 | |
b8e83514 | 39 | /* |
cdd6c482 IM |
40 | * Generalized performance event event_id types, used by the |
41 | * attr.event_id parameter of the sys_perf_event_open() | |
a308444c | 42 | * syscall: |
b8e83514 | 43 | */ |
1c432d89 | 44 | enum perf_hw_id { |
9f66a381 | 45 | /* |
b8e83514 | 46 | * Common hardware events, generalized by the kernel: |
9f66a381 | 47 | */ |
f4dbfa8f PZ |
48 | PERF_COUNT_HW_CPU_CYCLES = 0, |
49 | PERF_COUNT_HW_INSTRUCTIONS = 1, | |
50 | PERF_COUNT_HW_CACHE_REFERENCES = 2, | |
51 | PERF_COUNT_HW_CACHE_MISSES = 3, | |
52 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, | |
53 | PERF_COUNT_HW_BRANCH_MISSES = 5, | |
54 | PERF_COUNT_HW_BUS_CYCLES = 6, | |
8f622422 IM |
55 | PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, |
56 | PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, | |
c37e1749 | 57 | PERF_COUNT_HW_REF_CPU_CYCLES = 9, |
f4dbfa8f | 58 | |
a308444c | 59 | PERF_COUNT_HW_MAX, /* non-ABI */ |
b8e83514 | 60 | }; |
e077df4f | 61 | |
8326f44d | 62 | /* |
cdd6c482 | 63 | * Generalized hardware cache events: |
8326f44d | 64 | * |
89d6c0b5 | 65 | * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x |
8326f44d IM |
66 | * { read, write, prefetch } x |
67 | * { accesses, misses } | |
68 | */ | |
1c432d89 | 69 | enum perf_hw_cache_id { |
a308444c IM |
70 | PERF_COUNT_HW_CACHE_L1D = 0, |
71 | PERF_COUNT_HW_CACHE_L1I = 1, | |
72 | PERF_COUNT_HW_CACHE_LL = 2, | |
73 | PERF_COUNT_HW_CACHE_DTLB = 3, | |
74 | PERF_COUNT_HW_CACHE_ITLB = 4, | |
75 | PERF_COUNT_HW_CACHE_BPU = 5, | |
89d6c0b5 | 76 | PERF_COUNT_HW_CACHE_NODE = 6, |
a308444c IM |
77 | |
78 | PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ | |
8326f44d IM |
79 | }; |
80 | ||
1c432d89 | 81 | enum perf_hw_cache_op_id { |
a308444c IM |
82 | PERF_COUNT_HW_CACHE_OP_READ = 0, |
83 | PERF_COUNT_HW_CACHE_OP_WRITE = 1, | |
84 | PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, | |
8326f44d | 85 | |
a308444c | 86 | PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ |
8326f44d IM |
87 | }; |
88 | ||
1c432d89 PZ |
89 | enum perf_hw_cache_op_result_id { |
90 | PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, | |
91 | PERF_COUNT_HW_CACHE_RESULT_MISS = 1, | |
8326f44d | 92 | |
a308444c | 93 | PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ |
8326f44d IM |
94 | }; |
95 | ||
b8e83514 | 96 | /* |
cdd6c482 IM |
97 | * Special "software" events provided by the kernel, even if the hardware |
98 | * does not support performance events. These events measure various | |
b8e83514 PZ |
99 | * physical and sw events of the kernel (and allow the profiling of them as |
100 | * well): | |
101 | */ | |
1c432d89 | 102 | enum perf_sw_ids { |
a308444c IM |
103 | PERF_COUNT_SW_CPU_CLOCK = 0, |
104 | PERF_COUNT_SW_TASK_CLOCK = 1, | |
105 | PERF_COUNT_SW_PAGE_FAULTS = 2, | |
106 | PERF_COUNT_SW_CONTEXT_SWITCHES = 3, | |
107 | PERF_COUNT_SW_CPU_MIGRATIONS = 4, | |
108 | PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, | |
109 | PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, | |
f7d79860 AB |
110 | PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, |
111 | PERF_COUNT_SW_EMULATION_FAULTS = 8, | |
a308444c IM |
112 | |
113 | PERF_COUNT_SW_MAX, /* non-ABI */ | |
0793a61d TG |
114 | }; |
115 | ||
8a057d84 | 116 | /* |
0d48696f | 117 | * Bits that can be set in attr.sample_type to request information |
8a057d84 PZ |
118 | * in the overflow packets. |
119 | */ | |
cdd6c482 | 120 | enum perf_event_sample_format { |
a308444c IM |
121 | PERF_SAMPLE_IP = 1U << 0, |
122 | PERF_SAMPLE_TID = 1U << 1, | |
123 | PERF_SAMPLE_TIME = 1U << 2, | |
124 | PERF_SAMPLE_ADDR = 1U << 3, | |
3dab77fb | 125 | PERF_SAMPLE_READ = 1U << 4, |
a308444c IM |
126 | PERF_SAMPLE_CALLCHAIN = 1U << 5, |
127 | PERF_SAMPLE_ID = 1U << 6, | |
128 | PERF_SAMPLE_CPU = 1U << 7, | |
129 | PERF_SAMPLE_PERIOD = 1U << 8, | |
7f453c24 | 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
3a43ce68 | 131 | PERF_SAMPLE_RAW = 1U << 10, |
bce38cd5 | 132 | PERF_SAMPLE_BRANCH_STACK = 1U << 11, |
974802ea | 133 | |
bce38cd5 | 134 | PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ |
8a057d84 PZ |
135 | }; |
136 | ||
bce38cd5 SE |
137 | /* |
138 | * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set | |
139 | * | |
140 | * If the user does not pass priv level information via branch_sample_type, | |
141 | * the kernel uses the event's priv level. Branch and event priv levels do | |
142 | * not have to match. Branch priv level is checked for permissions. | |
143 | * | |
144 | * The branch types can be combined, however BRANCH_ANY covers all types | |
145 | * of branches and therefore it supersedes all the other types. | |
146 | */ | |
147 | enum perf_branch_sample_type { | |
148 | PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ | |
149 | PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ | |
150 | PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ | |
151 | ||
152 | PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ | |
153 | PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ | |
154 | PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ | |
155 | PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ | |
156 | ||
157 | PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ | |
158 | }; | |
159 | ||
160 | #define PERF_SAMPLE_BRANCH_PLM_ALL \ | |
161 | (PERF_SAMPLE_BRANCH_USER|\ | |
162 | PERF_SAMPLE_BRANCH_KERNEL|\ | |
163 | PERF_SAMPLE_BRANCH_HV) | |
164 | ||
53cfbf59 | 165 | /* |
cdd6c482 | 166 | * The format of the data returned by read() on a perf event fd, |
3dab77fb PZ |
167 | * as specified by attr.read_format: |
168 | * | |
169 | * struct read_format { | |
57c0c15b | 170 | * { u64 value; |
d7ebe75b VW |
171 | * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED |
172 | * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING | |
57c0c15b IM |
173 | * { u64 id; } && PERF_FORMAT_ID |
174 | * } && !PERF_FORMAT_GROUP | |
3dab77fb | 175 | * |
57c0c15b | 176 | * { u64 nr; |
d7ebe75b VW |
177 | * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED |
178 | * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING | |
57c0c15b IM |
179 | * { u64 value; |
180 | * { u64 id; } && PERF_FORMAT_ID | |
181 | * } cntr[nr]; | |
182 | * } && PERF_FORMAT_GROUP | |
3dab77fb | 183 | * }; |
53cfbf59 | 184 | */ |
cdd6c482 | 185 | enum perf_event_read_format { |
a308444c IM |
186 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, |
187 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, | |
188 | PERF_FORMAT_ID = 1U << 2, | |
3dab77fb | 189 | PERF_FORMAT_GROUP = 1U << 3, |
974802ea | 190 | |
57c0c15b | 191 | PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ |
53cfbf59 PM |
192 | }; |
193 | ||
974802ea | 194 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
cb5d7699 SE |
195 | #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ |
196 | #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ | |
974802ea | 197 | |
9f66a381 | 198 | /* |
cdd6c482 | 199 | * Hardware event_id to monitor via a performance monitoring event: |
9f66a381 | 200 | */ |
cdd6c482 | 201 | struct perf_event_attr { |
974802ea | 202 | |
f4a2deb4 | 203 | /* |
a21ca2ca IM |
204 | * Major type: hardware/software/tracepoint/etc. |
205 | */ | |
206 | __u32 type; | |
974802ea PZ |
207 | |
208 | /* | |
209 | * Size of the attr structure, for fwd/bwd compat. | |
210 | */ | |
211 | __u32 size; | |
a21ca2ca IM |
212 | |
213 | /* | |
214 | * Type specific configuration information. | |
f4a2deb4 PZ |
215 | */ |
216 | __u64 config; | |
9f66a381 | 217 | |
60db5e09 | 218 | union { |
b23f3325 PZ |
219 | __u64 sample_period; |
220 | __u64 sample_freq; | |
60db5e09 PZ |
221 | }; |
222 | ||
b23f3325 PZ |
223 | __u64 sample_type; |
224 | __u64 read_format; | |
9f66a381 | 225 | |
2743a5b0 | 226 | __u64 disabled : 1, /* off by default */ |
0475f9ea PM |
227 | inherit : 1, /* children inherit it */ |
228 | pinned : 1, /* must always be on PMU */ | |
229 | exclusive : 1, /* only group on PMU */ | |
230 | exclude_user : 1, /* don't count user */ | |
231 | exclude_kernel : 1, /* ditto kernel */ | |
232 | exclude_hv : 1, /* ditto hypervisor */ | |
2743a5b0 | 233 | exclude_idle : 1, /* don't count when idle */ |
0a4a9391 | 234 | mmap : 1, /* include mmap data */ |
8d1b2d93 | 235 | comm : 1, /* include comm data */ |
60db5e09 | 236 | freq : 1, /* use freq, not period */ |
bfbd3381 | 237 | inherit_stat : 1, /* per task counts */ |
57e7986e | 238 | enable_on_exec : 1, /* next exec enables */ |
9f498cc5 | 239 | task : 1, /* trace fork/exit */ |
2667de81 | 240 | watermark : 1, /* wakeup_watermark */ |
ab608344 PZ |
241 | /* |
242 | * precise_ip: | |
243 | * | |
244 | * 0 - SAMPLE_IP can have arbitrary skid | |
245 | * 1 - SAMPLE_IP must have constant skid | |
246 | * 2 - SAMPLE_IP requested to have 0 skid | |
247 | * 3 - SAMPLE_IP must have 0 skid | |
248 | * | |
249 | * See also PERF_RECORD_MISC_EXACT_IP | |
250 | */ | |
251 | precise_ip : 2, /* skid constraint */ | |
3af9e859 | 252 | mmap_data : 1, /* non-exec mmap data */ |
c980d109 | 253 | sample_id_all : 1, /* sample_type all events */ |
ab608344 | 254 | |
a240f761 JR |
255 | exclude_host : 1, /* don't count in host */ |
256 | exclude_guest : 1, /* don't count in guest */ | |
257 | ||
258 | __reserved_1 : 43; | |
2743a5b0 | 259 | |
2667de81 PZ |
260 | union { |
261 | __u32 wakeup_events; /* wakeup every n events */ | |
262 | __u32 wakeup_watermark; /* bytes before wakeup */ | |
263 | }; | |
24f1e32c | 264 | |
f13c12c6 | 265 | __u32 bp_type; |
a7e3ed1e AK |
266 | union { |
267 | __u64 bp_addr; | |
268 | __u64 config1; /* extension of config */ | |
269 | }; | |
270 | union { | |
271 | __u64 bp_len; | |
272 | __u64 config2; /* extension of config1 */ | |
273 | }; | |
bce38cd5 | 274 | __u64 branch_sample_type; /* enum branch_sample_type */ |
eab656ae TG |
275 | }; |
276 | ||
d859e29f | 277 | /* |
cdd6c482 | 278 | * Ioctls that can be done on a perf event fd: |
d859e29f | 279 | */ |
cdd6c482 | 280 | #define PERF_EVENT_IOC_ENABLE _IO ('$', 0) |
57c0c15b IM |
281 | #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) |
282 | #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) | |
cdd6c482 | 283 | #define PERF_EVENT_IOC_RESET _IO ('$', 3) |
4c49b128 | 284 | #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) |
cdd6c482 | 285 | #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) |
6fb2915d | 286 | #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) |
cdd6c482 IM |
287 | |
288 | enum perf_event_ioc_flags { | |
3df5edad PZ |
289 | PERF_IOC_FLAG_GROUP = 1U << 0, |
290 | }; | |
d859e29f | 291 | |
37d81828 PM |
292 | /* |
293 | * Structure of the page that can be mapped via mmap | |
294 | */ | |
cdd6c482 | 295 | struct perf_event_mmap_page { |
37d81828 PM |
296 | __u32 version; /* version number of this structure */ |
297 | __u32 compat_version; /* lowest version this is compat with */ | |
38ff667b PZ |
298 | |
299 | /* | |
cdd6c482 | 300 | * Bits needed to read the hw events in user-space. |
38ff667b | 301 | * |
c7206205 PZ |
302 | * u32 seq, time_mult, time_shift, idx, width; |
303 | * u64 count, enabled, running; | |
304 | * u64 cyc, time_offset; | |
305 | * s64 pmc = 0; | |
38ff667b | 306 | * |
a2e87d06 PZ |
307 | * do { |
308 | * seq = pc->lock; | |
a2e87d06 | 309 | * barrier() |
c7206205 PZ |
310 | * |
311 | * enabled = pc->time_enabled; | |
312 | * running = pc->time_running; | |
313 | * | |
314 | * if (pc->cap_usr_time && enabled != running) { | |
315 | * cyc = rdtsc(); | |
316 | * time_offset = pc->time_offset; | |
317 | * time_mult = pc->time_mult; | |
318 | * time_shift = pc->time_shift; | |
319 | * } | |
320 | * | |
321 | * idx = pc->index; | |
322 | * count = pc->offset; | |
323 | * if (pc->cap_usr_rdpmc && idx) { | |
324 | * width = pc->pmc_width; | |
325 | * pmc = rdpmc(idx - 1); | |
326 | * } | |
38ff667b | 327 | * |
a2e87d06 PZ |
328 | * barrier(); |
329 | * } while (pc->lock != seq); | |
38ff667b | 330 | * |
92f22a38 PZ |
331 | * NOTE: for obvious reason this only works on self-monitoring |
332 | * processes. | |
38ff667b | 333 | */ |
37d81828 | 334 | __u32 lock; /* seqlock for synchronization */ |
cdd6c482 IM |
335 | __u32 index; /* hardware event identifier */ |
336 | __s64 offset; /* add to hardware event value */ | |
337 | __u64 time_enabled; /* time event active */ | |
338 | __u64 time_running; /* time event on cpu */ | |
c7206205 PZ |
339 | union { |
340 | __u64 capabilities; | |
341 | __u64 cap_usr_time : 1, | |
342 | cap_usr_rdpmc : 1, | |
343 | cap_____res : 62; | |
344 | }; | |
345 | ||
346 | /* | |
347 | * If cap_usr_rdpmc this field provides the bit-width of the value | |
348 | * read using the rdpmc() or equivalent instruction. This can be used | |
349 | * to sign extend the result like: | |
350 | * | |
351 | * pmc <<= 64 - width; | |
352 | * pmc >>= 64 - width; // signed shift right | |
353 | * count += pmc; | |
354 | */ | |
355 | __u16 pmc_width; | |
356 | ||
357 | /* | |
358 | * If cap_usr_time the below fields can be used to compute the time | |
359 | * delta since time_enabled (in ns) using rdtsc or similar. | |
360 | * | |
361 | * u64 quot, rem; | |
362 | * u64 delta; | |
363 | * | |
364 | * quot = (cyc >> time_shift); | |
365 | * rem = cyc & ((1 << time_shift) - 1); | |
366 | * delta = time_offset + quot * time_mult + | |
367 | * ((rem * time_mult) >> time_shift); | |
368 | * | |
369 | * Where time_offset,time_mult,time_shift and cyc are read in the | |
370 | * seqcount loop described above. This delta can then be added to | |
371 | * enabled and possible running (if idx), improving the scaling: | |
372 | * | |
373 | * enabled += delta; | |
374 | * if (idx) | |
375 | * running += delta; | |
376 | * | |
377 | * quot = count / running; | |
378 | * rem = count % running; | |
379 | * count = quot * enabled + (rem * enabled) / running; | |
380 | */ | |
381 | __u16 time_shift; | |
382 | __u32 time_mult; | |
e3f3541c | 383 | __u64 time_offset; |
7b732a75 | 384 | |
41f95331 PZ |
385 | /* |
386 | * Hole for extension of the self monitor capabilities | |
387 | */ | |
388 | ||
c7206205 | 389 | __u64 __reserved[120]; /* align to 1k */ |
41f95331 | 390 | |
38ff667b PZ |
391 | /* |
392 | * Control data for the mmap() data buffer. | |
393 | * | |
43a21ea8 PZ |
394 | * User-space reading the @data_head value should issue an rmb(), on |
395 | * SMP capable platforms, after reading this value -- see | |
cdd6c482 | 396 | * perf_event_wakeup(). |
43a21ea8 PZ |
397 | * |
398 | * When the mapping is PROT_WRITE the @data_tail value should be | |
399 | * written by userspace to reflect the last read data. In this case | |
400 | * the kernel will not over-write unread data. | |
38ff667b | 401 | */ |
8e3747c1 | 402 | __u64 data_head; /* head in the data section */ |
43a21ea8 | 403 | __u64 data_tail; /* user-space written tail */ |
37d81828 PM |
404 | }; |
405 | ||
39447b38 | 406 | #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) |
184f412c | 407 | #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) |
cdd6c482 IM |
408 | #define PERF_RECORD_MISC_KERNEL (1 << 0) |
409 | #define PERF_RECORD_MISC_USER (2 << 0) | |
410 | #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) | |
39447b38 ZY |
411 | #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) |
412 | #define PERF_RECORD_MISC_GUEST_USER (5 << 0) | |
6fab0192 | 413 | |
ab608344 PZ |
414 | /* |
415 | * Indicates that the content of PERF_SAMPLE_IP points to | |
416 | * the actual instruction that triggered the event. See also | |
417 | * perf_event_attr::precise_ip. | |
418 | */ | |
419 | #define PERF_RECORD_MISC_EXACT_IP (1 << 14) | |
ef21f683 PZ |
420 | /* |
421 | * Reserve the last bit to indicate some extended misc field | |
422 | */ | |
423 | #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) | |
424 | ||
5c148194 PZ |
425 | struct perf_event_header { |
426 | __u32 type; | |
6fab0192 PZ |
427 | __u16 misc; |
428 | __u16 size; | |
5c148194 PZ |
429 | }; |
430 | ||
431 | enum perf_event_type { | |
5ed00415 | 432 | |
0c593b34 | 433 | /* |
c980d109 ACM |
434 | * If perf_event_attr.sample_id_all is set then all event types will |
435 | * have the sample_type selected fields related to where/when | |
436 | * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID) | |
437 | * described in PERF_RECORD_SAMPLE below, it will be stashed just after | |
438 | * the perf_event_header and the fields already present for the existing | |
439 | * fields, i.e. at the end of the payload. That way a newer perf.data | |
440 | * file will be supported by older perf tools, with these new optional | |
441 | * fields being ignored. | |
442 | * | |
0c593b34 PZ |
443 | * The MMAP events record the PROT_EXEC mappings so that we can |
444 | * correlate userspace IPs to code. They have the following structure: | |
445 | * | |
446 | * struct { | |
0127c3ea | 447 | * struct perf_event_header header; |
0c593b34 | 448 | * |
0127c3ea IM |
449 | * u32 pid, tid; |
450 | * u64 addr; | |
451 | * u64 len; | |
452 | * u64 pgoff; | |
453 | * char filename[]; | |
0c593b34 PZ |
454 | * }; |
455 | */ | |
cdd6c482 | 456 | PERF_RECORD_MMAP = 1, |
0a4a9391 | 457 | |
43a21ea8 PZ |
458 | /* |
459 | * struct { | |
57c0c15b IM |
460 | * struct perf_event_header header; |
461 | * u64 id; | |
462 | * u64 lost; | |
43a21ea8 PZ |
463 | * }; |
464 | */ | |
cdd6c482 | 465 | PERF_RECORD_LOST = 2, |
43a21ea8 | 466 | |
8d1b2d93 PZ |
467 | /* |
468 | * struct { | |
0127c3ea | 469 | * struct perf_event_header header; |
8d1b2d93 | 470 | * |
0127c3ea IM |
471 | * u32 pid, tid; |
472 | * char comm[]; | |
8d1b2d93 PZ |
473 | * }; |
474 | */ | |
cdd6c482 | 475 | PERF_RECORD_COMM = 3, |
8d1b2d93 | 476 | |
9f498cc5 PZ |
477 | /* |
478 | * struct { | |
479 | * struct perf_event_header header; | |
480 | * u32 pid, ppid; | |
481 | * u32 tid, ptid; | |
393b2ad8 | 482 | * u64 time; |
9f498cc5 PZ |
483 | * }; |
484 | */ | |
cdd6c482 | 485 | PERF_RECORD_EXIT = 4, |
9f498cc5 | 486 | |
26b119bc PZ |
487 | /* |
488 | * struct { | |
0127c3ea IM |
489 | * struct perf_event_header header; |
490 | * u64 time; | |
689802b2 | 491 | * u64 id; |
7f453c24 | 492 | * u64 stream_id; |
a78ac325 PZ |
493 | * }; |
494 | */ | |
184f412c IM |
495 | PERF_RECORD_THROTTLE = 5, |
496 | PERF_RECORD_UNTHROTTLE = 6, | |
a78ac325 | 497 | |
60313ebe PZ |
498 | /* |
499 | * struct { | |
a21ca2ca IM |
500 | * struct perf_event_header header; |
501 | * u32 pid, ppid; | |
9f498cc5 | 502 | * u32 tid, ptid; |
a6f10a2f | 503 | * u64 time; |
60313ebe PZ |
504 | * }; |
505 | */ | |
cdd6c482 | 506 | PERF_RECORD_FORK = 7, |
60313ebe | 507 | |
38b200d6 PZ |
508 | /* |
509 | * struct { | |
184f412c IM |
510 | * struct perf_event_header header; |
511 | * u32 pid, tid; | |
3dab77fb | 512 | * |
184f412c | 513 | * struct read_format values; |
38b200d6 PZ |
514 | * }; |
515 | */ | |
cdd6c482 | 516 | PERF_RECORD_READ = 8, |
38b200d6 | 517 | |
8a057d84 | 518 | /* |
0c593b34 | 519 | * struct { |
0127c3ea | 520 | * struct perf_event_header header; |
0c593b34 | 521 | * |
43a21ea8 PZ |
522 | * { u64 ip; } && PERF_SAMPLE_IP |
523 | * { u32 pid, tid; } && PERF_SAMPLE_TID | |
524 | * { u64 time; } && PERF_SAMPLE_TIME | |
525 | * { u64 addr; } && PERF_SAMPLE_ADDR | |
e6e18ec7 | 526 | * { u64 id; } && PERF_SAMPLE_ID |
7f453c24 | 527 | * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID |
43a21ea8 | 528 | * { u32 cpu, res; } && PERF_SAMPLE_CPU |
57c0c15b | 529 | * { u64 period; } && PERF_SAMPLE_PERIOD |
0c593b34 | 530 | * |
3dab77fb | 531 | * { struct read_format values; } && PERF_SAMPLE_READ |
0c593b34 | 532 | * |
f9188e02 | 533 | * { u64 nr, |
43a21ea8 | 534 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
3dab77fb | 535 | * |
57c0c15b IM |
536 | * # |
537 | * # The RAW record below is opaque data wrt the ABI | |
538 | * # | |
539 | * # That is, the ABI doesn't make any promises wrt to | |
540 | * # the stability of its content, it may vary depending | |
541 | * # on event, hardware, kernel version and phase of | |
542 | * # the moon. | |
543 | * # | |
544 | * # In other words, PERF_SAMPLE_RAW contents are not an ABI. | |
545 | * # | |
3dab77fb | 546 | * |
a044560c PZ |
547 | * { u32 size; |
548 | * char data[size];}&& PERF_SAMPLE_RAW | |
bce38cd5 SE |
549 | * |
550 | * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK | |
0c593b34 | 551 | * }; |
8a057d84 | 552 | */ |
184f412c | 553 | PERF_RECORD_SAMPLE = 9, |
e6e18ec7 | 554 | |
cdd6c482 | 555 | PERF_RECORD_MAX, /* non-ABI */ |
5c148194 PZ |
556 | }; |
557 | ||
0b0d9cf6 | 558 | #define PERF_MAX_STACK_DEPTH 127 |
114067b6 | 559 | |
f9188e02 PZ |
560 | enum perf_callchain_context { |
561 | PERF_CONTEXT_HV = (__u64)-32, | |
562 | PERF_CONTEXT_KERNEL = (__u64)-128, | |
563 | PERF_CONTEXT_USER = (__u64)-512, | |
7522060c | 564 | |
f9188e02 PZ |
565 | PERF_CONTEXT_GUEST = (__u64)-2048, |
566 | PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, | |
567 | PERF_CONTEXT_GUEST_USER = (__u64)-2560, | |
568 | ||
569 | PERF_CONTEXT_MAX = (__u64)-4095, | |
7522060c IM |
570 | }; |
571 | ||
e7e7ee2e IM |
572 | #define PERF_FLAG_FD_NO_GROUP (1U << 0) |
573 | #define PERF_FLAG_FD_OUTPUT (1U << 1) | |
574 | #define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ | |
a4be7c27 | 575 | |
f3dfd265 | 576 | #ifdef __KERNEL__ |
9f66a381 | 577 | /* |
f3dfd265 | 578 | * Kernel-internal data types and definitions: |
9f66a381 IM |
579 | */ |
580 | ||
cdd6c482 | 581 | #ifdef CONFIG_PERF_EVENTS |
e5d1367f | 582 | # include <linux/cgroup.h> |
cdd6c482 | 583 | # include <asm/perf_event.h> |
7be79236 | 584 | # include <asm/local64.h> |
f3dfd265 PM |
585 | #endif |
586 | ||
39447b38 | 587 | struct perf_guest_info_callbacks { |
e7e7ee2e IM |
588 | int (*is_in_guest)(void); |
589 | int (*is_user_mode)(void); | |
590 | unsigned long (*get_guest_ip)(void); | |
39447b38 ZY |
591 | }; |
592 | ||
2ff6cfd7 AB |
593 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
594 | #include <asm/hw_breakpoint.h> | |
595 | #endif | |
596 | ||
f3dfd265 PM |
597 | #include <linux/list.h> |
598 | #include <linux/mutex.h> | |
599 | #include <linux/rculist.h> | |
600 | #include <linux/rcupdate.h> | |
601 | #include <linux/spinlock.h> | |
d6d020e9 | 602 | #include <linux/hrtimer.h> |
3c446b3d | 603 | #include <linux/fs.h> |
709e50cf | 604 | #include <linux/pid_namespace.h> |
906010b2 | 605 | #include <linux/workqueue.h> |
5331d7b8 | 606 | #include <linux/ftrace.h> |
85cfabbc | 607 | #include <linux/cpu.h> |
e360adbe | 608 | #include <linux/irq_work.h> |
c5905afb | 609 | #include <linux/static_key.h> |
60063497 | 610 | #include <linux/atomic.h> |
641cc938 | 611 | #include <linux/sysfs.h> |
fa588151 | 612 | #include <asm/local.h> |
f3dfd265 | 613 | |
f9188e02 PZ |
614 | struct perf_callchain_entry { |
615 | __u64 nr; | |
616 | __u64 ip[PERF_MAX_STACK_DEPTH]; | |
617 | }; | |
618 | ||
3a43ce68 FW |
619 | struct perf_raw_record { |
620 | u32 size; | |
621 | void *data; | |
f413cdb8 FW |
622 | }; |
623 | ||
bce38cd5 SE |
624 | /* |
625 | * single taken branch record layout: | |
626 | * | |
627 | * from: source instruction (may not always be a branch insn) | |
628 | * to: branch target | |
629 | * mispred: branch target was mispredicted | |
630 | * predicted: branch target was predicted | |
631 | * | |
632 | * support for mispred, predicted is optional. In case it | |
633 | * is not supported mispred = predicted = 0. | |
634 | */ | |
caff2bef | 635 | struct perf_branch_entry { |
bce38cd5 SE |
636 | __u64 from; |
637 | __u64 to; | |
638 | __u64 mispred:1, /* target mispredicted */ | |
639 | predicted:1,/* target predicted */ | |
640 | reserved:62; | |
caff2bef PZ |
641 | }; |
642 | ||
bce38cd5 SE |
643 | /* |
644 | * branch stack layout: | |
645 | * nr: number of taken branches stored in entries[] | |
646 | * | |
647 | * Note that nr can vary from sample to sample | |
648 | * branches (to, from) are stored from most recent | |
649 | * to least recent, i.e., entries[0] contains the most | |
650 | * recent branch. | |
651 | */ | |
caff2bef PZ |
652 | struct perf_branch_stack { |
653 | __u64 nr; | |
654 | struct perf_branch_entry entries[0]; | |
655 | }; | |
656 | ||
f3dfd265 PM |
657 | struct task_struct; |
658 | ||
efc9f05d SE |
659 | /* |
660 | * extra PMU register associated with an event | |
661 | */ | |
662 | struct hw_perf_event_extra { | |
663 | u64 config; /* register value */ | |
664 | unsigned int reg; /* register address or index */ | |
665 | int alloc; /* extra register already allocated */ | |
666 | int idx; /* index in shared_regs->regs[] */ | |
667 | }; | |
668 | ||
0793a61d | 669 | /** |
cdd6c482 | 670 | * struct hw_perf_event - performance event hardware details: |
0793a61d | 671 | */ |
cdd6c482 IM |
672 | struct hw_perf_event { |
673 | #ifdef CONFIG_PERF_EVENTS | |
d6d020e9 PZ |
674 | union { |
675 | struct { /* hardware */ | |
a308444c | 676 | u64 config; |
447a194b | 677 | u64 last_tag; |
a308444c | 678 | unsigned long config_base; |
cdd6c482 | 679 | unsigned long event_base; |
c48b6053 | 680 | int event_base_rdpmc; |
a308444c | 681 | int idx; |
447a194b | 682 | int last_cpu; |
bce38cd5 | 683 | |
efc9f05d | 684 | struct hw_perf_event_extra extra_reg; |
bce38cd5 | 685 | struct hw_perf_event_extra branch_reg; |
d6d020e9 | 686 | }; |
721a669b | 687 | struct { /* software */ |
a308444c | 688 | struct hrtimer hrtimer; |
d6d020e9 | 689 | }; |
24f1e32c | 690 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
45a73372 FW |
691 | struct { /* breakpoint */ |
692 | struct arch_hw_breakpoint info; | |
693 | struct list_head bp_list; | |
d580ff86 PZ |
694 | /* |
695 | * Crufty hack to avoid the chicken and egg | |
696 | * problem hw_breakpoint has with context | |
697 | * creation and event initalization. | |
698 | */ | |
699 | struct task_struct *bp_target; | |
45a73372 | 700 | }; |
24f1e32c | 701 | #endif |
d6d020e9 | 702 | }; |
a4eaf7f1 | 703 | int state; |
e7850595 | 704 | local64_t prev_count; |
b23f3325 | 705 | u64 sample_period; |
9e350de3 | 706 | u64 last_period; |
e7850595 | 707 | local64_t period_left; |
e050e3f0 | 708 | u64 interrupts_seq; |
60db5e09 | 709 | u64 interrupts; |
6a24ed6c | 710 | |
abd50713 PZ |
711 | u64 freq_time_stamp; |
712 | u64 freq_count_stamp; | |
ee06094f | 713 | #endif |
0793a61d TG |
714 | }; |
715 | ||
a4eaf7f1 PZ |
716 | /* |
717 | * hw_perf_event::state flags | |
718 | */ | |
719 | #define PERF_HES_STOPPED 0x01 /* the counter is stopped */ | |
720 | #define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ | |
721 | #define PERF_HES_ARCH 0x04 | |
722 | ||
cdd6c482 | 723 | struct perf_event; |
621a01ea | 724 | |
8d2cacbb PZ |
725 | /* |
726 | * Common implementation detail of pmu::{start,commit,cancel}_txn | |
727 | */ | |
728 | #define PERF_EVENT_TXN 0x1 | |
6bde9b6c | 729 | |
621a01ea | 730 | /** |
4aeb0b42 | 731 | * struct pmu - generic performance monitoring unit |
621a01ea | 732 | */ |
4aeb0b42 | 733 | struct pmu { |
b0a873eb PZ |
734 | struct list_head entry; |
735 | ||
abe43400 | 736 | struct device *dev; |
0c9d42ed | 737 | const struct attribute_group **attr_groups; |
2e80a82a PZ |
738 | char *name; |
739 | int type; | |
740 | ||
108b02cf PZ |
741 | int * __percpu pmu_disable_count; |
742 | struct perf_cpu_context * __percpu pmu_cpu_context; | |
8dc85d54 | 743 | int task_ctx_nr; |
6bde9b6c LM |
744 | |
745 | /* | |
a4eaf7f1 PZ |
746 | * Fully disable/enable this PMU, can be used to protect from the PMI |
747 | * as well as for lazy/batch writing of the MSRs. | |
6bde9b6c | 748 | */ |
ad5133b7 PZ |
749 | void (*pmu_enable) (struct pmu *pmu); /* optional */ |
750 | void (*pmu_disable) (struct pmu *pmu); /* optional */ | |
6bde9b6c | 751 | |
8d2cacbb | 752 | /* |
a4eaf7f1 | 753 | * Try and initialize the event for this PMU. |
24cd7f54 | 754 | * Should return -ENOENT when the @event doesn't match this PMU. |
8d2cacbb | 755 | */ |
b0a873eb PZ |
756 | int (*event_init) (struct perf_event *event); |
757 | ||
a4eaf7f1 PZ |
758 | #define PERF_EF_START 0x01 /* start the counter when adding */ |
759 | #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ | |
760 | #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ | |
761 | ||
8d2cacbb | 762 | /* |
a4eaf7f1 PZ |
763 | * Adds/Removes a counter to/from the PMU, can be done inside |
764 | * a transaction, see the ->*_txn() methods. | |
765 | */ | |
766 | int (*add) (struct perf_event *event, int flags); | |
767 | void (*del) (struct perf_event *event, int flags); | |
768 | ||
769 | /* | |
770 | * Starts/Stops a counter present on the PMU. The PMI handler | |
771 | * should stop the counter when perf_event_overflow() returns | |
772 | * !0. ->start() will be used to continue. | |
773 | */ | |
774 | void (*start) (struct perf_event *event, int flags); | |
775 | void (*stop) (struct perf_event *event, int flags); | |
776 | ||
777 | /* | |
778 | * Updates the counter value of the event. | |
779 | */ | |
cdd6c482 | 780 | void (*read) (struct perf_event *event); |
6bde9b6c LM |
781 | |
782 | /* | |
24cd7f54 PZ |
783 | * Group events scheduling is treated as a transaction, add |
784 | * group events as a whole and perform one schedulability test. | |
785 | * If the test fails, roll back the whole group | |
a4eaf7f1 PZ |
786 | * |
787 | * Start the transaction, after this ->add() doesn't need to | |
24cd7f54 | 788 | * do schedulability tests. |
8d2cacbb | 789 | */ |
e7e7ee2e | 790 | void (*start_txn) (struct pmu *pmu); /* optional */ |
8d2cacbb | 791 | /* |
a4eaf7f1 | 792 | * If ->start_txn() disabled the ->add() schedulability test |
8d2cacbb PZ |
793 | * then ->commit_txn() is required to perform one. On success |
794 | * the transaction is closed. On error the transaction is kept | |
795 | * open until ->cancel_txn() is called. | |
796 | */ | |
e7e7ee2e | 797 | int (*commit_txn) (struct pmu *pmu); /* optional */ |
8d2cacbb | 798 | /* |
a4eaf7f1 | 799 | * Will cancel the transaction, assumes ->del() is called |
25985edc | 800 | * for each successful ->add() during the transaction. |
8d2cacbb | 801 | */ |
e7e7ee2e | 802 | void (*cancel_txn) (struct pmu *pmu); /* optional */ |
35edc2a5 PZ |
803 | |
804 | /* | |
805 | * Will return the value for perf_event_mmap_page::index for this event, | |
806 | * if no implementation is provided it will default to: event->hw.idx + 1. | |
807 | */ | |
808 | int (*event_idx) (struct perf_event *event); /*optional */ | |
d010b332 SE |
809 | |
810 | /* | |
811 | * flush branch stack on context-switches (needed in cpu-wide mode) | |
812 | */ | |
813 | void (*flush_branch_stack) (void); | |
621a01ea IM |
814 | }; |
815 | ||
6a930700 | 816 | /** |
cdd6c482 | 817 | * enum perf_event_active_state - the states of a event |
6a930700 | 818 | */ |
cdd6c482 | 819 | enum perf_event_active_state { |
57c0c15b | 820 | PERF_EVENT_STATE_ERROR = -2, |
cdd6c482 IM |
821 | PERF_EVENT_STATE_OFF = -1, |
822 | PERF_EVENT_STATE_INACTIVE = 0, | |
57c0c15b | 823 | PERF_EVENT_STATE_ACTIVE = 1, |
6a930700 IM |
824 | }; |
825 | ||
9b51f66d | 826 | struct file; |
453f19ee PZ |
827 | struct perf_sample_data; |
828 | ||
a8b0ca17 | 829 | typedef void (*perf_overflow_handler_t)(struct perf_event *, |
b326e956 FW |
830 | struct perf_sample_data *, |
831 | struct pt_regs *regs); | |
832 | ||
d6f962b5 | 833 | enum perf_group_flag { |
e7e7ee2e | 834 | PERF_GROUP_SOFTWARE = 0x1, |
d6f962b5 FW |
835 | }; |
836 | ||
e7e7ee2e IM |
837 | #define SWEVENT_HLIST_BITS 8 |
838 | #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) | |
76e1d904 FW |
839 | |
840 | struct swevent_hlist { | |
e7e7ee2e IM |
841 | struct hlist_head heads[SWEVENT_HLIST_SIZE]; |
842 | struct rcu_head rcu_head; | |
76e1d904 FW |
843 | }; |
844 | ||
8a49542c PZ |
845 | #define PERF_ATTACH_CONTEXT 0x01 |
846 | #define PERF_ATTACH_GROUP 0x02 | |
d580ff86 | 847 | #define PERF_ATTACH_TASK 0x04 |
8a49542c | 848 | |
e5d1367f SE |
849 | #ifdef CONFIG_CGROUP_PERF |
850 | /* | |
851 | * perf_cgroup_info keeps track of time_enabled for a cgroup. | |
852 | * This is a per-cpu dynamically allocated data structure. | |
853 | */ | |
854 | struct perf_cgroup_info { | |
e7e7ee2e IM |
855 | u64 time; |
856 | u64 timestamp; | |
e5d1367f SE |
857 | }; |
858 | ||
859 | struct perf_cgroup { | |
e7e7ee2e IM |
860 | struct cgroup_subsys_state css; |
861 | struct perf_cgroup_info *info; /* timing info, one per cpu */ | |
e5d1367f SE |
862 | }; |
863 | #endif | |
864 | ||
76369139 FW |
865 | struct ring_buffer; |
866 | ||
0793a61d | 867 | /** |
cdd6c482 | 868 | * struct perf_event - performance event kernel representation: |
0793a61d | 869 | */ |
cdd6c482 IM |
870 | struct perf_event { |
871 | #ifdef CONFIG_PERF_EVENTS | |
65abc865 | 872 | struct list_head group_entry; |
592903cd | 873 | struct list_head event_entry; |
04289bb9 | 874 | struct list_head sibling_list; |
76e1d904 | 875 | struct hlist_node hlist_entry; |
0127c3ea | 876 | int nr_siblings; |
d6f962b5 | 877 | int group_flags; |
cdd6c482 | 878 | struct perf_event *group_leader; |
a4eaf7f1 | 879 | struct pmu *pmu; |
04289bb9 | 880 | |
cdd6c482 | 881 | enum perf_event_active_state state; |
8a49542c | 882 | unsigned int attach_state; |
e7850595 | 883 | local64_t count; |
a6e6dea6 | 884 | atomic64_t child_count; |
ee06094f | 885 | |
53cfbf59 | 886 | /* |
cdd6c482 | 887 | * These are the total time in nanoseconds that the event |
53cfbf59 | 888 | * has been enabled (i.e. eligible to run, and the task has |
cdd6c482 | 889 | * been scheduled in, if this is a per-task event) |
53cfbf59 PM |
890 | * and running (scheduled onto the CPU), respectively. |
891 | * | |
892 | * They are computed from tstamp_enabled, tstamp_running and | |
cdd6c482 | 893 | * tstamp_stopped when the event is in INACTIVE or ACTIVE state. |
53cfbf59 PM |
894 | */ |
895 | u64 total_time_enabled; | |
896 | u64 total_time_running; | |
897 | ||
898 | /* | |
899 | * These are timestamps used for computing total_time_enabled | |
cdd6c482 | 900 | * and total_time_running when the event is in INACTIVE or |
53cfbf59 PM |
901 | * ACTIVE state, measured in nanoseconds from an arbitrary point |
902 | * in time. | |
cdd6c482 IM |
903 | * tstamp_enabled: the notional time when the event was enabled |
904 | * tstamp_running: the notional time when the event was scheduled on | |
53cfbf59 | 905 | * tstamp_stopped: in INACTIVE state, the notional time when the |
cdd6c482 | 906 | * event was scheduled off. |
53cfbf59 PM |
907 | */ |
908 | u64 tstamp_enabled; | |
909 | u64 tstamp_running; | |
910 | u64 tstamp_stopped; | |
911 | ||
eed01528 SE |
912 | /* |
913 | * timestamp shadows the actual context timing but it can | |
914 | * be safely used in NMI interrupt context. It reflects the | |
915 | * context time as it was when the event was last scheduled in. | |
916 | * | |
917 | * ctx_time already accounts for ctx->timestamp. Therefore to | |
918 | * compute ctx_time for a sample, simply add perf_clock(). | |
919 | */ | |
920 | u64 shadow_ctx_time; | |
921 | ||
24f1e32c | 922 | struct perf_event_attr attr; |
c320c7b7 | 923 | u16 header_size; |
6844c09d | 924 | u16 id_header_size; |
c320c7b7 | 925 | u16 read_size; |
cdd6c482 | 926 | struct hw_perf_event hw; |
0793a61d | 927 | |
cdd6c482 | 928 | struct perf_event_context *ctx; |
9b51f66d | 929 | struct file *filp; |
0793a61d | 930 | |
53cfbf59 PM |
931 | /* |
932 | * These accumulate total time (in nanoseconds) that children | |
cdd6c482 | 933 | * events have been enabled and running, respectively. |
53cfbf59 PM |
934 | */ |
935 | atomic64_t child_total_time_enabled; | |
936 | atomic64_t child_total_time_running; | |
937 | ||
0793a61d | 938 | /* |
d859e29f | 939 | * Protect attach/detach and child_list: |
0793a61d | 940 | */ |
fccc714b PZ |
941 | struct mutex child_mutex; |
942 | struct list_head child_list; | |
cdd6c482 | 943 | struct perf_event *parent; |
0793a61d TG |
944 | |
945 | int oncpu; | |
946 | int cpu; | |
947 | ||
082ff5a2 PZ |
948 | struct list_head owner_entry; |
949 | struct task_struct *owner; | |
950 | ||
7b732a75 PZ |
951 | /* mmap bits */ |
952 | struct mutex mmap_mutex; | |
953 | atomic_t mmap_count; | |
ac9721f3 PZ |
954 | int mmap_locked; |
955 | struct user_struct *mmap_user; | |
76369139 | 956 | struct ring_buffer *rb; |
10c6db11 | 957 | struct list_head rb_entry; |
37d81828 | 958 | |
7b732a75 | 959 | /* poll related */ |
0793a61d | 960 | wait_queue_head_t waitq; |
3c446b3d | 961 | struct fasync_struct *fasync; |
79f14641 PZ |
962 | |
963 | /* delayed work for NMIs and such */ | |
964 | int pending_wakeup; | |
4c9e2542 | 965 | int pending_kill; |
79f14641 | 966 | int pending_disable; |
e360adbe | 967 | struct irq_work pending; |
592903cd | 968 | |
79f14641 PZ |
969 | atomic_t event_limit; |
970 | ||
cdd6c482 | 971 | void (*destroy)(struct perf_event *); |
592903cd | 972 | struct rcu_head rcu_head; |
709e50cf PZ |
973 | |
974 | struct pid_namespace *ns; | |
8e5799b1 | 975 | u64 id; |
6fb2915d | 976 | |
b326e956 | 977 | perf_overflow_handler_t overflow_handler; |
4dc0da86 | 978 | void *overflow_handler_context; |
453f19ee | 979 | |
07b139c8 | 980 | #ifdef CONFIG_EVENT_TRACING |
1c024eca | 981 | struct ftrace_event_call *tp_event; |
6fb2915d | 982 | struct event_filter *filter; |
ced39002 JO |
983 | #ifdef CONFIG_FUNCTION_TRACER |
984 | struct ftrace_ops ftrace_ops; | |
985 | #endif | |
ee06094f | 986 | #endif |
6fb2915d | 987 | |
e5d1367f SE |
988 | #ifdef CONFIG_CGROUP_PERF |
989 | struct perf_cgroup *cgrp; /* cgroup event is attach to */ | |
990 | int cgrp_defer_enabled; | |
991 | #endif | |
992 | ||
6fb2915d | 993 | #endif /* CONFIG_PERF_EVENTS */ |
0793a61d TG |
994 | }; |
995 | ||
b04243ef PZ |
996 | enum perf_event_context_type { |
997 | task_context, | |
998 | cpu_context, | |
999 | }; | |
1000 | ||
0793a61d | 1001 | /** |
cdd6c482 | 1002 | * struct perf_event_context - event context structure |
0793a61d | 1003 | * |
cdd6c482 | 1004 | * Used as a container for task events and CPU events as well: |
0793a61d | 1005 | */ |
cdd6c482 | 1006 | struct perf_event_context { |
108b02cf | 1007 | struct pmu *pmu; |
ee643c41 | 1008 | enum perf_event_context_type type; |
0793a61d | 1009 | /* |
cdd6c482 | 1010 | * Protect the states of the events in the list, |
d859e29f | 1011 | * nr_active, and the list: |
0793a61d | 1012 | */ |
e625cce1 | 1013 | raw_spinlock_t lock; |
d859e29f | 1014 | /* |
cdd6c482 | 1015 | * Protect the list of events. Locking either mutex or lock |
d859e29f PM |
1016 | * is sufficient to ensure the list doesn't change; to change |
1017 | * the list you need to lock both the mutex and the spinlock. | |
1018 | */ | |
a308444c | 1019 | struct mutex mutex; |
04289bb9 | 1020 | |
889ff015 FW |
1021 | struct list_head pinned_groups; |
1022 | struct list_head flexible_groups; | |
a308444c | 1023 | struct list_head event_list; |
cdd6c482 | 1024 | int nr_events; |
a308444c IM |
1025 | int nr_active; |
1026 | int is_active; | |
bfbd3381 | 1027 | int nr_stat; |
0f5a2601 | 1028 | int nr_freq; |
dddd3379 | 1029 | int rotate_disable; |
a308444c IM |
1030 | atomic_t refcount; |
1031 | struct task_struct *task; | |
53cfbf59 PM |
1032 | |
1033 | /* | |
4af4998b | 1034 | * Context clock, runs when context enabled. |
53cfbf59 | 1035 | */ |
a308444c IM |
1036 | u64 time; |
1037 | u64 timestamp; | |
564c2b21 PM |
1038 | |
1039 | /* | |
1040 | * These fields let us detect when two contexts have both | |
1041 | * been cloned (inherited) from a common ancestor. | |
1042 | */ | |
cdd6c482 | 1043 | struct perf_event_context *parent_ctx; |
a308444c IM |
1044 | u64 parent_gen; |
1045 | u64 generation; | |
1046 | int pin_count; | |
d010b332 SE |
1047 | int nr_cgroups; /* cgroup evts */ |
1048 | int nr_branch_stack; /* branch_stack evt */ | |
28009ce4 | 1049 | struct rcu_head rcu_head; |
0793a61d TG |
1050 | }; |
1051 | ||
7ae07ea3 FW |
1052 | /* |
1053 | * Number of contexts where an event can trigger: | |
e7e7ee2e | 1054 | * task, softirq, hardirq, nmi. |
7ae07ea3 FW |
1055 | */ |
1056 | #define PERF_NR_CONTEXTS 4 | |
1057 | ||
0793a61d | 1058 | /** |
cdd6c482 | 1059 | * struct perf_event_cpu_context - per cpu event context structure |
0793a61d TG |
1060 | */ |
1061 | struct perf_cpu_context { | |
cdd6c482 IM |
1062 | struct perf_event_context ctx; |
1063 | struct perf_event_context *task_ctx; | |
0793a61d | 1064 | int active_oncpu; |
3b6f9e5c | 1065 | int exclusive; |
e9d2b064 PZ |
1066 | struct list_head rotation_list; |
1067 | int jiffies_interval; | |
51676957 | 1068 | struct pmu *active_pmu; |
e5d1367f | 1069 | struct perf_cgroup *cgrp; |
0793a61d TG |
1070 | }; |
1071 | ||
5622f295 | 1072 | struct perf_output_handle { |
57c0c15b | 1073 | struct perf_event *event; |
76369139 | 1074 | struct ring_buffer *rb; |
6d1acfd5 | 1075 | unsigned long wakeup; |
5d967a8b PZ |
1076 | unsigned long size; |
1077 | void *addr; | |
1078 | int page; | |
5622f295 MM |
1079 | }; |
1080 | ||
cdd6c482 | 1081 | #ifdef CONFIG_PERF_EVENTS |
829b42dd | 1082 | |
2e80a82a | 1083 | extern int perf_pmu_register(struct pmu *pmu, char *name, int type); |
b0a873eb | 1084 | extern void perf_pmu_unregister(struct pmu *pmu); |
621a01ea | 1085 | |
3bf101ba | 1086 | extern int perf_num_counters(void); |
84c79910 | 1087 | extern const char *perf_pmu_name(void); |
ab0cce56 JO |
1088 | extern void __perf_event_task_sched_in(struct task_struct *prev, |
1089 | struct task_struct *task); | |
1090 | extern void __perf_event_task_sched_out(struct task_struct *prev, | |
1091 | struct task_struct *next); | |
cdd6c482 IM |
1092 | extern int perf_event_init_task(struct task_struct *child); |
1093 | extern void perf_event_exit_task(struct task_struct *child); | |
1094 | extern void perf_event_free_task(struct task_struct *task); | |
4e231c79 | 1095 | extern void perf_event_delayed_put(struct task_struct *task); |
cdd6c482 | 1096 | extern void perf_event_print_debug(void); |
33696fc0 PZ |
1097 | extern void perf_pmu_disable(struct pmu *pmu); |
1098 | extern void perf_pmu_enable(struct pmu *pmu); | |
cdd6c482 IM |
1099 | extern int perf_event_task_disable(void); |
1100 | extern int perf_event_task_enable(void); | |
26ca5c11 | 1101 | extern int perf_event_refresh(struct perf_event *event, int refresh); |
cdd6c482 | 1102 | extern void perf_event_update_userpage(struct perf_event *event); |
fb0459d7 AV |
1103 | extern int perf_event_release_kernel(struct perf_event *event); |
1104 | extern struct perf_event * | |
1105 | perf_event_create_kernel_counter(struct perf_event_attr *attr, | |
1106 | int cpu, | |
38a81da2 | 1107 | struct task_struct *task, |
4dc0da86 AK |
1108 | perf_overflow_handler_t callback, |
1109 | void *context); | |
0cda4c02 YZ |
1110 | extern void perf_pmu_migrate_context(struct pmu *pmu, |
1111 | int src_cpu, int dst_cpu); | |
59ed446f PZ |
1112 | extern u64 perf_event_read_value(struct perf_event *event, |
1113 | u64 *enabled, u64 *running); | |
5c92d124 | 1114 | |
d010b332 | 1115 | |
df1a132b | 1116 | struct perf_sample_data { |
5622f295 MM |
1117 | u64 type; |
1118 | ||
1119 | u64 ip; | |
1120 | struct { | |
1121 | u32 pid; | |
1122 | u32 tid; | |
1123 | } tid_entry; | |
1124 | u64 time; | |
a308444c | 1125 | u64 addr; |
5622f295 MM |
1126 | u64 id; |
1127 | u64 stream_id; | |
1128 | struct { | |
1129 | u32 cpu; | |
1130 | u32 reserved; | |
1131 | } cpu_entry; | |
a308444c | 1132 | u64 period; |
5622f295 | 1133 | struct perf_callchain_entry *callchain; |
3a43ce68 | 1134 | struct perf_raw_record *raw; |
bce38cd5 | 1135 | struct perf_branch_stack *br_stack; |
df1a132b PZ |
1136 | }; |
1137 | ||
fd0d000b RR |
1138 | static inline void perf_sample_data_init(struct perf_sample_data *data, |
1139 | u64 addr, u64 period) | |
dc1d628a | 1140 | { |
fd0d000b | 1141 | /* remaining struct members initialized in perf_prepare_sample() */ |
dc1d628a PZ |
1142 | data->addr = addr; |
1143 | data->raw = NULL; | |
bce38cd5 | 1144 | data->br_stack = NULL; |
fd0d000b | 1145 | data->period = period; |
dc1d628a PZ |
1146 | } |
1147 | ||
5622f295 MM |
1148 | extern void perf_output_sample(struct perf_output_handle *handle, |
1149 | struct perf_event_header *header, | |
1150 | struct perf_sample_data *data, | |
cdd6c482 | 1151 | struct perf_event *event); |
5622f295 MM |
1152 | extern void perf_prepare_sample(struct perf_event_header *header, |
1153 | struct perf_sample_data *data, | |
cdd6c482 | 1154 | struct perf_event *event, |
5622f295 MM |
1155 | struct pt_regs *regs); |
1156 | ||
a8b0ca17 | 1157 | extern int perf_event_overflow(struct perf_event *event, |
5622f295 MM |
1158 | struct perf_sample_data *data, |
1159 | struct pt_regs *regs); | |
df1a132b | 1160 | |
6c7e550f FBH |
1161 | static inline bool is_sampling_event(struct perf_event *event) |
1162 | { | |
1163 | return event->attr.sample_period != 0; | |
1164 | } | |
1165 | ||
3b6f9e5c | 1166 | /* |
cdd6c482 | 1167 | * Return 1 for a software event, 0 for a hardware event |
3b6f9e5c | 1168 | */ |
cdd6c482 | 1169 | static inline int is_software_event(struct perf_event *event) |
3b6f9e5c | 1170 | { |
89a1e187 | 1171 | return event->pmu->task_ctx_nr == perf_sw_context; |
3b6f9e5c PM |
1172 | } |
1173 | ||
c5905afb | 1174 | extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
f29ac756 | 1175 | |
a8b0ca17 | 1176 | extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); |
f29ac756 | 1177 | |
b0f82b81 | 1178 | #ifndef perf_arch_fetch_caller_regs |
e7e7ee2e | 1179 | static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } |
b0f82b81 | 1180 | #endif |
5331d7b8 FW |
1181 | |
1182 | /* | |
1183 | * Take a snapshot of the regs. Skip ip and frame pointer to | |
1184 | * the nth caller. We only need a few of the regs: | |
1185 | * - ip for PERF_SAMPLE_IP | |
1186 | * - cs for user_mode() tests | |
1187 | * - bp for callchains | |
1188 | * - eflags, for future purposes, just in case | |
1189 | */ | |
b0f82b81 | 1190 | static inline void perf_fetch_caller_regs(struct pt_regs *regs) |
5331d7b8 | 1191 | { |
5331d7b8 FW |
1192 | memset(regs, 0, sizeof(*regs)); |
1193 | ||
b0f82b81 | 1194 | perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); |
5331d7b8 FW |
1195 | } |
1196 | ||
7e54a5a0 | 1197 | static __always_inline void |
a8b0ca17 | 1198 | perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
e49a5bd3 | 1199 | { |
7e54a5a0 PZ |
1200 | struct pt_regs hot_regs; |
1201 | ||
c5905afb | 1202 | if (static_key_false(&perf_swevent_enabled[event_id])) { |
d430d3d7 JB |
1203 | if (!regs) { |
1204 | perf_fetch_caller_regs(&hot_regs); | |
1205 | regs = &hot_regs; | |
1206 | } | |
a8b0ca17 | 1207 | __perf_sw_event(event_id, nr, regs, addr); |
e49a5bd3 FW |
1208 | } |
1209 | } | |
1210 | ||
c5905afb | 1211 | extern struct static_key_deferred perf_sched_events; |
ee6dcfa4 | 1212 | |
ab0cce56 | 1213 | static inline void perf_event_task_sched_in(struct task_struct *prev, |
a8d757ef | 1214 | struct task_struct *task) |
ab0cce56 JO |
1215 | { |
1216 | if (static_key_false(&perf_sched_events.key)) | |
1217 | __perf_event_task_sched_in(prev, task); | |
1218 | } | |
1219 | ||
1220 | static inline void perf_event_task_sched_out(struct task_struct *prev, | |
1221 | struct task_struct *next) | |
ee6dcfa4 | 1222 | { |
a8b0ca17 | 1223 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); |
ee6dcfa4 | 1224 | |
c5905afb | 1225 | if (static_key_false(&perf_sched_events.key)) |
ab0cce56 | 1226 | __perf_event_task_sched_out(prev, next); |
ee6dcfa4 PZ |
1227 | } |
1228 | ||
3af9e859 | 1229 | extern void perf_event_mmap(struct vm_area_struct *vma); |
39447b38 | 1230 | extern struct perf_guest_info_callbacks *perf_guest_cbs; |
dcf46b94 ZY |
1231 | extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); |
1232 | extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); | |
39447b38 | 1233 | |
cdd6c482 IM |
1234 | extern void perf_event_comm(struct task_struct *tsk); |
1235 | extern void perf_event_fork(struct task_struct *tsk); | |
8d1b2d93 | 1236 | |
56962b44 FW |
1237 | /* Callchains */ |
1238 | DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); | |
1239 | ||
e7e7ee2e IM |
1240 | extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); |
1241 | extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); | |
394ee076 | 1242 | |
e7e7ee2e | 1243 | static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) |
70791ce9 FW |
1244 | { |
1245 | if (entry->nr < PERF_MAX_STACK_DEPTH) | |
1246 | entry->ip[entry->nr++] = ip; | |
1247 | } | |
394ee076 | 1248 | |
cdd6c482 IM |
1249 | extern int sysctl_perf_event_paranoid; |
1250 | extern int sysctl_perf_event_mlock; | |
1251 | extern int sysctl_perf_event_sample_rate; | |
1ccd1549 | 1252 | |
163ec435 PZ |
1253 | extern int perf_proc_update_handler(struct ctl_table *table, int write, |
1254 | void __user *buffer, size_t *lenp, | |
1255 | loff_t *ppos); | |
1256 | ||
320ebf09 PZ |
1257 | static inline bool perf_paranoid_tracepoint_raw(void) |
1258 | { | |
1259 | return sysctl_perf_event_paranoid > -1; | |
1260 | } | |
1261 | ||
1262 | static inline bool perf_paranoid_cpu(void) | |
1263 | { | |
1264 | return sysctl_perf_event_paranoid > 0; | |
1265 | } | |
1266 | ||
1267 | static inline bool perf_paranoid_kernel(void) | |
1268 | { | |
1269 | return sysctl_perf_event_paranoid > 1; | |
1270 | } | |
1271 | ||
cdd6c482 | 1272 | extern void perf_event_init(void); |
1c024eca PZ |
1273 | extern void perf_tp_event(u64 addr, u64 count, void *record, |
1274 | int entry_size, struct pt_regs *regs, | |
ecc55f84 | 1275 | struct hlist_head *head, int rctx); |
24f1e32c | 1276 | extern void perf_bp_event(struct perf_event *event, void *data); |
0d905bca | 1277 | |
9d23a90a | 1278 | #ifndef perf_misc_flags |
e7e7ee2e IM |
1279 | # define perf_misc_flags(regs) \ |
1280 | (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) | |
1281 | # define perf_instruction_pointer(regs) instruction_pointer(regs) | |
9d23a90a PM |
1282 | #endif |
1283 | ||
bce38cd5 SE |
1284 | static inline bool has_branch_stack(struct perf_event *event) |
1285 | { | |
1286 | return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; | |
1287 | } | |
1288 | ||
5622f295 | 1289 | extern int perf_output_begin(struct perf_output_handle *handle, |
a7ac67ea | 1290 | struct perf_event *event, unsigned int size); |
5622f295 MM |
1291 | extern void perf_output_end(struct perf_output_handle *handle); |
1292 | extern void perf_output_copy(struct perf_output_handle *handle, | |
1293 | const void *buf, unsigned int len); | |
4ed7c92d PZ |
1294 | extern int perf_swevent_get_recursion_context(void); |
1295 | extern void perf_swevent_put_recursion_context(int rctx); | |
44234adc FW |
1296 | extern void perf_event_enable(struct perf_event *event); |
1297 | extern void perf_event_disable(struct perf_event *event); | |
e9d2b064 | 1298 | extern void perf_event_task_tick(void); |
0793a61d TG |
1299 | #else |
1300 | static inline void | |
ab0cce56 JO |
1301 | perf_event_task_sched_in(struct task_struct *prev, |
1302 | struct task_struct *task) { } | |
1303 | static inline void | |
1304 | perf_event_task_sched_out(struct task_struct *prev, | |
1305 | struct task_struct *next) { } | |
cdd6c482 IM |
1306 | static inline int perf_event_init_task(struct task_struct *child) { return 0; } |
1307 | static inline void perf_event_exit_task(struct task_struct *child) { } | |
1308 | static inline void perf_event_free_task(struct task_struct *task) { } | |
4e231c79 | 1309 | static inline void perf_event_delayed_put(struct task_struct *task) { } |
57c0c15b | 1310 | static inline void perf_event_print_debug(void) { } |
57c0c15b IM |
1311 | static inline int perf_event_task_disable(void) { return -EINVAL; } |
1312 | static inline int perf_event_task_enable(void) { return -EINVAL; } | |
26ca5c11 AK |
1313 | static inline int perf_event_refresh(struct perf_event *event, int refresh) |
1314 | { | |
1315 | return -EINVAL; | |
1316 | } | |
15dbf27c | 1317 | |
925d519a | 1318 | static inline void |
a8b0ca17 | 1319 | perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } |
24f1e32c | 1320 | static inline void |
184f412c | 1321 | perf_bp_event(struct perf_event *event, void *data) { } |
0a4a9391 | 1322 | |
39447b38 | 1323 | static inline int perf_register_guest_info_callbacks |
e7e7ee2e | 1324 | (struct perf_guest_info_callbacks *callbacks) { return 0; } |
39447b38 | 1325 | static inline int perf_unregister_guest_info_callbacks |
e7e7ee2e | 1326 | (struct perf_guest_info_callbacks *callbacks) { return 0; } |
39447b38 | 1327 | |
57c0c15b | 1328 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } |
cdd6c482 IM |
1329 | static inline void perf_event_comm(struct task_struct *tsk) { } |
1330 | static inline void perf_event_fork(struct task_struct *tsk) { } | |
1331 | static inline void perf_event_init(void) { } | |
184f412c | 1332 | static inline int perf_swevent_get_recursion_context(void) { return -1; } |
4ed7c92d | 1333 | static inline void perf_swevent_put_recursion_context(int rctx) { } |
44234adc FW |
1334 | static inline void perf_event_enable(struct perf_event *event) { } |
1335 | static inline void perf_event_disable(struct perf_event *event) { } | |
e9d2b064 | 1336 | static inline void perf_event_task_tick(void) { } |
0793a61d TG |
1337 | #endif |
1338 | ||
e7e7ee2e | 1339 | #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) |
5622f295 | 1340 | |
3f6da390 PZ |
1341 | /* |
1342 | * This has to have a higher priority than migration_notifier in sched.c. | |
1343 | */ | |
e7e7ee2e IM |
1344 | #define perf_cpu_notifier(fn) \ |
1345 | do { \ | |
1346 | static struct notifier_block fn##_nb __cpuinitdata = \ | |
1347 | { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ | |
1348 | fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ | |
1349 | (void *)(unsigned long)smp_processor_id()); \ | |
1350 | fn(&fn##_nb, (unsigned long)CPU_STARTING, \ | |
1351 | (void *)(unsigned long)smp_processor_id()); \ | |
1352 | fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ | |
1353 | (void *)(unsigned long)smp_processor_id()); \ | |
1354 | register_cpu_notifier(&fn##_nb); \ | |
3f6da390 PZ |
1355 | } while (0) |
1356 | ||
641cc938 JO |
1357 | |
1358 | #define PMU_FORMAT_ATTR(_name, _format) \ | |
1359 | static ssize_t \ | |
1360 | _name##_show(struct device *dev, \ | |
1361 | struct device_attribute *attr, \ | |
1362 | char *page) \ | |
1363 | { \ | |
1364 | BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ | |
1365 | return sprintf(page, _format "\n"); \ | |
1366 | } \ | |
1367 | \ | |
1368 | static struct device_attribute format_attr_##_name = __ATTR_RO(_name) | |
1369 | ||
f3dfd265 | 1370 | #endif /* __KERNEL__ */ |
cdd6c482 | 1371 | #endif /* _LINUX_PERF_EVENT_H */ |