| 1 | /* |
| 2 | * Per core/cpu state |
| 3 | * |
| 4 | * Used to coordinate shared registers between HT threads or |
| 5 | * among events on a single PMU. |
| 6 | */ |
| 7 | |
| 8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 9 | |
| 10 | #include <linux/stddef.h> |
| 11 | #include <linux/types.h> |
| 12 | #include <linux/init.h> |
| 13 | #include <linux/slab.h> |
| 14 | #include <linux/export.h> |
| 15 | #include <linux/watchdog.h> |
| 16 | |
| 17 | #include <asm/cpufeature.h> |
| 18 | #include <asm/hardirq.h> |
| 19 | #include <asm/apic.h> |
| 20 | |
| 21 | #include "perf_event.h" |
| 22 | |
| 23 | /* |
| 24 | * Intel PerfMon, used on Core and later. |
| 25 | */ |
| 26 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = |
| 27 | { |
| 28 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
| 29 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
| 30 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, |
| 31 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, |
| 32 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
| 33 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
| 34 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
| 35 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ |
| 36 | }; |
| 37 | |
| 38 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
| 39 | { |
| 40 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
| 41 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
| 42 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ |
| 43 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ |
| 44 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ |
| 45 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ |
| 46 | EVENT_CONSTRAINT_END |
| 47 | }; |
| 48 | |
| 49 | static struct event_constraint intel_core2_event_constraints[] __read_mostly = |
| 50 | { |
| 51 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 52 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 53 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 54 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ |
| 55 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
| 56 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
| 57 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ |
| 58 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ |
| 59 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ |
| 60 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ |
| 61 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ |
| 62 | INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ |
| 63 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ |
| 64 | EVENT_CONSTRAINT_END |
| 65 | }; |
| 66 | |
| 67 | static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = |
| 68 | { |
| 69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 71 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 72 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ |
| 73 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ |
| 74 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ |
| 75 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ |
| 76 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ |
| 77 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ |
| 78 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
| 79 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
| 80 | EVENT_CONSTRAINT_END |
| 81 | }; |
| 82 | |
| 83 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
| 84 | { |
| 85 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 86 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
| 87 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), |
| 88 | EVENT_EXTRA_END |
| 89 | }; |
| 90 | |
| 91 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = |
| 92 | { |
| 93 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 94 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 95 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 96 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
| 97 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ |
| 98 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
| 99 | INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ |
| 100 | EVENT_CONSTRAINT_END |
| 101 | }; |
| 102 | |
| 103 | static struct event_constraint intel_snb_event_constraints[] __read_mostly = |
| 104 | { |
| 105 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 106 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 107 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 108 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ |
| 109 | INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ |
| 110 | INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
| 111 | INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ |
| 112 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ |
| 113 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
| 114 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
| 115 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ |
| 116 | INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
| 117 | |
| 118 | INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ |
| 119 | INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
| 120 | INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
| 121 | INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ |
| 122 | |
| 123 | EVENT_CONSTRAINT_END |
| 124 | }; |
| 125 | |
| 126 | static struct event_constraint intel_ivb_event_constraints[] __read_mostly = |
| 127 | { |
| 128 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 129 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 130 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 131 | INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ |
| 132 | INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ |
| 133 | INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ |
| 134 | INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ |
| 135 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ |
| 136 | INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ |
| 137 | INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ |
| 138 | INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
| 139 | INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ |
| 140 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
| 141 | |
| 142 | INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ |
| 143 | INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
| 144 | INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
| 145 | INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ |
| 146 | |
| 147 | EVENT_CONSTRAINT_END |
| 148 | }; |
| 149 | |
| 150 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
| 151 | { |
| 152 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 153 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
| 154 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
| 155 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), |
| 156 | EVENT_EXTRA_END |
| 157 | }; |
| 158 | |
| 159 | static struct event_constraint intel_v1_event_constraints[] __read_mostly = |
| 160 | { |
| 161 | EVENT_CONSTRAINT_END |
| 162 | }; |
| 163 | |
| 164 | static struct event_constraint intel_gen_event_constraints[] __read_mostly = |
| 165 | { |
| 166 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 167 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 168 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 169 | EVENT_CONSTRAINT_END |
| 170 | }; |
| 171 | |
| 172 | static struct event_constraint intel_slm_event_constraints[] __read_mostly = |
| 173 | { |
| 174 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 175 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 176 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ |
| 177 | EVENT_CONSTRAINT_END |
| 178 | }; |
| 179 | |
| 180 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { |
| 181 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 182 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), |
| 183 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), |
| 184 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
| 185 | EVENT_EXTRA_END |
| 186 | }; |
| 187 | |
| 188 | static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { |
| 189 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 190 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), |
| 191 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), |
| 192 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
| 193 | EVENT_EXTRA_END |
| 194 | }; |
| 195 | |
| 196 | EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); |
| 197 | EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); |
| 198 | EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); |
| 199 | |
| 200 | struct attribute *nhm_events_attrs[] = { |
| 201 | EVENT_PTR(mem_ld_nhm), |
| 202 | NULL, |
| 203 | }; |
| 204 | |
| 205 | struct attribute *snb_events_attrs[] = { |
| 206 | EVENT_PTR(mem_ld_snb), |
| 207 | EVENT_PTR(mem_st_snb), |
| 208 | NULL, |
| 209 | }; |
| 210 | |
| 211 | static struct event_constraint intel_hsw_event_constraints[] = { |
| 212 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 213 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 214 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 215 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */ |
| 216 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
| 217 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
| 218 | /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
| 219 | INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), |
| 220 | /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ |
| 221 | INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), |
| 222 | /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ |
| 223 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), |
| 224 | |
| 225 | INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ |
| 226 | INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
| 227 | INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
| 228 | INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ |
| 229 | |
| 230 | EVENT_CONSTRAINT_END |
| 231 | }; |
| 232 | |
| 233 | struct event_constraint intel_bdw_event_constraints[] = { |
| 234 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 235 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 236 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 237 | INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ |
| 238 | INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ |
| 239 | EVENT_CONSTRAINT_END |
| 240 | }; |
| 241 | |
| 242 | static u64 intel_pmu_event_map(int hw_event) |
| 243 | { |
| 244 | return intel_perfmon_event_map[hw_event]; |
| 245 | } |
| 246 | |
| 247 | #define SNB_DMND_DATA_RD (1ULL << 0) |
| 248 | #define SNB_DMND_RFO (1ULL << 1) |
| 249 | #define SNB_DMND_IFETCH (1ULL << 2) |
| 250 | #define SNB_DMND_WB (1ULL << 3) |
| 251 | #define SNB_PF_DATA_RD (1ULL << 4) |
| 252 | #define SNB_PF_RFO (1ULL << 5) |
| 253 | #define SNB_PF_IFETCH (1ULL << 6) |
| 254 | #define SNB_LLC_DATA_RD (1ULL << 7) |
| 255 | #define SNB_LLC_RFO (1ULL << 8) |
| 256 | #define SNB_LLC_IFETCH (1ULL << 9) |
| 257 | #define SNB_BUS_LOCKS (1ULL << 10) |
| 258 | #define SNB_STRM_ST (1ULL << 11) |
| 259 | #define SNB_OTHER (1ULL << 15) |
| 260 | #define SNB_RESP_ANY (1ULL << 16) |
| 261 | #define SNB_NO_SUPP (1ULL << 17) |
| 262 | #define SNB_LLC_HITM (1ULL << 18) |
| 263 | #define SNB_LLC_HITE (1ULL << 19) |
| 264 | #define SNB_LLC_HITS (1ULL << 20) |
| 265 | #define SNB_LLC_HITF (1ULL << 21) |
| 266 | #define SNB_LOCAL (1ULL << 22) |
| 267 | #define SNB_REMOTE (0xffULL << 23) |
| 268 | #define SNB_SNP_NONE (1ULL << 31) |
| 269 | #define SNB_SNP_NOT_NEEDED (1ULL << 32) |
| 270 | #define SNB_SNP_MISS (1ULL << 33) |
| 271 | #define SNB_NO_FWD (1ULL << 34) |
| 272 | #define SNB_SNP_FWD (1ULL << 35) |
| 273 | #define SNB_HITM (1ULL << 36) |
| 274 | #define SNB_NON_DRAM (1ULL << 37) |
| 275 | |
| 276 | #define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) |
| 277 | #define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) |
| 278 | #define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) |
| 279 | |
| 280 | #define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ |
| 281 | SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ |
| 282 | SNB_HITM) |
| 283 | |
| 284 | #define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) |
| 285 | #define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) |
| 286 | |
| 287 | #define SNB_L3_ACCESS SNB_RESP_ANY |
| 288 | #define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) |
| 289 | |
| 290 | static __initconst const u64 snb_hw_cache_extra_regs |
| 291 | [PERF_COUNT_HW_CACHE_MAX] |
| 292 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 293 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 294 | { |
| 295 | [ C(LL ) ] = { |
| 296 | [ C(OP_READ) ] = { |
| 297 | [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, |
| 298 | [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, |
| 299 | }, |
| 300 | [ C(OP_WRITE) ] = { |
| 301 | [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, |
| 302 | [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, |
| 303 | }, |
| 304 | [ C(OP_PREFETCH) ] = { |
| 305 | [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, |
| 306 | [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, |
| 307 | }, |
| 308 | }, |
| 309 | [ C(NODE) ] = { |
| 310 | [ C(OP_READ) ] = { |
| 311 | [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, |
| 312 | [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, |
| 313 | }, |
| 314 | [ C(OP_WRITE) ] = { |
| 315 | [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, |
| 316 | [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, |
| 317 | }, |
| 318 | [ C(OP_PREFETCH) ] = { |
| 319 | [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, |
| 320 | [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, |
| 321 | }, |
| 322 | }, |
| 323 | }; |
| 324 | |
| 325 | static __initconst const u64 snb_hw_cache_event_ids |
| 326 | [PERF_COUNT_HW_CACHE_MAX] |
| 327 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 328 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 329 | { |
| 330 | [ C(L1D) ] = { |
| 331 | [ C(OP_READ) ] = { |
| 332 | [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ |
| 333 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ |
| 334 | }, |
| 335 | [ C(OP_WRITE) ] = { |
| 336 | [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ |
| 337 | [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ |
| 338 | }, |
| 339 | [ C(OP_PREFETCH) ] = { |
| 340 | [ C(RESULT_ACCESS) ] = 0x0, |
| 341 | [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ |
| 342 | }, |
| 343 | }, |
| 344 | [ C(L1I ) ] = { |
| 345 | [ C(OP_READ) ] = { |
| 346 | [ C(RESULT_ACCESS) ] = 0x0, |
| 347 | [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ |
| 348 | }, |
| 349 | [ C(OP_WRITE) ] = { |
| 350 | [ C(RESULT_ACCESS) ] = -1, |
| 351 | [ C(RESULT_MISS) ] = -1, |
| 352 | }, |
| 353 | [ C(OP_PREFETCH) ] = { |
| 354 | [ C(RESULT_ACCESS) ] = 0x0, |
| 355 | [ C(RESULT_MISS) ] = 0x0, |
| 356 | }, |
| 357 | }, |
| 358 | [ C(LL ) ] = { |
| 359 | [ C(OP_READ) ] = { |
| 360 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
| 361 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 362 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
| 363 | [ C(RESULT_MISS) ] = 0x01b7, |
| 364 | }, |
| 365 | [ C(OP_WRITE) ] = { |
| 366 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
| 367 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 368 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
| 369 | [ C(RESULT_MISS) ] = 0x01b7, |
| 370 | }, |
| 371 | [ C(OP_PREFETCH) ] = { |
| 372 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
| 373 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 374 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
| 375 | [ C(RESULT_MISS) ] = 0x01b7, |
| 376 | }, |
| 377 | }, |
| 378 | [ C(DTLB) ] = { |
| 379 | [ C(OP_READ) ] = { |
| 380 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ |
| 381 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ |
| 382 | }, |
| 383 | [ C(OP_WRITE) ] = { |
| 384 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ |
| 385 | [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ |
| 386 | }, |
| 387 | [ C(OP_PREFETCH) ] = { |
| 388 | [ C(RESULT_ACCESS) ] = 0x0, |
| 389 | [ C(RESULT_MISS) ] = 0x0, |
| 390 | }, |
| 391 | }, |
| 392 | [ C(ITLB) ] = { |
| 393 | [ C(OP_READ) ] = { |
| 394 | [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ |
| 395 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ |
| 396 | }, |
| 397 | [ C(OP_WRITE) ] = { |
| 398 | [ C(RESULT_ACCESS) ] = -1, |
| 399 | [ C(RESULT_MISS) ] = -1, |
| 400 | }, |
| 401 | [ C(OP_PREFETCH) ] = { |
| 402 | [ C(RESULT_ACCESS) ] = -1, |
| 403 | [ C(RESULT_MISS) ] = -1, |
| 404 | }, |
| 405 | }, |
| 406 | [ C(BPU ) ] = { |
| 407 | [ C(OP_READ) ] = { |
| 408 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 409 | [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ |
| 410 | }, |
| 411 | [ C(OP_WRITE) ] = { |
| 412 | [ C(RESULT_ACCESS) ] = -1, |
| 413 | [ C(RESULT_MISS) ] = -1, |
| 414 | }, |
| 415 | [ C(OP_PREFETCH) ] = { |
| 416 | [ C(RESULT_ACCESS) ] = -1, |
| 417 | [ C(RESULT_MISS) ] = -1, |
| 418 | }, |
| 419 | }, |
| 420 | [ C(NODE) ] = { |
| 421 | [ C(OP_READ) ] = { |
| 422 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 423 | [ C(RESULT_MISS) ] = 0x01b7, |
| 424 | }, |
| 425 | [ C(OP_WRITE) ] = { |
| 426 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 427 | [ C(RESULT_MISS) ] = 0x01b7, |
| 428 | }, |
| 429 | [ C(OP_PREFETCH) ] = { |
| 430 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 431 | [ C(RESULT_MISS) ] = 0x01b7, |
| 432 | }, |
| 433 | }, |
| 434 | |
| 435 | }; |
| 436 | |
| 437 | /* |
| 438 | * Notes on the events: |
| 439 | * - data reads do not include code reads (comparable to earlier tables) |
| 440 | * - data counts include speculative execution (except L1 write, dtlb, bpu) |
| 441 | * - remote node access includes remote memory, remote cache, remote mmio. |
| 442 | * - prefetches are not included in the counts because they are not |
| 443 | * reliably counted. |
| 444 | */ |
| 445 | |
| 446 | #define HSW_DEMAND_DATA_RD BIT_ULL(0) |
| 447 | #define HSW_DEMAND_RFO BIT_ULL(1) |
| 448 | #define HSW_ANY_RESPONSE BIT_ULL(16) |
| 449 | #define HSW_SUPPLIER_NONE BIT_ULL(17) |
| 450 | #define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22) |
| 451 | #define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27) |
| 452 | #define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28) |
| 453 | #define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29) |
| 454 | #define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \ |
| 455 | HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ |
| 456 | HSW_L3_MISS_REMOTE_HOP2P) |
| 457 | #define HSW_SNOOP_NONE BIT_ULL(31) |
| 458 | #define HSW_SNOOP_NOT_NEEDED BIT_ULL(32) |
| 459 | #define HSW_SNOOP_MISS BIT_ULL(33) |
| 460 | #define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34) |
| 461 | #define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35) |
| 462 | #define HSW_SNOOP_HITM BIT_ULL(36) |
| 463 | #define HSW_SNOOP_NON_DRAM BIT_ULL(37) |
| 464 | #define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \ |
| 465 | HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \ |
| 466 | HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \ |
| 467 | HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM) |
| 468 | #define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM) |
| 469 | #define HSW_DEMAND_READ HSW_DEMAND_DATA_RD |
| 470 | #define HSW_DEMAND_WRITE HSW_DEMAND_RFO |
| 471 | #define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\ |
| 472 | HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P) |
| 473 | #define HSW_LLC_ACCESS HSW_ANY_RESPONSE |
| 474 | |
| 475 | #define BDW_L3_MISS_LOCAL BIT(26) |
| 476 | #define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \ |
| 477 | HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ |
| 478 | HSW_L3_MISS_REMOTE_HOP2P) |
| 479 | |
| 480 | |
| 481 | static __initconst const u64 hsw_hw_cache_event_ids |
| 482 | [PERF_COUNT_HW_CACHE_MAX] |
| 483 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 484 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 485 | { |
| 486 | [ C(L1D ) ] = { |
| 487 | [ C(OP_READ) ] = { |
| 488 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ |
| 489 | [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ |
| 490 | }, |
| 491 | [ C(OP_WRITE) ] = { |
| 492 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ |
| 493 | [ C(RESULT_MISS) ] = 0x0, |
| 494 | }, |
| 495 | [ C(OP_PREFETCH) ] = { |
| 496 | [ C(RESULT_ACCESS) ] = 0x0, |
| 497 | [ C(RESULT_MISS) ] = 0x0, |
| 498 | }, |
| 499 | }, |
| 500 | [ C(L1I ) ] = { |
| 501 | [ C(OP_READ) ] = { |
| 502 | [ C(RESULT_ACCESS) ] = 0x0, |
| 503 | [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ |
| 504 | }, |
| 505 | [ C(OP_WRITE) ] = { |
| 506 | [ C(RESULT_ACCESS) ] = -1, |
| 507 | [ C(RESULT_MISS) ] = -1, |
| 508 | }, |
| 509 | [ C(OP_PREFETCH) ] = { |
| 510 | [ C(RESULT_ACCESS) ] = 0x0, |
| 511 | [ C(RESULT_MISS) ] = 0x0, |
| 512 | }, |
| 513 | }, |
| 514 | [ C(LL ) ] = { |
| 515 | [ C(OP_READ) ] = { |
| 516 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 517 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 518 | }, |
| 519 | [ C(OP_WRITE) ] = { |
| 520 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 521 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 522 | }, |
| 523 | [ C(OP_PREFETCH) ] = { |
| 524 | [ C(RESULT_ACCESS) ] = 0x0, |
| 525 | [ C(RESULT_MISS) ] = 0x0, |
| 526 | }, |
| 527 | }, |
| 528 | [ C(DTLB) ] = { |
| 529 | [ C(OP_READ) ] = { |
| 530 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ |
| 531 | [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ |
| 532 | }, |
| 533 | [ C(OP_WRITE) ] = { |
| 534 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ |
| 535 | [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ |
| 536 | }, |
| 537 | [ C(OP_PREFETCH) ] = { |
| 538 | [ C(RESULT_ACCESS) ] = 0x0, |
| 539 | [ C(RESULT_MISS) ] = 0x0, |
| 540 | }, |
| 541 | }, |
| 542 | [ C(ITLB) ] = { |
| 543 | [ C(OP_READ) ] = { |
| 544 | [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ |
| 545 | [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ |
| 546 | }, |
| 547 | [ C(OP_WRITE) ] = { |
| 548 | [ C(RESULT_ACCESS) ] = -1, |
| 549 | [ C(RESULT_MISS) ] = -1, |
| 550 | }, |
| 551 | [ C(OP_PREFETCH) ] = { |
| 552 | [ C(RESULT_ACCESS) ] = -1, |
| 553 | [ C(RESULT_MISS) ] = -1, |
| 554 | }, |
| 555 | }, |
| 556 | [ C(BPU ) ] = { |
| 557 | [ C(OP_READ) ] = { |
| 558 | [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 559 | [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ |
| 560 | }, |
| 561 | [ C(OP_WRITE) ] = { |
| 562 | [ C(RESULT_ACCESS) ] = -1, |
| 563 | [ C(RESULT_MISS) ] = -1, |
| 564 | }, |
| 565 | [ C(OP_PREFETCH) ] = { |
| 566 | [ C(RESULT_ACCESS) ] = -1, |
| 567 | [ C(RESULT_MISS) ] = -1, |
| 568 | }, |
| 569 | }, |
| 570 | [ C(NODE) ] = { |
| 571 | [ C(OP_READ) ] = { |
| 572 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 573 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 574 | }, |
| 575 | [ C(OP_WRITE) ] = { |
| 576 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 577 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 578 | }, |
| 579 | [ C(OP_PREFETCH) ] = { |
| 580 | [ C(RESULT_ACCESS) ] = 0x0, |
| 581 | [ C(RESULT_MISS) ] = 0x0, |
| 582 | }, |
| 583 | }, |
| 584 | }; |
| 585 | |
| 586 | static __initconst const u64 hsw_hw_cache_extra_regs |
| 587 | [PERF_COUNT_HW_CACHE_MAX] |
| 588 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 589 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 590 | { |
| 591 | [ C(LL ) ] = { |
| 592 | [ C(OP_READ) ] = { |
| 593 | [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| |
| 594 | HSW_LLC_ACCESS, |
| 595 | [ C(RESULT_MISS) ] = HSW_DEMAND_READ| |
| 596 | HSW_L3_MISS|HSW_ANY_SNOOP, |
| 597 | }, |
| 598 | [ C(OP_WRITE) ] = { |
| 599 | [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| |
| 600 | HSW_LLC_ACCESS, |
| 601 | [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| |
| 602 | HSW_L3_MISS|HSW_ANY_SNOOP, |
| 603 | }, |
| 604 | [ C(OP_PREFETCH) ] = { |
| 605 | [ C(RESULT_ACCESS) ] = 0x0, |
| 606 | [ C(RESULT_MISS) ] = 0x0, |
| 607 | }, |
| 608 | }, |
| 609 | [ C(NODE) ] = { |
| 610 | [ C(OP_READ) ] = { |
| 611 | [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| |
| 612 | HSW_L3_MISS_LOCAL_DRAM| |
| 613 | HSW_SNOOP_DRAM, |
| 614 | [ C(RESULT_MISS) ] = HSW_DEMAND_READ| |
| 615 | HSW_L3_MISS_REMOTE| |
| 616 | HSW_SNOOP_DRAM, |
| 617 | }, |
| 618 | [ C(OP_WRITE) ] = { |
| 619 | [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| |
| 620 | HSW_L3_MISS_LOCAL_DRAM| |
| 621 | HSW_SNOOP_DRAM, |
| 622 | [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| |
| 623 | HSW_L3_MISS_REMOTE| |
| 624 | HSW_SNOOP_DRAM, |
| 625 | }, |
| 626 | [ C(OP_PREFETCH) ] = { |
| 627 | [ C(RESULT_ACCESS) ] = 0x0, |
| 628 | [ C(RESULT_MISS) ] = 0x0, |
| 629 | }, |
| 630 | }, |
| 631 | }; |
| 632 | |
| 633 | static __initconst const u64 westmere_hw_cache_event_ids |
| 634 | [PERF_COUNT_HW_CACHE_MAX] |
| 635 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 636 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 637 | { |
| 638 | [ C(L1D) ] = { |
| 639 | [ C(OP_READ) ] = { |
| 640 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
| 641 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ |
| 642 | }, |
| 643 | [ C(OP_WRITE) ] = { |
| 644 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
| 645 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ |
| 646 | }, |
| 647 | [ C(OP_PREFETCH) ] = { |
| 648 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
| 649 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ |
| 650 | }, |
| 651 | }, |
| 652 | [ C(L1I ) ] = { |
| 653 | [ C(OP_READ) ] = { |
| 654 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| 655 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| 656 | }, |
| 657 | [ C(OP_WRITE) ] = { |
| 658 | [ C(RESULT_ACCESS) ] = -1, |
| 659 | [ C(RESULT_MISS) ] = -1, |
| 660 | }, |
| 661 | [ C(OP_PREFETCH) ] = { |
| 662 | [ C(RESULT_ACCESS) ] = 0x0, |
| 663 | [ C(RESULT_MISS) ] = 0x0, |
| 664 | }, |
| 665 | }, |
| 666 | [ C(LL ) ] = { |
| 667 | [ C(OP_READ) ] = { |
| 668 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
| 669 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 670 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
| 671 | [ C(RESULT_MISS) ] = 0x01b7, |
| 672 | }, |
| 673 | /* |
| 674 | * Use RFO, not WRITEBACK, because a write miss would typically occur |
| 675 | * on RFO. |
| 676 | */ |
| 677 | [ C(OP_WRITE) ] = { |
| 678 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
| 679 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 680 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
| 681 | [ C(RESULT_MISS) ] = 0x01b7, |
| 682 | }, |
| 683 | [ C(OP_PREFETCH) ] = { |
| 684 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
| 685 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 686 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
| 687 | [ C(RESULT_MISS) ] = 0x01b7, |
| 688 | }, |
| 689 | }, |
| 690 | [ C(DTLB) ] = { |
| 691 | [ C(OP_READ) ] = { |
| 692 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
| 693 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ |
| 694 | }, |
| 695 | [ C(OP_WRITE) ] = { |
| 696 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
| 697 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ |
| 698 | }, |
| 699 | [ C(OP_PREFETCH) ] = { |
| 700 | [ C(RESULT_ACCESS) ] = 0x0, |
| 701 | [ C(RESULT_MISS) ] = 0x0, |
| 702 | }, |
| 703 | }, |
| 704 | [ C(ITLB) ] = { |
| 705 | [ C(OP_READ) ] = { |
| 706 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ |
| 707 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ |
| 708 | }, |
| 709 | [ C(OP_WRITE) ] = { |
| 710 | [ C(RESULT_ACCESS) ] = -1, |
| 711 | [ C(RESULT_MISS) ] = -1, |
| 712 | }, |
| 713 | [ C(OP_PREFETCH) ] = { |
| 714 | [ C(RESULT_ACCESS) ] = -1, |
| 715 | [ C(RESULT_MISS) ] = -1, |
| 716 | }, |
| 717 | }, |
| 718 | [ C(BPU ) ] = { |
| 719 | [ C(OP_READ) ] = { |
| 720 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 721 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ |
| 722 | }, |
| 723 | [ C(OP_WRITE) ] = { |
| 724 | [ C(RESULT_ACCESS) ] = -1, |
| 725 | [ C(RESULT_MISS) ] = -1, |
| 726 | }, |
| 727 | [ C(OP_PREFETCH) ] = { |
| 728 | [ C(RESULT_ACCESS) ] = -1, |
| 729 | [ C(RESULT_MISS) ] = -1, |
| 730 | }, |
| 731 | }, |
| 732 | [ C(NODE) ] = { |
| 733 | [ C(OP_READ) ] = { |
| 734 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 735 | [ C(RESULT_MISS) ] = 0x01b7, |
| 736 | }, |
| 737 | [ C(OP_WRITE) ] = { |
| 738 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 739 | [ C(RESULT_MISS) ] = 0x01b7, |
| 740 | }, |
| 741 | [ C(OP_PREFETCH) ] = { |
| 742 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 743 | [ C(RESULT_MISS) ] = 0x01b7, |
| 744 | }, |
| 745 | }, |
| 746 | }; |
| 747 | |
| 748 | /* |
| 749 | * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; |
| 750 | * See IA32 SDM Vol 3B 30.6.1.3 |
| 751 | */ |
| 752 | |
| 753 | #define NHM_DMND_DATA_RD (1 << 0) |
| 754 | #define NHM_DMND_RFO (1 << 1) |
| 755 | #define NHM_DMND_IFETCH (1 << 2) |
| 756 | #define NHM_DMND_WB (1 << 3) |
| 757 | #define NHM_PF_DATA_RD (1 << 4) |
| 758 | #define NHM_PF_DATA_RFO (1 << 5) |
| 759 | #define NHM_PF_IFETCH (1 << 6) |
| 760 | #define NHM_OFFCORE_OTHER (1 << 7) |
| 761 | #define NHM_UNCORE_HIT (1 << 8) |
| 762 | #define NHM_OTHER_CORE_HIT_SNP (1 << 9) |
| 763 | #define NHM_OTHER_CORE_HITM (1 << 10) |
| 764 | /* reserved */ |
| 765 | #define NHM_REMOTE_CACHE_FWD (1 << 12) |
| 766 | #define NHM_REMOTE_DRAM (1 << 13) |
| 767 | #define NHM_LOCAL_DRAM (1 << 14) |
| 768 | #define NHM_NON_DRAM (1 << 15) |
| 769 | |
| 770 | #define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) |
| 771 | #define NHM_REMOTE (NHM_REMOTE_DRAM) |
| 772 | |
| 773 | #define NHM_DMND_READ (NHM_DMND_DATA_RD) |
| 774 | #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) |
| 775 | #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) |
| 776 | |
| 777 | #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) |
| 778 | #define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) |
| 779 | #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) |
| 780 | |
| 781 | static __initconst const u64 nehalem_hw_cache_extra_regs |
| 782 | [PERF_COUNT_HW_CACHE_MAX] |
| 783 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 784 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 785 | { |
| 786 | [ C(LL ) ] = { |
| 787 | [ C(OP_READ) ] = { |
| 788 | [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, |
| 789 | [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, |
| 790 | }, |
| 791 | [ C(OP_WRITE) ] = { |
| 792 | [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, |
| 793 | [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, |
| 794 | }, |
| 795 | [ C(OP_PREFETCH) ] = { |
| 796 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, |
| 797 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, |
| 798 | }, |
| 799 | }, |
| 800 | [ C(NODE) ] = { |
| 801 | [ C(OP_READ) ] = { |
| 802 | [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, |
| 803 | [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, |
| 804 | }, |
| 805 | [ C(OP_WRITE) ] = { |
| 806 | [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, |
| 807 | [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, |
| 808 | }, |
| 809 | [ C(OP_PREFETCH) ] = { |
| 810 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, |
| 811 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, |
| 812 | }, |
| 813 | }, |
| 814 | }; |
| 815 | |
| 816 | static __initconst const u64 nehalem_hw_cache_event_ids |
| 817 | [PERF_COUNT_HW_CACHE_MAX] |
| 818 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 819 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 820 | { |
| 821 | [ C(L1D) ] = { |
| 822 | [ C(OP_READ) ] = { |
| 823 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
| 824 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ |
| 825 | }, |
| 826 | [ C(OP_WRITE) ] = { |
| 827 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
| 828 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ |
| 829 | }, |
| 830 | [ C(OP_PREFETCH) ] = { |
| 831 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
| 832 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ |
| 833 | }, |
| 834 | }, |
| 835 | [ C(L1I ) ] = { |
| 836 | [ C(OP_READ) ] = { |
| 837 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| 838 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| 839 | }, |
| 840 | [ C(OP_WRITE) ] = { |
| 841 | [ C(RESULT_ACCESS) ] = -1, |
| 842 | [ C(RESULT_MISS) ] = -1, |
| 843 | }, |
| 844 | [ C(OP_PREFETCH) ] = { |
| 845 | [ C(RESULT_ACCESS) ] = 0x0, |
| 846 | [ C(RESULT_MISS) ] = 0x0, |
| 847 | }, |
| 848 | }, |
| 849 | [ C(LL ) ] = { |
| 850 | [ C(OP_READ) ] = { |
| 851 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
| 852 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 853 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
| 854 | [ C(RESULT_MISS) ] = 0x01b7, |
| 855 | }, |
| 856 | /* |
| 857 | * Use RFO, not WRITEBACK, because a write miss would typically occur |
| 858 | * on RFO. |
| 859 | */ |
| 860 | [ C(OP_WRITE) ] = { |
| 861 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
| 862 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 863 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
| 864 | [ C(RESULT_MISS) ] = 0x01b7, |
| 865 | }, |
| 866 | [ C(OP_PREFETCH) ] = { |
| 867 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
| 868 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 869 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
| 870 | [ C(RESULT_MISS) ] = 0x01b7, |
| 871 | }, |
| 872 | }, |
| 873 | [ C(DTLB) ] = { |
| 874 | [ C(OP_READ) ] = { |
| 875 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ |
| 876 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ |
| 877 | }, |
| 878 | [ C(OP_WRITE) ] = { |
| 879 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ |
| 880 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ |
| 881 | }, |
| 882 | [ C(OP_PREFETCH) ] = { |
| 883 | [ C(RESULT_ACCESS) ] = 0x0, |
| 884 | [ C(RESULT_MISS) ] = 0x0, |
| 885 | }, |
| 886 | }, |
| 887 | [ C(ITLB) ] = { |
| 888 | [ C(OP_READ) ] = { |
| 889 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ |
| 890 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ |
| 891 | }, |
| 892 | [ C(OP_WRITE) ] = { |
| 893 | [ C(RESULT_ACCESS) ] = -1, |
| 894 | [ C(RESULT_MISS) ] = -1, |
| 895 | }, |
| 896 | [ C(OP_PREFETCH) ] = { |
| 897 | [ C(RESULT_ACCESS) ] = -1, |
| 898 | [ C(RESULT_MISS) ] = -1, |
| 899 | }, |
| 900 | }, |
| 901 | [ C(BPU ) ] = { |
| 902 | [ C(OP_READ) ] = { |
| 903 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 904 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ |
| 905 | }, |
| 906 | [ C(OP_WRITE) ] = { |
| 907 | [ C(RESULT_ACCESS) ] = -1, |
| 908 | [ C(RESULT_MISS) ] = -1, |
| 909 | }, |
| 910 | [ C(OP_PREFETCH) ] = { |
| 911 | [ C(RESULT_ACCESS) ] = -1, |
| 912 | [ C(RESULT_MISS) ] = -1, |
| 913 | }, |
| 914 | }, |
| 915 | [ C(NODE) ] = { |
| 916 | [ C(OP_READ) ] = { |
| 917 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 918 | [ C(RESULT_MISS) ] = 0x01b7, |
| 919 | }, |
| 920 | [ C(OP_WRITE) ] = { |
| 921 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 922 | [ C(RESULT_MISS) ] = 0x01b7, |
| 923 | }, |
| 924 | [ C(OP_PREFETCH) ] = { |
| 925 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 926 | [ C(RESULT_MISS) ] = 0x01b7, |
| 927 | }, |
| 928 | }, |
| 929 | }; |
| 930 | |
| 931 | static __initconst const u64 core2_hw_cache_event_ids |
| 932 | [PERF_COUNT_HW_CACHE_MAX] |
| 933 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 934 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 935 | { |
| 936 | [ C(L1D) ] = { |
| 937 | [ C(OP_READ) ] = { |
| 938 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ |
| 939 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ |
| 940 | }, |
| 941 | [ C(OP_WRITE) ] = { |
| 942 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ |
| 943 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ |
| 944 | }, |
| 945 | [ C(OP_PREFETCH) ] = { |
| 946 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ |
| 947 | [ C(RESULT_MISS) ] = 0, |
| 948 | }, |
| 949 | }, |
| 950 | [ C(L1I ) ] = { |
| 951 | [ C(OP_READ) ] = { |
| 952 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ |
| 953 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ |
| 954 | }, |
| 955 | [ C(OP_WRITE) ] = { |
| 956 | [ C(RESULT_ACCESS) ] = -1, |
| 957 | [ C(RESULT_MISS) ] = -1, |
| 958 | }, |
| 959 | [ C(OP_PREFETCH) ] = { |
| 960 | [ C(RESULT_ACCESS) ] = 0, |
| 961 | [ C(RESULT_MISS) ] = 0, |
| 962 | }, |
| 963 | }, |
| 964 | [ C(LL ) ] = { |
| 965 | [ C(OP_READ) ] = { |
| 966 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ |
| 967 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ |
| 968 | }, |
| 969 | [ C(OP_WRITE) ] = { |
| 970 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ |
| 971 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ |
| 972 | }, |
| 973 | [ C(OP_PREFETCH) ] = { |
| 974 | [ C(RESULT_ACCESS) ] = 0, |
| 975 | [ C(RESULT_MISS) ] = 0, |
| 976 | }, |
| 977 | }, |
| 978 | [ C(DTLB) ] = { |
| 979 | [ C(OP_READ) ] = { |
| 980 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ |
| 981 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ |
| 982 | }, |
| 983 | [ C(OP_WRITE) ] = { |
| 984 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ |
| 985 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ |
| 986 | }, |
| 987 | [ C(OP_PREFETCH) ] = { |
| 988 | [ C(RESULT_ACCESS) ] = 0, |
| 989 | [ C(RESULT_MISS) ] = 0, |
| 990 | }, |
| 991 | }, |
| 992 | [ C(ITLB) ] = { |
| 993 | [ C(OP_READ) ] = { |
| 994 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ |
| 995 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ |
| 996 | }, |
| 997 | [ C(OP_WRITE) ] = { |
| 998 | [ C(RESULT_ACCESS) ] = -1, |
| 999 | [ C(RESULT_MISS) ] = -1, |
| 1000 | }, |
| 1001 | [ C(OP_PREFETCH) ] = { |
| 1002 | [ C(RESULT_ACCESS) ] = -1, |
| 1003 | [ C(RESULT_MISS) ] = -1, |
| 1004 | }, |
| 1005 | }, |
| 1006 | [ C(BPU ) ] = { |
| 1007 | [ C(OP_READ) ] = { |
| 1008 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ |
| 1009 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ |
| 1010 | }, |
| 1011 | [ C(OP_WRITE) ] = { |
| 1012 | [ C(RESULT_ACCESS) ] = -1, |
| 1013 | [ C(RESULT_MISS) ] = -1, |
| 1014 | }, |
| 1015 | [ C(OP_PREFETCH) ] = { |
| 1016 | [ C(RESULT_ACCESS) ] = -1, |
| 1017 | [ C(RESULT_MISS) ] = -1, |
| 1018 | }, |
| 1019 | }, |
| 1020 | }; |
| 1021 | |
| 1022 | static __initconst const u64 atom_hw_cache_event_ids |
| 1023 | [PERF_COUNT_HW_CACHE_MAX] |
| 1024 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1025 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1026 | { |
| 1027 | [ C(L1D) ] = { |
| 1028 | [ C(OP_READ) ] = { |
| 1029 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ |
| 1030 | [ C(RESULT_MISS) ] = 0, |
| 1031 | }, |
| 1032 | [ C(OP_WRITE) ] = { |
| 1033 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ |
| 1034 | [ C(RESULT_MISS) ] = 0, |
| 1035 | }, |
| 1036 | [ C(OP_PREFETCH) ] = { |
| 1037 | [ C(RESULT_ACCESS) ] = 0x0, |
| 1038 | [ C(RESULT_MISS) ] = 0, |
| 1039 | }, |
| 1040 | }, |
| 1041 | [ C(L1I ) ] = { |
| 1042 | [ C(OP_READ) ] = { |
| 1043 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| 1044 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| 1045 | }, |
| 1046 | [ C(OP_WRITE) ] = { |
| 1047 | [ C(RESULT_ACCESS) ] = -1, |
| 1048 | [ C(RESULT_MISS) ] = -1, |
| 1049 | }, |
| 1050 | [ C(OP_PREFETCH) ] = { |
| 1051 | [ C(RESULT_ACCESS) ] = 0, |
| 1052 | [ C(RESULT_MISS) ] = 0, |
| 1053 | }, |
| 1054 | }, |
| 1055 | [ C(LL ) ] = { |
| 1056 | [ C(OP_READ) ] = { |
| 1057 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ |
| 1058 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ |
| 1059 | }, |
| 1060 | [ C(OP_WRITE) ] = { |
| 1061 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ |
| 1062 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ |
| 1063 | }, |
| 1064 | [ C(OP_PREFETCH) ] = { |
| 1065 | [ C(RESULT_ACCESS) ] = 0, |
| 1066 | [ C(RESULT_MISS) ] = 0, |
| 1067 | }, |
| 1068 | }, |
| 1069 | [ C(DTLB) ] = { |
| 1070 | [ C(OP_READ) ] = { |
| 1071 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ |
| 1072 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ |
| 1073 | }, |
| 1074 | [ C(OP_WRITE) ] = { |
| 1075 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ |
| 1076 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ |
| 1077 | }, |
| 1078 | [ C(OP_PREFETCH) ] = { |
| 1079 | [ C(RESULT_ACCESS) ] = 0, |
| 1080 | [ C(RESULT_MISS) ] = 0, |
| 1081 | }, |
| 1082 | }, |
| 1083 | [ C(ITLB) ] = { |
| 1084 | [ C(OP_READ) ] = { |
| 1085 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ |
| 1086 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ |
| 1087 | }, |
| 1088 | [ C(OP_WRITE) ] = { |
| 1089 | [ C(RESULT_ACCESS) ] = -1, |
| 1090 | [ C(RESULT_MISS) ] = -1, |
| 1091 | }, |
| 1092 | [ C(OP_PREFETCH) ] = { |
| 1093 | [ C(RESULT_ACCESS) ] = -1, |
| 1094 | [ C(RESULT_MISS) ] = -1, |
| 1095 | }, |
| 1096 | }, |
| 1097 | [ C(BPU ) ] = { |
| 1098 | [ C(OP_READ) ] = { |
| 1099 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ |
| 1100 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ |
| 1101 | }, |
| 1102 | [ C(OP_WRITE) ] = { |
| 1103 | [ C(RESULT_ACCESS) ] = -1, |
| 1104 | [ C(RESULT_MISS) ] = -1, |
| 1105 | }, |
| 1106 | [ C(OP_PREFETCH) ] = { |
| 1107 | [ C(RESULT_ACCESS) ] = -1, |
| 1108 | [ C(RESULT_MISS) ] = -1, |
| 1109 | }, |
| 1110 | }, |
| 1111 | }; |
| 1112 | |
| 1113 | static struct extra_reg intel_slm_extra_regs[] __read_mostly = |
| 1114 | { |
| 1115 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 1116 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), |
| 1117 | INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1), |
| 1118 | EVENT_EXTRA_END |
| 1119 | }; |
| 1120 | |
| 1121 | #define SLM_DMND_READ SNB_DMND_DATA_RD |
| 1122 | #define SLM_DMND_WRITE SNB_DMND_RFO |
| 1123 | #define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) |
| 1124 | |
| 1125 | #define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM) |
| 1126 | #define SLM_LLC_ACCESS SNB_RESP_ANY |
| 1127 | #define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM) |
| 1128 | |
| 1129 | static __initconst const u64 slm_hw_cache_extra_regs |
| 1130 | [PERF_COUNT_HW_CACHE_MAX] |
| 1131 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1132 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1133 | { |
| 1134 | [ C(LL ) ] = { |
| 1135 | [ C(OP_READ) ] = { |
| 1136 | [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, |
| 1137 | [ C(RESULT_MISS) ] = 0, |
| 1138 | }, |
| 1139 | [ C(OP_WRITE) ] = { |
| 1140 | [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, |
| 1141 | [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS, |
| 1142 | }, |
| 1143 | [ C(OP_PREFETCH) ] = { |
| 1144 | [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS, |
| 1145 | [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS, |
| 1146 | }, |
| 1147 | }, |
| 1148 | }; |
| 1149 | |
| 1150 | static __initconst const u64 slm_hw_cache_event_ids |
| 1151 | [PERF_COUNT_HW_CACHE_MAX] |
| 1152 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1153 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1154 | { |
| 1155 | [ C(L1D) ] = { |
| 1156 | [ C(OP_READ) ] = { |
| 1157 | [ C(RESULT_ACCESS) ] = 0, |
| 1158 | [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */ |
| 1159 | }, |
| 1160 | [ C(OP_WRITE) ] = { |
| 1161 | [ C(RESULT_ACCESS) ] = 0, |
| 1162 | [ C(RESULT_MISS) ] = 0, |
| 1163 | }, |
| 1164 | [ C(OP_PREFETCH) ] = { |
| 1165 | [ C(RESULT_ACCESS) ] = 0, |
| 1166 | [ C(RESULT_MISS) ] = 0, |
| 1167 | }, |
| 1168 | }, |
| 1169 | [ C(L1I ) ] = { |
| 1170 | [ C(OP_READ) ] = { |
| 1171 | [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */ |
| 1172 | [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */ |
| 1173 | }, |
| 1174 | [ C(OP_WRITE) ] = { |
| 1175 | [ C(RESULT_ACCESS) ] = -1, |
| 1176 | [ C(RESULT_MISS) ] = -1, |
| 1177 | }, |
| 1178 | [ C(OP_PREFETCH) ] = { |
| 1179 | [ C(RESULT_ACCESS) ] = 0, |
| 1180 | [ C(RESULT_MISS) ] = 0, |
| 1181 | }, |
| 1182 | }, |
| 1183 | [ C(LL ) ] = { |
| 1184 | [ C(OP_READ) ] = { |
| 1185 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
| 1186 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1187 | [ C(RESULT_MISS) ] = 0, |
| 1188 | }, |
| 1189 | [ C(OP_WRITE) ] = { |
| 1190 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
| 1191 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1192 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
| 1193 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1194 | }, |
| 1195 | [ C(OP_PREFETCH) ] = { |
| 1196 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
| 1197 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1198 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
| 1199 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1200 | }, |
| 1201 | }, |
| 1202 | [ C(DTLB) ] = { |
| 1203 | [ C(OP_READ) ] = { |
| 1204 | [ C(RESULT_ACCESS) ] = 0, |
| 1205 | [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */ |
| 1206 | }, |
| 1207 | [ C(OP_WRITE) ] = { |
| 1208 | [ C(RESULT_ACCESS) ] = 0, |
| 1209 | [ C(RESULT_MISS) ] = 0, |
| 1210 | }, |
| 1211 | [ C(OP_PREFETCH) ] = { |
| 1212 | [ C(RESULT_ACCESS) ] = 0, |
| 1213 | [ C(RESULT_MISS) ] = 0, |
| 1214 | }, |
| 1215 | }, |
| 1216 | [ C(ITLB) ] = { |
| 1217 | [ C(OP_READ) ] = { |
| 1218 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ |
| 1219 | [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ |
| 1220 | }, |
| 1221 | [ C(OP_WRITE) ] = { |
| 1222 | [ C(RESULT_ACCESS) ] = -1, |
| 1223 | [ C(RESULT_MISS) ] = -1, |
| 1224 | }, |
| 1225 | [ C(OP_PREFETCH) ] = { |
| 1226 | [ C(RESULT_ACCESS) ] = -1, |
| 1227 | [ C(RESULT_MISS) ] = -1, |
| 1228 | }, |
| 1229 | }, |
| 1230 | [ C(BPU ) ] = { |
| 1231 | [ C(OP_READ) ] = { |
| 1232 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ |
| 1233 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ |
| 1234 | }, |
| 1235 | [ C(OP_WRITE) ] = { |
| 1236 | [ C(RESULT_ACCESS) ] = -1, |
| 1237 | [ C(RESULT_MISS) ] = -1, |
| 1238 | }, |
| 1239 | [ C(OP_PREFETCH) ] = { |
| 1240 | [ C(RESULT_ACCESS) ] = -1, |
| 1241 | [ C(RESULT_MISS) ] = -1, |
| 1242 | }, |
| 1243 | }, |
| 1244 | }; |
| 1245 | |
| 1246 | /* |
| 1247 | * Use from PMIs where the LBRs are already disabled. |
| 1248 | */ |
| 1249 | static void __intel_pmu_disable_all(void) |
| 1250 | { |
| 1251 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1252 | |
| 1253 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
| 1254 | |
| 1255 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
| 1256 | intel_pmu_disable_bts(); |
| 1257 | else |
| 1258 | intel_bts_disable_local(); |
| 1259 | |
| 1260 | intel_pmu_pebs_disable_all(); |
| 1261 | } |
| 1262 | |
| 1263 | static void intel_pmu_disable_all(void) |
| 1264 | { |
| 1265 | __intel_pmu_disable_all(); |
| 1266 | intel_pmu_lbr_disable_all(); |
| 1267 | } |
| 1268 | |
| 1269 | static void __intel_pmu_enable_all(int added, bool pmi) |
| 1270 | { |
| 1271 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1272 | |
| 1273 | intel_pmu_pebs_enable_all(); |
| 1274 | intel_pmu_lbr_enable_all(pmi); |
| 1275 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, |
| 1276 | x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); |
| 1277 | |
| 1278 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
| 1279 | struct perf_event *event = |
| 1280 | cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; |
| 1281 | |
| 1282 | if (WARN_ON_ONCE(!event)) |
| 1283 | return; |
| 1284 | |
| 1285 | intel_pmu_enable_bts(event->hw.config); |
| 1286 | } else |
| 1287 | intel_bts_enable_local(); |
| 1288 | } |
| 1289 | |
| 1290 | static void intel_pmu_enable_all(int added) |
| 1291 | { |
| 1292 | __intel_pmu_enable_all(added, false); |
| 1293 | } |
| 1294 | |
| 1295 | /* |
| 1296 | * Workaround for: |
| 1297 | * Intel Errata AAK100 (model 26) |
| 1298 | * Intel Errata AAP53 (model 30) |
| 1299 | * Intel Errata BD53 (model 44) |
| 1300 | * |
| 1301 | * The official story: |
| 1302 | * These chips need to be 'reset' when adding counters by programming the |
| 1303 | * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either |
| 1304 | * in sequence on the same PMC or on different PMCs. |
| 1305 | * |
| 1306 | * In practise it appears some of these events do in fact count, and |
| 1307 | * we need to programm all 4 events. |
| 1308 | */ |
| 1309 | static void intel_pmu_nhm_workaround(void) |
| 1310 | { |
| 1311 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1312 | static const unsigned long nhm_magic[4] = { |
| 1313 | 0x4300B5, |
| 1314 | 0x4300D2, |
| 1315 | 0x4300B1, |
| 1316 | 0x4300B1 |
| 1317 | }; |
| 1318 | struct perf_event *event; |
| 1319 | int i; |
| 1320 | |
| 1321 | /* |
| 1322 | * The Errata requires below steps: |
| 1323 | * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; |
| 1324 | * 2) Configure 4 PERFEVTSELx with the magic events and clear |
| 1325 | * the corresponding PMCx; |
| 1326 | * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; |
| 1327 | * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; |
| 1328 | * 5) Clear 4 pairs of ERFEVTSELx and PMCx; |
| 1329 | */ |
| 1330 | |
| 1331 | /* |
| 1332 | * The real steps we choose are a little different from above. |
| 1333 | * A) To reduce MSR operations, we don't run step 1) as they |
| 1334 | * are already cleared before this function is called; |
| 1335 | * B) Call x86_perf_event_update to save PMCx before configuring |
| 1336 | * PERFEVTSELx with magic number; |
| 1337 | * C) With step 5), we do clear only when the PERFEVTSELx is |
| 1338 | * not used currently. |
| 1339 | * D) Call x86_perf_event_set_period to restore PMCx; |
| 1340 | */ |
| 1341 | |
| 1342 | /* We always operate 4 pairs of PERF Counters */ |
| 1343 | for (i = 0; i < 4; i++) { |
| 1344 | event = cpuc->events[i]; |
| 1345 | if (event) |
| 1346 | x86_perf_event_update(event); |
| 1347 | } |
| 1348 | |
| 1349 | for (i = 0; i < 4; i++) { |
| 1350 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); |
| 1351 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); |
| 1352 | } |
| 1353 | |
| 1354 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); |
| 1355 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); |
| 1356 | |
| 1357 | for (i = 0; i < 4; i++) { |
| 1358 | event = cpuc->events[i]; |
| 1359 | |
| 1360 | if (event) { |
| 1361 | x86_perf_event_set_period(event); |
| 1362 | __x86_pmu_enable_event(&event->hw, |
| 1363 | ARCH_PERFMON_EVENTSEL_ENABLE); |
| 1364 | } else |
| 1365 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); |
| 1366 | } |
| 1367 | } |
| 1368 | |
| 1369 | static void intel_pmu_nhm_enable_all(int added) |
| 1370 | { |
| 1371 | if (added) |
| 1372 | intel_pmu_nhm_workaround(); |
| 1373 | intel_pmu_enable_all(added); |
| 1374 | } |
| 1375 | |
| 1376 | static inline u64 intel_pmu_get_status(void) |
| 1377 | { |
| 1378 | u64 status; |
| 1379 | |
| 1380 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
| 1381 | |
| 1382 | return status; |
| 1383 | } |
| 1384 | |
| 1385 | static inline void intel_pmu_ack_status(u64 ack) |
| 1386 | { |
| 1387 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
| 1388 | } |
| 1389 | |
| 1390 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
| 1391 | { |
| 1392 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
| 1393 | u64 ctrl_val, mask; |
| 1394 | |
| 1395 | mask = 0xfULL << (idx * 4); |
| 1396 | |
| 1397 | rdmsrl(hwc->config_base, ctrl_val); |
| 1398 | ctrl_val &= ~mask; |
| 1399 | wrmsrl(hwc->config_base, ctrl_val); |
| 1400 | } |
| 1401 | |
| 1402 | static inline bool event_is_checkpointed(struct perf_event *event) |
| 1403 | { |
| 1404 | return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; |
| 1405 | } |
| 1406 | |
| 1407 | static void intel_pmu_disable_event(struct perf_event *event) |
| 1408 | { |
| 1409 | struct hw_perf_event *hwc = &event->hw; |
| 1410 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1411 | |
| 1412 | if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { |
| 1413 | intel_pmu_disable_bts(); |
| 1414 | intel_pmu_drain_bts_buffer(); |
| 1415 | return; |
| 1416 | } |
| 1417 | |
| 1418 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); |
| 1419 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
| 1420 | cpuc->intel_cp_status &= ~(1ull << hwc->idx); |
| 1421 | |
| 1422 | /* |
| 1423 | * must disable before any actual event |
| 1424 | * because any event may be combined with LBR |
| 1425 | */ |
| 1426 | if (needs_branch_stack(event)) |
| 1427 | intel_pmu_lbr_disable(event); |
| 1428 | |
| 1429 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 1430 | intel_pmu_disable_fixed(hwc); |
| 1431 | return; |
| 1432 | } |
| 1433 | |
| 1434 | x86_pmu_disable_event(event); |
| 1435 | |
| 1436 | if (unlikely(event->attr.precise_ip)) |
| 1437 | intel_pmu_pebs_disable(event); |
| 1438 | } |
| 1439 | |
| 1440 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
| 1441 | { |
| 1442 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
| 1443 | u64 ctrl_val, bits, mask; |
| 1444 | |
| 1445 | /* |
| 1446 | * Enable IRQ generation (0x8), |
| 1447 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) |
| 1448 | * if requested: |
| 1449 | */ |
| 1450 | bits = 0x8ULL; |
| 1451 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) |
| 1452 | bits |= 0x2; |
| 1453 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) |
| 1454 | bits |= 0x1; |
| 1455 | |
| 1456 | /* |
| 1457 | * ANY bit is supported in v3 and up |
| 1458 | */ |
| 1459 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) |
| 1460 | bits |= 0x4; |
| 1461 | |
| 1462 | bits <<= (idx * 4); |
| 1463 | mask = 0xfULL << (idx * 4); |
| 1464 | |
| 1465 | rdmsrl(hwc->config_base, ctrl_val); |
| 1466 | ctrl_val &= ~mask; |
| 1467 | ctrl_val |= bits; |
| 1468 | wrmsrl(hwc->config_base, ctrl_val); |
| 1469 | } |
| 1470 | |
| 1471 | static void intel_pmu_enable_event(struct perf_event *event) |
| 1472 | { |
| 1473 | struct hw_perf_event *hwc = &event->hw; |
| 1474 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1475 | |
| 1476 | if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { |
| 1477 | if (!__this_cpu_read(cpu_hw_events.enabled)) |
| 1478 | return; |
| 1479 | |
| 1480 | intel_pmu_enable_bts(hwc->config); |
| 1481 | return; |
| 1482 | } |
| 1483 | /* |
| 1484 | * must enabled before any actual event |
| 1485 | * because any event may be combined with LBR |
| 1486 | */ |
| 1487 | if (needs_branch_stack(event)) |
| 1488 | intel_pmu_lbr_enable(event); |
| 1489 | |
| 1490 | if (event->attr.exclude_host) |
| 1491 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); |
| 1492 | if (event->attr.exclude_guest) |
| 1493 | cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); |
| 1494 | |
| 1495 | if (unlikely(event_is_checkpointed(event))) |
| 1496 | cpuc->intel_cp_status |= (1ull << hwc->idx); |
| 1497 | |
| 1498 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 1499 | intel_pmu_enable_fixed(hwc); |
| 1500 | return; |
| 1501 | } |
| 1502 | |
| 1503 | if (unlikely(event->attr.precise_ip)) |
| 1504 | intel_pmu_pebs_enable(event); |
| 1505 | |
| 1506 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); |
| 1507 | } |
| 1508 | |
| 1509 | /* |
| 1510 | * Save and restart an expired event. Called by NMI contexts, |
| 1511 | * so it has to be careful about preempting normal event ops: |
| 1512 | */ |
| 1513 | int intel_pmu_save_and_restart(struct perf_event *event) |
| 1514 | { |
| 1515 | x86_perf_event_update(event); |
| 1516 | /* |
| 1517 | * For a checkpointed counter always reset back to 0. This |
| 1518 | * avoids a situation where the counter overflows, aborts the |
| 1519 | * transaction and is then set back to shortly before the |
| 1520 | * overflow, and overflows and aborts again. |
| 1521 | */ |
| 1522 | if (unlikely(event_is_checkpointed(event))) { |
| 1523 | /* No race with NMIs because the counter should not be armed */ |
| 1524 | wrmsrl(event->hw.event_base, 0); |
| 1525 | local64_set(&event->hw.prev_count, 0); |
| 1526 | } |
| 1527 | return x86_perf_event_set_period(event); |
| 1528 | } |
| 1529 | |
| 1530 | static void intel_pmu_reset(void) |
| 1531 | { |
| 1532 | struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); |
| 1533 | unsigned long flags; |
| 1534 | int idx; |
| 1535 | |
| 1536 | if (!x86_pmu.num_counters) |
| 1537 | return; |
| 1538 | |
| 1539 | local_irq_save(flags); |
| 1540 | |
| 1541 | pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); |
| 1542 | |
| 1543 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 1544 | wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); |
| 1545 | wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); |
| 1546 | } |
| 1547 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
| 1548 | wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
| 1549 | |
| 1550 | if (ds) |
| 1551 | ds->bts_index = ds->bts_buffer_base; |
| 1552 | |
| 1553 | /* Ack all overflows and disable fixed counters */ |
| 1554 | if (x86_pmu.version >= 2) { |
| 1555 | intel_pmu_ack_status(intel_pmu_get_status()); |
| 1556 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
| 1557 | } |
| 1558 | |
| 1559 | /* Reset LBRs and LBR freezing */ |
| 1560 | if (x86_pmu.lbr_nr) { |
| 1561 | update_debugctlmsr(get_debugctlmsr() & |
| 1562 | ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); |
| 1563 | } |
| 1564 | |
| 1565 | local_irq_restore(flags); |
| 1566 | } |
| 1567 | |
| 1568 | /* |
| 1569 | * This handler is triggered by the local APIC, so the APIC IRQ handling |
| 1570 | * rules apply: |
| 1571 | */ |
| 1572 | static int intel_pmu_handle_irq(struct pt_regs *regs) |
| 1573 | { |
| 1574 | struct perf_sample_data data; |
| 1575 | struct cpu_hw_events *cpuc; |
| 1576 | int bit, loops; |
| 1577 | u64 status; |
| 1578 | int handled; |
| 1579 | |
| 1580 | cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1581 | |
| 1582 | /* |
| 1583 | * No known reason to not always do late ACK, |
| 1584 | * but just in case do it opt-in. |
| 1585 | */ |
| 1586 | if (!x86_pmu.late_ack) |
| 1587 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1588 | __intel_pmu_disable_all(); |
| 1589 | handled = intel_pmu_drain_bts_buffer(); |
| 1590 | handled += intel_bts_interrupt(); |
| 1591 | status = intel_pmu_get_status(); |
| 1592 | if (!status) |
| 1593 | goto done; |
| 1594 | |
| 1595 | loops = 0; |
| 1596 | again: |
| 1597 | intel_pmu_ack_status(status); |
| 1598 | if (++loops > 100) { |
| 1599 | static bool warned = false; |
| 1600 | if (!warned) { |
| 1601 | WARN(1, "perfevents: irq loop stuck!\n"); |
| 1602 | perf_event_print_debug(); |
| 1603 | warned = true; |
| 1604 | } |
| 1605 | intel_pmu_reset(); |
| 1606 | goto done; |
| 1607 | } |
| 1608 | |
| 1609 | inc_irq_stat(apic_perf_irqs); |
| 1610 | |
| 1611 | intel_pmu_lbr_read(); |
| 1612 | |
| 1613 | /* |
| 1614 | * CondChgd bit 63 doesn't mean any overflow status. Ignore |
| 1615 | * and clear the bit. |
| 1616 | */ |
| 1617 | if (__test_and_clear_bit(63, (unsigned long *)&status)) { |
| 1618 | if (!status) |
| 1619 | goto done; |
| 1620 | } |
| 1621 | |
| 1622 | /* |
| 1623 | * PEBS overflow sets bit 62 in the global status register |
| 1624 | */ |
| 1625 | if (__test_and_clear_bit(62, (unsigned long *)&status)) { |
| 1626 | handled++; |
| 1627 | x86_pmu.drain_pebs(regs); |
| 1628 | } |
| 1629 | |
| 1630 | /* |
| 1631 | * Intel PT |
| 1632 | */ |
| 1633 | if (__test_and_clear_bit(55, (unsigned long *)&status)) { |
| 1634 | handled++; |
| 1635 | intel_pt_interrupt(); |
| 1636 | } |
| 1637 | |
| 1638 | /* |
| 1639 | * Checkpointed counters can lead to 'spurious' PMIs because the |
| 1640 | * rollback caused by the PMI will have cleared the overflow status |
| 1641 | * bit. Therefore always force probe these counters. |
| 1642 | */ |
| 1643 | status |= cpuc->intel_cp_status; |
| 1644 | |
| 1645 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
| 1646 | struct perf_event *event = cpuc->events[bit]; |
| 1647 | |
| 1648 | handled++; |
| 1649 | |
| 1650 | if (!test_bit(bit, cpuc->active_mask)) |
| 1651 | continue; |
| 1652 | |
| 1653 | if (!intel_pmu_save_and_restart(event)) |
| 1654 | continue; |
| 1655 | |
| 1656 | perf_sample_data_init(&data, 0, event->hw.last_period); |
| 1657 | |
| 1658 | if (has_branch_stack(event)) |
| 1659 | data.br_stack = &cpuc->lbr_stack; |
| 1660 | |
| 1661 | if (perf_event_overflow(event, &data, regs)) |
| 1662 | x86_pmu_stop(event, 0); |
| 1663 | } |
| 1664 | |
| 1665 | /* |
| 1666 | * Repeat if there is more work to be done: |
| 1667 | */ |
| 1668 | status = intel_pmu_get_status(); |
| 1669 | if (status) |
| 1670 | goto again; |
| 1671 | |
| 1672 | done: |
| 1673 | __intel_pmu_enable_all(0, true); |
| 1674 | /* |
| 1675 | * Only unmask the NMI after the overflow counters |
| 1676 | * have been reset. This avoids spurious NMIs on |
| 1677 | * Haswell CPUs. |
| 1678 | */ |
| 1679 | if (x86_pmu.late_ack) |
| 1680 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1681 | return handled; |
| 1682 | } |
| 1683 | |
| 1684 | static struct event_constraint * |
| 1685 | intel_bts_constraints(struct perf_event *event) |
| 1686 | { |
| 1687 | struct hw_perf_event *hwc = &event->hw; |
| 1688 | unsigned int hw_event, bts_event; |
| 1689 | |
| 1690 | if (event->attr.freq) |
| 1691 | return NULL; |
| 1692 | |
| 1693 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
| 1694 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
| 1695 | |
| 1696 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) |
| 1697 | return &bts_constraint; |
| 1698 | |
| 1699 | return NULL; |
| 1700 | } |
| 1701 | |
| 1702 | static int intel_alt_er(int idx) |
| 1703 | { |
| 1704 | if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) |
| 1705 | return idx; |
| 1706 | |
| 1707 | if (idx == EXTRA_REG_RSP_0) |
| 1708 | return EXTRA_REG_RSP_1; |
| 1709 | |
| 1710 | if (idx == EXTRA_REG_RSP_1) |
| 1711 | return EXTRA_REG_RSP_0; |
| 1712 | |
| 1713 | return idx; |
| 1714 | } |
| 1715 | |
| 1716 | static void intel_fixup_er(struct perf_event *event, int idx) |
| 1717 | { |
| 1718 | event->hw.extra_reg.idx = idx; |
| 1719 | |
| 1720 | if (idx == EXTRA_REG_RSP_0) { |
| 1721 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
| 1722 | event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event; |
| 1723 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; |
| 1724 | } else if (idx == EXTRA_REG_RSP_1) { |
| 1725 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
| 1726 | event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event; |
| 1727 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; |
| 1728 | } |
| 1729 | } |
| 1730 | |
| 1731 | /* |
| 1732 | * manage allocation of shared extra msr for certain events |
| 1733 | * |
| 1734 | * sharing can be: |
| 1735 | * per-cpu: to be shared between the various events on a single PMU |
| 1736 | * per-core: per-cpu + shared by HT threads |
| 1737 | */ |
| 1738 | static struct event_constraint * |
| 1739 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
| 1740 | struct perf_event *event, |
| 1741 | struct hw_perf_event_extra *reg) |
| 1742 | { |
| 1743 | struct event_constraint *c = &emptyconstraint; |
| 1744 | struct er_account *era; |
| 1745 | unsigned long flags; |
| 1746 | int idx = reg->idx; |
| 1747 | |
| 1748 | /* |
| 1749 | * reg->alloc can be set due to existing state, so for fake cpuc we |
| 1750 | * need to ignore this, otherwise we might fail to allocate proper fake |
| 1751 | * state for this extra reg constraint. Also see the comment below. |
| 1752 | */ |
| 1753 | if (reg->alloc && !cpuc->is_fake) |
| 1754 | return NULL; /* call x86_get_event_constraint() */ |
| 1755 | |
| 1756 | again: |
| 1757 | era = &cpuc->shared_regs->regs[idx]; |
| 1758 | /* |
| 1759 | * we use spin_lock_irqsave() to avoid lockdep issues when |
| 1760 | * passing a fake cpuc |
| 1761 | */ |
| 1762 | raw_spin_lock_irqsave(&era->lock, flags); |
| 1763 | |
| 1764 | if (!atomic_read(&era->ref) || era->config == reg->config) { |
| 1765 | |
| 1766 | /* |
| 1767 | * If its a fake cpuc -- as per validate_{group,event}() we |
| 1768 | * shouldn't touch event state and we can avoid doing so |
| 1769 | * since both will only call get_event_constraints() once |
| 1770 | * on each event, this avoids the need for reg->alloc. |
| 1771 | * |
| 1772 | * Not doing the ER fixup will only result in era->reg being |
| 1773 | * wrong, but since we won't actually try and program hardware |
| 1774 | * this isn't a problem either. |
| 1775 | */ |
| 1776 | if (!cpuc->is_fake) { |
| 1777 | if (idx != reg->idx) |
| 1778 | intel_fixup_er(event, idx); |
| 1779 | |
| 1780 | /* |
| 1781 | * x86_schedule_events() can call get_event_constraints() |
| 1782 | * multiple times on events in the case of incremental |
| 1783 | * scheduling(). reg->alloc ensures we only do the ER |
| 1784 | * allocation once. |
| 1785 | */ |
| 1786 | reg->alloc = 1; |
| 1787 | } |
| 1788 | |
| 1789 | /* lock in msr value */ |
| 1790 | era->config = reg->config; |
| 1791 | era->reg = reg->reg; |
| 1792 | |
| 1793 | /* one more user */ |
| 1794 | atomic_inc(&era->ref); |
| 1795 | |
| 1796 | /* |
| 1797 | * need to call x86_get_event_constraint() |
| 1798 | * to check if associated event has constraints |
| 1799 | */ |
| 1800 | c = NULL; |
| 1801 | } else { |
| 1802 | idx = intel_alt_er(idx); |
| 1803 | if (idx != reg->idx) { |
| 1804 | raw_spin_unlock_irqrestore(&era->lock, flags); |
| 1805 | goto again; |
| 1806 | } |
| 1807 | } |
| 1808 | raw_spin_unlock_irqrestore(&era->lock, flags); |
| 1809 | |
| 1810 | return c; |
| 1811 | } |
| 1812 | |
| 1813 | static void |
| 1814 | __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, |
| 1815 | struct hw_perf_event_extra *reg) |
| 1816 | { |
| 1817 | struct er_account *era; |
| 1818 | |
| 1819 | /* |
| 1820 | * Only put constraint if extra reg was actually allocated. Also takes |
| 1821 | * care of event which do not use an extra shared reg. |
| 1822 | * |
| 1823 | * Also, if this is a fake cpuc we shouldn't touch any event state |
| 1824 | * (reg->alloc) and we don't care about leaving inconsistent cpuc state |
| 1825 | * either since it'll be thrown out. |
| 1826 | */ |
| 1827 | if (!reg->alloc || cpuc->is_fake) |
| 1828 | return; |
| 1829 | |
| 1830 | era = &cpuc->shared_regs->regs[reg->idx]; |
| 1831 | |
| 1832 | /* one fewer user */ |
| 1833 | atomic_dec(&era->ref); |
| 1834 | |
| 1835 | /* allocate again next time */ |
| 1836 | reg->alloc = 0; |
| 1837 | } |
| 1838 | |
| 1839 | static struct event_constraint * |
| 1840 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, |
| 1841 | struct perf_event *event) |
| 1842 | { |
| 1843 | struct event_constraint *c = NULL, *d; |
| 1844 | struct hw_perf_event_extra *xreg, *breg; |
| 1845 | |
| 1846 | xreg = &event->hw.extra_reg; |
| 1847 | if (xreg->idx != EXTRA_REG_NONE) { |
| 1848 | c = __intel_shared_reg_get_constraints(cpuc, event, xreg); |
| 1849 | if (c == &emptyconstraint) |
| 1850 | return c; |
| 1851 | } |
| 1852 | breg = &event->hw.branch_reg; |
| 1853 | if (breg->idx != EXTRA_REG_NONE) { |
| 1854 | d = __intel_shared_reg_get_constraints(cpuc, event, breg); |
| 1855 | if (d == &emptyconstraint) { |
| 1856 | __intel_shared_reg_put_constraints(cpuc, xreg); |
| 1857 | c = d; |
| 1858 | } |
| 1859 | } |
| 1860 | return c; |
| 1861 | } |
| 1862 | |
| 1863 | struct event_constraint * |
| 1864 | x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 1865 | struct perf_event *event) |
| 1866 | { |
| 1867 | struct event_constraint *c; |
| 1868 | |
| 1869 | if (x86_pmu.event_constraints) { |
| 1870 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
| 1871 | if ((event->hw.config & c->cmask) == c->code) { |
| 1872 | event->hw.flags |= c->flags; |
| 1873 | return c; |
| 1874 | } |
| 1875 | } |
| 1876 | } |
| 1877 | |
| 1878 | return &unconstrained; |
| 1879 | } |
| 1880 | |
| 1881 | static struct event_constraint * |
| 1882 | __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 1883 | struct perf_event *event) |
| 1884 | { |
| 1885 | struct event_constraint *c; |
| 1886 | |
| 1887 | c = intel_bts_constraints(event); |
| 1888 | if (c) |
| 1889 | return c; |
| 1890 | |
| 1891 | c = intel_shared_regs_constraints(cpuc, event); |
| 1892 | if (c) |
| 1893 | return c; |
| 1894 | |
| 1895 | c = intel_pebs_constraints(event); |
| 1896 | if (c) |
| 1897 | return c; |
| 1898 | |
| 1899 | return x86_get_event_constraints(cpuc, idx, event); |
| 1900 | } |
| 1901 | |
| 1902 | static void |
| 1903 | intel_start_scheduling(struct cpu_hw_events *cpuc) |
| 1904 | { |
| 1905 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 1906 | struct intel_excl_states *xl, *xlo; |
| 1907 | int tid = cpuc->excl_thread_id; |
| 1908 | int o_tid = 1 - tid; /* sibling thread */ |
| 1909 | |
| 1910 | /* |
| 1911 | * nothing needed if in group validation mode |
| 1912 | */ |
| 1913 | if (cpuc->is_fake || !is_ht_workaround_enabled()) |
| 1914 | return; |
| 1915 | |
| 1916 | /* |
| 1917 | * no exclusion needed |
| 1918 | */ |
| 1919 | if (!excl_cntrs) |
| 1920 | return; |
| 1921 | |
| 1922 | xlo = &excl_cntrs->states[o_tid]; |
| 1923 | xl = &excl_cntrs->states[tid]; |
| 1924 | |
| 1925 | xl->sched_started = true; |
| 1926 | /* |
| 1927 | * lock shared state until we are done scheduling |
| 1928 | * in stop_event_scheduling() |
| 1929 | * makes scheduling appear as a transaction |
| 1930 | */ |
| 1931 | WARN_ON_ONCE(!irqs_disabled()); |
| 1932 | raw_spin_lock(&excl_cntrs->lock); |
| 1933 | |
| 1934 | /* |
| 1935 | * save initial state of sibling thread |
| 1936 | */ |
| 1937 | memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state)); |
| 1938 | } |
| 1939 | |
| 1940 | static void |
| 1941 | intel_stop_scheduling(struct cpu_hw_events *cpuc) |
| 1942 | { |
| 1943 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 1944 | struct intel_excl_states *xl, *xlo; |
| 1945 | int tid = cpuc->excl_thread_id; |
| 1946 | int o_tid = 1 - tid; /* sibling thread */ |
| 1947 | |
| 1948 | /* |
| 1949 | * nothing needed if in group validation mode |
| 1950 | */ |
| 1951 | if (cpuc->is_fake || !is_ht_workaround_enabled()) |
| 1952 | return; |
| 1953 | /* |
| 1954 | * no exclusion needed |
| 1955 | */ |
| 1956 | if (!excl_cntrs) |
| 1957 | return; |
| 1958 | |
| 1959 | xlo = &excl_cntrs->states[o_tid]; |
| 1960 | xl = &excl_cntrs->states[tid]; |
| 1961 | |
| 1962 | /* |
| 1963 | * make new sibling thread state visible |
| 1964 | */ |
| 1965 | memcpy(xlo->state, xlo->init_state, sizeof(xlo->state)); |
| 1966 | |
| 1967 | xl->sched_started = false; |
| 1968 | /* |
| 1969 | * release shared state lock (acquired in intel_start_scheduling()) |
| 1970 | */ |
| 1971 | raw_spin_unlock(&excl_cntrs->lock); |
| 1972 | } |
| 1973 | |
| 1974 | static struct event_constraint * |
| 1975 | intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, |
| 1976 | int idx, struct event_constraint *c) |
| 1977 | { |
| 1978 | struct event_constraint *cx; |
| 1979 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 1980 | struct intel_excl_states *xl, *xlo; |
| 1981 | int is_excl, i; |
| 1982 | int tid = cpuc->excl_thread_id; |
| 1983 | int o_tid = 1 - tid; /* alternate */ |
| 1984 | |
| 1985 | /* |
| 1986 | * validating a group does not require |
| 1987 | * enforcing cross-thread exclusion |
| 1988 | */ |
| 1989 | if (cpuc->is_fake || !is_ht_workaround_enabled()) |
| 1990 | return c; |
| 1991 | |
| 1992 | /* |
| 1993 | * no exclusion needed |
| 1994 | */ |
| 1995 | if (!excl_cntrs) |
| 1996 | return c; |
| 1997 | /* |
| 1998 | * event requires exclusive counter access |
| 1999 | * across HT threads |
| 2000 | */ |
| 2001 | is_excl = c->flags & PERF_X86_EVENT_EXCL; |
| 2002 | if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { |
| 2003 | event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; |
| 2004 | if (!cpuc->n_excl++) |
| 2005 | WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); |
| 2006 | } |
| 2007 | |
| 2008 | /* |
| 2009 | * xl = state of current HT |
| 2010 | * xlo = state of sibling HT |
| 2011 | */ |
| 2012 | xl = &excl_cntrs->states[tid]; |
| 2013 | xlo = &excl_cntrs->states[o_tid]; |
| 2014 | |
| 2015 | cx = c; |
| 2016 | |
| 2017 | /* |
| 2018 | * because we modify the constraint, we need |
| 2019 | * to make a copy. Static constraints come |
| 2020 | * from static const tables. |
| 2021 | * |
| 2022 | * only needed when constraint has not yet |
| 2023 | * been cloned (marked dynamic) |
| 2024 | */ |
| 2025 | if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { |
| 2026 | |
| 2027 | /* sanity check */ |
| 2028 | if (idx < 0) |
| 2029 | return &emptyconstraint; |
| 2030 | |
| 2031 | /* |
| 2032 | * grab pre-allocated constraint entry |
| 2033 | */ |
| 2034 | cx = &cpuc->constraint_list[idx]; |
| 2035 | |
| 2036 | /* |
| 2037 | * initialize dynamic constraint |
| 2038 | * with static constraint |
| 2039 | */ |
| 2040 | memcpy(cx, c, sizeof(*cx)); |
| 2041 | |
| 2042 | /* |
| 2043 | * mark constraint as dynamic, so we |
| 2044 | * can free it later on |
| 2045 | */ |
| 2046 | cx->flags |= PERF_X86_EVENT_DYNAMIC; |
| 2047 | } |
| 2048 | |
| 2049 | /* |
| 2050 | * From here on, the constraint is dynamic. |
| 2051 | * Either it was just allocated above, or it |
| 2052 | * was allocated during a earlier invocation |
| 2053 | * of this function |
| 2054 | */ |
| 2055 | |
| 2056 | /* |
| 2057 | * Modify static constraint with current dynamic |
| 2058 | * state of thread |
| 2059 | * |
| 2060 | * EXCLUSIVE: sibling counter measuring exclusive event |
| 2061 | * SHARED : sibling counter measuring non-exclusive event |
| 2062 | * UNUSED : sibling counter unused |
| 2063 | */ |
| 2064 | for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) { |
| 2065 | /* |
| 2066 | * exclusive event in sibling counter |
| 2067 | * our corresponding counter cannot be used |
| 2068 | * regardless of our event |
| 2069 | */ |
| 2070 | if (xl->state[i] == INTEL_EXCL_EXCLUSIVE) |
| 2071 | __clear_bit(i, cx->idxmsk); |
| 2072 | /* |
| 2073 | * if measuring an exclusive event, sibling |
| 2074 | * measuring non-exclusive, then counter cannot |
| 2075 | * be used |
| 2076 | */ |
| 2077 | if (is_excl && xl->state[i] == INTEL_EXCL_SHARED) |
| 2078 | __clear_bit(i, cx->idxmsk); |
| 2079 | } |
| 2080 | |
| 2081 | /* |
| 2082 | * recompute actual bit weight for scheduling algorithm |
| 2083 | */ |
| 2084 | cx->weight = hweight64(cx->idxmsk64); |
| 2085 | |
| 2086 | /* |
| 2087 | * if we return an empty mask, then switch |
| 2088 | * back to static empty constraint to avoid |
| 2089 | * the cost of freeing later on |
| 2090 | */ |
| 2091 | if (cx->weight == 0) |
| 2092 | cx = &emptyconstraint; |
| 2093 | |
| 2094 | return cx; |
| 2095 | } |
| 2096 | |
| 2097 | static struct event_constraint * |
| 2098 | intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 2099 | struct perf_event *event) |
| 2100 | { |
| 2101 | struct event_constraint *c1 = cpuc->event_constraint[idx]; |
| 2102 | struct event_constraint *c2; |
| 2103 | |
| 2104 | /* |
| 2105 | * first time only |
| 2106 | * - static constraint: no change across incremental scheduling calls |
| 2107 | * - dynamic constraint: handled by intel_get_excl_constraints() |
| 2108 | */ |
| 2109 | c2 = __intel_get_event_constraints(cpuc, idx, event); |
| 2110 | if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) { |
| 2111 | bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); |
| 2112 | c1->weight = c2->weight; |
| 2113 | c2 = c1; |
| 2114 | } |
| 2115 | |
| 2116 | if (cpuc->excl_cntrs) |
| 2117 | return intel_get_excl_constraints(cpuc, event, idx, c2); |
| 2118 | |
| 2119 | return c2; |
| 2120 | } |
| 2121 | |
| 2122 | static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, |
| 2123 | struct perf_event *event) |
| 2124 | { |
| 2125 | struct hw_perf_event *hwc = &event->hw; |
| 2126 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 2127 | struct intel_excl_states *xlo, *xl; |
| 2128 | unsigned long flags = 0; /* keep compiler happy */ |
| 2129 | int tid = cpuc->excl_thread_id; |
| 2130 | int o_tid = 1 - tid; |
| 2131 | |
| 2132 | /* |
| 2133 | * nothing needed if in group validation mode |
| 2134 | */ |
| 2135 | if (cpuc->is_fake) |
| 2136 | return; |
| 2137 | |
| 2138 | WARN_ON_ONCE(!excl_cntrs); |
| 2139 | |
| 2140 | if (!excl_cntrs) |
| 2141 | return; |
| 2142 | |
| 2143 | xl = &excl_cntrs->states[tid]; |
| 2144 | xlo = &excl_cntrs->states[o_tid]; |
| 2145 | if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { |
| 2146 | hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; |
| 2147 | if (!--cpuc->n_excl) |
| 2148 | WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); |
| 2149 | } |
| 2150 | |
| 2151 | /* |
| 2152 | * put_constraint may be called from x86_schedule_events() |
| 2153 | * which already has the lock held so here make locking |
| 2154 | * conditional |
| 2155 | */ |
| 2156 | if (!xl->sched_started) |
| 2157 | raw_spin_lock_irqsave(&excl_cntrs->lock, flags); |
| 2158 | |
| 2159 | /* |
| 2160 | * if event was actually assigned, then mark the |
| 2161 | * counter state as unused now |
| 2162 | */ |
| 2163 | if (hwc->idx >= 0) |
| 2164 | xlo->state[hwc->idx] = INTEL_EXCL_UNUSED; |
| 2165 | |
| 2166 | if (!xl->sched_started) |
| 2167 | raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); |
| 2168 | } |
| 2169 | |
| 2170 | static void |
| 2171 | intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, |
| 2172 | struct perf_event *event) |
| 2173 | { |
| 2174 | struct hw_perf_event_extra *reg; |
| 2175 | |
| 2176 | reg = &event->hw.extra_reg; |
| 2177 | if (reg->idx != EXTRA_REG_NONE) |
| 2178 | __intel_shared_reg_put_constraints(cpuc, reg); |
| 2179 | |
| 2180 | reg = &event->hw.branch_reg; |
| 2181 | if (reg->idx != EXTRA_REG_NONE) |
| 2182 | __intel_shared_reg_put_constraints(cpuc, reg); |
| 2183 | } |
| 2184 | |
| 2185 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
| 2186 | struct perf_event *event) |
| 2187 | { |
| 2188 | intel_put_shared_regs_event_constraints(cpuc, event); |
| 2189 | |
| 2190 | /* |
| 2191 | * is PMU has exclusive counter restrictions, then |
| 2192 | * all events are subject to and must call the |
| 2193 | * put_excl_constraints() routine |
| 2194 | */ |
| 2195 | if (cpuc->excl_cntrs) |
| 2196 | intel_put_excl_constraints(cpuc, event); |
| 2197 | } |
| 2198 | |
| 2199 | static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) |
| 2200 | { |
| 2201 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 2202 | struct event_constraint *c = cpuc->event_constraint[idx]; |
| 2203 | struct intel_excl_states *xlo, *xl; |
| 2204 | int tid = cpuc->excl_thread_id; |
| 2205 | int o_tid = 1 - tid; |
| 2206 | int is_excl; |
| 2207 | |
| 2208 | if (cpuc->is_fake || !c) |
| 2209 | return; |
| 2210 | |
| 2211 | is_excl = c->flags & PERF_X86_EVENT_EXCL; |
| 2212 | |
| 2213 | if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) |
| 2214 | return; |
| 2215 | |
| 2216 | WARN_ON_ONCE(!excl_cntrs); |
| 2217 | |
| 2218 | if (!excl_cntrs) |
| 2219 | return; |
| 2220 | |
| 2221 | xl = &excl_cntrs->states[tid]; |
| 2222 | xlo = &excl_cntrs->states[o_tid]; |
| 2223 | |
| 2224 | WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock)); |
| 2225 | |
| 2226 | if (cntr >= 0) { |
| 2227 | if (is_excl) |
| 2228 | xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; |
| 2229 | else |
| 2230 | xlo->init_state[cntr] = INTEL_EXCL_SHARED; |
| 2231 | } |
| 2232 | } |
| 2233 | |
| 2234 | static void intel_pebs_aliases_core2(struct perf_event *event) |
| 2235 | { |
| 2236 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { |
| 2237 | /* |
| 2238 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P |
| 2239 | * (0x003c) so that we can use it with PEBS. |
| 2240 | * |
| 2241 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't |
| 2242 | * PEBS capable. However we can use INST_RETIRED.ANY_P |
| 2243 | * (0x00c0), which is a PEBS capable event, to get the same |
| 2244 | * count. |
| 2245 | * |
| 2246 | * INST_RETIRED.ANY_P counts the number of cycles that retires |
| 2247 | * CNTMASK instructions. By setting CNTMASK to a value (16) |
| 2248 | * larger than the maximum number of instructions that can be |
| 2249 | * retired per cycle (4) and then inverting the condition, we |
| 2250 | * count all cycles that retire 16 or less instructions, which |
| 2251 | * is every cycle. |
| 2252 | * |
| 2253 | * Thereby we gain a PEBS capable cycle counter. |
| 2254 | */ |
| 2255 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); |
| 2256 | |
| 2257 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
| 2258 | event->hw.config = alt_config; |
| 2259 | } |
| 2260 | } |
| 2261 | |
| 2262 | static void intel_pebs_aliases_snb(struct perf_event *event) |
| 2263 | { |
| 2264 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { |
| 2265 | /* |
| 2266 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P |
| 2267 | * (0x003c) so that we can use it with PEBS. |
| 2268 | * |
| 2269 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't |
| 2270 | * PEBS capable. However we can use UOPS_RETIRED.ALL |
| 2271 | * (0x01c2), which is a PEBS capable event, to get the same |
| 2272 | * count. |
| 2273 | * |
| 2274 | * UOPS_RETIRED.ALL counts the number of cycles that retires |
| 2275 | * CNTMASK micro-ops. By setting CNTMASK to a value (16) |
| 2276 | * larger than the maximum number of micro-ops that can be |
| 2277 | * retired per cycle (4) and then inverting the condition, we |
| 2278 | * count all cycles that retire 16 or less micro-ops, which |
| 2279 | * is every cycle. |
| 2280 | * |
| 2281 | * Thereby we gain a PEBS capable cycle counter. |
| 2282 | */ |
| 2283 | u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); |
| 2284 | |
| 2285 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
| 2286 | event->hw.config = alt_config; |
| 2287 | } |
| 2288 | } |
| 2289 | |
| 2290 | static int intel_pmu_hw_config(struct perf_event *event) |
| 2291 | { |
| 2292 | int ret = x86_pmu_hw_config(event); |
| 2293 | |
| 2294 | if (ret) |
| 2295 | return ret; |
| 2296 | |
| 2297 | if (event->attr.precise_ip && x86_pmu.pebs_aliases) |
| 2298 | x86_pmu.pebs_aliases(event); |
| 2299 | |
| 2300 | if (needs_branch_stack(event)) { |
| 2301 | ret = intel_pmu_setup_lbr_filter(event); |
| 2302 | if (ret) |
| 2303 | return ret; |
| 2304 | |
| 2305 | /* |
| 2306 | * BTS is set up earlier in this path, so don't account twice |
| 2307 | */ |
| 2308 | if (!intel_pmu_has_bts(event)) { |
| 2309 | /* disallow lbr if conflicting events are present */ |
| 2310 | if (x86_add_exclusive(x86_lbr_exclusive_lbr)) |
| 2311 | return -EBUSY; |
| 2312 | |
| 2313 | event->destroy = hw_perf_lbr_event_destroy; |
| 2314 | } |
| 2315 | } |
| 2316 | |
| 2317 | if (event->attr.type != PERF_TYPE_RAW) |
| 2318 | return 0; |
| 2319 | |
| 2320 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) |
| 2321 | return 0; |
| 2322 | |
| 2323 | if (x86_pmu.version < 3) |
| 2324 | return -EINVAL; |
| 2325 | |
| 2326 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
| 2327 | return -EACCES; |
| 2328 | |
| 2329 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; |
| 2330 | |
| 2331 | return 0; |
| 2332 | } |
| 2333 | |
| 2334 | struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) |
| 2335 | { |
| 2336 | if (x86_pmu.guest_get_msrs) |
| 2337 | return x86_pmu.guest_get_msrs(nr); |
| 2338 | *nr = 0; |
| 2339 | return NULL; |
| 2340 | } |
| 2341 | EXPORT_SYMBOL_GPL(perf_guest_get_msrs); |
| 2342 | |
| 2343 | static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) |
| 2344 | { |
| 2345 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 2346 | struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; |
| 2347 | |
| 2348 | arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; |
| 2349 | arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; |
| 2350 | arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; |
| 2351 | /* |
| 2352 | * If PMU counter has PEBS enabled it is not enough to disable counter |
| 2353 | * on a guest entry since PEBS memory write can overshoot guest entry |
| 2354 | * and corrupt guest memory. Disabling PEBS solves the problem. |
| 2355 | */ |
| 2356 | arr[1].msr = MSR_IA32_PEBS_ENABLE; |
| 2357 | arr[1].host = cpuc->pebs_enabled; |
| 2358 | arr[1].guest = 0; |
| 2359 | |
| 2360 | *nr = 2; |
| 2361 | return arr; |
| 2362 | } |
| 2363 | |
| 2364 | static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) |
| 2365 | { |
| 2366 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 2367 | struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; |
| 2368 | int idx; |
| 2369 | |
| 2370 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 2371 | struct perf_event *event = cpuc->events[idx]; |
| 2372 | |
| 2373 | arr[idx].msr = x86_pmu_config_addr(idx); |
| 2374 | arr[idx].host = arr[idx].guest = 0; |
| 2375 | |
| 2376 | if (!test_bit(idx, cpuc->active_mask)) |
| 2377 | continue; |
| 2378 | |
| 2379 | arr[idx].host = arr[idx].guest = |
| 2380 | event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; |
| 2381 | |
| 2382 | if (event->attr.exclude_host) |
| 2383 | arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
| 2384 | else if (event->attr.exclude_guest) |
| 2385 | arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
| 2386 | } |
| 2387 | |
| 2388 | *nr = x86_pmu.num_counters; |
| 2389 | return arr; |
| 2390 | } |
| 2391 | |
| 2392 | static void core_pmu_enable_event(struct perf_event *event) |
| 2393 | { |
| 2394 | if (!event->attr.exclude_host) |
| 2395 | x86_pmu_enable_event(event); |
| 2396 | } |
| 2397 | |
| 2398 | static void core_pmu_enable_all(int added) |
| 2399 | { |
| 2400 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 2401 | int idx; |
| 2402 | |
| 2403 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 2404 | struct hw_perf_event *hwc = &cpuc->events[idx]->hw; |
| 2405 | |
| 2406 | if (!test_bit(idx, cpuc->active_mask) || |
| 2407 | cpuc->events[idx]->attr.exclude_host) |
| 2408 | continue; |
| 2409 | |
| 2410 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); |
| 2411 | } |
| 2412 | } |
| 2413 | |
| 2414 | static int hsw_hw_config(struct perf_event *event) |
| 2415 | { |
| 2416 | int ret = intel_pmu_hw_config(event); |
| 2417 | |
| 2418 | if (ret) |
| 2419 | return ret; |
| 2420 | if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) |
| 2421 | return 0; |
| 2422 | event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); |
| 2423 | |
| 2424 | /* |
| 2425 | * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with |
| 2426 | * PEBS or in ANY thread mode. Since the results are non-sensical forbid |
| 2427 | * this combination. |
| 2428 | */ |
| 2429 | if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && |
| 2430 | ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || |
| 2431 | event->attr.precise_ip > 0)) |
| 2432 | return -EOPNOTSUPP; |
| 2433 | |
| 2434 | if (event_is_checkpointed(event)) { |
| 2435 | /* |
| 2436 | * Sampling of checkpointed events can cause situations where |
| 2437 | * the CPU constantly aborts because of a overflow, which is |
| 2438 | * then checkpointed back and ignored. Forbid checkpointing |
| 2439 | * for sampling. |
| 2440 | * |
| 2441 | * But still allow a long sampling period, so that perf stat |
| 2442 | * from KVM works. |
| 2443 | */ |
| 2444 | if (event->attr.sample_period > 0 && |
| 2445 | event->attr.sample_period < 0x7fffffff) |
| 2446 | return -EOPNOTSUPP; |
| 2447 | } |
| 2448 | return 0; |
| 2449 | } |
| 2450 | |
| 2451 | static struct event_constraint counter2_constraint = |
| 2452 | EVENT_CONSTRAINT(0, 0x4, 0); |
| 2453 | |
| 2454 | static struct event_constraint * |
| 2455 | hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 2456 | struct perf_event *event) |
| 2457 | { |
| 2458 | struct event_constraint *c; |
| 2459 | |
| 2460 | c = intel_get_event_constraints(cpuc, idx, event); |
| 2461 | |
| 2462 | /* Handle special quirk on in_tx_checkpointed only in counter 2 */ |
| 2463 | if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { |
| 2464 | if (c->idxmsk64 & (1U << 2)) |
| 2465 | return &counter2_constraint; |
| 2466 | return &emptyconstraint; |
| 2467 | } |
| 2468 | |
| 2469 | return c; |
| 2470 | } |
| 2471 | |
| 2472 | /* |
| 2473 | * Broadwell: |
| 2474 | * |
| 2475 | * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared |
| 2476 | * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine |
| 2477 | * the two to enforce a minimum period of 128 (the smallest value that has bits |
| 2478 | * 0-5 cleared and >= 100). |
| 2479 | * |
| 2480 | * Because of how the code in x86_perf_event_set_period() works, the truncation |
| 2481 | * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period |
| 2482 | * to make up for the 'lost' events due to carrying the 'error' in period_left. |
| 2483 | * |
| 2484 | * Therefore the effective (average) period matches the requested period, |
| 2485 | * despite coarser hardware granularity. |
| 2486 | */ |
| 2487 | static unsigned bdw_limit_period(struct perf_event *event, unsigned left) |
| 2488 | { |
| 2489 | if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == |
| 2490 | X86_CONFIG(.event=0xc0, .umask=0x01)) { |
| 2491 | if (left < 128) |
| 2492 | left = 128; |
| 2493 | left &= ~0x3fu; |
| 2494 | } |
| 2495 | return left; |
| 2496 | } |
| 2497 | |
| 2498 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
| 2499 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
| 2500 | PMU_FORMAT_ATTR(edge, "config:18" ); |
| 2501 | PMU_FORMAT_ATTR(pc, "config:19" ); |
| 2502 | PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ |
| 2503 | PMU_FORMAT_ATTR(inv, "config:23" ); |
| 2504 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); |
| 2505 | PMU_FORMAT_ATTR(in_tx, "config:32"); |
| 2506 | PMU_FORMAT_ATTR(in_tx_cp, "config:33"); |
| 2507 | |
| 2508 | static struct attribute *intel_arch_formats_attr[] = { |
| 2509 | &format_attr_event.attr, |
| 2510 | &format_attr_umask.attr, |
| 2511 | &format_attr_edge.attr, |
| 2512 | &format_attr_pc.attr, |
| 2513 | &format_attr_inv.attr, |
| 2514 | &format_attr_cmask.attr, |
| 2515 | NULL, |
| 2516 | }; |
| 2517 | |
| 2518 | ssize_t intel_event_sysfs_show(char *page, u64 config) |
| 2519 | { |
| 2520 | u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); |
| 2521 | |
| 2522 | return x86_event_sysfs_show(page, config, event); |
| 2523 | } |
| 2524 | |
| 2525 | struct intel_shared_regs *allocate_shared_regs(int cpu) |
| 2526 | { |
| 2527 | struct intel_shared_regs *regs; |
| 2528 | int i; |
| 2529 | |
| 2530 | regs = kzalloc_node(sizeof(struct intel_shared_regs), |
| 2531 | GFP_KERNEL, cpu_to_node(cpu)); |
| 2532 | if (regs) { |
| 2533 | /* |
| 2534 | * initialize the locks to keep lockdep happy |
| 2535 | */ |
| 2536 | for (i = 0; i < EXTRA_REG_MAX; i++) |
| 2537 | raw_spin_lock_init(®s->regs[i].lock); |
| 2538 | |
| 2539 | regs->core_id = -1; |
| 2540 | } |
| 2541 | return regs; |
| 2542 | } |
| 2543 | |
| 2544 | static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) |
| 2545 | { |
| 2546 | struct intel_excl_cntrs *c; |
| 2547 | int i; |
| 2548 | |
| 2549 | c = kzalloc_node(sizeof(struct intel_excl_cntrs), |
| 2550 | GFP_KERNEL, cpu_to_node(cpu)); |
| 2551 | if (c) { |
| 2552 | raw_spin_lock_init(&c->lock); |
| 2553 | for (i = 0; i < X86_PMC_IDX_MAX; i++) { |
| 2554 | c->states[0].state[i] = INTEL_EXCL_UNUSED; |
| 2555 | c->states[0].init_state[i] = INTEL_EXCL_UNUSED; |
| 2556 | |
| 2557 | c->states[1].state[i] = INTEL_EXCL_UNUSED; |
| 2558 | c->states[1].init_state[i] = INTEL_EXCL_UNUSED; |
| 2559 | } |
| 2560 | c->core_id = -1; |
| 2561 | } |
| 2562 | return c; |
| 2563 | } |
| 2564 | |
| 2565 | static int intel_pmu_cpu_prepare(int cpu) |
| 2566 | { |
| 2567 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 2568 | |
| 2569 | if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { |
| 2570 | cpuc->shared_regs = allocate_shared_regs(cpu); |
| 2571 | if (!cpuc->shared_regs) |
| 2572 | return NOTIFY_BAD; |
| 2573 | } |
| 2574 | |
| 2575 | if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { |
| 2576 | size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); |
| 2577 | |
| 2578 | cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); |
| 2579 | if (!cpuc->constraint_list) |
| 2580 | return NOTIFY_BAD; |
| 2581 | |
| 2582 | cpuc->excl_cntrs = allocate_excl_cntrs(cpu); |
| 2583 | if (!cpuc->excl_cntrs) { |
| 2584 | kfree(cpuc->constraint_list); |
| 2585 | kfree(cpuc->shared_regs); |
| 2586 | return NOTIFY_BAD; |
| 2587 | } |
| 2588 | cpuc->excl_thread_id = 0; |
| 2589 | } |
| 2590 | |
| 2591 | return NOTIFY_OK; |
| 2592 | } |
| 2593 | |
| 2594 | static void intel_pmu_cpu_starting(int cpu) |
| 2595 | { |
| 2596 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 2597 | int core_id = topology_core_id(cpu); |
| 2598 | int i; |
| 2599 | |
| 2600 | init_debug_store_on_cpu(cpu); |
| 2601 | /* |
| 2602 | * Deal with CPUs that don't clear their LBRs on power-up. |
| 2603 | */ |
| 2604 | intel_pmu_lbr_reset(); |
| 2605 | |
| 2606 | cpuc->lbr_sel = NULL; |
| 2607 | |
| 2608 | if (!cpuc->shared_regs) |
| 2609 | return; |
| 2610 | |
| 2611 | if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { |
| 2612 | void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; |
| 2613 | |
| 2614 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
| 2615 | struct intel_shared_regs *pc; |
| 2616 | |
| 2617 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
| 2618 | if (pc && pc->core_id == core_id) { |
| 2619 | *onln = cpuc->shared_regs; |
| 2620 | cpuc->shared_regs = pc; |
| 2621 | break; |
| 2622 | } |
| 2623 | } |
| 2624 | cpuc->shared_regs->core_id = core_id; |
| 2625 | cpuc->shared_regs->refcnt++; |
| 2626 | } |
| 2627 | |
| 2628 | if (x86_pmu.lbr_sel_map) |
| 2629 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
| 2630 | |
| 2631 | if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { |
| 2632 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
| 2633 | struct intel_excl_cntrs *c; |
| 2634 | |
| 2635 | c = per_cpu(cpu_hw_events, i).excl_cntrs; |
| 2636 | if (c && c->core_id == core_id) { |
| 2637 | cpuc->kfree_on_online[1] = cpuc->excl_cntrs; |
| 2638 | cpuc->excl_cntrs = c; |
| 2639 | cpuc->excl_thread_id = 1; |
| 2640 | break; |
| 2641 | } |
| 2642 | } |
| 2643 | cpuc->excl_cntrs->core_id = core_id; |
| 2644 | cpuc->excl_cntrs->refcnt++; |
| 2645 | } |
| 2646 | } |
| 2647 | |
| 2648 | static void free_excl_cntrs(int cpu) |
| 2649 | { |
| 2650 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 2651 | struct intel_excl_cntrs *c; |
| 2652 | |
| 2653 | c = cpuc->excl_cntrs; |
| 2654 | if (c) { |
| 2655 | if (c->core_id == -1 || --c->refcnt == 0) |
| 2656 | kfree(c); |
| 2657 | cpuc->excl_cntrs = NULL; |
| 2658 | kfree(cpuc->constraint_list); |
| 2659 | cpuc->constraint_list = NULL; |
| 2660 | } |
| 2661 | } |
| 2662 | |
| 2663 | static void intel_pmu_cpu_dying(int cpu) |
| 2664 | { |
| 2665 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 2666 | struct intel_shared_regs *pc; |
| 2667 | |
| 2668 | pc = cpuc->shared_regs; |
| 2669 | if (pc) { |
| 2670 | if (pc->core_id == -1 || --pc->refcnt == 0) |
| 2671 | kfree(pc); |
| 2672 | cpuc->shared_regs = NULL; |
| 2673 | } |
| 2674 | |
| 2675 | free_excl_cntrs(cpu); |
| 2676 | |
| 2677 | fini_debug_store_on_cpu(cpu); |
| 2678 | } |
| 2679 | |
| 2680 | PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); |
| 2681 | |
| 2682 | PMU_FORMAT_ATTR(ldlat, "config1:0-15"); |
| 2683 | |
| 2684 | static struct attribute *intel_arch3_formats_attr[] = { |
| 2685 | &format_attr_event.attr, |
| 2686 | &format_attr_umask.attr, |
| 2687 | &format_attr_edge.attr, |
| 2688 | &format_attr_pc.attr, |
| 2689 | &format_attr_any.attr, |
| 2690 | &format_attr_inv.attr, |
| 2691 | &format_attr_cmask.attr, |
| 2692 | &format_attr_in_tx.attr, |
| 2693 | &format_attr_in_tx_cp.attr, |
| 2694 | |
| 2695 | &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ |
| 2696 | &format_attr_ldlat.attr, /* PEBS load latency */ |
| 2697 | NULL, |
| 2698 | }; |
| 2699 | |
| 2700 | static __initconst const struct x86_pmu core_pmu = { |
| 2701 | .name = "core", |
| 2702 | .handle_irq = x86_pmu_handle_irq, |
| 2703 | .disable_all = x86_pmu_disable_all, |
| 2704 | .enable_all = core_pmu_enable_all, |
| 2705 | .enable = core_pmu_enable_event, |
| 2706 | .disable = x86_pmu_disable_event, |
| 2707 | .hw_config = x86_pmu_hw_config, |
| 2708 | .schedule_events = x86_schedule_events, |
| 2709 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| 2710 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| 2711 | .event_map = intel_pmu_event_map, |
| 2712 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| 2713 | .apic = 1, |
| 2714 | /* |
| 2715 | * Intel PMCs cannot be accessed sanely above 32-bit width, |
| 2716 | * so we install an artificial 1<<31 period regardless of |
| 2717 | * the generic event period: |
| 2718 | */ |
| 2719 | .max_period = (1ULL<<31) - 1, |
| 2720 | .get_event_constraints = intel_get_event_constraints, |
| 2721 | .put_event_constraints = intel_put_event_constraints, |
| 2722 | .event_constraints = intel_core_event_constraints, |
| 2723 | .guest_get_msrs = core_guest_get_msrs, |
| 2724 | .format_attrs = intel_arch_formats_attr, |
| 2725 | .events_sysfs_show = intel_event_sysfs_show, |
| 2726 | |
| 2727 | /* |
| 2728 | * Virtual (or funny metal) CPU can define x86_pmu.extra_regs |
| 2729 | * together with PMU version 1 and thus be using core_pmu with |
| 2730 | * shared_regs. We need following callbacks here to allocate |
| 2731 | * it properly. |
| 2732 | */ |
| 2733 | .cpu_prepare = intel_pmu_cpu_prepare, |
| 2734 | .cpu_starting = intel_pmu_cpu_starting, |
| 2735 | .cpu_dying = intel_pmu_cpu_dying, |
| 2736 | }; |
| 2737 | |
| 2738 | static __initconst const struct x86_pmu intel_pmu = { |
| 2739 | .name = "Intel", |
| 2740 | .handle_irq = intel_pmu_handle_irq, |
| 2741 | .disable_all = intel_pmu_disable_all, |
| 2742 | .enable_all = intel_pmu_enable_all, |
| 2743 | .enable = intel_pmu_enable_event, |
| 2744 | .disable = intel_pmu_disable_event, |
| 2745 | .hw_config = intel_pmu_hw_config, |
| 2746 | .schedule_events = x86_schedule_events, |
| 2747 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| 2748 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| 2749 | .event_map = intel_pmu_event_map, |
| 2750 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| 2751 | .apic = 1, |
| 2752 | /* |
| 2753 | * Intel PMCs cannot be accessed sanely above 32 bit width, |
| 2754 | * so we install an artificial 1<<31 period regardless of |
| 2755 | * the generic event period: |
| 2756 | */ |
| 2757 | .max_period = (1ULL << 31) - 1, |
| 2758 | .get_event_constraints = intel_get_event_constraints, |
| 2759 | .put_event_constraints = intel_put_event_constraints, |
| 2760 | .pebs_aliases = intel_pebs_aliases_core2, |
| 2761 | |
| 2762 | .format_attrs = intel_arch3_formats_attr, |
| 2763 | .events_sysfs_show = intel_event_sysfs_show, |
| 2764 | |
| 2765 | .cpu_prepare = intel_pmu_cpu_prepare, |
| 2766 | .cpu_starting = intel_pmu_cpu_starting, |
| 2767 | .cpu_dying = intel_pmu_cpu_dying, |
| 2768 | .guest_get_msrs = intel_guest_get_msrs, |
| 2769 | .sched_task = intel_pmu_lbr_sched_task, |
| 2770 | }; |
| 2771 | |
| 2772 | static __init void intel_clovertown_quirk(void) |
| 2773 | { |
| 2774 | /* |
| 2775 | * PEBS is unreliable due to: |
| 2776 | * |
| 2777 | * AJ67 - PEBS may experience CPL leaks |
| 2778 | * AJ68 - PEBS PMI may be delayed by one event |
| 2779 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] |
| 2780 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS |
| 2781 | * |
| 2782 | * AJ67 could be worked around by restricting the OS/USR flags. |
| 2783 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. |
| 2784 | * |
| 2785 | * AJ106 could possibly be worked around by not allowing LBR |
| 2786 | * usage from PEBS, including the fixup. |
| 2787 | * AJ68 could possibly be worked around by always programming |
| 2788 | * a pebs_event_reset[0] value and coping with the lost events. |
| 2789 | * |
| 2790 | * But taken together it might just make sense to not enable PEBS on |
| 2791 | * these chips. |
| 2792 | */ |
| 2793 | pr_warn("PEBS disabled due to CPU errata\n"); |
| 2794 | x86_pmu.pebs = 0; |
| 2795 | x86_pmu.pebs_constraints = NULL; |
| 2796 | } |
| 2797 | |
| 2798 | static int intel_snb_pebs_broken(int cpu) |
| 2799 | { |
| 2800 | u32 rev = UINT_MAX; /* default to broken for unknown models */ |
| 2801 | |
| 2802 | switch (cpu_data(cpu).x86_model) { |
| 2803 | case 42: /* SNB */ |
| 2804 | rev = 0x28; |
| 2805 | break; |
| 2806 | |
| 2807 | case 45: /* SNB-EP */ |
| 2808 | switch (cpu_data(cpu).x86_mask) { |
| 2809 | case 6: rev = 0x618; break; |
| 2810 | case 7: rev = 0x70c; break; |
| 2811 | } |
| 2812 | } |
| 2813 | |
| 2814 | return (cpu_data(cpu).microcode < rev); |
| 2815 | } |
| 2816 | |
| 2817 | static void intel_snb_check_microcode(void) |
| 2818 | { |
| 2819 | int pebs_broken = 0; |
| 2820 | int cpu; |
| 2821 | |
| 2822 | get_online_cpus(); |
| 2823 | for_each_online_cpu(cpu) { |
| 2824 | if ((pebs_broken = intel_snb_pebs_broken(cpu))) |
| 2825 | break; |
| 2826 | } |
| 2827 | put_online_cpus(); |
| 2828 | |
| 2829 | if (pebs_broken == x86_pmu.pebs_broken) |
| 2830 | return; |
| 2831 | |
| 2832 | /* |
| 2833 | * Serialized by the microcode lock.. |
| 2834 | */ |
| 2835 | if (x86_pmu.pebs_broken) { |
| 2836 | pr_info("PEBS enabled due to microcode update\n"); |
| 2837 | x86_pmu.pebs_broken = 0; |
| 2838 | } else { |
| 2839 | pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); |
| 2840 | x86_pmu.pebs_broken = 1; |
| 2841 | } |
| 2842 | } |
| 2843 | |
| 2844 | /* |
| 2845 | * Under certain circumstances, access certain MSR may cause #GP. |
| 2846 | * The function tests if the input MSR can be safely accessed. |
| 2847 | */ |
| 2848 | static bool check_msr(unsigned long msr, u64 mask) |
| 2849 | { |
| 2850 | u64 val_old, val_new, val_tmp; |
| 2851 | |
| 2852 | /* |
| 2853 | * Read the current value, change it and read it back to see if it |
| 2854 | * matches, this is needed to detect certain hardware emulators |
| 2855 | * (qemu/kvm) that don't trap on the MSR access and always return 0s. |
| 2856 | */ |
| 2857 | if (rdmsrl_safe(msr, &val_old)) |
| 2858 | return false; |
| 2859 | |
| 2860 | /* |
| 2861 | * Only change the bits which can be updated by wrmsrl. |
| 2862 | */ |
| 2863 | val_tmp = val_old ^ mask; |
| 2864 | if (wrmsrl_safe(msr, val_tmp) || |
| 2865 | rdmsrl_safe(msr, &val_new)) |
| 2866 | return false; |
| 2867 | |
| 2868 | if (val_new != val_tmp) |
| 2869 | return false; |
| 2870 | |
| 2871 | /* Here it's sure that the MSR can be safely accessed. |
| 2872 | * Restore the old value and return. |
| 2873 | */ |
| 2874 | wrmsrl(msr, val_old); |
| 2875 | |
| 2876 | return true; |
| 2877 | } |
| 2878 | |
| 2879 | static __init void intel_sandybridge_quirk(void) |
| 2880 | { |
| 2881 | x86_pmu.check_microcode = intel_snb_check_microcode; |
| 2882 | intel_snb_check_microcode(); |
| 2883 | } |
| 2884 | |
| 2885 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { |
| 2886 | { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, |
| 2887 | { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, |
| 2888 | { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, |
| 2889 | { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, |
| 2890 | { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, |
| 2891 | { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, |
| 2892 | { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, |
| 2893 | }; |
| 2894 | |
| 2895 | static __init void intel_arch_events_quirk(void) |
| 2896 | { |
| 2897 | int bit; |
| 2898 | |
| 2899 | /* disable event that reported as not presend by cpuid */ |
| 2900 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { |
| 2901 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; |
| 2902 | pr_warn("CPUID marked event: \'%s\' unavailable\n", |
| 2903 | intel_arch_events_map[bit].name); |
| 2904 | } |
| 2905 | } |
| 2906 | |
| 2907 | static __init void intel_nehalem_quirk(void) |
| 2908 | { |
| 2909 | union cpuid10_ebx ebx; |
| 2910 | |
| 2911 | ebx.full = x86_pmu.events_maskl; |
| 2912 | if (ebx.split.no_branch_misses_retired) { |
| 2913 | /* |
| 2914 | * Erratum AAJ80 detected, we work it around by using |
| 2915 | * the BR_MISP_EXEC.ANY event. This will over-count |
| 2916 | * branch-misses, but it's still much better than the |
| 2917 | * architectural event which is often completely bogus: |
| 2918 | */ |
| 2919 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; |
| 2920 | ebx.split.no_branch_misses_retired = 0; |
| 2921 | x86_pmu.events_maskl = ebx.full; |
| 2922 | pr_info("CPU erratum AAJ80 worked around\n"); |
| 2923 | } |
| 2924 | } |
| 2925 | |
| 2926 | /* |
| 2927 | * enable software workaround for errata: |
| 2928 | * SNB: BJ122 |
| 2929 | * IVB: BV98 |
| 2930 | * HSW: HSD29 |
| 2931 | * |
| 2932 | * Only needed when HT is enabled. However detecting |
| 2933 | * if HT is enabled is difficult (model specific). So instead, |
| 2934 | * we enable the workaround in the early boot, and verify if |
| 2935 | * it is needed in a later initcall phase once we have valid |
| 2936 | * topology information to check if HT is actually enabled |
| 2937 | */ |
| 2938 | static __init void intel_ht_bug(void) |
| 2939 | { |
| 2940 | x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; |
| 2941 | |
| 2942 | x86_pmu.commit_scheduling = intel_commit_scheduling; |
| 2943 | x86_pmu.start_scheduling = intel_start_scheduling; |
| 2944 | x86_pmu.stop_scheduling = intel_stop_scheduling; |
| 2945 | } |
| 2946 | |
| 2947 | EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); |
| 2948 | EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") |
| 2949 | |
| 2950 | /* Haswell special events */ |
| 2951 | EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); |
| 2952 | EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); |
| 2953 | EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); |
| 2954 | EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); |
| 2955 | EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); |
| 2956 | EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); |
| 2957 | EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); |
| 2958 | EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); |
| 2959 | EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); |
| 2960 | EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); |
| 2961 | EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); |
| 2962 | EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); |
| 2963 | |
| 2964 | static struct attribute *hsw_events_attrs[] = { |
| 2965 | EVENT_PTR(tx_start), |
| 2966 | EVENT_PTR(tx_commit), |
| 2967 | EVENT_PTR(tx_abort), |
| 2968 | EVENT_PTR(tx_capacity), |
| 2969 | EVENT_PTR(tx_conflict), |
| 2970 | EVENT_PTR(el_start), |
| 2971 | EVENT_PTR(el_commit), |
| 2972 | EVENT_PTR(el_abort), |
| 2973 | EVENT_PTR(el_capacity), |
| 2974 | EVENT_PTR(el_conflict), |
| 2975 | EVENT_PTR(cycles_t), |
| 2976 | EVENT_PTR(cycles_ct), |
| 2977 | EVENT_PTR(mem_ld_hsw), |
| 2978 | EVENT_PTR(mem_st_hsw), |
| 2979 | NULL |
| 2980 | }; |
| 2981 | |
| 2982 | __init int intel_pmu_init(void) |
| 2983 | { |
| 2984 | union cpuid10_edx edx; |
| 2985 | union cpuid10_eax eax; |
| 2986 | union cpuid10_ebx ebx; |
| 2987 | struct event_constraint *c; |
| 2988 | unsigned int unused; |
| 2989 | struct extra_reg *er; |
| 2990 | int version, i; |
| 2991 | |
| 2992 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
| 2993 | switch (boot_cpu_data.x86) { |
| 2994 | case 0x6: |
| 2995 | return p6_pmu_init(); |
| 2996 | case 0xb: |
| 2997 | return knc_pmu_init(); |
| 2998 | case 0xf: |
| 2999 | return p4_pmu_init(); |
| 3000 | } |
| 3001 | return -ENODEV; |
| 3002 | } |
| 3003 | |
| 3004 | /* |
| 3005 | * Check whether the Architectural PerfMon supports |
| 3006 | * Branch Misses Retired hw_event or not. |
| 3007 | */ |
| 3008 | cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); |
| 3009 | if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) |
| 3010 | return -ENODEV; |
| 3011 | |
| 3012 | version = eax.split.version_id; |
| 3013 | if (version < 2) |
| 3014 | x86_pmu = core_pmu; |
| 3015 | else |
| 3016 | x86_pmu = intel_pmu; |
| 3017 | |
| 3018 | x86_pmu.version = version; |
| 3019 | x86_pmu.num_counters = eax.split.num_counters; |
| 3020 | x86_pmu.cntval_bits = eax.split.bit_width; |
| 3021 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
| 3022 | |
| 3023 | x86_pmu.events_maskl = ebx.full; |
| 3024 | x86_pmu.events_mask_len = eax.split.mask_length; |
| 3025 | |
| 3026 | x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); |
| 3027 | |
| 3028 | /* |
| 3029 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
| 3030 | * assume at least 3 events: |
| 3031 | */ |
| 3032 | if (version > 1) |
| 3033 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
| 3034 | |
| 3035 | if (boot_cpu_has(X86_FEATURE_PDCM)) { |
| 3036 | u64 capabilities; |
| 3037 | |
| 3038 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); |
| 3039 | x86_pmu.intel_cap.capabilities = capabilities; |
| 3040 | } |
| 3041 | |
| 3042 | intel_ds_init(); |
| 3043 | |
| 3044 | x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ |
| 3045 | |
| 3046 | /* |
| 3047 | * Install the hw-cache-events table: |
| 3048 | */ |
| 3049 | switch (boot_cpu_data.x86_model) { |
| 3050 | case 14: /* 65nm Core "Yonah" */ |
| 3051 | pr_cont("Core events, "); |
| 3052 | break; |
| 3053 | |
| 3054 | case 15: /* 65nm Core2 "Merom" */ |
| 3055 | x86_add_quirk(intel_clovertown_quirk); |
| 3056 | case 22: /* 65nm Core2 "Merom-L" */ |
| 3057 | case 23: /* 45nm Core2 "Penryn" */ |
| 3058 | case 29: /* 45nm Core2 "Dunnington (MP) */ |
| 3059 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
| 3060 | sizeof(hw_cache_event_ids)); |
| 3061 | |
| 3062 | intel_pmu_lbr_init_core(); |
| 3063 | |
| 3064 | x86_pmu.event_constraints = intel_core2_event_constraints; |
| 3065 | x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; |
| 3066 | pr_cont("Core2 events, "); |
| 3067 | break; |
| 3068 | |
| 3069 | case 30: /* 45nm Nehalem */ |
| 3070 | case 26: /* 45nm Nehalem-EP */ |
| 3071 | case 46: /* 45nm Nehalem-EX */ |
| 3072 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
| 3073 | sizeof(hw_cache_event_ids)); |
| 3074 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, |
| 3075 | sizeof(hw_cache_extra_regs)); |
| 3076 | |
| 3077 | intel_pmu_lbr_init_nhm(); |
| 3078 | |
| 3079 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
| 3080 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; |
| 3081 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
| 3082 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
| 3083 | |
| 3084 | x86_pmu.cpu_events = nhm_events_attrs; |
| 3085 | |
| 3086 | /* UOPS_ISSUED.STALLED_CYCLES */ |
| 3087 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 3088 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
| 3089 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
| 3090 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 3091 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); |
| 3092 | |
| 3093 | x86_add_quirk(intel_nehalem_quirk); |
| 3094 | |
| 3095 | pr_cont("Nehalem events, "); |
| 3096 | break; |
| 3097 | |
| 3098 | case 28: /* 45nm Atom "Pineview" */ |
| 3099 | case 38: /* 45nm Atom "Lincroft" */ |
| 3100 | case 39: /* 32nm Atom "Penwell" */ |
| 3101 | case 53: /* 32nm Atom "Cloverview" */ |
| 3102 | case 54: /* 32nm Atom "Cedarview" */ |
| 3103 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
| 3104 | sizeof(hw_cache_event_ids)); |
| 3105 | |
| 3106 | intel_pmu_lbr_init_atom(); |
| 3107 | |
| 3108 | x86_pmu.event_constraints = intel_gen_event_constraints; |
| 3109 | x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; |
| 3110 | pr_cont("Atom events, "); |
| 3111 | break; |
| 3112 | |
| 3113 | case 55: /* 22nm Atom "Silvermont" */ |
| 3114 | case 76: /* 14nm Atom "Airmont" */ |
| 3115 | case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ |
| 3116 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, |
| 3117 | sizeof(hw_cache_event_ids)); |
| 3118 | memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, |
| 3119 | sizeof(hw_cache_extra_regs)); |
| 3120 | |
| 3121 | intel_pmu_lbr_init_atom(); |
| 3122 | |
| 3123 | x86_pmu.event_constraints = intel_slm_event_constraints; |
| 3124 | x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; |
| 3125 | x86_pmu.extra_regs = intel_slm_extra_regs; |
| 3126 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3127 | pr_cont("Silvermont events, "); |
| 3128 | break; |
| 3129 | |
| 3130 | case 37: /* 32nm Westmere */ |
| 3131 | case 44: /* 32nm Westmere-EP */ |
| 3132 | case 47: /* 32nm Westmere-EX */ |
| 3133 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
| 3134 | sizeof(hw_cache_event_ids)); |
| 3135 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, |
| 3136 | sizeof(hw_cache_extra_regs)); |
| 3137 | |
| 3138 | intel_pmu_lbr_init_nhm(); |
| 3139 | |
| 3140 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
| 3141 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
| 3142 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
| 3143 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
| 3144 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3145 | |
| 3146 | x86_pmu.cpu_events = nhm_events_attrs; |
| 3147 | |
| 3148 | /* UOPS_ISSUED.STALLED_CYCLES */ |
| 3149 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 3150 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
| 3151 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
| 3152 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 3153 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); |
| 3154 | |
| 3155 | pr_cont("Westmere events, "); |
| 3156 | break; |
| 3157 | |
| 3158 | case 42: /* 32nm SandyBridge */ |
| 3159 | case 45: /* 32nm SandyBridge-E/EN/EP */ |
| 3160 | x86_add_quirk(intel_sandybridge_quirk); |
| 3161 | x86_add_quirk(intel_ht_bug); |
| 3162 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
| 3163 | sizeof(hw_cache_event_ids)); |
| 3164 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, |
| 3165 | sizeof(hw_cache_extra_regs)); |
| 3166 | |
| 3167 | intel_pmu_lbr_init_snb(); |
| 3168 | |
| 3169 | x86_pmu.event_constraints = intel_snb_event_constraints; |
| 3170 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
| 3171 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; |
| 3172 | if (boot_cpu_data.x86_model == 45) |
| 3173 | x86_pmu.extra_regs = intel_snbep_extra_regs; |
| 3174 | else |
| 3175 | x86_pmu.extra_regs = intel_snb_extra_regs; |
| 3176 | |
| 3177 | |
| 3178 | /* all extra regs are per-cpu when HT is on */ |
| 3179 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3180 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3181 | |
| 3182 | x86_pmu.cpu_events = snb_events_attrs; |
| 3183 | |
| 3184 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
| 3185 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 3186 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
| 3187 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ |
| 3188 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 3189 | X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); |
| 3190 | |
| 3191 | pr_cont("SandyBridge events, "); |
| 3192 | break; |
| 3193 | |
| 3194 | case 58: /* 22nm IvyBridge */ |
| 3195 | case 62: /* 22nm IvyBridge-EP/EX */ |
| 3196 | x86_add_quirk(intel_ht_bug); |
| 3197 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
| 3198 | sizeof(hw_cache_event_ids)); |
| 3199 | /* dTLB-load-misses on IVB is different than SNB */ |
| 3200 | hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ |
| 3201 | |
| 3202 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, |
| 3203 | sizeof(hw_cache_extra_regs)); |
| 3204 | |
| 3205 | intel_pmu_lbr_init_snb(); |
| 3206 | |
| 3207 | x86_pmu.event_constraints = intel_ivb_event_constraints; |
| 3208 | x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; |
| 3209 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; |
| 3210 | if (boot_cpu_data.x86_model == 62) |
| 3211 | x86_pmu.extra_regs = intel_snbep_extra_regs; |
| 3212 | else |
| 3213 | x86_pmu.extra_regs = intel_snb_extra_regs; |
| 3214 | /* all extra regs are per-cpu when HT is on */ |
| 3215 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3216 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3217 | |
| 3218 | x86_pmu.cpu_events = snb_events_attrs; |
| 3219 | |
| 3220 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
| 3221 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 3222 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
| 3223 | |
| 3224 | pr_cont("IvyBridge events, "); |
| 3225 | break; |
| 3226 | |
| 3227 | |
| 3228 | case 60: /* 22nm Haswell Core */ |
| 3229 | case 63: /* 22nm Haswell Server */ |
| 3230 | case 69: /* 22nm Haswell ULT */ |
| 3231 | case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ |
| 3232 | x86_add_quirk(intel_ht_bug); |
| 3233 | x86_pmu.late_ack = true; |
| 3234 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
| 3235 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
| 3236 | |
| 3237 | intel_pmu_lbr_init_hsw(); |
| 3238 | |
| 3239 | x86_pmu.event_constraints = intel_hsw_event_constraints; |
| 3240 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; |
| 3241 | x86_pmu.extra_regs = intel_snbep_extra_regs; |
| 3242 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; |
| 3243 | /* all extra regs are per-cpu when HT is on */ |
| 3244 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3245 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3246 | |
| 3247 | x86_pmu.hw_config = hsw_hw_config; |
| 3248 | x86_pmu.get_event_constraints = hsw_get_event_constraints; |
| 3249 | x86_pmu.cpu_events = hsw_events_attrs; |
| 3250 | x86_pmu.lbr_double_abort = true; |
| 3251 | pr_cont("Haswell events, "); |
| 3252 | break; |
| 3253 | |
| 3254 | case 61: /* 14nm Broadwell Core-M */ |
| 3255 | case 86: /* 14nm Broadwell Xeon D */ |
| 3256 | x86_pmu.late_ack = true; |
| 3257 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
| 3258 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
| 3259 | |
| 3260 | /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */ |
| 3261 | hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ | |
| 3262 | BDW_L3_MISS|HSW_SNOOP_DRAM; |
| 3263 | hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS| |
| 3264 | HSW_SNOOP_DRAM; |
| 3265 | hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ| |
| 3266 | BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; |
| 3267 | hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| |
| 3268 | BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; |
| 3269 | |
| 3270 | intel_pmu_lbr_init_hsw(); |
| 3271 | |
| 3272 | x86_pmu.event_constraints = intel_bdw_event_constraints; |
| 3273 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; |
| 3274 | x86_pmu.extra_regs = intel_snbep_extra_regs; |
| 3275 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; |
| 3276 | /* all extra regs are per-cpu when HT is on */ |
| 3277 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3278 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3279 | |
| 3280 | x86_pmu.hw_config = hsw_hw_config; |
| 3281 | x86_pmu.get_event_constraints = hsw_get_event_constraints; |
| 3282 | x86_pmu.cpu_events = hsw_events_attrs; |
| 3283 | x86_pmu.limit_period = bdw_limit_period; |
| 3284 | pr_cont("Broadwell events, "); |
| 3285 | break; |
| 3286 | |
| 3287 | default: |
| 3288 | switch (x86_pmu.version) { |
| 3289 | case 1: |
| 3290 | x86_pmu.event_constraints = intel_v1_event_constraints; |
| 3291 | pr_cont("generic architected perfmon v1, "); |
| 3292 | break; |
| 3293 | default: |
| 3294 | /* |
| 3295 | * default constraints for v2 and up |
| 3296 | */ |
| 3297 | x86_pmu.event_constraints = intel_gen_event_constraints; |
| 3298 | pr_cont("generic architected perfmon, "); |
| 3299 | break; |
| 3300 | } |
| 3301 | } |
| 3302 | |
| 3303 | if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { |
| 3304 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
| 3305 | x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); |
| 3306 | x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; |
| 3307 | } |
| 3308 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
| 3309 | |
| 3310 | if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { |
| 3311 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
| 3312 | x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); |
| 3313 | x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; |
| 3314 | } |
| 3315 | |
| 3316 | x86_pmu.intel_ctrl |= |
| 3317 | ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; |
| 3318 | |
| 3319 | if (x86_pmu.event_constraints) { |
| 3320 | /* |
| 3321 | * event on fixed counter2 (REF_CYCLES) only works on this |
| 3322 | * counter, so do not extend mask to generic counters |
| 3323 | */ |
| 3324 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
| 3325 | if (c->cmask != FIXED_EVENT_FLAGS |
| 3326 | || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { |
| 3327 | continue; |
| 3328 | } |
| 3329 | |
| 3330 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
| 3331 | c->weight += x86_pmu.num_counters; |
| 3332 | } |
| 3333 | } |
| 3334 | |
| 3335 | /* |
| 3336 | * Access LBR MSR may cause #GP under certain circumstances. |
| 3337 | * E.g. KVM doesn't support LBR MSR |
| 3338 | * Check all LBT MSR here. |
| 3339 | * Disable LBR access if any LBR MSRs can not be accessed. |
| 3340 | */ |
| 3341 | if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) |
| 3342 | x86_pmu.lbr_nr = 0; |
| 3343 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
| 3344 | if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && |
| 3345 | check_msr(x86_pmu.lbr_to + i, 0xffffUL))) |
| 3346 | x86_pmu.lbr_nr = 0; |
| 3347 | } |
| 3348 | |
| 3349 | /* |
| 3350 | * Access extra MSR may cause #GP under certain circumstances. |
| 3351 | * E.g. KVM doesn't support offcore event |
| 3352 | * Check all extra_regs here. |
| 3353 | */ |
| 3354 | if (x86_pmu.extra_regs) { |
| 3355 | for (er = x86_pmu.extra_regs; er->msr; er++) { |
| 3356 | er->extra_msr_access = check_msr(er->msr, 0x1ffUL); |
| 3357 | /* Disable LBR select mapping */ |
| 3358 | if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) |
| 3359 | x86_pmu.lbr_sel_map = NULL; |
| 3360 | } |
| 3361 | } |
| 3362 | |
| 3363 | /* Support full width counters using alternative MSR range */ |
| 3364 | if (x86_pmu.intel_cap.full_width_write) { |
| 3365 | x86_pmu.max_period = x86_pmu.cntval_mask; |
| 3366 | x86_pmu.perfctr = MSR_IA32_PMC0; |
| 3367 | pr_cont("full-width counters, "); |
| 3368 | } |
| 3369 | |
| 3370 | return 0; |
| 3371 | } |
| 3372 | |
| 3373 | /* |
| 3374 | * HT bug: phase 2 init |
| 3375 | * Called once we have valid topology information to check |
| 3376 | * whether or not HT is enabled |
| 3377 | * If HT is off, then we disable the workaround |
| 3378 | */ |
| 3379 | static __init int fixup_ht_bug(void) |
| 3380 | { |
| 3381 | int cpu = smp_processor_id(); |
| 3382 | int w, c; |
| 3383 | /* |
| 3384 | * problem not present on this CPU model, nothing to do |
| 3385 | */ |
| 3386 | if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) |
| 3387 | return 0; |
| 3388 | |
| 3389 | w = cpumask_weight(topology_thread_cpumask(cpu)); |
| 3390 | if (w > 1) { |
| 3391 | pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); |
| 3392 | return 0; |
| 3393 | } |
| 3394 | |
| 3395 | watchdog_nmi_disable_all(); |
| 3396 | |
| 3397 | x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); |
| 3398 | |
| 3399 | x86_pmu.commit_scheduling = NULL; |
| 3400 | x86_pmu.start_scheduling = NULL; |
| 3401 | x86_pmu.stop_scheduling = NULL; |
| 3402 | |
| 3403 | watchdog_nmi_enable_all(); |
| 3404 | |
| 3405 | get_online_cpus(); |
| 3406 | |
| 3407 | for_each_online_cpu(c) { |
| 3408 | free_excl_cntrs(c); |
| 3409 | } |
| 3410 | |
| 3411 | put_online_cpus(); |
| 3412 | pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); |
| 3413 | return 0; |
| 3414 | } |
| 3415 | subsys_initcall(fixup_ht_bug) |