arch/x86/kernel/cpu/perf_event_amd_uncore.c

   1 /*
   2  * Copyright (C) 2013 Advanced Micro Devices, Inc.
   3  *
   4  * Author: Jacob Shin <jacob.shin@amd.com>
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 as
   8  * published by the Free Software Foundation.
   9  */
  10
  11 #include <linux/perf_event.h>
  12 #include <linux/percpu.h>
  13 #include <linux/types.h>
  14 #include <linux/slab.h>
  15 #include <linux/init.h>
  16 #include <linux/cpu.h>
  17 #include <linux/cpumask.h>
  18
  19 #include <asm/cpufeature.h>
  20 #include <asm/perf_event.h>
  21 #include <asm/msr.h>
  22
  23 #define NUM_COUNTERS_NB         4
  24 #define NUM_COUNTERS_L2         4
  25 #define MAX_COUNTERS            NUM_COUNTERS_NB
  26
  27 #define RDPMC_BASE_NB           6
  28 #define RDPMC_BASE_L2           10
  29
  30 #define COUNTER_SHIFT           16
  31
  32 struct amd_uncore {
  33         int id;
  34         int refcnt;
  35         int cpu;
  36         int num_counters;
  37         int rdpmc_base;
  38         u32 msr_base;
  39         cpumask_t *active_mask;
  40         struct pmu *pmu;
  41         struct perf_event *events[MAX_COUNTERS];
  42         struct amd_uncore *free_when_cpu_online;
  43 };
  44
  45 static struct amd_uncore * __percpu *amd_uncore_nb;
  46 static struct amd_uncore * __percpu *amd_uncore_l2;
  47
  48 static struct pmu amd_nb_pmu;
  49 static struct pmu amd_l2_pmu;
  50
  51 static cpumask_t amd_nb_active_mask;
  52 static cpumask_t amd_l2_active_mask;
  53
  54 static bool is_nb_event(struct perf_event *event)
  55 {
  56         return event->pmu->type == amd_nb_pmu.type;
  57 }
  58
  59 static bool is_l2_event(struct perf_event *event)
  60 {
  61         return event->pmu->type == amd_l2_pmu.type;
  62 }
  63
  64 static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
  65 {
  66         if (is_nb_event(event) && amd_uncore_nb)
  67                 return *per_cpu_ptr(amd_uncore_nb, event->cpu);
  68         else if (is_l2_event(event) && amd_uncore_l2)
  69                 return *per_cpu_ptr(amd_uncore_l2, event->cpu);
  70
  71         return NULL;
  72 }
  73
  74 static void amd_uncore_read(struct perf_event *event)
  75 {
  76         struct hw_perf_event *hwc = &event->hw;
  77         u64 prev, new;
  78         s64 delta;
  79
  80         /*
  81          * since we do not enable counter overflow interrupts,
  82          * we do not have to worry about prev_count changing on us
  83          */
  84
  85         prev = local64_read(&hwc->prev_count);
  86         rdpmcl(hwc->event_base_rdpmc, new);
  87         local64_set(&hwc->prev_count, new);
  88         delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
  89         delta >>= COUNTER_SHIFT;
  90         local64_add(delta, &event->count);
  91 }
  92
  93 static void amd_uncore_start(struct perf_event *event, int flags)
  94 {
  95         struct hw_perf_event *hwc = &event->hw;
  96
  97         if (flags & PERF_EF_RELOAD)
  98                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
  99
 100         hwc->state = 0;
 101         wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
 102         perf_event_update_userpage(event);
 103 }
 104
 105 static void amd_uncore_stop(struct perf_event *event, int flags)
 106 {
 107         struct hw_perf_event *hwc = &event->hw;
 108
 109         wrmsrl(hwc->config_base, hwc->config);
 110         hwc->state |= PERF_HES_STOPPED;
 111
 112         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 113                 amd_uncore_read(event);
 114                 hwc->state |= PERF_HES_UPTODATE;
 115         }
 116 }
 117
 118 static int amd_uncore_add(struct perf_event *event, int flags)
 119 {
 120         int i;
 121         struct amd_uncore *uncore = event_to_amd_uncore(event);
 122         struct hw_perf_event *hwc = &event->hw;
 123
 124         /* are we already assigned? */
 125         if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
 126                 goto out;
 127
 128         for (i = 0; i < uncore->num_counters; i++) {
 129                 if (uncore->events[i] == event) {
 130                         hwc->idx = i;
 131                         goto out;
 132                 }
 133         }
 134
 135         /* if not, take the first available counter */
 136         hwc->idx = -1;
 137         for (i = 0; i < uncore->num_counters; i++) {
 138                 if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
 139                         hwc->idx = i;
 140                         break;
 141                 }
 142         }
 143
 144 out:
 145         if (hwc->idx == -1)
 146                 return -EBUSY;
 147
 148         hwc->config_base = uncore->msr_base + (2 * hwc->idx);
 149         hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
 150         hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
 151         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 152
 153         if (flags & PERF_EF_START)
 154                 amd_uncore_start(event, PERF_EF_RELOAD);
 155
 156         return 0;
 157 }
 158
 159 static void amd_uncore_del(struct perf_event *event, int flags)
 160 {
 161         int i;
 162         struct amd_uncore *uncore = event_to_amd_uncore(event);
 163         struct hw_perf_event *hwc = &event->hw;
 164
 165         amd_uncore_stop(event, PERF_EF_UPDATE);
 166
 167         for (i = 0; i < uncore->num_counters; i++) {
 168                 if (cmpxchg(&uncore->events[i], event, NULL) == event)
 169                         break;
 170         }
 171
 172         hwc->idx = -1;
 173 }
 174
 175 static int amd_uncore_event_init(struct perf_event *event)
 176 {
 177         struct amd_uncore *uncore;
 178         struct hw_perf_event *hwc = &event->hw;
 179
 180         if (event->attr.type != event->pmu->type)
 181                 return -ENOENT;
 182
 183         /*
 184          * NB and L2 counters (MSRs) are shared across all cores that share the
 185          * same NB / L2 cache. Interrupts can be directed to a single target
 186          * core, however, event counts generated by processes running on other
 187          * cores cannot be masked out. So we do not support sampling and
 188          * per-thread events.
 189          */
 190         if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
 191                 return -EINVAL;
 192
 193         /* NB and L2 counters do not have usr/os/guest/host bits */
 194         if (event->attr.exclude_user || event->attr.exclude_kernel ||
 195             event->attr.exclude_host || event->attr.exclude_guest)
 196                 return -EINVAL;
 197
 198         /* and we do not enable counter overflow interrupts */
 199         hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
 200         hwc->idx = -1;
 201
 202         if (event->cpu < 0)
 203                 return -EINVAL;
 204
 205         uncore = event_to_amd_uncore(event);
 206         if (!uncore)
 207                 return -ENODEV;
 208
 209         /*
 210          * since request can come in to any of the shared cores, we will remap
 211          * to a single common cpu.
 212          */
 213         event->cpu = uncore->cpu;
 214
 215         return 0;
 216 }
 217
 218 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 219                                             struct device_attribute *attr,
 220                                             char *buf)
 221 {
 222         int n;
 223         cpumask_t *active_mask;
 224         struct pmu *pmu = dev_get_drvdata(dev);
 225
 226         if (pmu->type == amd_nb_pmu.type)
 227                 active_mask = &amd_nb_active_mask;
 228         else if (pmu->type == amd_l2_pmu.type)
 229                 active_mask = &amd_l2_active_mask;
 230         else
 231                 return 0;
 232
 233         n = cpulist_scnprintf(buf, PAGE_SIZE - 2, active_mask);
 234         buf[n++] = '\n';
 235         buf[n] = '\0';
 236         return n;
 237 }
 238 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
 239
 240 static struct attribute *amd_uncore_attrs[] = {
 241         &dev_attr_cpumask.attr,
 242         NULL,
 243 };
 244
 245 static struct attribute_group amd_uncore_attr_group = {
 246         .attrs = amd_uncore_attrs,
 247 };
 248
 249 PMU_FORMAT_ATTR(event, "config:0-7,32-35");
 250 PMU_FORMAT_ATTR(umask, "config:8-15");
 251
 252 static struct attribute *amd_uncore_format_attr[] = {
 253         &format_attr_event.attr,
 254         &format_attr_umask.attr,
 255         NULL,
 256 };
 257
 258 static struct attribute_group amd_uncore_format_group = {
 259         .name = "format",
 260         .attrs = amd_uncore_format_attr,
 261 };
 262
 263 static const struct attribute_group *amd_uncore_attr_groups[] = {
 264         &amd_uncore_attr_group,
 265         &amd_uncore_format_group,
 266         NULL,
 267 };
 268
 269 static struct pmu amd_nb_pmu = {
 270         .attr_groups    = amd_uncore_attr_groups,
 271         .name           = "amd_nb",
 272         .event_init     = amd_uncore_event_init,
 273         .add            = amd_uncore_add,
 274         .del            = amd_uncore_del,
 275         .start          = amd_uncore_start,
 276         .stop           = amd_uncore_stop,
 277         .read           = amd_uncore_read,
 278 };
 279
 280 static struct pmu amd_l2_pmu = {
 281         .attr_groups    = amd_uncore_attr_groups,
 282         .name           = "amd_l2",
 283         .event_init     = amd_uncore_event_init,
 284         .add            = amd_uncore_add,
 285         .del            = amd_uncore_del,
 286         .start          = amd_uncore_start,
 287         .stop           = amd_uncore_stop,
 288         .read           = amd_uncore_read,
 289 };
 290
 291 static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
 292 {
 293         return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
 294                         cpu_to_node(cpu));
 295 }
 296
 297 static void amd_uncore_cpu_up_prepare(unsigned int cpu)
 298 {
 299         struct amd_uncore *uncore;
 300
 301         if (amd_uncore_nb) {
 302                 uncore = amd_uncore_alloc(cpu);
 303                 uncore->cpu = cpu;
 304                 uncore->num_counters = NUM_COUNTERS_NB;
 305                 uncore->rdpmc_base = RDPMC_BASE_NB;
 306                 uncore->msr_base = MSR_F15H_NB_PERF_CTL;
 307                 uncore->active_mask = &amd_nb_active_mask;
 308                 uncore->pmu = &amd_nb_pmu;
 309                 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
 310         }
 311
 312         if (amd_uncore_l2) {
 313                 uncore = amd_uncore_alloc(cpu);
 314                 uncore->cpu = cpu;
 315                 uncore->num_counters = NUM_COUNTERS_L2;
 316                 uncore->rdpmc_base = RDPMC_BASE_L2;
 317                 uncore->msr_base = MSR_F16H_L2I_PERF_CTL;
 318                 uncore->active_mask = &amd_l2_active_mask;
 319                 uncore->pmu = &amd_l2_pmu;
 320                 *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
 321         }
 322 }
 323
 324 static struct amd_uncore *
 325 amd_uncore_find_online_sibling(struct amd_uncore *this,
 326                                struct amd_uncore * __percpu *uncores)
 327 {
 328         unsigned int cpu;
 329         struct amd_uncore *that;
 330
 331         for_each_online_cpu(cpu) {
 332                 that = *per_cpu_ptr(uncores, cpu);
 333
 334                 if (!that)
 335                         continue;
 336
 337                 if (this == that)
 338                         continue;
 339
 340                 if (this->id == that->id) {
 341                         that->free_when_cpu_online = this;
 342                         this = that;
 343                         break;
 344                 }
 345         }
 346
 347         this->refcnt++;
 348         return this;
 349 }
 350
 351 static void amd_uncore_cpu_starting(unsigned int cpu)
 352 {
 353         unsigned int eax, ebx, ecx, edx;
 354         struct amd_uncore *uncore;
 355
 356         if (amd_uncore_nb) {
 357                 uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
 358                 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 359                 uncore->id = ecx & 0xff;
 360
 361                 uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
 362                 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
 363         }
 364
 365         if (amd_uncore_l2) {
 366                 unsigned int apicid = cpu_data(cpu).apicid;
 367                 unsigned int nshared;
 368
 369                 uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
 370                 cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
 371                 nshared = ((eax >> 14) & 0xfff) + 1;
 372                 uncore->id = apicid - (apicid % nshared);
 373
 374                 uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
 375                 *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
 376         }
 377 }
 378
 379 static void uncore_online(unsigned int cpu,
 380                           struct amd_uncore * __percpu *uncores)
 381 {
 382         struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 383
 384         kfree(uncore->free_when_cpu_online);
 385         uncore->free_when_cpu_online = NULL;
 386
 387         if (cpu == uncore->cpu)
 388                 cpumask_set_cpu(cpu, uncore->active_mask);
 389 }
 390
 391 static void amd_uncore_cpu_online(unsigned int cpu)
 392 {
 393         if (amd_uncore_nb)
 394                 uncore_online(cpu, amd_uncore_nb);
 395
 396         if (amd_uncore_l2)
 397                 uncore_online(cpu, amd_uncore_l2);
 398 }
 399
 400 static void uncore_down_prepare(unsigned int cpu,
 401                                 struct amd_uncore * __percpu *uncores)
 402 {
 403         unsigned int i;
 404         struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
 405
 406         if (this->cpu != cpu)
 407                 return;
 408
 409         /* this cpu is going down, migrate to a shared sibling if possible */
 410         for_each_online_cpu(i) {
 411                 struct amd_uncore *that = *per_cpu_ptr(uncores, i);
 412
 413                 if (cpu == i)
 414                         continue;
 415
 416                 if (this == that) {
 417                         perf_pmu_migrate_context(this->pmu, cpu, i);
 418                         cpumask_clear_cpu(cpu, that->active_mask);
 419                         cpumask_set_cpu(i, that->active_mask);
 420                         that->cpu = i;
 421                         break;
 422                 }
 423         }
 424 }
 425
 426 static void amd_uncore_cpu_down_prepare(unsigned int cpu)
 427 {
 428         if (amd_uncore_nb)
 429                 uncore_down_prepare(cpu, amd_uncore_nb);
 430
 431         if (amd_uncore_l2)
 432                 uncore_down_prepare(cpu, amd_uncore_l2);
 433 }
 434
 435 static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
 436 {
 437         struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 438
 439         if (cpu == uncore->cpu)
 440                 cpumask_clear_cpu(cpu, uncore->active_mask);
 441
 442         if (!--uncore->refcnt)
 443                 kfree(uncore);
 444         *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
 445 }
 446
 447 static void amd_uncore_cpu_dead(unsigned int cpu)
 448 {
 449         if (amd_uncore_nb)
 450                 uncore_dead(cpu, amd_uncore_nb);
 451
 452         if (amd_uncore_l2)
 453                 uncore_dead(cpu, amd_uncore_l2);
 454 }
 455
 456 static int
 457 amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
 458                         void *hcpu)
 459 {
 460         unsigned int cpu = (long)hcpu;
 461
 462         switch (action & ~CPU_TASKS_FROZEN) {
 463         case CPU_UP_PREPARE:
 464                 amd_uncore_cpu_up_prepare(cpu);
 465                 break;
 466
 467         case CPU_STARTING:
 468                 amd_uncore_cpu_starting(cpu);
 469                 break;
 470
 471         case CPU_ONLINE:
 472                 amd_uncore_cpu_online(cpu);
 473                 break;
 474
 475         case CPU_DOWN_PREPARE:
 476                 amd_uncore_cpu_down_prepare(cpu);
 477                 break;
 478
 479         case CPU_UP_CANCELED:
 480         case CPU_DEAD:
 481                 amd_uncore_cpu_dead(cpu);
 482                 break;
 483
 484         default:
 485                 break;
 486         }
 487
 488         return NOTIFY_OK;
 489 }
 490
 491 static struct notifier_block amd_uncore_cpu_notifier_block = {
 492         .notifier_call  = amd_uncore_cpu_notifier,
 493         .priority       = CPU_PRI_PERF + 1,
 494 };
 495
 496 static void __init init_cpu_already_online(void *dummy)
 497 {
 498         unsigned int cpu = smp_processor_id();
 499
 500         amd_uncore_cpu_starting(cpu);
 501         amd_uncore_cpu_online(cpu);
 502 }
 503
 504 static int __init amd_uncore_init(void)
 505 {
 506         unsigned int cpu;
 507         int ret = -ENODEV;
 508
 509         if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 510                 return -ENODEV;
 511
 512         if (!cpu_has_topoext)
 513                 return -ENODEV;
 514
 515         if (cpu_has_perfctr_nb) {
 516                 amd_uncore_nb = alloc_percpu(struct amd_uncore *);
 517                 perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
 518
 519                 printk(KERN_INFO "perf: AMD NB counters detected\n");
 520                 ret = 0;
 521         }
 522
 523         if (cpu_has_perfctr_l2) {
 524                 amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
 525                 perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
 526
 527                 printk(KERN_INFO "perf: AMD L2I counters detected\n");
 528                 ret = 0;
 529         }
 530
 531         if (ret)
 532                 return -ENODEV;
 533
 534         get_online_cpus();
 535         /* init cpus already online before registering for hotplug notifier */
 536         for_each_online_cpu(cpu) {
 537                 amd_uncore_cpu_up_prepare(cpu);
 538                 smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
 539         }
 540
 541         register_cpu_notifier(&amd_uncore_cpu_notifier_block);
 542         put_online_cpus();
 543
 544         return 0;
 545 }
 546 device_initcall(amd_uncore_init);