1 /* Performance event support for sparc64. 2 * 3 * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net> 4 * 5 * This code is based almost entirely upon the x86 perf event 6 * code, which is: 7 * 8 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 9 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 10 * Copyright (C) 2009 Jaswinder Singh Rajput 11 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 12 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 13 */ 14 15 #include <linux/perf_event.h> 16 #include <linux/kprobes.h> 17 #include <linux/ftrace.h> 18 #include <linux/kernel.h> 19 #include <linux/kdebug.h> 20 #include <linux/mutex.h> 21 22 #include <asm/stacktrace.h> 23 #include <asm/cpudata.h> 24 #include <asm/uaccess.h> 25 #include <linux/atomic.h> 26 #include <asm/nmi.h> 27 #include <asm/pcr.h> 28 #include <asm/perfctr.h> 29 #include <asm/cacheflush.h> 30 31 #include "kernel.h" 32 #include "kstack.h" 33 34 /* Sparc64 chips have two performance counters, 32-bits each, with 35 * overflow interrupts generated on transition from 0xffffffff to 0. 36 * The counters are accessed in one go using a 64-bit register. 37 * 38 * Both counters are controlled using a single control register. The 39 * only way to stop all sampling is to clear all of the context (user, 40 * supervisor, hypervisor) sampling enable bits. But these bits apply 41 * to both counters, thus the two counters can't be enabled/disabled 42 * individually. 43 * 44 * The control register has two event fields, one for each of the two 45 * counters. It's thus nearly impossible to have one counter going 46 * while keeping the other one stopped. Therefore it is possible to 47 * get overflow interrupts for counters not currently "in use" and 48 * that condition must be checked in the overflow interrupt handler. 49 * 50 * So we use a hack, in that we program inactive counters with the 51 * "sw_count0" and "sw_count1" events. These count how many times 52 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an 53 * unusual way to encode a NOP and therefore will not trigger in 54 * normal code. 55 */ 56 57 #define MAX_HWEVENTS 2 58 #define MAX_PERIOD ((1UL << 32) - 1) 59 60 #define PIC_UPPER_INDEX 0 61 #define PIC_LOWER_INDEX 1 62 #define PIC_NO_INDEX -1 63 64 struct cpu_hw_events { 65 /* Number of events currently scheduled onto this cpu. 66 * This tells how many entries in the arrays below 67 * are valid. 68 */ 69 int n_events; 70 71 /* Number of new events added since the last hw_perf_disable(). 72 * This works because the perf event layer always adds new 73 * events inside of a perf_{disable,enable}() sequence. 74 */ 75 int n_added; 76 77 /* Array of events current scheduled on this cpu. */ 78 struct perf_event *event[MAX_HWEVENTS]; 79 80 /* Array of encoded longs, specifying the %pcr register 81 * encoding and the mask of PIC counters this even can 82 * be scheduled on. See perf_event_encode() et al. 83 */ 84 unsigned long events[MAX_HWEVENTS]; 85 86 /* The current counter index assigned to an event. When the 87 * event hasn't been programmed into the cpu yet, this will 88 * hold PIC_NO_INDEX. The event->hw.idx value tells us where 89 * we ought to schedule the event. 90 */ 91 int current_idx[MAX_HWEVENTS]; 92 93 /* Software copy of %pcr register on this cpu. */ 94 u64 pcr; 95 96 /* Enabled/disable state. */ 97 int enabled; 98 99 unsigned int group_flag; 100 }; 101 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; 102 103 /* An event map describes the characteristics of a performance 104 * counter event. In particular it gives the encoding as well as 105 * a mask telling which counters the event can be measured on. 106 */ 107 struct perf_event_map { 108 u16 encoding; 109 u8 pic_mask; 110 #define PIC_NONE 0x00 111 #define PIC_UPPER 0x01 112 #define PIC_LOWER 0x02 113 }; 114 115 /* Encode a perf_event_map entry into a long. */ 116 static unsigned long perf_event_encode(const struct perf_event_map *pmap) 117 { 118 return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; 119 } 120 121 static u8 perf_event_get_msk(unsigned long val) 122 { 123 return val & 0xff; 124 } 125 126 static u64 perf_event_get_enc(unsigned long val) 127 { 128 return val >> 16; 129 } 130 131 #define C(x) PERF_COUNT_HW_CACHE_##x 132 133 #define CACHE_OP_UNSUPPORTED 0xfffe 134 #define CACHE_OP_NONSENSE 0xffff 135 136 typedef struct perf_event_map cache_map_t 137 [PERF_COUNT_HW_CACHE_MAX] 138 [PERF_COUNT_HW_CACHE_OP_MAX] 139 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 140 141 struct sparc_pmu { 142 const struct perf_event_map *(*event_map)(int); 143 const cache_map_t *cache_map; 144 int max_events; 145 int upper_shift; 146 int lower_shift; 147 int event_mask; 148 int hv_bit; 149 int irq_bit; 150 int upper_nop; 151 int lower_nop; 152 }; 153 154 static const struct perf_event_map ultra3_perfmon_event_map[] = { 155 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, 156 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, 157 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, 158 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, 159 }; 160 161 static const struct perf_event_map *ultra3_event_map(int event_id) 162 { 163 return &ultra3_perfmon_event_map[event_id]; 164 } 165 166 static const cache_map_t ultra3_cache_map = { 167 [C(L1D)] = { 168 [C(OP_READ)] = { 169 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 170 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 171 }, 172 [C(OP_WRITE)] = { 173 [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, 174 [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, 175 }, 176 [C(OP_PREFETCH)] = { 177 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 178 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 179 }, 180 }, 181 [C(L1I)] = { 182 [C(OP_READ)] = { 183 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 184 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 185 }, 186 [ C(OP_WRITE) ] = { 187 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 188 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 189 }, 190 [ C(OP_PREFETCH) ] = { 191 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 192 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 193 }, 194 }, 195 [C(LL)] = { 196 [C(OP_READ)] = { 197 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, 198 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, 199 }, 200 [C(OP_WRITE)] = { 201 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, 202 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, 203 }, 204 [C(OP_PREFETCH)] = { 205 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 206 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 207 }, 208 }, 209 [C(DTLB)] = { 210 [C(OP_READ)] = { 211 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 212 [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, 213 }, 214 [ C(OP_WRITE) ] = { 215 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 216 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 217 }, 218 [ C(OP_PREFETCH) ] = { 219 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 220 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 221 }, 222 }, 223 [C(ITLB)] = { 224 [C(OP_READ)] = { 225 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 226 [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, 227 }, 228 [ C(OP_WRITE) ] = { 229 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 230 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 231 }, 232 [ C(OP_PREFETCH) ] = { 233 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 234 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 235 }, 236 }, 237 [C(BPU)] = { 238 [C(OP_READ)] = { 239 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 240 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 241 }, 242 [ C(OP_WRITE) ] = { 243 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 244 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 245 }, 246 [ C(OP_PREFETCH) ] = { 247 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 248 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 249 }, 250 }, 251 [C(NODE)] = { 252 [C(OP_READ)] = { 253 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 254 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 255 }, 256 [ C(OP_WRITE) ] = { 257 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 258 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 259 }, 260 [ C(OP_PREFETCH) ] = { 261 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 262 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 263 }, 264 }, 265 }; 266 267 static const struct sparc_pmu ultra3_pmu = { 268 .event_map = ultra3_event_map, 269 .cache_map = &ultra3_cache_map, 270 .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), 271 .upper_shift = 11, 272 .lower_shift = 4, 273 .event_mask = 0x3f, 274 .upper_nop = 0x1c, 275 .lower_nop = 0x14, 276 }; 277 278 /* Niagara1 is very limited. The upper PIC is hard-locked to count 279 * only instructions, so it is free running which creates all kinds of 280 * problems. Some hardware designs make one wonder if the creator 281 * even looked at how this stuff gets used by software. 282 */ 283 static const struct perf_event_map niagara1_perfmon_event_map[] = { 284 [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, 285 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, 286 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, 287 [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, 288 }; 289 290 static const struct perf_event_map *niagara1_event_map(int event_id) 291 { 292 return &niagara1_perfmon_event_map[event_id]; 293 } 294 295 static const cache_map_t niagara1_cache_map = { 296 [C(L1D)] = { 297 [C(OP_READ)] = { 298 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 299 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 300 }, 301 [C(OP_WRITE)] = { 302 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 303 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 304 }, 305 [C(OP_PREFETCH)] = { 306 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 307 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 308 }, 309 }, 310 [C(L1I)] = { 311 [C(OP_READ)] = { 312 [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, 313 [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, 314 }, 315 [ C(OP_WRITE) ] = { 316 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 317 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 318 }, 319 [ C(OP_PREFETCH) ] = { 320 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 321 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 322 }, 323 }, 324 [C(LL)] = { 325 [C(OP_READ)] = { 326 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 327 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 328 }, 329 [C(OP_WRITE)] = { 330 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 331 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 332 }, 333 [C(OP_PREFETCH)] = { 334 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 335 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 336 }, 337 }, 338 [C(DTLB)] = { 339 [C(OP_READ)] = { 340 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 341 [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, 342 }, 343 [ C(OP_WRITE) ] = { 344 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 345 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 346 }, 347 [ C(OP_PREFETCH) ] = { 348 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 349 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 350 }, 351 }, 352 [C(ITLB)] = { 353 [C(OP_READ)] = { 354 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 355 [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, 356 }, 357 [ C(OP_WRITE) ] = { 358 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 359 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 360 }, 361 [ C(OP_PREFETCH) ] = { 362 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 363 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 364 }, 365 }, 366 [C(BPU)] = { 367 [C(OP_READ)] = { 368 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 369 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 370 }, 371 [ C(OP_WRITE) ] = { 372 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 373 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 374 }, 375 [ C(OP_PREFETCH) ] = { 376 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 377 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 378 }, 379 }, 380 [C(NODE)] = { 381 [C(OP_READ)] = { 382 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 383 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 384 }, 385 [ C(OP_WRITE) ] = { 386 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 387 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 388 }, 389 [ C(OP_PREFETCH) ] = { 390 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 391 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 392 }, 393 }, 394 }; 395 396 static const struct sparc_pmu niagara1_pmu = { 397 .event_map = niagara1_event_map, 398 .cache_map = &niagara1_cache_map, 399 .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), 400 .upper_shift = 0, 401 .lower_shift = 4, 402 .event_mask = 0x7, 403 .upper_nop = 0x0, 404 .lower_nop = 0x0, 405 }; 406 407 static const struct perf_event_map niagara2_perfmon_event_map[] = { 408 [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, 409 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, 410 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, 411 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, 412 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, 413 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, 414 }; 415 416 static const struct perf_event_map *niagara2_event_map(int event_id) 417 { 418 return &niagara2_perfmon_event_map[event_id]; 419 } 420 421 static const cache_map_t niagara2_cache_map = { 422 [C(L1D)] = { 423 [C(OP_READ)] = { 424 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 425 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 426 }, 427 [C(OP_WRITE)] = { 428 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 429 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 430 }, 431 [C(OP_PREFETCH)] = { 432 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 433 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 434 }, 435 }, 436 [C(L1I)] = { 437 [C(OP_READ)] = { 438 [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, 439 [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, 440 }, 441 [ C(OP_WRITE) ] = { 442 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 443 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 444 }, 445 [ C(OP_PREFETCH) ] = { 446 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 447 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 448 }, 449 }, 450 [C(LL)] = { 451 [C(OP_READ)] = { 452 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 453 [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, 454 }, 455 [C(OP_WRITE)] = { 456 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 457 [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, 458 }, 459 [C(OP_PREFETCH)] = { 460 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 461 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 462 }, 463 }, 464 [C(DTLB)] = { 465 [C(OP_READ)] = { 466 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 467 [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, 468 }, 469 [ C(OP_WRITE) ] = { 470 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 471 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 472 }, 473 [ C(OP_PREFETCH) ] = { 474 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 475 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 476 }, 477 }, 478 [C(ITLB)] = { 479 [C(OP_READ)] = { 480 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 481 [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, 482 }, 483 [ C(OP_WRITE) ] = { 484 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 485 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 486 }, 487 [ C(OP_PREFETCH) ] = { 488 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 489 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 490 }, 491 }, 492 [C(BPU)] = { 493 [C(OP_READ)] = { 494 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 495 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 496 }, 497 [ C(OP_WRITE) ] = { 498 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 499 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 500 }, 501 [ C(OP_PREFETCH) ] = { 502 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 503 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 504 }, 505 }, 506 [C(NODE)] = { 507 [C(OP_READ)] = { 508 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 509 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 510 }, 511 [ C(OP_WRITE) ] = { 512 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 513 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 514 }, 515 [ C(OP_PREFETCH) ] = { 516 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 517 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 518 }, 519 }, 520 }; 521 522 static const struct sparc_pmu niagara2_pmu = { 523 .event_map = niagara2_event_map, 524 .cache_map = &niagara2_cache_map, 525 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), 526 .upper_shift = 19, 527 .lower_shift = 6, 528 .event_mask = 0xfff, 529 .hv_bit = 0x8, 530 .irq_bit = 0x30, 531 .upper_nop = 0x220, 532 .lower_nop = 0x220, 533 }; 534 535 static const struct sparc_pmu *sparc_pmu __read_mostly; 536 537 static u64 event_encoding(u64 event_id, int idx) 538 { 539 if (idx == PIC_UPPER_INDEX) 540 event_id <<= sparc_pmu->upper_shift; 541 else 542 event_id <<= sparc_pmu->lower_shift; 543 return event_id; 544 } 545 546 static u64 mask_for_index(int idx) 547 { 548 return event_encoding(sparc_pmu->event_mask, idx); 549 } 550 551 static u64 nop_for_index(int idx) 552 { 553 return event_encoding(idx == PIC_UPPER_INDEX ? 554 sparc_pmu->upper_nop : 555 sparc_pmu->lower_nop, idx); 556 } 557 558 static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 559 { 560 u64 val, mask = mask_for_index(idx); 561 562 val = cpuc->pcr; 563 val &= ~mask; 564 val |= hwc->config; 565 cpuc->pcr = val; 566 567 pcr_ops->write(cpuc->pcr); 568 } 569 570 static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 571 { 572 u64 mask = mask_for_index(idx); 573 u64 nop = nop_for_index(idx); 574 u64 val; 575 576 val = cpuc->pcr; 577 val &= ~mask; 578 val |= nop; 579 cpuc->pcr = val; 580 581 pcr_ops->write(cpuc->pcr); 582 } 583 584 static u32 read_pmc(int idx) 585 { 586 u64 val; 587 588 read_pic(val); 589 if (idx == PIC_UPPER_INDEX) 590 val >>= 32; 591 592 return val & 0xffffffff; 593 } 594 595 static void write_pmc(int idx, u64 val) 596 { 597 u64 shift, mask, pic; 598 599 shift = 0; 600 if (idx == PIC_UPPER_INDEX) 601 shift = 32; 602 603 mask = ((u64) 0xffffffff) << shift; 604 val <<= shift; 605 606 read_pic(pic); 607 pic &= ~mask; 608 pic |= val; 609 write_pic(pic); 610 } 611 612 static u64 sparc_perf_event_update(struct perf_event *event, 613 struct hw_perf_event *hwc, int idx) 614 { 615 int shift = 64 - 32; 616 u64 prev_raw_count, new_raw_count; 617 s64 delta; 618 619 again: 620 prev_raw_count = local64_read(&hwc->prev_count); 621 new_raw_count = read_pmc(idx); 622 623 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 624 new_raw_count) != prev_raw_count) 625 goto again; 626 627 delta = (new_raw_count << shift) - (prev_raw_count << shift); 628 delta >>= shift; 629 630 local64_add(delta, &event->count); 631 local64_sub(delta, &hwc->period_left); 632 633 return new_raw_count; 634 } 635 636 static int sparc_perf_event_set_period(struct perf_event *event, 637 struct hw_perf_event *hwc, int idx) 638 { 639 s64 left = local64_read(&hwc->period_left); 640 s64 period = hwc->sample_period; 641 int ret = 0; 642 643 if (unlikely(left <= -period)) { 644 left = period; 645 local64_set(&hwc->period_left, left); 646 hwc->last_period = period; 647 ret = 1; 648 } 649 650 if (unlikely(left <= 0)) { 651 left += period; 652 local64_set(&hwc->period_left, left); 653 hwc->last_period = period; 654 ret = 1; 655 } 656 if (left > MAX_PERIOD) 657 left = MAX_PERIOD; 658 659 local64_set(&hwc->prev_count, (u64)-left); 660 661 write_pmc(idx, (u64)(-left) & 0xffffffff); 662 663 perf_event_update_userpage(event); 664 665 return ret; 666 } 667 668 /* If performance event entries have been added, move existing 669 * events around (if necessary) and then assign new entries to 670 * counters. 671 */ 672 static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) 673 { 674 int i; 675 676 if (!cpuc->n_added) 677 goto out; 678 679 /* Read in the counters which are moving. */ 680 for (i = 0; i < cpuc->n_events; i++) { 681 struct perf_event *cp = cpuc->event[i]; 682 683 if (cpuc->current_idx[i] != PIC_NO_INDEX && 684 cpuc->current_idx[i] != cp->hw.idx) { 685 sparc_perf_event_update(cp, &cp->hw, 686 cpuc->current_idx[i]); 687 cpuc->current_idx[i] = PIC_NO_INDEX; 688 } 689 } 690 691 /* Assign to counters all unassigned events. */ 692 for (i = 0; i < cpuc->n_events; i++) { 693 struct perf_event *cp = cpuc->event[i]; 694 struct hw_perf_event *hwc = &cp->hw; 695 int idx = hwc->idx; 696 u64 enc; 697 698 if (cpuc->current_idx[i] != PIC_NO_INDEX) 699 continue; 700 701 sparc_perf_event_set_period(cp, hwc, idx); 702 cpuc->current_idx[i] = idx; 703 704 enc = perf_event_get_enc(cpuc->events[i]); 705 pcr &= ~mask_for_index(idx); 706 if (hwc->state & PERF_HES_STOPPED) 707 pcr |= nop_for_index(idx); 708 else 709 pcr |= event_encoding(enc, idx); 710 } 711 out: 712 return pcr; 713 } 714 715 static void sparc_pmu_enable(struct pmu *pmu) 716 { 717 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 718 u64 pcr; 719 720 if (cpuc->enabled) 721 return; 722 723 cpuc->enabled = 1; 724 barrier(); 725 726 pcr = cpuc->pcr; 727 if (!cpuc->n_events) { 728 pcr = 0; 729 } else { 730 pcr = maybe_change_configuration(cpuc, pcr); 731 732 /* We require that all of the events have the same 733 * configuration, so just fetch the settings from the 734 * first entry. 735 */ 736 cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; 737 } 738 739 pcr_ops->write(cpuc->pcr); 740 } 741 742 static void sparc_pmu_disable(struct pmu *pmu) 743 { 744 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 745 u64 val; 746 747 if (!cpuc->enabled) 748 return; 749 750 cpuc->enabled = 0; 751 cpuc->n_added = 0; 752 753 val = cpuc->pcr; 754 val &= ~(PCR_UTRACE | PCR_STRACE | 755 sparc_pmu->hv_bit | sparc_pmu->irq_bit); 756 cpuc->pcr = val; 757 758 pcr_ops->write(cpuc->pcr); 759 } 760 761 static int active_event_index(struct cpu_hw_events *cpuc, 762 struct perf_event *event) 763 { 764 int i; 765 766 for (i = 0; i < cpuc->n_events; i++) { 767 if (cpuc->event[i] == event) 768 break; 769 } 770 BUG_ON(i == cpuc->n_events); 771 return cpuc->current_idx[i]; 772 } 773 774 static void sparc_pmu_start(struct perf_event *event, int flags) 775 { 776 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 777 int idx = active_event_index(cpuc, event); 778 779 if (flags & PERF_EF_RELOAD) { 780 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 781 sparc_perf_event_set_period(event, &event->hw, idx); 782 } 783 784 event->hw.state = 0; 785 786 sparc_pmu_enable_event(cpuc, &event->hw, idx); 787 } 788 789 static void sparc_pmu_stop(struct perf_event *event, int flags) 790 { 791 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 792 int idx = active_event_index(cpuc, event); 793 794 if (!(event->hw.state & PERF_HES_STOPPED)) { 795 sparc_pmu_disable_event(cpuc, &event->hw, idx); 796 event->hw.state |= PERF_HES_STOPPED; 797 } 798 799 if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) { 800 sparc_perf_event_update(event, &event->hw, idx); 801 event->hw.state |= PERF_HES_UPTODATE; 802 } 803 } 804 805 static void sparc_pmu_del(struct perf_event *event, int _flags) 806 { 807 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 808 unsigned long flags; 809 int i; 810 811 local_irq_save(flags); 812 perf_pmu_disable(event->pmu); 813 814 for (i = 0; i < cpuc->n_events; i++) { 815 if (event == cpuc->event[i]) { 816 /* Absorb the final count and turn off the 817 * event. 818 */ 819 sparc_pmu_stop(event, PERF_EF_UPDATE); 820 821 /* Shift remaining entries down into 822 * the existing slot. 823 */ 824 while (++i < cpuc->n_events) { 825 cpuc->event[i - 1] = cpuc->event[i]; 826 cpuc->events[i - 1] = cpuc->events[i]; 827 cpuc->current_idx[i - 1] = 828 cpuc->current_idx[i]; 829 } 830 831 perf_event_update_userpage(event); 832 833 cpuc->n_events--; 834 break; 835 } 836 } 837 838 perf_pmu_enable(event->pmu); 839 local_irq_restore(flags); 840 } 841 842 static void sparc_pmu_read(struct perf_event *event) 843 { 844 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 845 int idx = active_event_index(cpuc, event); 846 struct hw_perf_event *hwc = &event->hw; 847 848 sparc_perf_event_update(event, hwc, idx); 849 } 850 851 static atomic_t active_events = ATOMIC_INIT(0); 852 static DEFINE_MUTEX(pmc_grab_mutex); 853 854 static void perf_stop_nmi_watchdog(void *unused) 855 { 856 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 857 858 stop_nmi_watchdog(NULL); 859 cpuc->pcr = pcr_ops->read(); 860 } 861 862 void perf_event_grab_pmc(void) 863 { 864 if (atomic_inc_not_zero(&active_events)) 865 return; 866 867 mutex_lock(&pmc_grab_mutex); 868 if (atomic_read(&active_events) == 0) { 869 if (atomic_read(&nmi_active) > 0) { 870 on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); 871 BUG_ON(atomic_read(&nmi_active) != 0); 872 } 873 atomic_inc(&active_events); 874 } 875 mutex_unlock(&pmc_grab_mutex); 876 } 877 878 void perf_event_release_pmc(void) 879 { 880 if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { 881 if (atomic_read(&nmi_active) == 0) 882 on_each_cpu(start_nmi_watchdog, NULL, 1); 883 mutex_unlock(&pmc_grab_mutex); 884 } 885 } 886 887 static const struct perf_event_map *sparc_map_cache_event(u64 config) 888 { 889 unsigned int cache_type, cache_op, cache_result; 890 const struct perf_event_map *pmap; 891 892 if (!sparc_pmu->cache_map) 893 return ERR_PTR(-ENOENT); 894 895 cache_type = (config >> 0) & 0xff; 896 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 897 return ERR_PTR(-EINVAL); 898 899 cache_op = (config >> 8) & 0xff; 900 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 901 return ERR_PTR(-EINVAL); 902 903 cache_result = (config >> 16) & 0xff; 904 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 905 return ERR_PTR(-EINVAL); 906 907 pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); 908 909 if (pmap->encoding == CACHE_OP_UNSUPPORTED) 910 return ERR_PTR(-ENOENT); 911 912 if (pmap->encoding == CACHE_OP_NONSENSE) 913 return ERR_PTR(-EINVAL); 914 915 return pmap; 916 } 917 918 static void hw_perf_event_destroy(struct perf_event *event) 919 { 920 perf_event_release_pmc(); 921 } 922 923 /* Make sure all events can be scheduled into the hardware at 924 * the same time. This is simplified by the fact that we only 925 * need to support 2 simultaneous HW events. 926 * 927 * As a side effect, the evts[]->hw.idx values will be assigned 928 * on success. These are pending indexes. When the events are 929 * actually programmed into the chip, these values will propagate 930 * to the per-cpu cpuc->current_idx[] slots, see the code in 931 * maybe_change_configuration() for details. 932 */ 933 static int sparc_check_constraints(struct perf_event **evts, 934 unsigned long *events, int n_ev) 935 { 936 u8 msk0 = 0, msk1 = 0; 937 int idx0 = 0; 938 939 /* This case is possible when we are invoked from 940 * hw_perf_group_sched_in(). 941 */ 942 if (!n_ev) 943 return 0; 944 945 if (n_ev > MAX_HWEVENTS) 946 return -1; 947 948 msk0 = perf_event_get_msk(events[0]); 949 if (n_ev == 1) { 950 if (msk0 & PIC_LOWER) 951 idx0 = 1; 952 goto success; 953 } 954 BUG_ON(n_ev != 2); 955 msk1 = perf_event_get_msk(events[1]); 956 957 /* If both events can go on any counter, OK. */ 958 if (msk0 == (PIC_UPPER | PIC_LOWER) && 959 msk1 == (PIC_UPPER | PIC_LOWER)) 960 goto success; 961 962 /* If one event is limited to a specific counter, 963 * and the other can go on both, OK. 964 */ 965 if ((msk0 == PIC_UPPER || msk0 == PIC_LOWER) && 966 msk1 == (PIC_UPPER | PIC_LOWER)) { 967 if (msk0 & PIC_LOWER) 968 idx0 = 1; 969 goto success; 970 } 971 972 if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && 973 msk0 == (PIC_UPPER | PIC_LOWER)) { 974 if (msk1 & PIC_UPPER) 975 idx0 = 1; 976 goto success; 977 } 978 979 /* If the events are fixed to different counters, OK. */ 980 if ((msk0 == PIC_UPPER && msk1 == PIC_LOWER) || 981 (msk0 == PIC_LOWER && msk1 == PIC_UPPER)) { 982 if (msk0 & PIC_LOWER) 983 idx0 = 1; 984 goto success; 985 } 986 987 /* Otherwise, there is a conflict. */ 988 return -1; 989 990 success: 991 evts[0]->hw.idx = idx0; 992 if (n_ev == 2) 993 evts[1]->hw.idx = idx0 ^ 1; 994 return 0; 995 } 996 997 static int check_excludes(struct perf_event **evts, int n_prev, int n_new) 998 { 999 int eu = 0, ek = 0, eh = 0; 1000 struct perf_event *event; 1001 int i, n, first; 1002 1003 n = n_prev + n_new; 1004 if (n <= 1) 1005 return 0; 1006 1007 first = 1; 1008 for (i = 0; i < n; i++) { 1009 event = evts[i]; 1010 if (first) { 1011 eu = event->attr.exclude_user; 1012 ek = event->attr.exclude_kernel; 1013 eh = event->attr.exclude_hv; 1014 first = 0; 1015 } else if (event->attr.exclude_user != eu || 1016 event->attr.exclude_kernel != ek || 1017 event->attr.exclude_hv != eh) { 1018 return -EAGAIN; 1019 } 1020 } 1021 1022 return 0; 1023 } 1024 1025 static int collect_events(struct perf_event *group, int max_count, 1026 struct perf_event *evts[], unsigned long *events, 1027 int *current_idx) 1028 { 1029 struct perf_event *event; 1030 int n = 0; 1031 1032 if (!is_software_event(group)) { 1033 if (n >= max_count) 1034 return -1; 1035 evts[n] = group; 1036 events[n] = group->hw.event_base; 1037 current_idx[n++] = PIC_NO_INDEX; 1038 } 1039 list_for_each_entry(event, &group->sibling_list, group_entry) { 1040 if (!is_software_event(event) && 1041 event->state != PERF_EVENT_STATE_OFF) { 1042 if (n >= max_count) 1043 return -1; 1044 evts[n] = event; 1045 events[n] = event->hw.event_base; 1046 current_idx[n++] = PIC_NO_INDEX; 1047 } 1048 } 1049 return n; 1050 } 1051 1052 static int sparc_pmu_add(struct perf_event *event, int ef_flags) 1053 { 1054 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1055 int n0, ret = -EAGAIN; 1056 unsigned long flags; 1057 1058 local_irq_save(flags); 1059 perf_pmu_disable(event->pmu); 1060 1061 n0 = cpuc->n_events; 1062 if (n0 >= MAX_HWEVENTS) 1063 goto out; 1064 1065 cpuc->event[n0] = event; 1066 cpuc->events[n0] = event->hw.event_base; 1067 cpuc->current_idx[n0] = PIC_NO_INDEX; 1068 1069 event->hw.state = PERF_HES_UPTODATE; 1070 if (!(ef_flags & PERF_EF_START)) 1071 event->hw.state |= PERF_HES_STOPPED; 1072 1073 /* 1074 * If group events scheduling transaction was started, 1075 * skip the schedulability test here, it will be performed 1076 * at commit time(->commit_txn) as a whole 1077 */ 1078 if (cpuc->group_flag & PERF_EVENT_TXN) 1079 goto nocheck; 1080 1081 if (check_excludes(cpuc->event, n0, 1)) 1082 goto out; 1083 if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) 1084 goto out; 1085 1086 nocheck: 1087 cpuc->n_events++; 1088 cpuc->n_added++; 1089 1090 ret = 0; 1091 out: 1092 perf_pmu_enable(event->pmu); 1093 local_irq_restore(flags); 1094 return ret; 1095 } 1096 1097 static int sparc_pmu_event_init(struct perf_event *event) 1098 { 1099 struct perf_event_attr *attr = &event->attr; 1100 struct perf_event *evts[MAX_HWEVENTS]; 1101 struct hw_perf_event *hwc = &event->hw; 1102 unsigned long events[MAX_HWEVENTS]; 1103 int current_idx_dmy[MAX_HWEVENTS]; 1104 const struct perf_event_map *pmap; 1105 int n; 1106 1107 if (atomic_read(&nmi_active) < 0) 1108 return -ENODEV; 1109 1110 /* does not support taken branch sampling */ 1111 if (has_branch_stack(event)) 1112 return -EOPNOTSUPP; 1113 1114 switch (attr->type) { 1115 case PERF_TYPE_HARDWARE: 1116 if (attr->config >= sparc_pmu->max_events) 1117 return -EINVAL; 1118 pmap = sparc_pmu->event_map(attr->config); 1119 break; 1120 1121 case PERF_TYPE_HW_CACHE: 1122 pmap = sparc_map_cache_event(attr->config); 1123 if (IS_ERR(pmap)) 1124 return PTR_ERR(pmap); 1125 break; 1126 1127 case PERF_TYPE_RAW: 1128 pmap = NULL; 1129 break; 1130 1131 default: 1132 return -ENOENT; 1133 1134 } 1135 1136 if (pmap) { 1137 hwc->event_base = perf_event_encode(pmap); 1138 } else { 1139 /* 1140 * User gives us "(encoding << 16) | pic_mask" for 1141 * PERF_TYPE_RAW events. 1142 */ 1143 hwc->event_base = attr->config; 1144 } 1145 1146 /* We save the enable bits in the config_base. */ 1147 hwc->config_base = sparc_pmu->irq_bit; 1148 if (!attr->exclude_user) 1149 hwc->config_base |= PCR_UTRACE; 1150 if (!attr->exclude_kernel) 1151 hwc->config_base |= PCR_STRACE; 1152 if (!attr->exclude_hv) 1153 hwc->config_base |= sparc_pmu->hv_bit; 1154 1155 n = 0; 1156 if (event->group_leader != event) { 1157 n = collect_events(event->group_leader, 1158 MAX_HWEVENTS - 1, 1159 evts, events, current_idx_dmy); 1160 if (n < 0) 1161 return -EINVAL; 1162 } 1163 events[n] = hwc->event_base; 1164 evts[n] = event; 1165 1166 if (check_excludes(evts, n, 1)) 1167 return -EINVAL; 1168 1169 if (sparc_check_constraints(evts, events, n + 1)) 1170 return -EINVAL; 1171 1172 hwc->idx = PIC_NO_INDEX; 1173 1174 /* Try to do all error checking before this point, as unwinding 1175 * state after grabbing the PMC is difficult. 1176 */ 1177 perf_event_grab_pmc(); 1178 event->destroy = hw_perf_event_destroy; 1179 1180 if (!hwc->sample_period) { 1181 hwc->sample_period = MAX_PERIOD; 1182 hwc->last_period = hwc->sample_period; 1183 local64_set(&hwc->period_left, hwc->sample_period); 1184 } 1185 1186 return 0; 1187 } 1188 1189 /* 1190 * Start group events scheduling transaction 1191 * Set the flag to make pmu::enable() not perform the 1192 * schedulability test, it will be performed at commit time 1193 */ 1194 static void sparc_pmu_start_txn(struct pmu *pmu) 1195 { 1196 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1197 1198 perf_pmu_disable(pmu); 1199 cpuhw->group_flag |= PERF_EVENT_TXN; 1200 } 1201 1202 /* 1203 * Stop group events scheduling transaction 1204 * Clear the flag and pmu::enable() will perform the 1205 * schedulability test. 1206 */ 1207 static void sparc_pmu_cancel_txn(struct pmu *pmu) 1208 { 1209 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1210 1211 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1212 perf_pmu_enable(pmu); 1213 } 1214 1215 /* 1216 * Commit group events scheduling transaction 1217 * Perform the group schedulability test as a whole 1218 * Return 0 if success 1219 */ 1220 static int sparc_pmu_commit_txn(struct pmu *pmu) 1221 { 1222 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1223 int n; 1224 1225 if (!sparc_pmu) 1226 return -EINVAL; 1227 1228 cpuc = &__get_cpu_var(cpu_hw_events); 1229 n = cpuc->n_events; 1230 if (check_excludes(cpuc->event, 0, n)) 1231 return -EINVAL; 1232 if (sparc_check_constraints(cpuc->event, cpuc->events, n)) 1233 return -EAGAIN; 1234 1235 cpuc->group_flag &= ~PERF_EVENT_TXN; 1236 perf_pmu_enable(pmu); 1237 return 0; 1238 } 1239 1240 static struct pmu pmu = { 1241 .pmu_enable = sparc_pmu_enable, 1242 .pmu_disable = sparc_pmu_disable, 1243 .event_init = sparc_pmu_event_init, 1244 .add = sparc_pmu_add, 1245 .del = sparc_pmu_del, 1246 .start = sparc_pmu_start, 1247 .stop = sparc_pmu_stop, 1248 .read = sparc_pmu_read, 1249 .start_txn = sparc_pmu_start_txn, 1250 .cancel_txn = sparc_pmu_cancel_txn, 1251 .commit_txn = sparc_pmu_commit_txn, 1252 }; 1253 1254 void perf_event_print_debug(void) 1255 { 1256 unsigned long flags; 1257 u64 pcr, pic; 1258 int cpu; 1259 1260 if (!sparc_pmu) 1261 return; 1262 1263 local_irq_save(flags); 1264 1265 cpu = smp_processor_id(); 1266 1267 pcr = pcr_ops->read(); 1268 read_pic(pic); 1269 1270 pr_info("\n"); 1271 pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", 1272 cpu, pcr, pic); 1273 1274 local_irq_restore(flags); 1275 } 1276 1277 static int __kprobes perf_event_nmi_handler(struct notifier_block *self, 1278 unsigned long cmd, void *__args) 1279 { 1280 struct die_args *args = __args; 1281 struct perf_sample_data data; 1282 struct cpu_hw_events *cpuc; 1283 struct pt_regs *regs; 1284 int i; 1285 1286 if (!atomic_read(&active_events)) 1287 return NOTIFY_DONE; 1288 1289 switch (cmd) { 1290 case DIE_NMI: 1291 break; 1292 1293 default: 1294 return NOTIFY_DONE; 1295 } 1296 1297 regs = args->regs; 1298 1299 cpuc = &__get_cpu_var(cpu_hw_events); 1300 1301 /* If the PMU has the TOE IRQ enable bits, we need to do a 1302 * dummy write to the %pcr to clear the overflow bits and thus 1303 * the interrupt. 1304 * 1305 * Do this before we peek at the counters to determine 1306 * overflow so we don't lose any events. 1307 */ 1308 if (sparc_pmu->irq_bit) 1309 pcr_ops->write(cpuc->pcr); 1310 1311 for (i = 0; i < cpuc->n_events; i++) { 1312 struct perf_event *event = cpuc->event[i]; 1313 int idx = cpuc->current_idx[i]; 1314 struct hw_perf_event *hwc; 1315 u64 val; 1316 1317 hwc = &event->hw; 1318 val = sparc_perf_event_update(event, hwc, idx); 1319 if (val & (1ULL << 31)) 1320 continue; 1321 1322 perf_sample_data_init(&data, 0, hwc->last_period); 1323 if (!sparc_perf_event_set_period(event, hwc, idx)) 1324 continue; 1325 1326 if (perf_event_overflow(event, &data, regs)) 1327 sparc_pmu_stop(event, 0); 1328 } 1329 1330 return NOTIFY_STOP; 1331 } 1332 1333 static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1334 .notifier_call = perf_event_nmi_handler, 1335 }; 1336 1337 static bool __init supported_pmu(void) 1338 { 1339 if (!strcmp(sparc_pmu_type, "ultra3") || 1340 !strcmp(sparc_pmu_type, "ultra3+") || 1341 !strcmp(sparc_pmu_type, "ultra3i") || 1342 !strcmp(sparc_pmu_type, "ultra4+")) { 1343 sparc_pmu = &ultra3_pmu; 1344 return true; 1345 } 1346 if (!strcmp(sparc_pmu_type, "niagara")) { 1347 sparc_pmu = &niagara1_pmu; 1348 return true; 1349 } 1350 if (!strcmp(sparc_pmu_type, "niagara2") || 1351 !strcmp(sparc_pmu_type, "niagara3")) { 1352 sparc_pmu = &niagara2_pmu; 1353 return true; 1354 } 1355 return false; 1356 } 1357 1358 int __init init_hw_perf_events(void) 1359 { 1360 pr_info("Performance events: "); 1361 1362 if (!supported_pmu()) { 1363 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); 1364 return 0; 1365 } 1366 1367 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1368 1369 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 1370 register_die_notifier(&perf_event_nmi_notifier); 1371 1372 return 0; 1373 } 1374 early_initcall(init_hw_perf_events); 1375 1376 void perf_callchain_kernel(struct perf_callchain_entry *entry, 1377 struct pt_regs *regs) 1378 { 1379 unsigned long ksp, fp; 1380 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 1381 int graph = 0; 1382 #endif 1383 1384 stack_trace_flush(); 1385 1386 perf_callchain_store(entry, regs->tpc); 1387 1388 ksp = regs->u_regs[UREG_I6]; 1389 fp = ksp + STACK_BIAS; 1390 do { 1391 struct sparc_stackf *sf; 1392 struct pt_regs *regs; 1393 unsigned long pc; 1394 1395 if (!kstack_valid(current_thread_info(), fp)) 1396 break; 1397 1398 sf = (struct sparc_stackf *) fp; 1399 regs = (struct pt_regs *) (sf + 1); 1400 1401 if (kstack_is_trap_frame(current_thread_info(), regs)) { 1402 if (user_mode(regs)) 1403 break; 1404 pc = regs->tpc; 1405 fp = regs->u_regs[UREG_I6] + STACK_BIAS; 1406 } else { 1407 pc = sf->callers_pc; 1408 fp = (unsigned long)sf->fp + STACK_BIAS; 1409 } 1410 perf_callchain_store(entry, pc); 1411 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 1412 if ((pc + 8UL) == (unsigned long) &return_to_handler) { 1413 int index = current->curr_ret_stack; 1414 if (current->ret_stack && index >= graph) { 1415 pc = current->ret_stack[index - graph].ret; 1416 perf_callchain_store(entry, pc); 1417 graph++; 1418 } 1419 } 1420 #endif 1421 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1422 } 1423 1424 static void perf_callchain_user_64(struct perf_callchain_entry *entry, 1425 struct pt_regs *regs) 1426 { 1427 unsigned long ufp; 1428 1429 perf_callchain_store(entry, regs->tpc); 1430 1431 ufp = regs->u_regs[UREG_I6] + STACK_BIAS; 1432 do { 1433 struct sparc_stackf *usf, sf; 1434 unsigned long pc; 1435 1436 usf = (struct sparc_stackf *) ufp; 1437 if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1438 break; 1439 1440 pc = sf.callers_pc; 1441 ufp = (unsigned long)sf.fp + STACK_BIAS; 1442 perf_callchain_store(entry, pc); 1443 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1444 } 1445 1446 static void perf_callchain_user_32(struct perf_callchain_entry *entry, 1447 struct pt_regs *regs) 1448 { 1449 unsigned long ufp; 1450 1451 perf_callchain_store(entry, regs->tpc); 1452 1453 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; 1454 do { 1455 struct sparc_stackf32 *usf, sf; 1456 unsigned long pc; 1457 1458 usf = (struct sparc_stackf32 *) ufp; 1459 if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1460 break; 1461 1462 pc = sf.callers_pc; 1463 ufp = (unsigned long)sf.fp; 1464 perf_callchain_store(entry, pc); 1465 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1466 } 1467 1468 void 1469 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) 1470 { 1471 flushw_user(); 1472 if (test_thread_flag(TIF_32BIT)) 1473 perf_callchain_user_32(entry, regs); 1474 else 1475 perf_callchain_user_64(entry, regs); 1476 } 1477