1 /* Performance event support for sparc64. 2 * 3 * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net> 4 * 5 * This code is based almost entirely upon the x86 perf event 6 * code, which is: 7 * 8 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 9 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 10 * Copyright (C) 2009 Jaswinder Singh Rajput 11 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 12 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 13 */ 14 15 #include <linux/perf_event.h> 16 #include <linux/kprobes.h> 17 #include <linux/kernel.h> 18 #include <linux/kdebug.h> 19 #include <linux/mutex.h> 20 21 #include <asm/stacktrace.h> 22 #include <asm/cpudata.h> 23 #include <asm/uaccess.h> 24 #include <asm/atomic.h> 25 #include <asm/nmi.h> 26 #include <asm/pcr.h> 27 28 #include "kstack.h" 29 30 /* Sparc64 chips have two performance counters, 32-bits each, with 31 * overflow interrupts generated on transition from 0xffffffff to 0. 32 * The counters are accessed in one go using a 64-bit register. 33 * 34 * Both counters are controlled using a single control register. The 35 * only way to stop all sampling is to clear all of the context (user, 36 * supervisor, hypervisor) sampling enable bits. But these bits apply 37 * to both counters, thus the two counters can't be enabled/disabled 38 * individually. 39 * 40 * The control register has two event fields, one for each of the two 41 * counters. It's thus nearly impossible to have one counter going 42 * while keeping the other one stopped. Therefore it is possible to 43 * get overflow interrupts for counters not currently "in use" and 44 * that condition must be checked in the overflow interrupt handler. 45 * 46 * So we use a hack, in that we program inactive counters with the 47 * "sw_count0" and "sw_count1" events. These count how many times 48 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an 49 * unusual way to encode a NOP and therefore will not trigger in 50 * normal code. 51 */ 52 53 #define MAX_HWEVENTS 2 54 #define MAX_PERIOD ((1UL << 32) - 1) 55 56 #define PIC_UPPER_INDEX 0 57 #define PIC_LOWER_INDEX 1 58 #define PIC_NO_INDEX -1 59 60 struct cpu_hw_events { 61 /* Number of events currently scheduled onto this cpu. 62 * This tells how many entries in the arrays below 63 * are valid. 64 */ 65 int n_events; 66 67 /* Number of new events added since the last hw_perf_disable(). 68 * This works because the perf event layer always adds new 69 * events inside of a perf_{disable,enable}() sequence. 70 */ 71 int n_added; 72 73 /* Array of events current scheduled on this cpu. */ 74 struct perf_event *event[MAX_HWEVENTS]; 75 76 /* Array of encoded longs, specifying the %pcr register 77 * encoding and the mask of PIC counters this even can 78 * be scheduled on. See perf_event_encode() et al. 79 */ 80 unsigned long events[MAX_HWEVENTS]; 81 82 /* The current counter index assigned to an event. When the 83 * event hasn't been programmed into the cpu yet, this will 84 * hold PIC_NO_INDEX. The event->hw.idx value tells us where 85 * we ought to schedule the event. 86 */ 87 int current_idx[MAX_HWEVENTS]; 88 89 /* Software copy of %pcr register on this cpu. */ 90 u64 pcr; 91 92 /* Enabled/disable state. */ 93 int enabled; 94 }; 95 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; 96 97 /* An event map describes the characteristics of a performance 98 * counter event. In particular it gives the encoding as well as 99 * a mask telling which counters the event can be measured on. 100 */ 101 struct perf_event_map { 102 u16 encoding; 103 u8 pic_mask; 104 #define PIC_NONE 0x00 105 #define PIC_UPPER 0x01 106 #define PIC_LOWER 0x02 107 }; 108 109 /* Encode a perf_event_map entry into a long. */ 110 static unsigned long perf_event_encode(const struct perf_event_map *pmap) 111 { 112 return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; 113 } 114 115 static u8 perf_event_get_msk(unsigned long val) 116 { 117 return val & 0xff; 118 } 119 120 static u64 perf_event_get_enc(unsigned long val) 121 { 122 return val >> 16; 123 } 124 125 #define C(x) PERF_COUNT_HW_CACHE_##x 126 127 #define CACHE_OP_UNSUPPORTED 0xfffe 128 #define CACHE_OP_NONSENSE 0xffff 129 130 typedef struct perf_event_map cache_map_t 131 [PERF_COUNT_HW_CACHE_MAX] 132 [PERF_COUNT_HW_CACHE_OP_MAX] 133 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 134 135 struct sparc_pmu { 136 const struct perf_event_map *(*event_map)(int); 137 const cache_map_t *cache_map; 138 int max_events; 139 int upper_shift; 140 int lower_shift; 141 int event_mask; 142 int hv_bit; 143 int irq_bit; 144 int upper_nop; 145 int lower_nop; 146 }; 147 148 static const struct perf_event_map ultra3_perfmon_event_map[] = { 149 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, 150 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, 151 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, 152 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, 153 }; 154 155 static const struct perf_event_map *ultra3_event_map(int event_id) 156 { 157 return &ultra3_perfmon_event_map[event_id]; 158 } 159 160 static const cache_map_t ultra3_cache_map = { 161 [C(L1D)] = { 162 [C(OP_READ)] = { 163 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 164 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 165 }, 166 [C(OP_WRITE)] = { 167 [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, 168 [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, 169 }, 170 [C(OP_PREFETCH)] = { 171 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 172 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 173 }, 174 }, 175 [C(L1I)] = { 176 [C(OP_READ)] = { 177 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 178 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 179 }, 180 [ C(OP_WRITE) ] = { 181 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 182 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 183 }, 184 [ C(OP_PREFETCH) ] = { 185 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 186 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 187 }, 188 }, 189 [C(LL)] = { 190 [C(OP_READ)] = { 191 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, 192 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, 193 }, 194 [C(OP_WRITE)] = { 195 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, 196 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, 197 }, 198 [C(OP_PREFETCH)] = { 199 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 200 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 201 }, 202 }, 203 [C(DTLB)] = { 204 [C(OP_READ)] = { 205 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 206 [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, 207 }, 208 [ C(OP_WRITE) ] = { 209 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 210 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 211 }, 212 [ C(OP_PREFETCH) ] = { 213 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 214 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 215 }, 216 }, 217 [C(ITLB)] = { 218 [C(OP_READ)] = { 219 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 220 [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, 221 }, 222 [ C(OP_WRITE) ] = { 223 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 224 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 225 }, 226 [ C(OP_PREFETCH) ] = { 227 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 228 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 229 }, 230 }, 231 [C(BPU)] = { 232 [C(OP_READ)] = { 233 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 234 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 235 }, 236 [ C(OP_WRITE) ] = { 237 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 238 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 239 }, 240 [ C(OP_PREFETCH) ] = { 241 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 242 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 243 }, 244 }, 245 }; 246 247 static const struct sparc_pmu ultra3_pmu = { 248 .event_map = ultra3_event_map, 249 .cache_map = &ultra3_cache_map, 250 .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), 251 .upper_shift = 11, 252 .lower_shift = 4, 253 .event_mask = 0x3f, 254 .upper_nop = 0x1c, 255 .lower_nop = 0x14, 256 }; 257 258 /* Niagara1 is very limited. The upper PIC is hard-locked to count 259 * only instructions, so it is free running which creates all kinds of 260 * problems. Some hardware designs make one wonder if the creator 261 * even looked at how this stuff gets used by software. 262 */ 263 static const struct perf_event_map niagara1_perfmon_event_map[] = { 264 [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, 265 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, 266 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, 267 [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, 268 }; 269 270 static const struct perf_event_map *niagara1_event_map(int event_id) 271 { 272 return &niagara1_perfmon_event_map[event_id]; 273 } 274 275 static const cache_map_t niagara1_cache_map = { 276 [C(L1D)] = { 277 [C(OP_READ)] = { 278 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 279 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 280 }, 281 [C(OP_WRITE)] = { 282 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 283 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 284 }, 285 [C(OP_PREFETCH)] = { 286 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 287 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 288 }, 289 }, 290 [C(L1I)] = { 291 [C(OP_READ)] = { 292 [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, 293 [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, 294 }, 295 [ C(OP_WRITE) ] = { 296 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 297 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 298 }, 299 [ C(OP_PREFETCH) ] = { 300 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 301 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 302 }, 303 }, 304 [C(LL)] = { 305 [C(OP_READ)] = { 306 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 307 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 308 }, 309 [C(OP_WRITE)] = { 310 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 311 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 312 }, 313 [C(OP_PREFETCH)] = { 314 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 315 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 316 }, 317 }, 318 [C(DTLB)] = { 319 [C(OP_READ)] = { 320 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 321 [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, 322 }, 323 [ C(OP_WRITE) ] = { 324 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 325 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 326 }, 327 [ C(OP_PREFETCH) ] = { 328 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 329 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 330 }, 331 }, 332 [C(ITLB)] = { 333 [C(OP_READ)] = { 334 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 335 [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, 336 }, 337 [ C(OP_WRITE) ] = { 338 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 339 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 340 }, 341 [ C(OP_PREFETCH) ] = { 342 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 343 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 344 }, 345 }, 346 [C(BPU)] = { 347 [C(OP_READ)] = { 348 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 349 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 350 }, 351 [ C(OP_WRITE) ] = { 352 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 353 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 354 }, 355 [ C(OP_PREFETCH) ] = { 356 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 357 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 358 }, 359 }, 360 }; 361 362 static const struct sparc_pmu niagara1_pmu = { 363 .event_map = niagara1_event_map, 364 .cache_map = &niagara1_cache_map, 365 .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), 366 .upper_shift = 0, 367 .lower_shift = 4, 368 .event_mask = 0x7, 369 .upper_nop = 0x0, 370 .lower_nop = 0x0, 371 }; 372 373 static const struct perf_event_map niagara2_perfmon_event_map[] = { 374 [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, 375 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, 376 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, 377 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, 378 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, 379 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, 380 }; 381 382 static const struct perf_event_map *niagara2_event_map(int event_id) 383 { 384 return &niagara2_perfmon_event_map[event_id]; 385 } 386 387 static const cache_map_t niagara2_cache_map = { 388 [C(L1D)] = { 389 [C(OP_READ)] = { 390 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 391 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 392 }, 393 [C(OP_WRITE)] = { 394 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 395 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 396 }, 397 [C(OP_PREFETCH)] = { 398 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 399 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 400 }, 401 }, 402 [C(L1I)] = { 403 [C(OP_READ)] = { 404 [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, 405 [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, 406 }, 407 [ C(OP_WRITE) ] = { 408 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 409 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 410 }, 411 [ C(OP_PREFETCH) ] = { 412 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 413 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 414 }, 415 }, 416 [C(LL)] = { 417 [C(OP_READ)] = { 418 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 419 [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, 420 }, 421 [C(OP_WRITE)] = { 422 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 423 [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, 424 }, 425 [C(OP_PREFETCH)] = { 426 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 427 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 428 }, 429 }, 430 [C(DTLB)] = { 431 [C(OP_READ)] = { 432 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 433 [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, 434 }, 435 [ C(OP_WRITE) ] = { 436 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 437 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 438 }, 439 [ C(OP_PREFETCH) ] = { 440 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 441 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 442 }, 443 }, 444 [C(ITLB)] = { 445 [C(OP_READ)] = { 446 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 447 [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, 448 }, 449 [ C(OP_WRITE) ] = { 450 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 451 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 452 }, 453 [ C(OP_PREFETCH) ] = { 454 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 455 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 456 }, 457 }, 458 [C(BPU)] = { 459 [C(OP_READ)] = { 460 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 461 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 462 }, 463 [ C(OP_WRITE) ] = { 464 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 465 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 466 }, 467 [ C(OP_PREFETCH) ] = { 468 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 469 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 470 }, 471 }, 472 }; 473 474 static const struct sparc_pmu niagara2_pmu = { 475 .event_map = niagara2_event_map, 476 .cache_map = &niagara2_cache_map, 477 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), 478 .upper_shift = 19, 479 .lower_shift = 6, 480 .event_mask = 0xfff, 481 .hv_bit = 0x8, 482 .irq_bit = 0x30, 483 .upper_nop = 0x220, 484 .lower_nop = 0x220, 485 }; 486 487 static const struct sparc_pmu *sparc_pmu __read_mostly; 488 489 static u64 event_encoding(u64 event_id, int idx) 490 { 491 if (idx == PIC_UPPER_INDEX) 492 event_id <<= sparc_pmu->upper_shift; 493 else 494 event_id <<= sparc_pmu->lower_shift; 495 return event_id; 496 } 497 498 static u64 mask_for_index(int idx) 499 { 500 return event_encoding(sparc_pmu->event_mask, idx); 501 } 502 503 static u64 nop_for_index(int idx) 504 { 505 return event_encoding(idx == PIC_UPPER_INDEX ? 506 sparc_pmu->upper_nop : 507 sparc_pmu->lower_nop, idx); 508 } 509 510 static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 511 { 512 u64 val, mask = mask_for_index(idx); 513 514 val = cpuc->pcr; 515 val &= ~mask; 516 val |= hwc->config; 517 cpuc->pcr = val; 518 519 pcr_ops->write(cpuc->pcr); 520 } 521 522 static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 523 { 524 u64 mask = mask_for_index(idx); 525 u64 nop = nop_for_index(idx); 526 u64 val; 527 528 val = cpuc->pcr; 529 val &= ~mask; 530 val |= nop; 531 cpuc->pcr = val; 532 533 pcr_ops->write(cpuc->pcr); 534 } 535 536 static u32 read_pmc(int idx) 537 { 538 u64 val; 539 540 read_pic(val); 541 if (idx == PIC_UPPER_INDEX) 542 val >>= 32; 543 544 return val & 0xffffffff; 545 } 546 547 static void write_pmc(int idx, u64 val) 548 { 549 u64 shift, mask, pic; 550 551 shift = 0; 552 if (idx == PIC_UPPER_INDEX) 553 shift = 32; 554 555 mask = ((u64) 0xffffffff) << shift; 556 val <<= shift; 557 558 read_pic(pic); 559 pic &= ~mask; 560 pic |= val; 561 write_pic(pic); 562 } 563 564 static u64 sparc_perf_event_update(struct perf_event *event, 565 struct hw_perf_event *hwc, int idx) 566 { 567 int shift = 64 - 32; 568 u64 prev_raw_count, new_raw_count; 569 s64 delta; 570 571 again: 572 prev_raw_count = atomic64_read(&hwc->prev_count); 573 new_raw_count = read_pmc(idx); 574 575 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 576 new_raw_count) != prev_raw_count) 577 goto again; 578 579 delta = (new_raw_count << shift) - (prev_raw_count << shift); 580 delta >>= shift; 581 582 atomic64_add(delta, &event->count); 583 atomic64_sub(delta, &hwc->period_left); 584 585 return new_raw_count; 586 } 587 588 static int sparc_perf_event_set_period(struct perf_event *event, 589 struct hw_perf_event *hwc, int idx) 590 { 591 s64 left = atomic64_read(&hwc->period_left); 592 s64 period = hwc->sample_period; 593 int ret = 0; 594 595 if (unlikely(left <= -period)) { 596 left = period; 597 atomic64_set(&hwc->period_left, left); 598 hwc->last_period = period; 599 ret = 1; 600 } 601 602 if (unlikely(left <= 0)) { 603 left += period; 604 atomic64_set(&hwc->period_left, left); 605 hwc->last_period = period; 606 ret = 1; 607 } 608 if (left > MAX_PERIOD) 609 left = MAX_PERIOD; 610 611 atomic64_set(&hwc->prev_count, (u64)-left); 612 613 write_pmc(idx, (u64)(-left) & 0xffffffff); 614 615 perf_event_update_userpage(event); 616 617 return ret; 618 } 619 620 /* If performance event entries have been added, move existing 621 * events around (if necessary) and then assign new entries to 622 * counters. 623 */ 624 static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) 625 { 626 int i; 627 628 if (!cpuc->n_added) 629 goto out; 630 631 /* Read in the counters which are moving. */ 632 for (i = 0; i < cpuc->n_events; i++) { 633 struct perf_event *cp = cpuc->event[i]; 634 635 if (cpuc->current_idx[i] != PIC_NO_INDEX && 636 cpuc->current_idx[i] != cp->hw.idx) { 637 sparc_perf_event_update(cp, &cp->hw, 638 cpuc->current_idx[i]); 639 cpuc->current_idx[i] = PIC_NO_INDEX; 640 } 641 } 642 643 /* Assign to counters all unassigned events. */ 644 for (i = 0; i < cpuc->n_events; i++) { 645 struct perf_event *cp = cpuc->event[i]; 646 struct hw_perf_event *hwc = &cp->hw; 647 int idx = hwc->idx; 648 u64 enc; 649 650 if (cpuc->current_idx[i] != PIC_NO_INDEX) 651 continue; 652 653 sparc_perf_event_set_period(cp, hwc, idx); 654 cpuc->current_idx[i] = idx; 655 656 enc = perf_event_get_enc(cpuc->events[i]); 657 pcr |= event_encoding(enc, idx); 658 } 659 out: 660 return pcr; 661 } 662 663 void hw_perf_enable(void) 664 { 665 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 666 u64 pcr; 667 668 if (cpuc->enabled) 669 return; 670 671 cpuc->enabled = 1; 672 barrier(); 673 674 pcr = cpuc->pcr; 675 if (!cpuc->n_events) { 676 pcr = 0; 677 } else { 678 pcr = maybe_change_configuration(cpuc, pcr); 679 680 /* We require that all of the events have the same 681 * configuration, so just fetch the settings from the 682 * first entry. 683 */ 684 cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; 685 } 686 687 pcr_ops->write(cpuc->pcr); 688 } 689 690 void hw_perf_disable(void) 691 { 692 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 693 u64 val; 694 695 if (!cpuc->enabled) 696 return; 697 698 cpuc->enabled = 0; 699 cpuc->n_added = 0; 700 701 val = cpuc->pcr; 702 val &= ~(PCR_UTRACE | PCR_STRACE | 703 sparc_pmu->hv_bit | sparc_pmu->irq_bit); 704 cpuc->pcr = val; 705 706 pcr_ops->write(cpuc->pcr); 707 } 708 709 static void sparc_pmu_disable(struct perf_event *event) 710 { 711 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 712 struct hw_perf_event *hwc = &event->hw; 713 unsigned long flags; 714 int i; 715 716 local_irq_save(flags); 717 perf_disable(); 718 719 for (i = 0; i < cpuc->n_events; i++) { 720 if (event == cpuc->event[i]) { 721 int idx = cpuc->current_idx[i]; 722 723 /* Shift remaining entries down into 724 * the existing slot. 725 */ 726 while (++i < cpuc->n_events) { 727 cpuc->event[i - 1] = cpuc->event[i]; 728 cpuc->events[i - 1] = cpuc->events[i]; 729 cpuc->current_idx[i - 1] = 730 cpuc->current_idx[i]; 731 } 732 733 /* Absorb the final count and turn off the 734 * event. 735 */ 736 sparc_pmu_disable_event(cpuc, hwc, idx); 737 barrier(); 738 sparc_perf_event_update(event, hwc, idx); 739 740 perf_event_update_userpage(event); 741 742 cpuc->n_events--; 743 break; 744 } 745 } 746 747 perf_enable(); 748 local_irq_restore(flags); 749 } 750 751 static int active_event_index(struct cpu_hw_events *cpuc, 752 struct perf_event *event) 753 { 754 int i; 755 756 for (i = 0; i < cpuc->n_events; i++) { 757 if (cpuc->event[i] == event) 758 break; 759 } 760 BUG_ON(i == cpuc->n_events); 761 return cpuc->current_idx[i]; 762 } 763 764 static void sparc_pmu_read(struct perf_event *event) 765 { 766 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 767 int idx = active_event_index(cpuc, event); 768 struct hw_perf_event *hwc = &event->hw; 769 770 sparc_perf_event_update(event, hwc, idx); 771 } 772 773 static void sparc_pmu_unthrottle(struct perf_event *event) 774 { 775 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 776 int idx = active_event_index(cpuc, event); 777 struct hw_perf_event *hwc = &event->hw; 778 779 sparc_pmu_enable_event(cpuc, hwc, idx); 780 } 781 782 static atomic_t active_events = ATOMIC_INIT(0); 783 static DEFINE_MUTEX(pmc_grab_mutex); 784 785 static void perf_stop_nmi_watchdog(void *unused) 786 { 787 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 788 789 stop_nmi_watchdog(NULL); 790 cpuc->pcr = pcr_ops->read(); 791 } 792 793 void perf_event_grab_pmc(void) 794 { 795 if (atomic_inc_not_zero(&active_events)) 796 return; 797 798 mutex_lock(&pmc_grab_mutex); 799 if (atomic_read(&active_events) == 0) { 800 if (atomic_read(&nmi_active) > 0) { 801 on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); 802 BUG_ON(atomic_read(&nmi_active) != 0); 803 } 804 atomic_inc(&active_events); 805 } 806 mutex_unlock(&pmc_grab_mutex); 807 } 808 809 void perf_event_release_pmc(void) 810 { 811 if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { 812 if (atomic_read(&nmi_active) == 0) 813 on_each_cpu(start_nmi_watchdog, NULL, 1); 814 mutex_unlock(&pmc_grab_mutex); 815 } 816 } 817 818 static const struct perf_event_map *sparc_map_cache_event(u64 config) 819 { 820 unsigned int cache_type, cache_op, cache_result; 821 const struct perf_event_map *pmap; 822 823 if (!sparc_pmu->cache_map) 824 return ERR_PTR(-ENOENT); 825 826 cache_type = (config >> 0) & 0xff; 827 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 828 return ERR_PTR(-EINVAL); 829 830 cache_op = (config >> 8) & 0xff; 831 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 832 return ERR_PTR(-EINVAL); 833 834 cache_result = (config >> 16) & 0xff; 835 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 836 return ERR_PTR(-EINVAL); 837 838 pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); 839 840 if (pmap->encoding == CACHE_OP_UNSUPPORTED) 841 return ERR_PTR(-ENOENT); 842 843 if (pmap->encoding == CACHE_OP_NONSENSE) 844 return ERR_PTR(-EINVAL); 845 846 return pmap; 847 } 848 849 static void hw_perf_event_destroy(struct perf_event *event) 850 { 851 perf_event_release_pmc(); 852 } 853 854 /* Make sure all events can be scheduled into the hardware at 855 * the same time. This is simplified by the fact that we only 856 * need to support 2 simultaneous HW events. 857 * 858 * As a side effect, the evts[]->hw.idx values will be assigned 859 * on success. These are pending indexes. When the events are 860 * actually programmed into the chip, these values will propagate 861 * to the per-cpu cpuc->current_idx[] slots, see the code in 862 * maybe_change_configuration() for details. 863 */ 864 static int sparc_check_constraints(struct perf_event **evts, 865 unsigned long *events, int n_ev) 866 { 867 u8 msk0 = 0, msk1 = 0; 868 int idx0 = 0; 869 870 /* This case is possible when we are invoked from 871 * hw_perf_group_sched_in(). 872 */ 873 if (!n_ev) 874 return 0; 875 876 if (n_ev > perf_max_events) 877 return -1; 878 879 msk0 = perf_event_get_msk(events[0]); 880 if (n_ev == 1) { 881 if (msk0 & PIC_LOWER) 882 idx0 = 1; 883 goto success; 884 } 885 BUG_ON(n_ev != 2); 886 msk1 = perf_event_get_msk(events[1]); 887 888 /* If both events can go on any counter, OK. */ 889 if (msk0 == (PIC_UPPER | PIC_LOWER) && 890 msk1 == (PIC_UPPER | PIC_LOWER)) 891 goto success; 892 893 /* If one event is limited to a specific counter, 894 * and the other can go on both, OK. 895 */ 896 if ((msk0 == PIC_UPPER || msk0 == PIC_LOWER) && 897 msk1 == (PIC_UPPER | PIC_LOWER)) { 898 if (msk0 & PIC_LOWER) 899 idx0 = 1; 900 goto success; 901 } 902 903 if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && 904 msk0 == (PIC_UPPER | PIC_LOWER)) { 905 if (msk1 & PIC_UPPER) 906 idx0 = 1; 907 goto success; 908 } 909 910 /* If the events are fixed to different counters, OK. */ 911 if ((msk0 == PIC_UPPER && msk1 == PIC_LOWER) || 912 (msk0 == PIC_LOWER && msk1 == PIC_UPPER)) { 913 if (msk0 & PIC_LOWER) 914 idx0 = 1; 915 goto success; 916 } 917 918 /* Otherwise, there is a conflict. */ 919 return -1; 920 921 success: 922 evts[0]->hw.idx = idx0; 923 if (n_ev == 2) 924 evts[1]->hw.idx = idx0 ^ 1; 925 return 0; 926 } 927 928 static int check_excludes(struct perf_event **evts, int n_prev, int n_new) 929 { 930 int eu = 0, ek = 0, eh = 0; 931 struct perf_event *event; 932 int i, n, first; 933 934 n = n_prev + n_new; 935 if (n <= 1) 936 return 0; 937 938 first = 1; 939 for (i = 0; i < n; i++) { 940 event = evts[i]; 941 if (first) { 942 eu = event->attr.exclude_user; 943 ek = event->attr.exclude_kernel; 944 eh = event->attr.exclude_hv; 945 first = 0; 946 } else if (event->attr.exclude_user != eu || 947 event->attr.exclude_kernel != ek || 948 event->attr.exclude_hv != eh) { 949 return -EAGAIN; 950 } 951 } 952 953 return 0; 954 } 955 956 static int collect_events(struct perf_event *group, int max_count, 957 struct perf_event *evts[], unsigned long *events, 958 int *current_idx) 959 { 960 struct perf_event *event; 961 int n = 0; 962 963 if (!is_software_event(group)) { 964 if (n >= max_count) 965 return -1; 966 evts[n] = group; 967 events[n] = group->hw.event_base; 968 current_idx[n++] = PIC_NO_INDEX; 969 } 970 list_for_each_entry(event, &group->sibling_list, group_entry) { 971 if (!is_software_event(event) && 972 event->state != PERF_EVENT_STATE_OFF) { 973 if (n >= max_count) 974 return -1; 975 evts[n] = event; 976 events[n] = event->hw.event_base; 977 current_idx[n++] = PIC_NO_INDEX; 978 } 979 } 980 return n; 981 } 982 983 static void event_sched_in(struct perf_event *event, int cpu) 984 { 985 event->state = PERF_EVENT_STATE_ACTIVE; 986 event->oncpu = cpu; 987 event->tstamp_running += event->ctx->time - event->tstamp_stopped; 988 if (is_software_event(event)) 989 event->pmu->enable(event); 990 } 991 992 int hw_perf_group_sched_in(struct perf_event *group_leader, 993 struct perf_cpu_context *cpuctx, 994 struct perf_event_context *ctx, int cpu) 995 { 996 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 997 struct perf_event *sub; 998 int n0, n; 999 1000 if (!sparc_pmu) 1001 return 0; 1002 1003 n0 = cpuc->n_events; 1004 n = collect_events(group_leader, perf_max_events - n0, 1005 &cpuc->event[n0], &cpuc->events[n0], 1006 &cpuc->current_idx[n0]); 1007 if (n < 0) 1008 return -EAGAIN; 1009 if (check_excludes(cpuc->event, n0, n)) 1010 return -EINVAL; 1011 if (sparc_check_constraints(cpuc->event, cpuc->events, n + n0)) 1012 return -EAGAIN; 1013 cpuc->n_events = n0 + n; 1014 cpuc->n_added += n; 1015 1016 cpuctx->active_oncpu += n; 1017 n = 1; 1018 event_sched_in(group_leader, cpu); 1019 list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { 1020 if (sub->state != PERF_EVENT_STATE_OFF) { 1021 event_sched_in(sub, cpu); 1022 n++; 1023 } 1024 } 1025 ctx->nr_active += n; 1026 1027 return 1; 1028 } 1029 1030 static int sparc_pmu_enable(struct perf_event *event) 1031 { 1032 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1033 int n0, ret = -EAGAIN; 1034 unsigned long flags; 1035 1036 local_irq_save(flags); 1037 perf_disable(); 1038 1039 n0 = cpuc->n_events; 1040 if (n0 >= perf_max_events) 1041 goto out; 1042 1043 cpuc->event[n0] = event; 1044 cpuc->events[n0] = event->hw.event_base; 1045 cpuc->current_idx[n0] = PIC_NO_INDEX; 1046 1047 if (check_excludes(cpuc->event, n0, 1)) 1048 goto out; 1049 if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) 1050 goto out; 1051 1052 cpuc->n_events++; 1053 cpuc->n_added++; 1054 1055 ret = 0; 1056 out: 1057 perf_enable(); 1058 local_irq_restore(flags); 1059 return ret; 1060 } 1061 1062 static int __hw_perf_event_init(struct perf_event *event) 1063 { 1064 struct perf_event_attr *attr = &event->attr; 1065 struct perf_event *evts[MAX_HWEVENTS]; 1066 struct hw_perf_event *hwc = &event->hw; 1067 unsigned long events[MAX_HWEVENTS]; 1068 int current_idx_dmy[MAX_HWEVENTS]; 1069 const struct perf_event_map *pmap; 1070 int n; 1071 1072 if (atomic_read(&nmi_active) < 0) 1073 return -ENODEV; 1074 1075 if (attr->type == PERF_TYPE_HARDWARE) { 1076 if (attr->config >= sparc_pmu->max_events) 1077 return -EINVAL; 1078 pmap = sparc_pmu->event_map(attr->config); 1079 } else if (attr->type == PERF_TYPE_HW_CACHE) { 1080 pmap = sparc_map_cache_event(attr->config); 1081 if (IS_ERR(pmap)) 1082 return PTR_ERR(pmap); 1083 } else 1084 return -EOPNOTSUPP; 1085 1086 /* We save the enable bits in the config_base. */ 1087 hwc->config_base = sparc_pmu->irq_bit; 1088 if (!attr->exclude_user) 1089 hwc->config_base |= PCR_UTRACE; 1090 if (!attr->exclude_kernel) 1091 hwc->config_base |= PCR_STRACE; 1092 if (!attr->exclude_hv) 1093 hwc->config_base |= sparc_pmu->hv_bit; 1094 1095 hwc->event_base = perf_event_encode(pmap); 1096 1097 n = 0; 1098 if (event->group_leader != event) { 1099 n = collect_events(event->group_leader, 1100 perf_max_events - 1, 1101 evts, events, current_idx_dmy); 1102 if (n < 0) 1103 return -EINVAL; 1104 } 1105 events[n] = hwc->event_base; 1106 evts[n] = event; 1107 1108 if (check_excludes(evts, n, 1)) 1109 return -EINVAL; 1110 1111 if (sparc_check_constraints(evts, events, n + 1)) 1112 return -EINVAL; 1113 1114 hwc->idx = PIC_NO_INDEX; 1115 1116 /* Try to do all error checking before this point, as unwinding 1117 * state after grabbing the PMC is difficult. 1118 */ 1119 perf_event_grab_pmc(); 1120 event->destroy = hw_perf_event_destroy; 1121 1122 if (!hwc->sample_period) { 1123 hwc->sample_period = MAX_PERIOD; 1124 hwc->last_period = hwc->sample_period; 1125 atomic64_set(&hwc->period_left, hwc->sample_period); 1126 } 1127 1128 return 0; 1129 } 1130 1131 static const struct pmu pmu = { 1132 .enable = sparc_pmu_enable, 1133 .disable = sparc_pmu_disable, 1134 .read = sparc_pmu_read, 1135 .unthrottle = sparc_pmu_unthrottle, 1136 }; 1137 1138 const struct pmu *hw_perf_event_init(struct perf_event *event) 1139 { 1140 int err = __hw_perf_event_init(event); 1141 1142 if (err) 1143 return ERR_PTR(err); 1144 return &pmu; 1145 } 1146 1147 void perf_event_print_debug(void) 1148 { 1149 unsigned long flags; 1150 u64 pcr, pic; 1151 int cpu; 1152 1153 if (!sparc_pmu) 1154 return; 1155 1156 local_irq_save(flags); 1157 1158 cpu = smp_processor_id(); 1159 1160 pcr = pcr_ops->read(); 1161 read_pic(pic); 1162 1163 pr_info("\n"); 1164 pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", 1165 cpu, pcr, pic); 1166 1167 local_irq_restore(flags); 1168 } 1169 1170 static int __kprobes perf_event_nmi_handler(struct notifier_block *self, 1171 unsigned long cmd, void *__args) 1172 { 1173 struct die_args *args = __args; 1174 struct perf_sample_data data; 1175 struct cpu_hw_events *cpuc; 1176 struct pt_regs *regs; 1177 int i; 1178 1179 if (!atomic_read(&active_events)) 1180 return NOTIFY_DONE; 1181 1182 switch (cmd) { 1183 case DIE_NMI: 1184 break; 1185 1186 default: 1187 return NOTIFY_DONE; 1188 } 1189 1190 regs = args->regs; 1191 1192 data.addr = 0; 1193 1194 cpuc = &__get_cpu_var(cpu_hw_events); 1195 1196 /* If the PMU has the TOE IRQ enable bits, we need to do a 1197 * dummy write to the %pcr to clear the overflow bits and thus 1198 * the interrupt. 1199 * 1200 * Do this before we peek at the counters to determine 1201 * overflow so we don't lose any events. 1202 */ 1203 if (sparc_pmu->irq_bit) 1204 pcr_ops->write(cpuc->pcr); 1205 1206 for (i = 0; i < cpuc->n_events; i++) { 1207 struct perf_event *event = cpuc->event[i]; 1208 int idx = cpuc->current_idx[i]; 1209 struct hw_perf_event *hwc; 1210 u64 val; 1211 1212 hwc = &event->hw; 1213 val = sparc_perf_event_update(event, hwc, idx); 1214 if (val & (1ULL << 31)) 1215 continue; 1216 1217 data.period = event->hw.last_period; 1218 if (!sparc_perf_event_set_period(event, hwc, idx)) 1219 continue; 1220 1221 if (perf_event_overflow(event, 1, &data, regs)) 1222 sparc_pmu_disable_event(cpuc, hwc, idx); 1223 } 1224 1225 return NOTIFY_STOP; 1226 } 1227 1228 static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1229 .notifier_call = perf_event_nmi_handler, 1230 }; 1231 1232 static bool __init supported_pmu(void) 1233 { 1234 if (!strcmp(sparc_pmu_type, "ultra3") || 1235 !strcmp(sparc_pmu_type, "ultra3+") || 1236 !strcmp(sparc_pmu_type, "ultra3i") || 1237 !strcmp(sparc_pmu_type, "ultra4+")) { 1238 sparc_pmu = &ultra3_pmu; 1239 return true; 1240 } 1241 if (!strcmp(sparc_pmu_type, "niagara")) { 1242 sparc_pmu = &niagara1_pmu; 1243 return true; 1244 } 1245 if (!strcmp(sparc_pmu_type, "niagara2")) { 1246 sparc_pmu = &niagara2_pmu; 1247 return true; 1248 } 1249 return false; 1250 } 1251 1252 void __init init_hw_perf_events(void) 1253 { 1254 pr_info("Performance events: "); 1255 1256 if (!supported_pmu()) { 1257 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); 1258 return; 1259 } 1260 1261 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1262 1263 /* All sparc64 PMUs currently have 2 events. */ 1264 perf_max_events = 2; 1265 1266 register_die_notifier(&perf_event_nmi_notifier); 1267 } 1268 1269 static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) 1270 { 1271 if (entry->nr < PERF_MAX_STACK_DEPTH) 1272 entry->ip[entry->nr++] = ip; 1273 } 1274 1275 static void perf_callchain_kernel(struct pt_regs *regs, 1276 struct perf_callchain_entry *entry) 1277 { 1278 unsigned long ksp, fp; 1279 1280 callchain_store(entry, PERF_CONTEXT_KERNEL); 1281 callchain_store(entry, regs->tpc); 1282 1283 ksp = regs->u_regs[UREG_I6]; 1284 fp = ksp + STACK_BIAS; 1285 do { 1286 struct sparc_stackf *sf; 1287 struct pt_regs *regs; 1288 unsigned long pc; 1289 1290 if (!kstack_valid(current_thread_info(), fp)) 1291 break; 1292 1293 sf = (struct sparc_stackf *) fp; 1294 regs = (struct pt_regs *) (sf + 1); 1295 1296 if (kstack_is_trap_frame(current_thread_info(), regs)) { 1297 if (user_mode(regs)) 1298 break; 1299 pc = regs->tpc; 1300 fp = regs->u_regs[UREG_I6] + STACK_BIAS; 1301 } else { 1302 pc = sf->callers_pc; 1303 fp = (unsigned long)sf->fp + STACK_BIAS; 1304 } 1305 callchain_store(entry, pc); 1306 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1307 } 1308 1309 static void perf_callchain_user_64(struct pt_regs *regs, 1310 struct perf_callchain_entry *entry) 1311 { 1312 unsigned long ufp; 1313 1314 callchain_store(entry, PERF_CONTEXT_USER); 1315 callchain_store(entry, regs->tpc); 1316 1317 ufp = regs->u_regs[UREG_I6] + STACK_BIAS; 1318 do { 1319 struct sparc_stackf *usf, sf; 1320 unsigned long pc; 1321 1322 usf = (struct sparc_stackf *) ufp; 1323 if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1324 break; 1325 1326 pc = sf.callers_pc; 1327 ufp = (unsigned long)sf.fp + STACK_BIAS; 1328 callchain_store(entry, pc); 1329 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1330 } 1331 1332 static void perf_callchain_user_32(struct pt_regs *regs, 1333 struct perf_callchain_entry *entry) 1334 { 1335 unsigned long ufp; 1336 1337 callchain_store(entry, PERF_CONTEXT_USER); 1338 callchain_store(entry, regs->tpc); 1339 1340 ufp = regs->u_regs[UREG_I6]; 1341 do { 1342 struct sparc_stackf32 *usf, sf; 1343 unsigned long pc; 1344 1345 usf = (struct sparc_stackf32 *) ufp; 1346 if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1347 break; 1348 1349 pc = sf.callers_pc; 1350 ufp = (unsigned long)sf.fp; 1351 callchain_store(entry, pc); 1352 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1353 } 1354 1355 /* Like powerpc we can't get PMU interrupts within the PMU handler, 1356 * so no need for seperate NMI and IRQ chains as on x86. 1357 */ 1358 static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); 1359 1360 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) 1361 { 1362 struct perf_callchain_entry *entry = &__get_cpu_var(callchain); 1363 1364 entry->nr = 0; 1365 if (!user_mode(regs)) { 1366 stack_trace_flush(); 1367 perf_callchain_kernel(regs, entry); 1368 if (current->mm) 1369 regs = task_pt_regs(current); 1370 else 1371 regs = NULL; 1372 } 1373 if (regs) { 1374 flushw_user(); 1375 if (test_thread_flag(TIF_32BIT)) 1376 perf_callchain_user_32(regs, entry); 1377 else 1378 perf_callchain_user_64(regs, entry); 1379 } 1380 return entry; 1381 } 1382