1 /* 2 * Performance counter callchain support - powerpc architecture code 3 * 4 * Copyright © 2009 Paul Mackerras, IBM Corporation. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/kernel.h> 12 #include <linux/sched.h> 13 #include <linux/perf_event.h> 14 #include <linux/percpu.h> 15 #include <linux/uaccess.h> 16 #include <linux/mm.h> 17 #include <asm/ptrace.h> 18 #include <asm/pgtable.h> 19 #include <asm/sigcontext.h> 20 #include <asm/ucontext.h> 21 #include <asm/vdso.h> 22 #ifdef CONFIG_PPC64 23 #include "../kernel/ppc32.h" 24 #endif 25 26 27 /* 28 * Is sp valid as the address of the next kernel stack frame after prev_sp? 29 * The next frame may be in a different stack area but should not go 30 * back down in the same stack area. 31 */ 32 static int valid_next_sp(unsigned long sp, unsigned long prev_sp) 33 { 34 if (sp & 0xf) 35 return 0; /* must be 16-byte aligned */ 36 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) 37 return 0; 38 if (sp >= prev_sp + STACK_FRAME_MIN_SIZE) 39 return 1; 40 /* 41 * sp could decrease when we jump off an interrupt stack 42 * back to the regular process stack. 43 */ 44 if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1))) 45 return 1; 46 return 0; 47 } 48 49 void 50 perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) 51 { 52 unsigned long sp, next_sp; 53 unsigned long next_ip; 54 unsigned long lr; 55 long level = 0; 56 unsigned long *fp; 57 58 lr = regs->link; 59 sp = regs->gpr[1]; 60 perf_callchain_store(entry, perf_instruction_pointer(regs)); 61 62 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) 63 return; 64 65 for (;;) { 66 fp = (unsigned long *) sp; 67 next_sp = fp[0]; 68 69 if (next_sp == sp + STACK_INT_FRAME_SIZE && 70 fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { 71 /* 72 * This looks like an interrupt frame for an 73 * interrupt that occurred in the kernel 74 */ 75 regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); 76 next_ip = regs->nip; 77 lr = regs->link; 78 level = 0; 79 perf_callchain_store(entry, PERF_CONTEXT_KERNEL); 80 81 } else { 82 if (level == 0) 83 next_ip = lr; 84 else 85 next_ip = fp[STACK_FRAME_LR_SAVE]; 86 87 /* 88 * We can't tell which of the first two addresses 89 * we get are valid, but we can filter out the 90 * obviously bogus ones here. We replace them 91 * with 0 rather than removing them entirely so 92 * that userspace can tell which is which. 93 */ 94 if ((level == 1 && next_ip == lr) || 95 (level <= 1 && !kernel_text_address(next_ip))) 96 next_ip = 0; 97 98 ++level; 99 } 100 101 perf_callchain_store(entry, next_ip); 102 if (!valid_next_sp(next_sp, sp)) 103 return; 104 sp = next_sp; 105 } 106 } 107 108 #ifdef CONFIG_PPC64 109 /* 110 * On 64-bit we don't want to invoke hash_page on user addresses from 111 * interrupt context, so if the access faults, we read the page tables 112 * to find which page (if any) is mapped and access it directly. 113 */ 114 static int read_user_stack_slow(void __user *ptr, void *buf, int nb) 115 { 116 int ret = -EFAULT; 117 pgd_t *pgdir; 118 pte_t *ptep, pte; 119 unsigned shift; 120 unsigned long addr = (unsigned long) ptr; 121 unsigned long offset; 122 unsigned long pfn, flags; 123 void *kaddr; 124 125 pgdir = current->mm->pgd; 126 if (!pgdir) 127 return -EFAULT; 128 129 local_irq_save(flags); 130 ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); 131 if (!ptep) 132 goto err_out; 133 if (!shift) 134 shift = PAGE_SHIFT; 135 136 /* align address to page boundary */ 137 offset = addr & ((1UL << shift) - 1); 138 139 pte = READ_ONCE(*ptep); 140 if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) 141 goto err_out; 142 pfn = pte_pfn(pte); 143 if (!page_is_ram(pfn)) 144 goto err_out; 145 146 /* no highmem to worry about here */ 147 kaddr = pfn_to_kaddr(pfn); 148 memcpy(buf, kaddr + offset, nb); 149 ret = 0; 150 err_out: 151 local_irq_restore(flags); 152 return ret; 153 } 154 155 static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) 156 { 157 if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || 158 ((unsigned long)ptr & 7)) 159 return -EFAULT; 160 161 pagefault_disable(); 162 if (!__get_user_inatomic(*ret, ptr)) { 163 pagefault_enable(); 164 return 0; 165 } 166 pagefault_enable(); 167 168 return read_user_stack_slow(ptr, ret, 8); 169 } 170 171 static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) 172 { 173 if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || 174 ((unsigned long)ptr & 3)) 175 return -EFAULT; 176 177 pagefault_disable(); 178 if (!__get_user_inatomic(*ret, ptr)) { 179 pagefault_enable(); 180 return 0; 181 } 182 pagefault_enable(); 183 184 return read_user_stack_slow(ptr, ret, 4); 185 } 186 187 static inline int valid_user_sp(unsigned long sp, int is_64) 188 { 189 if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) 190 return 0; 191 return 1; 192 } 193 194 /* 195 * 64-bit user processes use the same stack frame for RT and non-RT signals. 196 */ 197 struct signal_frame_64 { 198 char dummy[__SIGNAL_FRAMESIZE]; 199 struct ucontext uc; 200 unsigned long unused[2]; 201 unsigned int tramp[6]; 202 struct siginfo *pinfo; 203 void *puc; 204 struct siginfo info; 205 char abigap[288]; 206 }; 207 208 static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) 209 { 210 if (nip == fp + offsetof(struct signal_frame_64, tramp)) 211 return 1; 212 if (vdso64_rt_sigtramp && current->mm->context.vdso_base && 213 nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) 214 return 1; 215 return 0; 216 } 217 218 /* 219 * Do some sanity checking on the signal frame pointed to by sp. 220 * We check the pinfo and puc pointers in the frame. 221 */ 222 static int sane_signal_64_frame(unsigned long sp) 223 { 224 struct signal_frame_64 __user *sf; 225 unsigned long pinfo, puc; 226 227 sf = (struct signal_frame_64 __user *) sp; 228 if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || 229 read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) 230 return 0; 231 return pinfo == (unsigned long) &sf->info && 232 puc == (unsigned long) &sf->uc; 233 } 234 235 static void perf_callchain_user_64(struct perf_callchain_entry *entry, 236 struct pt_regs *regs) 237 { 238 unsigned long sp, next_sp; 239 unsigned long next_ip; 240 unsigned long lr; 241 long level = 0; 242 struct signal_frame_64 __user *sigframe; 243 unsigned long __user *fp, *uregs; 244 245 next_ip = perf_instruction_pointer(regs); 246 lr = regs->link; 247 sp = regs->gpr[1]; 248 perf_callchain_store(entry, next_ip); 249 250 while (entry->nr < PERF_MAX_STACK_DEPTH) { 251 fp = (unsigned long __user *) sp; 252 if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) 253 return; 254 if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) 255 return; 256 257 /* 258 * Note: the next_sp - sp >= signal frame size check 259 * is true when next_sp < sp, which can happen when 260 * transitioning from an alternate signal stack to the 261 * normal stack. 262 */ 263 if (next_sp - sp >= sizeof(struct signal_frame_64) && 264 (is_sigreturn_64_address(next_ip, sp) || 265 (level <= 1 && is_sigreturn_64_address(lr, sp))) && 266 sane_signal_64_frame(sp)) { 267 /* 268 * This looks like an signal frame 269 */ 270 sigframe = (struct signal_frame_64 __user *) sp; 271 uregs = sigframe->uc.uc_mcontext.gp_regs; 272 if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || 273 read_user_stack_64(&uregs[PT_LNK], &lr) || 274 read_user_stack_64(&uregs[PT_R1], &sp)) 275 return; 276 level = 0; 277 perf_callchain_store(entry, PERF_CONTEXT_USER); 278 perf_callchain_store(entry, next_ip); 279 continue; 280 } 281 282 if (level == 0) 283 next_ip = lr; 284 perf_callchain_store(entry, next_ip); 285 ++level; 286 sp = next_sp; 287 } 288 } 289 290 static inline int current_is_64bit(void) 291 { 292 /* 293 * We can't use test_thread_flag() here because we may be on an 294 * interrupt stack, and the thread flags don't get copied over 295 * from the thread_info on the main stack to the interrupt stack. 296 */ 297 return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); 298 } 299 300 #else /* CONFIG_PPC64 */ 301 /* 302 * On 32-bit we just access the address and let hash_page create a 303 * HPTE if necessary, so there is no need to fall back to reading 304 * the page tables. Since this is called at interrupt level, 305 * do_page_fault() won't treat a DSI as a page fault. 306 */ 307 static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) 308 { 309 int rc; 310 311 if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || 312 ((unsigned long)ptr & 3)) 313 return -EFAULT; 314 315 pagefault_disable(); 316 rc = __get_user_inatomic(*ret, ptr); 317 pagefault_enable(); 318 319 return rc; 320 } 321 322 static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, 323 struct pt_regs *regs) 324 { 325 } 326 327 static inline int current_is_64bit(void) 328 { 329 return 0; 330 } 331 332 static inline int valid_user_sp(unsigned long sp, int is_64) 333 { 334 if (!sp || (sp & 7) || sp > TASK_SIZE - 32) 335 return 0; 336 return 1; 337 } 338 339 #define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE 340 #define sigcontext32 sigcontext 341 #define mcontext32 mcontext 342 #define ucontext32 ucontext 343 #define compat_siginfo_t struct siginfo 344 345 #endif /* CONFIG_PPC64 */ 346 347 /* 348 * Layout for non-RT signal frames 349 */ 350 struct signal_frame_32 { 351 char dummy[__SIGNAL_FRAMESIZE32]; 352 struct sigcontext32 sctx; 353 struct mcontext32 mctx; 354 int abigap[56]; 355 }; 356 357 /* 358 * Layout for RT signal frames 359 */ 360 struct rt_signal_frame_32 { 361 char dummy[__SIGNAL_FRAMESIZE32 + 16]; 362 compat_siginfo_t info; 363 struct ucontext32 uc; 364 int abigap[56]; 365 }; 366 367 static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) 368 { 369 if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) 370 return 1; 371 if (vdso32_sigtramp && current->mm->context.vdso_base && 372 nip == current->mm->context.vdso_base + vdso32_sigtramp) 373 return 1; 374 return 0; 375 } 376 377 static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) 378 { 379 if (nip == fp + offsetof(struct rt_signal_frame_32, 380 uc.uc_mcontext.mc_pad)) 381 return 1; 382 if (vdso32_rt_sigtramp && current->mm->context.vdso_base && 383 nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) 384 return 1; 385 return 0; 386 } 387 388 static int sane_signal_32_frame(unsigned int sp) 389 { 390 struct signal_frame_32 __user *sf; 391 unsigned int regs; 392 393 sf = (struct signal_frame_32 __user *) (unsigned long) sp; 394 if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) 395 return 0; 396 return regs == (unsigned long) &sf->mctx; 397 } 398 399 static int sane_rt_signal_32_frame(unsigned int sp) 400 { 401 struct rt_signal_frame_32 __user *sf; 402 unsigned int regs; 403 404 sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; 405 if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) 406 return 0; 407 return regs == (unsigned long) &sf->uc.uc_mcontext; 408 } 409 410 static unsigned int __user *signal_frame_32_regs(unsigned int sp, 411 unsigned int next_sp, unsigned int next_ip) 412 { 413 struct mcontext32 __user *mctx = NULL; 414 struct signal_frame_32 __user *sf; 415 struct rt_signal_frame_32 __user *rt_sf; 416 417 /* 418 * Note: the next_sp - sp >= signal frame size check 419 * is true when next_sp < sp, for example, when 420 * transitioning from an alternate signal stack to the 421 * normal stack. 422 */ 423 if (next_sp - sp >= sizeof(struct signal_frame_32) && 424 is_sigreturn_32_address(next_ip, sp) && 425 sane_signal_32_frame(sp)) { 426 sf = (struct signal_frame_32 __user *) (unsigned long) sp; 427 mctx = &sf->mctx; 428 } 429 430 if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && 431 is_rt_sigreturn_32_address(next_ip, sp) && 432 sane_rt_signal_32_frame(sp)) { 433 rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; 434 mctx = &rt_sf->uc.uc_mcontext; 435 } 436 437 if (!mctx) 438 return NULL; 439 return mctx->mc_gregs; 440 } 441 442 static void perf_callchain_user_32(struct perf_callchain_entry *entry, 443 struct pt_regs *regs) 444 { 445 unsigned int sp, next_sp; 446 unsigned int next_ip; 447 unsigned int lr; 448 long level = 0; 449 unsigned int __user *fp, *uregs; 450 451 next_ip = perf_instruction_pointer(regs); 452 lr = regs->link; 453 sp = regs->gpr[1]; 454 perf_callchain_store(entry, next_ip); 455 456 while (entry->nr < PERF_MAX_STACK_DEPTH) { 457 fp = (unsigned int __user *) (unsigned long) sp; 458 if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) 459 return; 460 if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) 461 return; 462 463 uregs = signal_frame_32_regs(sp, next_sp, next_ip); 464 if (!uregs && level <= 1) 465 uregs = signal_frame_32_regs(sp, next_sp, lr); 466 if (uregs) { 467 /* 468 * This looks like an signal frame, so restart 469 * the stack trace with the values in it. 470 */ 471 if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || 472 read_user_stack_32(&uregs[PT_LNK], &lr) || 473 read_user_stack_32(&uregs[PT_R1], &sp)) 474 return; 475 level = 0; 476 perf_callchain_store(entry, PERF_CONTEXT_USER); 477 perf_callchain_store(entry, next_ip); 478 continue; 479 } 480 481 if (level == 0) 482 next_ip = lr; 483 perf_callchain_store(entry, next_ip); 484 ++level; 485 sp = next_sp; 486 } 487 } 488 489 void 490 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) 491 { 492 if (current_is_64bit()) 493 perf_callchain_user_64(entry, regs); 494 else 495 perf_callchain_user_32(entry, regs); 496 } 497