1 /* 2 * Parisc performance counters 3 * Copyright (C) 2001 Randolph Chung <tausq@debian.org> 4 * 5 * This code is derived, with permission, from HP/UX sources. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2, or (at your option) 10 * any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 /* 23 * Edited comment from original sources: 24 * 25 * This driver programs the PCX-U/PCX-W performance counters 26 * on the PA-RISC 2.0 chips. The driver keeps all images now 27 * internally to the kernel to hopefully eliminate the possiblity 28 * of a bad image halting the CPU. Also, there are different 29 * images for the PCX-W and later chips vs the PCX-U chips. 30 * 31 * Only 1 process is allowed to access the driver at any time, 32 * so the only protection that is needed is at open and close. 33 * A variable "perf_enabled" is used to hold the state of the 34 * driver. The spinlock "perf_lock" is used to protect the 35 * modification of the state during open/close operations so 36 * multiple processes don't get into the driver simultaneously. 37 * 38 * This driver accesses the processor directly vs going through 39 * the PDC INTRIGUE calls. This is done to eliminate bugs introduced 40 * in various PDC revisions. The code is much more maintainable 41 * and reliable this way vs having to debug on every version of PDC 42 * on every box. 43 */ 44 45 #include <linux/capability.h> 46 #include <linux/init.h> 47 #include <linux/proc_fs.h> 48 #include <linux/miscdevice.h> 49 #include <linux/smp_lock.h> 50 #include <linux/spinlock.h> 51 52 #include <asm/uaccess.h> 53 #include <asm/perf.h> 54 #include <asm/parisc-device.h> 55 #include <asm/processor.h> 56 #include <asm/runway.h> 57 #include <asm/io.h> /* for __raw_read() */ 58 59 #include "perf_images.h" 60 61 #define MAX_RDR_WORDS 24 62 #define PERF_VERSION 2 /* derived from hpux's PI v2 interface */ 63 64 /* definition of RDR regs */ 65 struct rdr_tbl_ent { 66 uint16_t width; 67 uint8_t num_words; 68 uint8_t write_control; 69 }; 70 71 static int perf_processor_interface __read_mostly = UNKNOWN_INTF; 72 static int perf_enabled __read_mostly; 73 static spinlock_t perf_lock; 74 struct parisc_device *cpu_device __read_mostly; 75 76 /* RDRs to write for PCX-W */ 77 static const int perf_rdrs_W[] = 78 { 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 79 80 /* RDRs to write for PCX-U */ 81 static const int perf_rdrs_U[] = 82 { 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 83 84 /* RDR register descriptions for PCX-W */ 85 static const struct rdr_tbl_ent perf_rdr_tbl_W[] = { 86 { 19, 1, 8 }, /* RDR 0 */ 87 { 16, 1, 16 }, /* RDR 1 */ 88 { 72, 2, 0 }, /* RDR 2 */ 89 { 81, 2, 0 }, /* RDR 3 */ 90 { 328, 6, 0 }, /* RDR 4 */ 91 { 160, 3, 0 }, /* RDR 5 */ 92 { 336, 6, 0 }, /* RDR 6 */ 93 { 164, 3, 0 }, /* RDR 7 */ 94 { 0, 0, 0 }, /* RDR 8 */ 95 { 35, 1, 0 }, /* RDR 9 */ 96 { 6, 1, 0 }, /* RDR 10 */ 97 { 18, 1, 0 }, /* RDR 11 */ 98 { 13, 1, 0 }, /* RDR 12 */ 99 { 8, 1, 0 }, /* RDR 13 */ 100 { 8, 1, 0 }, /* RDR 14 */ 101 { 8, 1, 0 }, /* RDR 15 */ 102 { 1530, 24, 0 }, /* RDR 16 */ 103 { 16, 1, 0 }, /* RDR 17 */ 104 { 4, 1, 0 }, /* RDR 18 */ 105 { 0, 0, 0 }, /* RDR 19 */ 106 { 152, 3, 24 }, /* RDR 20 */ 107 { 152, 3, 24 }, /* RDR 21 */ 108 { 233, 4, 48 }, /* RDR 22 */ 109 { 233, 4, 48 }, /* RDR 23 */ 110 { 71, 2, 0 }, /* RDR 24 */ 111 { 71, 2, 0 }, /* RDR 25 */ 112 { 11, 1, 0 }, /* RDR 26 */ 113 { 18, 1, 0 }, /* RDR 27 */ 114 { 128, 2, 0 }, /* RDR 28 */ 115 { 0, 0, 0 }, /* RDR 29 */ 116 { 16, 1, 0 }, /* RDR 30 */ 117 { 16, 1, 0 }, /* RDR 31 */ 118 }; 119 120 /* RDR register descriptions for PCX-U */ 121 static const struct rdr_tbl_ent perf_rdr_tbl_U[] = { 122 { 19, 1, 8 }, /* RDR 0 */ 123 { 32, 1, 16 }, /* RDR 1 */ 124 { 20, 1, 0 }, /* RDR 2 */ 125 { 0, 0, 0 }, /* RDR 3 */ 126 { 344, 6, 0 }, /* RDR 4 */ 127 { 176, 3, 0 }, /* RDR 5 */ 128 { 336, 6, 0 }, /* RDR 6 */ 129 { 0, 0, 0 }, /* RDR 7 */ 130 { 0, 0, 0 }, /* RDR 8 */ 131 { 0, 0, 0 }, /* RDR 9 */ 132 { 28, 1, 0 }, /* RDR 10 */ 133 { 33, 1, 0 }, /* RDR 11 */ 134 { 0, 0, 0 }, /* RDR 12 */ 135 { 230, 4, 0 }, /* RDR 13 */ 136 { 32, 1, 0 }, /* RDR 14 */ 137 { 128, 2, 0 }, /* RDR 15 */ 138 { 1494, 24, 0 }, /* RDR 16 */ 139 { 18, 1, 0 }, /* RDR 17 */ 140 { 4, 1, 0 }, /* RDR 18 */ 141 { 0, 0, 0 }, /* RDR 19 */ 142 { 158, 3, 24 }, /* RDR 20 */ 143 { 158, 3, 24 }, /* RDR 21 */ 144 { 194, 4, 48 }, /* RDR 22 */ 145 { 194, 4, 48 }, /* RDR 23 */ 146 { 71, 2, 0 }, /* RDR 24 */ 147 { 71, 2, 0 }, /* RDR 25 */ 148 { 28, 1, 0 }, /* RDR 26 */ 149 { 33, 1, 0 }, /* RDR 27 */ 150 { 88, 2, 0 }, /* RDR 28 */ 151 { 32, 1, 0 }, /* RDR 29 */ 152 { 24, 1, 0 }, /* RDR 30 */ 153 { 16, 1, 0 }, /* RDR 31 */ 154 }; 155 156 /* 157 * A non-zero write_control in the above tables is a byte offset into 158 * this array. 159 */ 160 static const uint64_t perf_bitmasks[] = { 161 0x0000000000000000ul, /* first dbl word must be zero */ 162 0xfdffe00000000000ul, /* RDR0 bitmask */ 163 0x003f000000000000ul, /* RDR1 bitmask */ 164 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (152 bits) */ 165 0xfffffffffffffffful, 166 0xfffffffc00000000ul, 167 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (233 bits) */ 168 0xfffffffffffffffful, 169 0xfffffffffffffffcul, 170 0xff00000000000000ul 171 }; 172 173 /* 174 * Write control bitmasks for Pa-8700 processor given 175 * some things have changed slightly. 176 */ 177 static const uint64_t perf_bitmasks_piranha[] = { 178 0x0000000000000000ul, /* first dbl word must be zero */ 179 0xfdffe00000000000ul, /* RDR0 bitmask */ 180 0x003f000000000000ul, /* RDR1 bitmask */ 181 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (158 bits) */ 182 0xfffffffffffffffful, 183 0xfffffffc00000000ul, 184 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (210 bits) */ 185 0xfffffffffffffffful, 186 0xfffffffffffffffful, 187 0xfffc000000000000ul 188 }; 189 190 static const uint64_t *bitmask_array; /* array of bitmasks to use */ 191 192 /****************************************************************************** 193 * Function Prototypes 194 *****************************************************************************/ 195 static int perf_config(uint32_t *image_ptr); 196 static int perf_release(struct inode *inode, struct file *file); 197 static int perf_open(struct inode *inode, struct file *file); 198 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); 199 static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 200 loff_t *ppos); 201 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 202 static void perf_start_counters(void); 203 static int perf_stop_counters(uint32_t *raddr); 204 static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num); 205 static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer); 206 static int perf_rdr_clear(uint32_t rdr_num); 207 static int perf_write_image(uint64_t *memaddr); 208 static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer); 209 210 /* External Assembly Routines */ 211 extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width); 212 extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width); 213 extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer); 214 extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer); 215 extern void perf_intrigue_enable_perf_counters (void); 216 extern void perf_intrigue_disable_perf_counters (void); 217 218 /****************************************************************************** 219 * Function Definitions 220 *****************************************************************************/ 221 222 223 /* 224 * configure: 225 * 226 * Configure the cpu with a given data image. First turn off the counters, 227 * then download the image, then turn the counters back on. 228 */ 229 static int perf_config(uint32_t *image_ptr) 230 { 231 long error; 232 uint32_t raddr[4]; 233 234 /* Stop the counters*/ 235 error = perf_stop_counters(raddr); 236 if (error != 0) { 237 printk("perf_config: perf_stop_counters = %ld\n", error); 238 return -EINVAL; 239 } 240 241 printk("Preparing to write image\n"); 242 /* Write the image to the chip */ 243 error = perf_write_image((uint64_t *)image_ptr); 244 if (error != 0) { 245 printk("perf_config: DOWNLOAD = %ld\n", error); 246 return -EINVAL; 247 } 248 249 printk("Preparing to start counters\n"); 250 251 /* Start the counters */ 252 perf_start_counters(); 253 254 return sizeof(uint32_t); 255 } 256 257 /* 258 * Open the device and initialize all of its memory. The device is only 259 * opened once, but can be "queried" by multiple processes that know its 260 * file descriptor. 261 */ 262 static int perf_open(struct inode *inode, struct file *file) 263 { 264 lock_kernel(); 265 spin_lock(&perf_lock); 266 if (perf_enabled) { 267 spin_unlock(&perf_lock); 268 unlock_kernel(); 269 return -EBUSY; 270 } 271 perf_enabled = 1; 272 spin_unlock(&perf_lock); 273 unlock_kernel(); 274 275 return 0; 276 } 277 278 /* 279 * Close the device. 280 */ 281 static int perf_release(struct inode *inode, struct file *file) 282 { 283 spin_lock(&perf_lock); 284 perf_enabled = 0; 285 spin_unlock(&perf_lock); 286 287 return 0; 288 } 289 290 /* 291 * Read does nothing for this driver 292 */ 293 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos) 294 { 295 return 0; 296 } 297 298 /* 299 * write: 300 * 301 * This routine downloads the image to the chip. It must be 302 * called on the processor that the download should happen 303 * on. 304 */ 305 static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 306 loff_t *ppos) 307 { 308 int err; 309 size_t image_size; 310 uint32_t image_type; 311 uint32_t interface_type; 312 uint32_t test; 313 314 if (perf_processor_interface == ONYX_INTF) 315 image_size = PCXU_IMAGE_SIZE; 316 else if (perf_processor_interface == CUDA_INTF) 317 image_size = PCXW_IMAGE_SIZE; 318 else 319 return -EFAULT; 320 321 if (!capable(CAP_SYS_ADMIN)) 322 return -EACCES; 323 324 if (count != sizeof(uint32_t)) 325 return -EIO; 326 327 if ((err = copy_from_user(&image_type, buf, sizeof(uint32_t))) != 0) 328 return err; 329 330 /* Get the interface type and test type */ 331 interface_type = (image_type >> 16) & 0xffff; 332 test = (image_type & 0xffff); 333 334 /* Make sure everything makes sense */ 335 336 /* First check the machine type is correct for 337 the requested image */ 338 if (((perf_processor_interface == CUDA_INTF) && 339 (interface_type != CUDA_INTF)) || 340 ((perf_processor_interface == ONYX_INTF) && 341 (interface_type != ONYX_INTF))) 342 return -EINVAL; 343 344 /* Next check to make sure the requested image 345 is valid */ 346 if (((interface_type == CUDA_INTF) && 347 (test >= MAX_CUDA_IMAGES)) || 348 ((interface_type == ONYX_INTF) && 349 (test >= MAX_ONYX_IMAGES))) 350 return -EINVAL; 351 352 /* Copy the image into the processor */ 353 if (interface_type == CUDA_INTF) 354 return perf_config(cuda_images[test]); 355 else 356 return perf_config(onyx_images[test]); 357 358 return count; 359 } 360 361 /* 362 * Patch the images that need to know the IVA addresses. 363 */ 364 static void perf_patch_images(void) 365 { 366 #if 0 /* FIXME!! */ 367 /* 368 * NOTE: this routine is VERY specific to the current TLB image. 369 * If the image is changed, this routine might also need to be changed. 370 */ 371 extern void $i_itlb_miss_2_0(); 372 extern void $i_dtlb_miss_2_0(); 373 extern void PA2_0_iva(); 374 375 /* 376 * We can only use the lower 32-bits, the upper 32-bits should be 0 377 * anyway given this is in the kernel 378 */ 379 uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); 380 uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); 381 uint32_t IVAaddress = (uint32_t)&PA2_0_iva; 382 383 if (perf_processor_interface == ONYX_INTF) { 384 /* clear last 2 bytes */ 385 onyx_images[TLBMISS][15] &= 0xffffff00; 386 /* set 2 bytes */ 387 onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 388 onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; 389 onyx_images[TLBMISS][17] = itlb_addr; 390 391 /* clear last 2 bytes */ 392 onyx_images[TLBHANDMISS][15] &= 0xffffff00; 393 /* set 2 bytes */ 394 onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 395 onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; 396 onyx_images[TLBHANDMISS][17] = itlb_addr; 397 398 /* clear last 2 bytes */ 399 onyx_images[BIG_CPI][15] &= 0xffffff00; 400 /* set 2 bytes */ 401 onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); 402 onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; 403 onyx_images[BIG_CPI][17] = itlb_addr; 404 405 onyx_images[PANIC][15] &= 0xffffff00; /* clear last 2 bytes */ 406 onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */ 407 onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00; 408 409 410 } else if (perf_processor_interface == CUDA_INTF) { 411 /* Cuda interface */ 412 cuda_images[TLBMISS][16] = 413 (cuda_images[TLBMISS][16]&0xffff0000) | 414 ((dtlb_addr >> 8)&0x0000ffff); 415 cuda_images[TLBMISS][17] = 416 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 417 cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; 418 419 cuda_images[TLBHANDMISS][16] = 420 (cuda_images[TLBHANDMISS][16]&0xffff0000) | 421 ((dtlb_addr >> 8)&0x0000ffff); 422 cuda_images[TLBHANDMISS][17] = 423 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 424 cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; 425 426 cuda_images[BIG_CPI][16] = 427 (cuda_images[BIG_CPI][16]&0xffff0000) | 428 ((dtlb_addr >> 8)&0x0000ffff); 429 cuda_images[BIG_CPI][17] = 430 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 431 cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; 432 } else { 433 /* Unknown type */ 434 } 435 #endif 436 } 437 438 439 /* 440 * ioctl routine 441 * All routines effect the processor that they are executed on. Thus you 442 * must be running on the processor that you wish to change. 443 */ 444 445 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 446 { 447 long error_start; 448 uint32_t raddr[4]; 449 int error = 0; 450 451 switch (cmd) { 452 453 case PA_PERF_ON: 454 /* Start the counters */ 455 perf_start_counters(); 456 break; 457 458 case PA_PERF_OFF: 459 error_start = perf_stop_counters(raddr); 460 if (error_start != 0) { 461 printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start); 462 error = -EFAULT; 463 break; 464 } 465 466 /* copy out the Counters */ 467 if (copy_to_user((void __user *)arg, raddr, 468 sizeof (raddr)) != 0) { 469 error = -EFAULT; 470 break; 471 } 472 break; 473 474 case PA_PERF_VERSION: 475 /* Return the version # */ 476 error = put_user(PERF_VERSION, (int *)arg); 477 break; 478 479 default: 480 error = -ENOTTY; 481 } 482 483 return error; 484 } 485 486 static const struct file_operations perf_fops = { 487 .llseek = no_llseek, 488 .read = perf_read, 489 .write = perf_write, 490 .unlocked_ioctl = perf_ioctl, 491 .compat_ioctl = perf_ioctl, 492 .open = perf_open, 493 .release = perf_release 494 }; 495 496 static struct miscdevice perf_dev = { 497 MISC_DYNAMIC_MINOR, 498 PA_PERF_DEV, 499 &perf_fops 500 }; 501 502 /* 503 * Initialize the module 504 */ 505 static int __init perf_init(void) 506 { 507 int ret; 508 509 /* Determine correct processor interface to use */ 510 bitmask_array = perf_bitmasks; 511 512 if (boot_cpu_data.cpu_type == pcxu || 513 boot_cpu_data.cpu_type == pcxu_) { 514 perf_processor_interface = ONYX_INTF; 515 } else if (boot_cpu_data.cpu_type == pcxw || 516 boot_cpu_data.cpu_type == pcxw_ || 517 boot_cpu_data.cpu_type == pcxw2 || 518 boot_cpu_data.cpu_type == mako || 519 boot_cpu_data.cpu_type == mako2) { 520 perf_processor_interface = CUDA_INTF; 521 if (boot_cpu_data.cpu_type == pcxw2 || 522 boot_cpu_data.cpu_type == mako || 523 boot_cpu_data.cpu_type == mako2) 524 bitmask_array = perf_bitmasks_piranha; 525 } else { 526 perf_processor_interface = UNKNOWN_INTF; 527 printk("Performance monitoring counters not supported on this processor\n"); 528 return -ENODEV; 529 } 530 531 ret = misc_register(&perf_dev); 532 if (ret) { 533 printk(KERN_ERR "Performance monitoring counters: " 534 "cannot register misc device.\n"); 535 return ret; 536 } 537 538 /* Patch the images to match the system */ 539 perf_patch_images(); 540 541 spin_lock_init(&perf_lock); 542 543 /* TODO: this only lets us access the first cpu.. what to do for SMP? */ 544 cpu_device = cpu_data[0].dev; 545 printk("Performance monitoring counters enabled for %s\n", 546 cpu_data[0].dev->name); 547 548 return 0; 549 } 550 551 /* 552 * perf_start_counters(void) 553 * 554 * Start the counters. 555 */ 556 static void perf_start_counters(void) 557 { 558 /* Enable performance monitor counters */ 559 perf_intrigue_enable_perf_counters(); 560 } 561 562 /* 563 * perf_stop_counters 564 * 565 * Stop the performance counters and save counts 566 * in a per_processor array. 567 */ 568 static int perf_stop_counters(uint32_t *raddr) 569 { 570 uint64_t userbuf[MAX_RDR_WORDS]; 571 572 /* Disable performance counters */ 573 perf_intrigue_disable_perf_counters(); 574 575 if (perf_processor_interface == ONYX_INTF) { 576 uint64_t tmp64; 577 /* 578 * Read the counters 579 */ 580 if (!perf_rdr_read_ubuf(16, userbuf)) 581 return -13; 582 583 /* Counter0 is bits 1398 to 1429 */ 584 tmp64 = (userbuf[21] << 22) & 0x00000000ffc00000; 585 tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff; 586 /* OR sticky0 (bit 1430) to counter0 bit 32 */ 587 tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000; 588 raddr[0] = (uint32_t)tmp64; 589 590 /* Counter1 is bits 1431 to 1462 */ 591 tmp64 = (userbuf[22] >> 9) & 0x00000000ffffffff; 592 /* OR sticky1 (bit 1463) to counter1 bit 32 */ 593 tmp64 |= (userbuf[22] << 23) & 0x0000000080000000; 594 raddr[1] = (uint32_t)tmp64; 595 596 /* Counter2 is bits 1464 to 1495 */ 597 tmp64 = (userbuf[22] << 24) & 0x00000000ff000000; 598 tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff; 599 /* OR sticky2 (bit 1496) to counter2 bit 32 */ 600 tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; 601 raddr[2] = (uint32_t)tmp64; 602 603 /* Counter3 is bits 1497 to 1528 */ 604 tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; 605 /* OR sticky3 (bit 1529) to counter3 bit 32 */ 606 tmp64 |= (userbuf[23] << 25) & 0x0000000080000000; 607 raddr[3] = (uint32_t)tmp64; 608 609 /* 610 * Zero out the counters 611 */ 612 613 /* 614 * The counters and sticky-bits comprise the last 132 bits 615 * (1398 - 1529) of RDR16 on a U chip. We'll zero these 616 * out the easy way: zero out last 10 bits of dword 21, 617 * all of dword 22 and 58 bits (plus 6 don't care bits) of 618 * dword 23. 619 */ 620 userbuf[21] &= 0xfffffffffffffc00ul; /* 0 to last 10 bits */ 621 userbuf[22] = 0; 622 userbuf[23] = 0; 623 624 /* 625 * Write back the zeroed bytes + the image given 626 * the read was destructive. 627 */ 628 perf_rdr_write(16, userbuf); 629 } else { 630 631 /* 632 * Read RDR-15 which contains the counters and sticky bits 633 */ 634 if (!perf_rdr_read_ubuf(15, userbuf)) { 635 return -13; 636 } 637 638 /* 639 * Clear out the counters 640 */ 641 perf_rdr_clear(15); 642 643 /* 644 * Copy the counters 645 */ 646 raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL); 647 raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL); 648 raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); 649 raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); 650 } 651 652 return 0; 653 } 654 655 /* 656 * perf_rdr_get_entry 657 * 658 * Retrieve a pointer to the description of what this 659 * RDR contains. 660 */ 661 static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num) 662 { 663 if (perf_processor_interface == ONYX_INTF) { 664 return &perf_rdr_tbl_U[rdr_num]; 665 } else { 666 return &perf_rdr_tbl_W[rdr_num]; 667 } 668 } 669 670 /* 671 * perf_rdr_read_ubuf 672 * 673 * Read the RDR value into the buffer specified. 674 */ 675 static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) 676 { 677 uint64_t data, data_mask = 0; 678 uint32_t width, xbits, i; 679 const struct rdr_tbl_ent *tentry; 680 681 tentry = perf_rdr_get_entry(rdr_num); 682 if ((width = tentry->width) == 0) 683 return 0; 684 685 /* Clear out buffer */ 686 i = tentry->num_words; 687 while (i--) { 688 buffer[i] = 0; 689 } 690 691 /* Check for bits an even number of 64 */ 692 if ((xbits = width & 0x03f) != 0) { 693 data_mask = 1; 694 data_mask <<= (64 - xbits); 695 data_mask--; 696 } 697 698 /* Grab all of the data */ 699 i = tentry->num_words; 700 while (i--) { 701 702 if (perf_processor_interface == ONYX_INTF) { 703 data = perf_rdr_shift_in_U(rdr_num, width); 704 } else { 705 data = perf_rdr_shift_in_W(rdr_num, width); 706 } 707 if (xbits) { 708 buffer[i] |= (data << (64 - xbits)); 709 if (i) { 710 buffer[i-1] |= ((data >> xbits) & data_mask); 711 } 712 } else { 713 buffer[i] = data; 714 } 715 } 716 717 return 1; 718 } 719 720 /* 721 * perf_rdr_clear 722 * 723 * Zero out the given RDR register 724 */ 725 static int perf_rdr_clear(uint32_t rdr_num) 726 { 727 const struct rdr_tbl_ent *tentry; 728 int32_t i; 729 730 tentry = perf_rdr_get_entry(rdr_num); 731 732 if (tentry->width == 0) { 733 return -1; 734 } 735 736 i = tentry->num_words; 737 while (i--) { 738 if (perf_processor_interface == ONYX_INTF) { 739 perf_rdr_shift_out_U(rdr_num, 0UL); 740 } else { 741 perf_rdr_shift_out_W(rdr_num, 0UL); 742 } 743 } 744 745 return 0; 746 } 747 748 749 /* 750 * perf_write_image 751 * 752 * Write the given image out to the processor 753 */ 754 static int perf_write_image(uint64_t *memaddr) 755 { 756 uint64_t buffer[MAX_RDR_WORDS]; 757 uint64_t *bptr; 758 uint32_t dwords; 759 const uint32_t *intrigue_rdr; 760 const uint64_t *intrigue_bitmask; 761 uint64_t tmp64; 762 void __iomem *runway; 763 const struct rdr_tbl_ent *tentry; 764 int i; 765 766 /* Clear out counters */ 767 if (perf_processor_interface == ONYX_INTF) { 768 769 perf_rdr_clear(16); 770 771 /* Toggle performance monitor */ 772 perf_intrigue_enable_perf_counters(); 773 perf_intrigue_disable_perf_counters(); 774 775 intrigue_rdr = perf_rdrs_U; 776 } else { 777 perf_rdr_clear(15); 778 intrigue_rdr = perf_rdrs_W; 779 } 780 781 /* Write all RDRs */ 782 while (*intrigue_rdr != -1) { 783 tentry = perf_rdr_get_entry(*intrigue_rdr); 784 perf_rdr_read_ubuf(*intrigue_rdr, buffer); 785 bptr = &buffer[0]; 786 dwords = tentry->num_words; 787 if (tentry->write_control) { 788 intrigue_bitmask = &bitmask_array[tentry->write_control >> 3]; 789 while (dwords--) { 790 tmp64 = *intrigue_bitmask & *memaddr++; 791 tmp64 |= (~(*intrigue_bitmask++)) & *bptr; 792 *bptr++ = tmp64; 793 } 794 } else { 795 while (dwords--) { 796 *bptr++ = *memaddr++; 797 } 798 } 799 800 perf_rdr_write(*intrigue_rdr, buffer); 801 intrigue_rdr++; 802 } 803 804 /* 805 * Now copy out the Runway stuff which is not in RDRs 806 */ 807 808 if (cpu_device == NULL) 809 { 810 printk(KERN_ERR "write_image: cpu_device not yet initialized!\n"); 811 return -1; 812 } 813 814 runway = ioremap_nocache(cpu_device->hpa.start, 4096); 815 816 /* Merge intrigue bits into Runway STATUS 0 */ 817 tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; 818 __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 819 runway + RUNWAY_STATUS); 820 821 /* Write RUNWAY DEBUG registers */ 822 for (i = 0; i < 8; i++) { 823 __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); 824 } 825 826 return 0; 827 } 828 829 /* 830 * perf_rdr_write 831 * 832 * Write the given RDR register with the contents 833 * of the given buffer. 834 */ 835 static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer) 836 { 837 const struct rdr_tbl_ent *tentry; 838 int32_t i; 839 840 printk("perf_rdr_write\n"); 841 tentry = perf_rdr_get_entry(rdr_num); 842 if (tentry->width == 0) { return; } 843 844 i = tentry->num_words; 845 while (i--) { 846 if (perf_processor_interface == ONYX_INTF) { 847 perf_rdr_shift_out_U(rdr_num, buffer[i]); 848 } else { 849 perf_rdr_shift_out_W(rdr_num, buffer[i]); 850 } 851 } 852 printk("perf_rdr_write done\n"); 853 } 854 855 module_init(perf_init); 856