1 /* 2 * Parisc performance counters 3 * Copyright (C) 2001 Randolph Chung <tausq@debian.org> 4 * 5 * This code is derived, with permission, from HP/UX sources. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2, or (at your option) 10 * any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 */ 21 22 /* 23 * Edited comment from original sources: 24 * 25 * This driver programs the PCX-U/PCX-W performance counters 26 * on the PA-RISC 2.0 chips. The driver keeps all images now 27 * internally to the kernel to hopefully eliminate the possiblity 28 * of a bad image halting the CPU. Also, there are different 29 * images for the PCX-W and later chips vs the PCX-U chips. 30 * 31 * Only 1 process is allowed to access the driver at any time, 32 * so the only protection that is needed is at open and close. 33 * A variable "perf_enabled" is used to hold the state of the 34 * driver. The spinlock "perf_lock" is used to protect the 35 * modification of the state during open/close operations so 36 * multiple processes don't get into the driver simultaneously. 37 * 38 * This driver accesses the processor directly vs going through 39 * the PDC INTRIGUE calls. This is done to eliminate bugs introduced 40 * in various PDC revisions. The code is much more maintainable 41 * and reliable this way vs having to debug on every version of PDC 42 * on every box. 43 */ 44 45 #include <linux/init.h> 46 #include <linux/proc_fs.h> 47 #include <linux/miscdevice.h> 48 #include <linux/spinlock.h> 49 50 #include <asm/uaccess.h> 51 #include <asm/perf.h> 52 #include <asm/parisc-device.h> 53 #include <asm/processor.h> 54 #include <asm/runway.h> 55 #include <asm/io.h> /* for __raw_read() */ 56 57 #include "perf_images.h" 58 59 #define MAX_RDR_WORDS 24 60 #define PERF_VERSION 2 /* derived from hpux's PI v2 interface */ 61 62 /* definition of RDR regs */ 63 struct rdr_tbl_ent { 64 uint16_t width; 65 uint8_t num_words; 66 uint8_t write_control; 67 }; 68 69 static int perf_processor_interface = UNKNOWN_INTF; 70 static int perf_enabled = 0; 71 static spinlock_t perf_lock; 72 struct parisc_device *cpu_device = NULL; 73 74 /* RDRs to write for PCX-W */ 75 static int perf_rdrs_W[] = 76 { 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 77 78 /* RDRs to write for PCX-U */ 79 static int perf_rdrs_U[] = 80 { 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 81 82 /* RDR register descriptions for PCX-W */ 83 static struct rdr_tbl_ent perf_rdr_tbl_W[] = { 84 { 19, 1, 8 }, /* RDR 0 */ 85 { 16, 1, 16 }, /* RDR 1 */ 86 { 72, 2, 0 }, /* RDR 2 */ 87 { 81, 2, 0 }, /* RDR 3 */ 88 { 328, 6, 0 }, /* RDR 4 */ 89 { 160, 3, 0 }, /* RDR 5 */ 90 { 336, 6, 0 }, /* RDR 6 */ 91 { 164, 3, 0 }, /* RDR 7 */ 92 { 0, 0, 0 }, /* RDR 8 */ 93 { 35, 1, 0 }, /* RDR 9 */ 94 { 6, 1, 0 }, /* RDR 10 */ 95 { 18, 1, 0 }, /* RDR 11 */ 96 { 13, 1, 0 }, /* RDR 12 */ 97 { 8, 1, 0 }, /* RDR 13 */ 98 { 8, 1, 0 }, /* RDR 14 */ 99 { 8, 1, 0 }, /* RDR 15 */ 100 { 1530, 24, 0 }, /* RDR 16 */ 101 { 16, 1, 0 }, /* RDR 17 */ 102 { 4, 1, 0 }, /* RDR 18 */ 103 { 0, 0, 0 }, /* RDR 19 */ 104 { 152, 3, 24 }, /* RDR 20 */ 105 { 152, 3, 24 }, /* RDR 21 */ 106 { 233, 4, 48 }, /* RDR 22 */ 107 { 233, 4, 48 }, /* RDR 23 */ 108 { 71, 2, 0 }, /* RDR 24 */ 109 { 71, 2, 0 }, /* RDR 25 */ 110 { 11, 1, 0 }, /* RDR 26 */ 111 { 18, 1, 0 }, /* RDR 27 */ 112 { 128, 2, 0 }, /* RDR 28 */ 113 { 0, 0, 0 }, /* RDR 29 */ 114 { 16, 1, 0 }, /* RDR 30 */ 115 { 16, 1, 0 }, /* RDR 31 */ 116 }; 117 118 /* RDR register descriptions for PCX-U */ 119 static struct rdr_tbl_ent perf_rdr_tbl_U[] = { 120 { 19, 1, 8 }, /* RDR 0 */ 121 { 32, 1, 16 }, /* RDR 1 */ 122 { 20, 1, 0 }, /* RDR 2 */ 123 { 0, 0, 0 }, /* RDR 3 */ 124 { 344, 6, 0 }, /* RDR 4 */ 125 { 176, 3, 0 }, /* RDR 5 */ 126 { 336, 6, 0 }, /* RDR 6 */ 127 { 0, 0, 0 }, /* RDR 7 */ 128 { 0, 0, 0 }, /* RDR 8 */ 129 { 0, 0, 0 }, /* RDR 9 */ 130 { 28, 1, 0 }, /* RDR 10 */ 131 { 33, 1, 0 }, /* RDR 11 */ 132 { 0, 0, 0 }, /* RDR 12 */ 133 { 230, 4, 0 }, /* RDR 13 */ 134 { 32, 1, 0 }, /* RDR 14 */ 135 { 128, 2, 0 }, /* RDR 15 */ 136 { 1494, 24, 0 }, /* RDR 16 */ 137 { 18, 1, 0 }, /* RDR 17 */ 138 { 4, 1, 0 }, /* RDR 18 */ 139 { 0, 0, 0 }, /* RDR 19 */ 140 { 158, 3, 24 }, /* RDR 20 */ 141 { 158, 3, 24 }, /* RDR 21 */ 142 { 194, 4, 48 }, /* RDR 22 */ 143 { 194, 4, 48 }, /* RDR 23 */ 144 { 71, 2, 0 }, /* RDR 24 */ 145 { 71, 2, 0 }, /* RDR 25 */ 146 { 28, 1, 0 }, /* RDR 26 */ 147 { 33, 1, 0 }, /* RDR 27 */ 148 { 88, 2, 0 }, /* RDR 28 */ 149 { 32, 1, 0 }, /* RDR 29 */ 150 { 24, 1, 0 }, /* RDR 30 */ 151 { 16, 1, 0 }, /* RDR 31 */ 152 }; 153 154 /* 155 * A non-zero write_control in the above tables is a byte offset into 156 * this array. 157 */ 158 static uint64_t perf_bitmasks[] = { 159 0x0000000000000000ul, /* first dbl word must be zero */ 160 0xfdffe00000000000ul, /* RDR0 bitmask */ 161 0x003f000000000000ul, /* RDR1 bitmask */ 162 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (152 bits) */ 163 0xfffffffffffffffful, 164 0xfffffffc00000000ul, 165 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (233 bits) */ 166 0xfffffffffffffffful, 167 0xfffffffffffffffcul, 168 0xff00000000000000ul 169 }; 170 171 /* 172 * Write control bitmasks for Pa-8700 processor given 173 * somethings have changed slightly. 174 */ 175 static uint64_t perf_bitmasks_piranha[] = { 176 0x0000000000000000ul, /* first dbl word must be zero */ 177 0xfdffe00000000000ul, /* RDR0 bitmask */ 178 0x003f000000000000ul, /* RDR1 bitmask */ 179 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (158 bits) */ 180 0xfffffffffffffffful, 181 0xfffffffc00000000ul, 182 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (210 bits) */ 183 0xfffffffffffffffful, 184 0xfffffffffffffffful, 185 0xfffc000000000000ul 186 }; 187 188 static uint64_t *bitmask_array; /* array of bitmasks to use */ 189 190 /****************************************************************************** 191 * Function Prototypes 192 *****************************************************************************/ 193 static int perf_config(uint32_t *image_ptr); 194 static int perf_release(struct inode *inode, struct file *file); 195 static int perf_open(struct inode *inode, struct file *file); 196 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); 197 static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 198 loff_t *ppos); 199 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 200 static void perf_start_counters(void); 201 static int perf_stop_counters(uint32_t *raddr); 202 static struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num); 203 static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer); 204 static int perf_rdr_clear(uint32_t rdr_num); 205 static int perf_write_image(uint64_t *memaddr); 206 static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer); 207 208 /* External Assembly Routines */ 209 extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width); 210 extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width); 211 extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer); 212 extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer); 213 extern void perf_intrigue_enable_perf_counters (void); 214 extern void perf_intrigue_disable_perf_counters (void); 215 216 /****************************************************************************** 217 * Function Definitions 218 *****************************************************************************/ 219 220 221 /* 222 * configure: 223 * 224 * Configure the cpu with a given data image. First turn off the counters, 225 * then download the image, then turn the counters back on. 226 */ 227 static int perf_config(uint32_t *image_ptr) 228 { 229 long error; 230 uint32_t raddr[4]; 231 232 /* Stop the counters*/ 233 error = perf_stop_counters(raddr); 234 if (error != 0) { 235 printk("perf_config: perf_stop_counters = %ld\n", error); 236 return -EINVAL; 237 } 238 239 printk("Preparing to write image\n"); 240 /* Write the image to the chip */ 241 error = perf_write_image((uint64_t *)image_ptr); 242 if (error != 0) { 243 printk("perf_config: DOWNLOAD = %ld\n", error); 244 return -EINVAL; 245 } 246 247 printk("Preparing to start counters\n"); 248 249 /* Start the counters */ 250 perf_start_counters(); 251 252 return sizeof(uint32_t); 253 } 254 255 /* 256 * Open the device and initialize all of its memory. The device is only 257 * opened once, but can be "queried" by multiple processes that know its 258 * file descriptor. 259 */ 260 static int perf_open(struct inode *inode, struct file *file) 261 { 262 spin_lock(&perf_lock); 263 if (perf_enabled) { 264 spin_unlock(&perf_lock); 265 return -EBUSY; 266 } 267 perf_enabled = 1; 268 spin_unlock(&perf_lock); 269 270 return 0; 271 } 272 273 /* 274 * Close the device. 275 */ 276 static int perf_release(struct inode *inode, struct file *file) 277 { 278 spin_lock(&perf_lock); 279 perf_enabled = 0; 280 spin_unlock(&perf_lock); 281 282 return 0; 283 } 284 285 /* 286 * Read does nothing for this driver 287 */ 288 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos) 289 { 290 return 0; 291 } 292 293 /* 294 * write: 295 * 296 * This routine downloads the image to the chip. It must be 297 * called on the processor that the download should happen 298 * on. 299 */ 300 static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 301 loff_t *ppos) 302 { 303 int err; 304 size_t image_size; 305 uint32_t image_type; 306 uint32_t interface_type; 307 uint32_t test; 308 309 if (perf_processor_interface == ONYX_INTF) 310 image_size = PCXU_IMAGE_SIZE; 311 else if (perf_processor_interface == CUDA_INTF) 312 image_size = PCXW_IMAGE_SIZE; 313 else 314 return -EFAULT; 315 316 if (!capable(CAP_SYS_ADMIN)) 317 return -EACCES; 318 319 if (count != sizeof(uint32_t)) 320 return -EIO; 321 322 if ((err = copy_from_user(&image_type, buf, sizeof(uint32_t))) != 0) 323 return err; 324 325 /* Get the interface type and test type */ 326 interface_type = (image_type >> 16) & 0xffff; 327 test = (image_type & 0xffff); 328 329 /* Make sure everything makes sense */ 330 331 /* First check the machine type is correct for 332 the requested image */ 333 if (((perf_processor_interface == CUDA_INTF) && 334 (interface_type != CUDA_INTF)) || 335 ((perf_processor_interface == ONYX_INTF) && 336 (interface_type != ONYX_INTF))) 337 return -EINVAL; 338 339 /* Next check to make sure the requested image 340 is valid */ 341 if (((interface_type == CUDA_INTF) && 342 (test >= MAX_CUDA_IMAGES)) || 343 ((interface_type == ONYX_INTF) && 344 (test >= MAX_ONYX_IMAGES))) 345 return -EINVAL; 346 347 /* Copy the image into the processor */ 348 if (interface_type == CUDA_INTF) 349 return perf_config(cuda_images[test]); 350 else 351 return perf_config(onyx_images[test]); 352 353 return count; 354 } 355 356 /* 357 * Patch the images that need to know the IVA addresses. 358 */ 359 static void perf_patch_images(void) 360 { 361 #if 0 /* FIXME!! */ 362 /* 363 * NOTE: this routine is VERY specific to the current TLB image. 364 * If the image is changed, this routine might also need to be changed. 365 */ 366 extern void $i_itlb_miss_2_0(); 367 extern void $i_dtlb_miss_2_0(); 368 extern void PA2_0_iva(); 369 370 /* 371 * We can only use the lower 32-bits, the upper 32-bits should be 0 372 * anyway given this is in the kernel 373 */ 374 uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); 375 uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); 376 uint32_t IVAaddress = (uint32_t)&PA2_0_iva; 377 378 if (perf_processor_interface == ONYX_INTF) { 379 /* clear last 2 bytes */ 380 onyx_images[TLBMISS][15] &= 0xffffff00; 381 /* set 2 bytes */ 382 onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 383 onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; 384 onyx_images[TLBMISS][17] = itlb_addr; 385 386 /* clear last 2 bytes */ 387 onyx_images[TLBHANDMISS][15] &= 0xffffff00; 388 /* set 2 bytes */ 389 onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 390 onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; 391 onyx_images[TLBHANDMISS][17] = itlb_addr; 392 393 /* clear last 2 bytes */ 394 onyx_images[BIG_CPI][15] &= 0xffffff00; 395 /* set 2 bytes */ 396 onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); 397 onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; 398 onyx_images[BIG_CPI][17] = itlb_addr; 399 400 onyx_images[PANIC][15] &= 0xffffff00; /* clear last 2 bytes */ 401 onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */ 402 onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00; 403 404 405 } else if (perf_processor_interface == CUDA_INTF) { 406 /* Cuda interface */ 407 cuda_images[TLBMISS][16] = 408 (cuda_images[TLBMISS][16]&0xffff0000) | 409 ((dtlb_addr >> 8)&0x0000ffff); 410 cuda_images[TLBMISS][17] = 411 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 412 cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; 413 414 cuda_images[TLBHANDMISS][16] = 415 (cuda_images[TLBHANDMISS][16]&0xffff0000) | 416 ((dtlb_addr >> 8)&0x0000ffff); 417 cuda_images[TLBHANDMISS][17] = 418 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 419 cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; 420 421 cuda_images[BIG_CPI][16] = 422 (cuda_images[BIG_CPI][16]&0xffff0000) | 423 ((dtlb_addr >> 8)&0x0000ffff); 424 cuda_images[BIG_CPI][17] = 425 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 426 cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; 427 } else { 428 /* Unknown type */ 429 } 430 #endif 431 } 432 433 434 /* 435 * ioctl routine 436 * All routines effect the processor that they are executed on. Thus you 437 * must be running on the processor that you wish to change. 438 */ 439 440 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 441 { 442 long error_start; 443 uint32_t raddr[4]; 444 int error = 0; 445 446 lock_kernel(); 447 switch (cmd) { 448 449 case PA_PERF_ON: 450 /* Start the counters */ 451 perf_start_counters(); 452 break; 453 454 case PA_PERF_OFF: 455 error_start = perf_stop_counters(raddr); 456 if (error_start != 0) { 457 printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start); 458 error = -EFAULT; 459 break; 460 } 461 462 /* copy out the Counters */ 463 if (copy_to_user((void __user *)arg, raddr, 464 sizeof (raddr)) != 0) { 465 error = -EFAULT; 466 break; 467 } 468 break; 469 470 case PA_PERF_VERSION: 471 /* Return the version # */ 472 error = put_user(PERF_VERSION, (int *)arg); 473 break; 474 475 default: 476 error = -ENOTTY; 477 } 478 479 unlock_kernel(); 480 481 return error; 482 } 483 484 static struct file_operations perf_fops = { 485 .llseek = no_llseek, 486 .read = perf_read, 487 .write = perf_write, 488 .unlocked_ioctl = perf_ioctl, 489 .compat_ioctl = perf_ioctl, 490 .open = perf_open, 491 .release = perf_release 492 }; 493 494 static struct miscdevice perf_dev = { 495 MISC_DYNAMIC_MINOR, 496 PA_PERF_DEV, 497 &perf_fops 498 }; 499 500 /* 501 * Initialize the module 502 */ 503 static int __init perf_init(void) 504 { 505 int ret; 506 507 /* Determine correct processor interface to use */ 508 bitmask_array = perf_bitmasks; 509 510 if (boot_cpu_data.cpu_type == pcxu || 511 boot_cpu_data.cpu_type == pcxu_) { 512 perf_processor_interface = ONYX_INTF; 513 } else if (boot_cpu_data.cpu_type == pcxw || 514 boot_cpu_data.cpu_type == pcxw_ || 515 boot_cpu_data.cpu_type == pcxw2 || 516 boot_cpu_data.cpu_type == mako) { 517 perf_processor_interface = CUDA_INTF; 518 if (boot_cpu_data.cpu_type == pcxw2 || 519 boot_cpu_data.cpu_type == mako) 520 bitmask_array = perf_bitmasks_piranha; 521 } else { 522 perf_processor_interface = UNKNOWN_INTF; 523 printk("Performance monitoring counters not supported on this processor\n"); 524 return -ENODEV; 525 } 526 527 ret = misc_register(&perf_dev); 528 if (ret) { 529 printk(KERN_ERR "Performance monitoring counters: " 530 "cannot register misc device.\n"); 531 return ret; 532 } 533 534 /* Patch the images to match the system */ 535 perf_patch_images(); 536 537 spin_lock_init(&perf_lock); 538 539 /* TODO: this only lets us access the first cpu.. what to do for SMP? */ 540 cpu_device = cpu_data[0].dev; 541 printk("Performance monitoring counters enabled for %s\n", 542 cpu_data[0].dev->name); 543 544 return 0; 545 } 546 547 /* 548 * perf_start_counters(void) 549 * 550 * Start the counters. 551 */ 552 static void perf_start_counters(void) 553 { 554 /* Enable performance monitor counters */ 555 perf_intrigue_enable_perf_counters(); 556 } 557 558 /* 559 * perf_stop_counters 560 * 561 * Stop the performance counters and save counts 562 * in a per_processor array. 563 */ 564 static int perf_stop_counters(uint32_t *raddr) 565 { 566 uint64_t userbuf[MAX_RDR_WORDS]; 567 568 /* Disable performance counters */ 569 perf_intrigue_disable_perf_counters(); 570 571 if (perf_processor_interface == ONYX_INTF) { 572 uint64_t tmp64; 573 /* 574 * Read the counters 575 */ 576 if (!perf_rdr_read_ubuf(16, userbuf)) 577 return -13; 578 579 /* Counter0 is bits 1398 thru 1429 */ 580 tmp64 = (userbuf[21] << 22) & 0x00000000ffc00000; 581 tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff; 582 /* OR sticky0 (bit 1430) to counter0 bit 32 */ 583 tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000; 584 raddr[0] = (uint32_t)tmp64; 585 586 /* Counter1 is bits 1431 thru 1462 */ 587 tmp64 = (userbuf[22] >> 9) & 0x00000000ffffffff; 588 /* OR sticky1 (bit 1463) to counter1 bit 32 */ 589 tmp64 |= (userbuf[22] << 23) & 0x0000000080000000; 590 raddr[1] = (uint32_t)tmp64; 591 592 /* Counter2 is bits 1464 thru 1495 */ 593 tmp64 = (userbuf[22] << 24) & 0x00000000ff000000; 594 tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff; 595 /* OR sticky2 (bit 1496) to counter2 bit 32 */ 596 tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; 597 raddr[2] = (uint32_t)tmp64; 598 599 /* Counter3 is bits 1497 thru 1528 */ 600 tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; 601 /* OR sticky3 (bit 1529) to counter3 bit 32 */ 602 tmp64 |= (userbuf[23] << 25) & 0x0000000080000000; 603 raddr[3] = (uint32_t)tmp64; 604 605 /* 606 * Zero out the counters 607 */ 608 609 /* 610 * The counters and sticky-bits comprise the last 132 bits 611 * (1398 - 1529) of RDR16 on a U chip. We'll zero these 612 * out the easy way: zero out last 10 bits of dword 21, 613 * all of dword 22 and 58 bits (plus 6 don't care bits) of 614 * dword 23. 615 */ 616 userbuf[21] &= 0xfffffffffffffc00ul; /* 0 to last 10 bits */ 617 userbuf[22] = 0; 618 userbuf[23] = 0; 619 620 /* 621 * Write back the zero'ed bytes + the image given 622 * the read was destructive. 623 */ 624 perf_rdr_write(16, userbuf); 625 } else { 626 627 /* 628 * Read RDR-15 which contains the counters and sticky bits 629 */ 630 if (!perf_rdr_read_ubuf(15, userbuf)) { 631 return -13; 632 } 633 634 /* 635 * Clear out the counters 636 */ 637 perf_rdr_clear(15); 638 639 /* 640 * Copy the counters 641 */ 642 raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL); 643 raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL); 644 raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); 645 raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); 646 } 647 648 return 0; 649 } 650 651 /* 652 * perf_rdr_get_entry 653 * 654 * Retrieve a pointer to the description of what this 655 * RDR contains. 656 */ 657 static struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num) 658 { 659 if (perf_processor_interface == ONYX_INTF) { 660 return &perf_rdr_tbl_U[rdr_num]; 661 } else { 662 return &perf_rdr_tbl_W[rdr_num]; 663 } 664 } 665 666 /* 667 * perf_rdr_read_ubuf 668 * 669 * Read the RDR value into the buffer specified. 670 */ 671 static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) 672 { 673 uint64_t data, data_mask = 0; 674 uint32_t width, xbits, i; 675 struct rdr_tbl_ent *tentry; 676 677 tentry = perf_rdr_get_entry(rdr_num); 678 if ((width = tentry->width) == 0) 679 return 0; 680 681 /* Clear out buffer */ 682 i = tentry->num_words; 683 while (i--) { 684 buffer[i] = 0; 685 } 686 687 /* Check for bits an even number of 64 */ 688 if ((xbits = width & 0x03f) != 0) { 689 data_mask = 1; 690 data_mask <<= (64 - xbits); 691 data_mask--; 692 } 693 694 /* Grab all of the data */ 695 i = tentry->num_words; 696 while (i--) { 697 698 if (perf_processor_interface == ONYX_INTF) { 699 data = perf_rdr_shift_in_U(rdr_num, width); 700 } else { 701 data = perf_rdr_shift_in_W(rdr_num, width); 702 } 703 if (xbits) { 704 buffer[i] |= (data << (64 - xbits)); 705 if (i) { 706 buffer[i-1] |= ((data >> xbits) & data_mask); 707 } 708 } else { 709 buffer[i] = data; 710 } 711 } 712 713 return 1; 714 } 715 716 /* 717 * perf_rdr_clear 718 * 719 * Zero out the given RDR register 720 */ 721 static int perf_rdr_clear(uint32_t rdr_num) 722 { 723 struct rdr_tbl_ent *tentry; 724 int32_t i; 725 726 tentry = perf_rdr_get_entry(rdr_num); 727 728 if (tentry->width == 0) { 729 return -1; 730 } 731 732 i = tentry->num_words; 733 while (i--) { 734 if (perf_processor_interface == ONYX_INTF) { 735 perf_rdr_shift_out_U(rdr_num, 0UL); 736 } else { 737 perf_rdr_shift_out_W(rdr_num, 0UL); 738 } 739 } 740 741 return 0; 742 } 743 744 745 /* 746 * perf_write_image 747 * 748 * Write the given image out to the processor 749 */ 750 static int perf_write_image(uint64_t *memaddr) 751 { 752 uint64_t buffer[MAX_RDR_WORDS]; 753 uint64_t *bptr; 754 uint32_t dwords; 755 uint32_t *intrigue_rdr; 756 uint64_t *intrigue_bitmask, tmp64; 757 void __iomem *runway; 758 struct rdr_tbl_ent *tentry; 759 int i; 760 761 /* Clear out counters */ 762 if (perf_processor_interface == ONYX_INTF) { 763 764 perf_rdr_clear(16); 765 766 /* Toggle performance monitor */ 767 perf_intrigue_enable_perf_counters(); 768 perf_intrigue_disable_perf_counters(); 769 770 intrigue_rdr = perf_rdrs_U; 771 } else { 772 perf_rdr_clear(15); 773 intrigue_rdr = perf_rdrs_W; 774 } 775 776 /* Write all RDRs */ 777 while (*intrigue_rdr != -1) { 778 tentry = perf_rdr_get_entry(*intrigue_rdr); 779 perf_rdr_read_ubuf(*intrigue_rdr, buffer); 780 bptr = &buffer[0]; 781 dwords = tentry->num_words; 782 if (tentry->write_control) { 783 intrigue_bitmask = &bitmask_array[tentry->write_control >> 3]; 784 while (dwords--) { 785 tmp64 = *intrigue_bitmask & *memaddr++; 786 tmp64 |= (~(*intrigue_bitmask++)) & *bptr; 787 *bptr++ = tmp64; 788 } 789 } else { 790 while (dwords--) { 791 *bptr++ = *memaddr++; 792 } 793 } 794 795 perf_rdr_write(*intrigue_rdr, buffer); 796 intrigue_rdr++; 797 } 798 799 /* 800 * Now copy out the Runway stuff which is not in RDRs 801 */ 802 803 if (cpu_device == NULL) 804 { 805 printk(KERN_ERR "write_image: cpu_device not yet initialized!\n"); 806 return -1; 807 } 808 809 runway = ioremap(cpu_device->hpa.start, 4096); 810 811 /* Merge intrigue bits into Runway STATUS 0 */ 812 tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; 813 __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 814 runway + RUNWAY_STATUS); 815 816 /* Write RUNWAY DEBUG registers */ 817 for (i = 0; i < 8; i++) { 818 __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); 819 } 820 821 return 0; 822 } 823 824 /* 825 * perf_rdr_write 826 * 827 * Write the given RDR register with the contents 828 * of the given buffer. 829 */ 830 static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer) 831 { 832 struct rdr_tbl_ent *tentry; 833 int32_t i; 834 835 printk("perf_rdr_write\n"); 836 tentry = perf_rdr_get_entry(rdr_num); 837 if (tentry->width == 0) { return; } 838 839 i = tentry->num_words; 840 while (i--) { 841 if (perf_processor_interface == ONYX_INTF) { 842 perf_rdr_shift_out_U(rdr_num, buffer[i]); 843 } else { 844 perf_rdr_shift_out_W(rdr_num, buffer[i]); 845 } 846 } 847 printk("perf_rdr_write done\n"); 848 } 849 850 module_init(perf_init); 851