1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Parisc performance counters 4 * Copyright (C) 2001 Randolph Chung <tausq@debian.org> 5 * 6 * This code is derived, with permission, from HP/UX sources. 7 */ 8 9 /* 10 * Edited comment from original sources: 11 * 12 * This driver programs the PCX-U/PCX-W performance counters 13 * on the PA-RISC 2.0 chips. The driver keeps all images now 14 * internally to the kernel to hopefully eliminate the possibility 15 * of a bad image halting the CPU. Also, there are different 16 * images for the PCX-W and later chips vs the PCX-U chips. 17 * 18 * Only 1 process is allowed to access the driver at any time, 19 * so the only protection that is needed is at open and close. 20 * A variable "perf_enabled" is used to hold the state of the 21 * driver. The spinlock "perf_lock" is used to protect the 22 * modification of the state during open/close operations so 23 * multiple processes don't get into the driver simultaneously. 24 * 25 * This driver accesses the processor directly vs going through 26 * the PDC INTRIGUE calls. This is done to eliminate bugs introduced 27 * in various PDC revisions. The code is much more maintainable 28 * and reliable this way vs having to debug on every version of PDC 29 * on every box. 30 */ 31 32 #include <linux/capability.h> 33 #include <linux/init.h> 34 #include <linux/proc_fs.h> 35 #include <linux/miscdevice.h> 36 #include <linux/spinlock.h> 37 38 #include <linux/uaccess.h> 39 #include <asm/perf.h> 40 #include <asm/parisc-device.h> 41 #include <asm/processor.h> 42 #include <asm/runway.h> 43 #include <asm/io.h> /* for __raw_read() */ 44 45 #include "perf_images.h" 46 47 #define MAX_RDR_WORDS 24 48 #define PERF_VERSION 2 /* derived from hpux's PI v2 interface */ 49 50 /* definition of RDR regs */ 51 struct rdr_tbl_ent { 52 uint16_t width; 53 uint8_t num_words; 54 uint8_t write_control; 55 }; 56 57 static int perf_processor_interface __read_mostly = UNKNOWN_INTF; 58 static int perf_enabled __read_mostly; 59 static DEFINE_SPINLOCK(perf_lock); 60 struct parisc_device *cpu_device __read_mostly; 61 62 /* RDRs to write for PCX-W */ 63 static const int perf_rdrs_W[] = 64 { 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 65 66 /* RDRs to write for PCX-U */ 67 static const int perf_rdrs_U[] = 68 { 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 69 70 /* RDR register descriptions for PCX-W */ 71 static const struct rdr_tbl_ent perf_rdr_tbl_W[] = { 72 { 19, 1, 8 }, /* RDR 0 */ 73 { 16, 1, 16 }, /* RDR 1 */ 74 { 72, 2, 0 }, /* RDR 2 */ 75 { 81, 2, 0 }, /* RDR 3 */ 76 { 328, 6, 0 }, /* RDR 4 */ 77 { 160, 3, 0 }, /* RDR 5 */ 78 { 336, 6, 0 }, /* RDR 6 */ 79 { 164, 3, 0 }, /* RDR 7 */ 80 { 0, 0, 0 }, /* RDR 8 */ 81 { 35, 1, 0 }, /* RDR 9 */ 82 { 6, 1, 0 }, /* RDR 10 */ 83 { 18, 1, 0 }, /* RDR 11 */ 84 { 13, 1, 0 }, /* RDR 12 */ 85 { 8, 1, 0 }, /* RDR 13 */ 86 { 8, 1, 0 }, /* RDR 14 */ 87 { 8, 1, 0 }, /* RDR 15 */ 88 { 1530, 24, 0 }, /* RDR 16 */ 89 { 16, 1, 0 }, /* RDR 17 */ 90 { 4, 1, 0 }, /* RDR 18 */ 91 { 0, 0, 0 }, /* RDR 19 */ 92 { 152, 3, 24 }, /* RDR 20 */ 93 { 152, 3, 24 }, /* RDR 21 */ 94 { 233, 4, 48 }, /* RDR 22 */ 95 { 233, 4, 48 }, /* RDR 23 */ 96 { 71, 2, 0 }, /* RDR 24 */ 97 { 71, 2, 0 }, /* RDR 25 */ 98 { 11, 1, 0 }, /* RDR 26 */ 99 { 18, 1, 0 }, /* RDR 27 */ 100 { 128, 2, 0 }, /* RDR 28 */ 101 { 0, 0, 0 }, /* RDR 29 */ 102 { 16, 1, 0 }, /* RDR 30 */ 103 { 16, 1, 0 }, /* RDR 31 */ 104 }; 105 106 /* RDR register descriptions for PCX-U */ 107 static const struct rdr_tbl_ent perf_rdr_tbl_U[] = { 108 { 19, 1, 8 }, /* RDR 0 */ 109 { 32, 1, 16 }, /* RDR 1 */ 110 { 20, 1, 0 }, /* RDR 2 */ 111 { 0, 0, 0 }, /* RDR 3 */ 112 { 344, 6, 0 }, /* RDR 4 */ 113 { 176, 3, 0 }, /* RDR 5 */ 114 { 336, 6, 0 }, /* RDR 6 */ 115 { 0, 0, 0 }, /* RDR 7 */ 116 { 0, 0, 0 }, /* RDR 8 */ 117 { 0, 0, 0 }, /* RDR 9 */ 118 { 28, 1, 0 }, /* RDR 10 */ 119 { 33, 1, 0 }, /* RDR 11 */ 120 { 0, 0, 0 }, /* RDR 12 */ 121 { 230, 4, 0 }, /* RDR 13 */ 122 { 32, 1, 0 }, /* RDR 14 */ 123 { 128, 2, 0 }, /* RDR 15 */ 124 { 1494, 24, 0 }, /* RDR 16 */ 125 { 18, 1, 0 }, /* RDR 17 */ 126 { 4, 1, 0 }, /* RDR 18 */ 127 { 0, 0, 0 }, /* RDR 19 */ 128 { 158, 3, 24 }, /* RDR 20 */ 129 { 158, 3, 24 }, /* RDR 21 */ 130 { 194, 4, 48 }, /* RDR 22 */ 131 { 194, 4, 48 }, /* RDR 23 */ 132 { 71, 2, 0 }, /* RDR 24 */ 133 { 71, 2, 0 }, /* RDR 25 */ 134 { 28, 1, 0 }, /* RDR 26 */ 135 { 33, 1, 0 }, /* RDR 27 */ 136 { 88, 2, 0 }, /* RDR 28 */ 137 { 32, 1, 0 }, /* RDR 29 */ 138 { 24, 1, 0 }, /* RDR 30 */ 139 { 16, 1, 0 }, /* RDR 31 */ 140 }; 141 142 /* 143 * A non-zero write_control in the above tables is a byte offset into 144 * this array. 145 */ 146 static const uint64_t perf_bitmasks[] = { 147 0x0000000000000000ul, /* first dbl word must be zero */ 148 0xfdffe00000000000ul, /* RDR0 bitmask */ 149 0x003f000000000000ul, /* RDR1 bitmask */ 150 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (152 bits) */ 151 0xfffffffffffffffful, 152 0xfffffffc00000000ul, 153 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (233 bits) */ 154 0xfffffffffffffffful, 155 0xfffffffffffffffcul, 156 0xff00000000000000ul 157 }; 158 159 /* 160 * Write control bitmasks for Pa-8700 processor given 161 * some things have changed slightly. 162 */ 163 static const uint64_t perf_bitmasks_piranha[] = { 164 0x0000000000000000ul, /* first dbl word must be zero */ 165 0xfdffe00000000000ul, /* RDR0 bitmask */ 166 0x003f000000000000ul, /* RDR1 bitmask */ 167 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (158 bits) */ 168 0xfffffffffffffffful, 169 0xfffffffc00000000ul, 170 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (210 bits) */ 171 0xfffffffffffffffful, 172 0xfffffffffffffffful, 173 0xfffc000000000000ul 174 }; 175 176 static const uint64_t *bitmask_array; /* array of bitmasks to use */ 177 178 /****************************************************************************** 179 * Function Prototypes 180 *****************************************************************************/ 181 static int perf_config(uint32_t *image_ptr); 182 static int perf_release(struct inode *inode, struct file *file); 183 static int perf_open(struct inode *inode, struct file *file); 184 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); 185 static ssize_t perf_write(struct file *file, const char __user *buf, 186 size_t count, loff_t *ppos); 187 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 188 static void perf_start_counters(void); 189 static int perf_stop_counters(uint32_t *raddr); 190 static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num); 191 static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer); 192 static int perf_rdr_clear(uint32_t rdr_num); 193 static int perf_write_image(uint64_t *memaddr); 194 static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer); 195 196 /* External Assembly Routines */ 197 extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width); 198 extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width); 199 extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer); 200 extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer); 201 extern void perf_intrigue_enable_perf_counters (void); 202 extern void perf_intrigue_disable_perf_counters (void); 203 204 /****************************************************************************** 205 * Function Definitions 206 *****************************************************************************/ 207 208 209 /* 210 * configure: 211 * 212 * Configure the cpu with a given data image. First turn off the counters, 213 * then download the image, then turn the counters back on. 214 */ 215 static int perf_config(uint32_t *image_ptr) 216 { 217 long error; 218 uint32_t raddr[4]; 219 220 /* Stop the counters*/ 221 error = perf_stop_counters(raddr); 222 if (error != 0) { 223 printk("perf_config: perf_stop_counters = %ld\n", error); 224 return -EINVAL; 225 } 226 227 printk("Preparing to write image\n"); 228 /* Write the image to the chip */ 229 error = perf_write_image((uint64_t *)image_ptr); 230 if (error != 0) { 231 printk("perf_config: DOWNLOAD = %ld\n", error); 232 return -EINVAL; 233 } 234 235 printk("Preparing to start counters\n"); 236 237 /* Start the counters */ 238 perf_start_counters(); 239 240 return sizeof(uint32_t); 241 } 242 243 /* 244 * Open the device and initialize all of its memory. The device is only 245 * opened once, but can be "queried" by multiple processes that know its 246 * file descriptor. 247 */ 248 static int perf_open(struct inode *inode, struct file *file) 249 { 250 spin_lock(&perf_lock); 251 if (perf_enabled) { 252 spin_unlock(&perf_lock); 253 return -EBUSY; 254 } 255 perf_enabled = 1; 256 spin_unlock(&perf_lock); 257 258 return 0; 259 } 260 261 /* 262 * Close the device. 263 */ 264 static int perf_release(struct inode *inode, struct file *file) 265 { 266 spin_lock(&perf_lock); 267 perf_enabled = 0; 268 spin_unlock(&perf_lock); 269 270 return 0; 271 } 272 273 /* 274 * Read does nothing for this driver 275 */ 276 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos) 277 { 278 return 0; 279 } 280 281 /* 282 * write: 283 * 284 * This routine downloads the image to the chip. It must be 285 * called on the processor that the download should happen 286 * on. 287 */ 288 static ssize_t perf_write(struct file *file, const char __user *buf, 289 size_t count, loff_t *ppos) 290 { 291 size_t image_size; 292 uint32_t image_type; 293 uint32_t interface_type; 294 uint32_t test; 295 296 if (perf_processor_interface == ONYX_INTF) 297 image_size = PCXU_IMAGE_SIZE; 298 else if (perf_processor_interface == CUDA_INTF) 299 image_size = PCXW_IMAGE_SIZE; 300 else 301 return -EFAULT; 302 303 if (!capable(CAP_SYS_ADMIN)) 304 return -EACCES; 305 306 if (count != sizeof(uint32_t)) 307 return -EIO; 308 309 if (copy_from_user(&image_type, buf, sizeof(uint32_t))) 310 return -EFAULT; 311 312 /* Get the interface type and test type */ 313 interface_type = (image_type >> 16) & 0xffff; 314 test = (image_type & 0xffff); 315 316 /* Make sure everything makes sense */ 317 318 /* First check the machine type is correct for 319 the requested image */ 320 if (((perf_processor_interface == CUDA_INTF) && 321 (interface_type != CUDA_INTF)) || 322 ((perf_processor_interface == ONYX_INTF) && 323 (interface_type != ONYX_INTF))) 324 return -EINVAL; 325 326 /* Next check to make sure the requested image 327 is valid */ 328 if (((interface_type == CUDA_INTF) && 329 (test >= MAX_CUDA_IMAGES)) || 330 ((interface_type == ONYX_INTF) && 331 (test >= MAX_ONYX_IMAGES))) 332 return -EINVAL; 333 334 /* Copy the image into the processor */ 335 if (interface_type == CUDA_INTF) 336 return perf_config(cuda_images[test]); 337 else 338 return perf_config(onyx_images[test]); 339 340 return count; 341 } 342 343 /* 344 * Patch the images that need to know the IVA addresses. 345 */ 346 static void perf_patch_images(void) 347 { 348 #if 0 /* FIXME!! */ 349 /* 350 * NOTE: this routine is VERY specific to the current TLB image. 351 * If the image is changed, this routine might also need to be changed. 352 */ 353 extern void $i_itlb_miss_2_0(); 354 extern void $i_dtlb_miss_2_0(); 355 extern void PA2_0_iva(); 356 357 /* 358 * We can only use the lower 32-bits, the upper 32-bits should be 0 359 * anyway given this is in the kernel 360 */ 361 uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); 362 uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); 363 uint32_t IVAaddress = (uint32_t)&PA2_0_iva; 364 365 if (perf_processor_interface == ONYX_INTF) { 366 /* clear last 2 bytes */ 367 onyx_images[TLBMISS][15] &= 0xffffff00; 368 /* set 2 bytes */ 369 onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 370 onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; 371 onyx_images[TLBMISS][17] = itlb_addr; 372 373 /* clear last 2 bytes */ 374 onyx_images[TLBHANDMISS][15] &= 0xffffff00; 375 /* set 2 bytes */ 376 onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 377 onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; 378 onyx_images[TLBHANDMISS][17] = itlb_addr; 379 380 /* clear last 2 bytes */ 381 onyx_images[BIG_CPI][15] &= 0xffffff00; 382 /* set 2 bytes */ 383 onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); 384 onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; 385 onyx_images[BIG_CPI][17] = itlb_addr; 386 387 onyx_images[PANIC][15] &= 0xffffff00; /* clear last 2 bytes */ 388 onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */ 389 onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00; 390 391 392 } else if (perf_processor_interface == CUDA_INTF) { 393 /* Cuda interface */ 394 cuda_images[TLBMISS][16] = 395 (cuda_images[TLBMISS][16]&0xffff0000) | 396 ((dtlb_addr >> 8)&0x0000ffff); 397 cuda_images[TLBMISS][17] = 398 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 399 cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; 400 401 cuda_images[TLBHANDMISS][16] = 402 (cuda_images[TLBHANDMISS][16]&0xffff0000) | 403 ((dtlb_addr >> 8)&0x0000ffff); 404 cuda_images[TLBHANDMISS][17] = 405 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 406 cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; 407 408 cuda_images[BIG_CPI][16] = 409 (cuda_images[BIG_CPI][16]&0xffff0000) | 410 ((dtlb_addr >> 8)&0x0000ffff); 411 cuda_images[BIG_CPI][17] = 412 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 413 cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; 414 } else { 415 /* Unknown type */ 416 } 417 #endif 418 } 419 420 421 /* 422 * ioctl routine 423 * All routines effect the processor that they are executed on. Thus you 424 * must be running on the processor that you wish to change. 425 */ 426 427 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 428 { 429 long error_start; 430 uint32_t raddr[4]; 431 int error = 0; 432 433 switch (cmd) { 434 435 case PA_PERF_ON: 436 /* Start the counters */ 437 perf_start_counters(); 438 break; 439 440 case PA_PERF_OFF: 441 error_start = perf_stop_counters(raddr); 442 if (error_start != 0) { 443 printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start); 444 error = -EFAULT; 445 break; 446 } 447 448 /* copy out the Counters */ 449 if (copy_to_user((void __user *)arg, raddr, 450 sizeof (raddr)) != 0) { 451 error = -EFAULT; 452 break; 453 } 454 break; 455 456 case PA_PERF_VERSION: 457 /* Return the version # */ 458 error = put_user(PERF_VERSION, (int *)arg); 459 break; 460 461 default: 462 error = -ENOTTY; 463 } 464 465 return error; 466 } 467 468 static const struct file_operations perf_fops = { 469 .llseek = no_llseek, 470 .read = perf_read, 471 .write = perf_write, 472 .unlocked_ioctl = perf_ioctl, 473 .compat_ioctl = perf_ioctl, 474 .open = perf_open, 475 .release = perf_release 476 }; 477 478 static struct miscdevice perf_dev = { 479 MISC_DYNAMIC_MINOR, 480 PA_PERF_DEV, 481 &perf_fops 482 }; 483 484 /* 485 * Initialize the module 486 */ 487 static int __init perf_init(void) 488 { 489 int ret; 490 491 /* Determine correct processor interface to use */ 492 bitmask_array = perf_bitmasks; 493 494 if (boot_cpu_data.cpu_type == pcxu || 495 boot_cpu_data.cpu_type == pcxu_) { 496 perf_processor_interface = ONYX_INTF; 497 } else if (boot_cpu_data.cpu_type == pcxw || 498 boot_cpu_data.cpu_type == pcxw_ || 499 boot_cpu_data.cpu_type == pcxw2 || 500 boot_cpu_data.cpu_type == mako || 501 boot_cpu_data.cpu_type == mako2) { 502 perf_processor_interface = CUDA_INTF; 503 if (boot_cpu_data.cpu_type == pcxw2 || 504 boot_cpu_data.cpu_type == mako || 505 boot_cpu_data.cpu_type == mako2) 506 bitmask_array = perf_bitmasks_piranha; 507 } else { 508 perf_processor_interface = UNKNOWN_INTF; 509 printk("Performance monitoring counters not supported on this processor\n"); 510 return -ENODEV; 511 } 512 513 ret = misc_register(&perf_dev); 514 if (ret) { 515 printk(KERN_ERR "Performance monitoring counters: " 516 "cannot register misc device.\n"); 517 return ret; 518 } 519 520 /* Patch the images to match the system */ 521 perf_patch_images(); 522 523 /* TODO: this only lets us access the first cpu.. what to do for SMP? */ 524 cpu_device = per_cpu(cpu_data, 0).dev; 525 printk("Performance monitoring counters enabled for %s\n", 526 per_cpu(cpu_data, 0).dev->name); 527 528 return 0; 529 } 530 device_initcall(perf_init); 531 532 /* 533 * perf_start_counters(void) 534 * 535 * Start the counters. 536 */ 537 static void perf_start_counters(void) 538 { 539 /* Enable performance monitor counters */ 540 perf_intrigue_enable_perf_counters(); 541 } 542 543 /* 544 * perf_stop_counters 545 * 546 * Stop the performance counters and save counts 547 * in a per_processor array. 548 */ 549 static int perf_stop_counters(uint32_t *raddr) 550 { 551 uint64_t userbuf[MAX_RDR_WORDS]; 552 553 /* Disable performance counters */ 554 perf_intrigue_disable_perf_counters(); 555 556 if (perf_processor_interface == ONYX_INTF) { 557 uint64_t tmp64; 558 /* 559 * Read the counters 560 */ 561 if (!perf_rdr_read_ubuf(16, userbuf)) 562 return -13; 563 564 /* Counter0 is bits 1398 to 1429 */ 565 tmp64 = (userbuf[21] << 22) & 0x00000000ffc00000; 566 tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff; 567 /* OR sticky0 (bit 1430) to counter0 bit 32 */ 568 tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000; 569 raddr[0] = (uint32_t)tmp64; 570 571 /* Counter1 is bits 1431 to 1462 */ 572 tmp64 = (userbuf[22] >> 9) & 0x00000000ffffffff; 573 /* OR sticky1 (bit 1463) to counter1 bit 32 */ 574 tmp64 |= (userbuf[22] << 23) & 0x0000000080000000; 575 raddr[1] = (uint32_t)tmp64; 576 577 /* Counter2 is bits 1464 to 1495 */ 578 tmp64 = (userbuf[22] << 24) & 0x00000000ff000000; 579 tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff; 580 /* OR sticky2 (bit 1496) to counter2 bit 32 */ 581 tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; 582 raddr[2] = (uint32_t)tmp64; 583 584 /* Counter3 is bits 1497 to 1528 */ 585 tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; 586 /* OR sticky3 (bit 1529) to counter3 bit 32 */ 587 tmp64 |= (userbuf[23] << 25) & 0x0000000080000000; 588 raddr[3] = (uint32_t)tmp64; 589 590 /* 591 * Zero out the counters 592 */ 593 594 /* 595 * The counters and sticky-bits comprise the last 132 bits 596 * (1398 - 1529) of RDR16 on a U chip. We'll zero these 597 * out the easy way: zero out last 10 bits of dword 21, 598 * all of dword 22 and 58 bits (plus 6 don't care bits) of 599 * dword 23. 600 */ 601 userbuf[21] &= 0xfffffffffffffc00ul; /* 0 to last 10 bits */ 602 userbuf[22] = 0; 603 userbuf[23] = 0; 604 605 /* 606 * Write back the zeroed bytes + the image given 607 * the read was destructive. 608 */ 609 perf_rdr_write(16, userbuf); 610 } else { 611 612 /* 613 * Read RDR-15 which contains the counters and sticky bits 614 */ 615 if (!perf_rdr_read_ubuf(15, userbuf)) { 616 return -13; 617 } 618 619 /* 620 * Clear out the counters 621 */ 622 perf_rdr_clear(15); 623 624 /* 625 * Copy the counters 626 */ 627 raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL); 628 raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL); 629 raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); 630 raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); 631 } 632 633 return 0; 634 } 635 636 /* 637 * perf_rdr_get_entry 638 * 639 * Retrieve a pointer to the description of what this 640 * RDR contains. 641 */ 642 static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num) 643 { 644 if (perf_processor_interface == ONYX_INTF) { 645 return &perf_rdr_tbl_U[rdr_num]; 646 } else { 647 return &perf_rdr_tbl_W[rdr_num]; 648 } 649 } 650 651 /* 652 * perf_rdr_read_ubuf 653 * 654 * Read the RDR value into the buffer specified. 655 */ 656 static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) 657 { 658 uint64_t data, data_mask = 0; 659 uint32_t width, xbits, i; 660 const struct rdr_tbl_ent *tentry; 661 662 tentry = perf_rdr_get_entry(rdr_num); 663 if ((width = tentry->width) == 0) 664 return 0; 665 666 /* Clear out buffer */ 667 i = tentry->num_words; 668 while (i--) { 669 buffer[i] = 0; 670 } 671 672 /* Check for bits an even number of 64 */ 673 if ((xbits = width & 0x03f) != 0) { 674 data_mask = 1; 675 data_mask <<= (64 - xbits); 676 data_mask--; 677 } 678 679 /* Grab all of the data */ 680 i = tentry->num_words; 681 while (i--) { 682 683 if (perf_processor_interface == ONYX_INTF) { 684 data = perf_rdr_shift_in_U(rdr_num, width); 685 } else { 686 data = perf_rdr_shift_in_W(rdr_num, width); 687 } 688 if (xbits) { 689 buffer[i] |= (data << (64 - xbits)); 690 if (i) { 691 buffer[i-1] |= ((data >> xbits) & data_mask); 692 } 693 } else { 694 buffer[i] = data; 695 } 696 } 697 698 return 1; 699 } 700 701 /* 702 * perf_rdr_clear 703 * 704 * Zero out the given RDR register 705 */ 706 static int perf_rdr_clear(uint32_t rdr_num) 707 { 708 const struct rdr_tbl_ent *tentry; 709 int32_t i; 710 711 tentry = perf_rdr_get_entry(rdr_num); 712 713 if (tentry->width == 0) { 714 return -1; 715 } 716 717 i = tentry->num_words; 718 while (i--) { 719 if (perf_processor_interface == ONYX_INTF) { 720 perf_rdr_shift_out_U(rdr_num, 0UL); 721 } else { 722 perf_rdr_shift_out_W(rdr_num, 0UL); 723 } 724 } 725 726 return 0; 727 } 728 729 730 /* 731 * perf_write_image 732 * 733 * Write the given image out to the processor 734 */ 735 static int perf_write_image(uint64_t *memaddr) 736 { 737 uint64_t buffer[MAX_RDR_WORDS]; 738 uint64_t *bptr; 739 uint32_t dwords; 740 const uint32_t *intrigue_rdr; 741 const uint64_t *intrigue_bitmask; 742 uint64_t tmp64; 743 void __iomem *runway; 744 const struct rdr_tbl_ent *tentry; 745 int i; 746 747 /* Clear out counters */ 748 if (perf_processor_interface == ONYX_INTF) { 749 750 perf_rdr_clear(16); 751 752 /* Toggle performance monitor */ 753 perf_intrigue_enable_perf_counters(); 754 perf_intrigue_disable_perf_counters(); 755 756 intrigue_rdr = perf_rdrs_U; 757 } else { 758 perf_rdr_clear(15); 759 intrigue_rdr = perf_rdrs_W; 760 } 761 762 /* Write all RDRs */ 763 while (*intrigue_rdr != -1) { 764 tentry = perf_rdr_get_entry(*intrigue_rdr); 765 perf_rdr_read_ubuf(*intrigue_rdr, buffer); 766 bptr = &buffer[0]; 767 dwords = tentry->num_words; 768 if (tentry->write_control) { 769 intrigue_bitmask = &bitmask_array[tentry->write_control >> 3]; 770 while (dwords--) { 771 tmp64 = *intrigue_bitmask & *memaddr++; 772 tmp64 |= (~(*intrigue_bitmask++)) & *bptr; 773 *bptr++ = tmp64; 774 } 775 } else { 776 while (dwords--) { 777 *bptr++ = *memaddr++; 778 } 779 } 780 781 perf_rdr_write(*intrigue_rdr, buffer); 782 intrigue_rdr++; 783 } 784 785 /* 786 * Now copy out the Runway stuff which is not in RDRs 787 */ 788 789 if (cpu_device == NULL) 790 { 791 printk(KERN_ERR "write_image: cpu_device not yet initialized!\n"); 792 return -1; 793 } 794 795 runway = ioremap(cpu_device->hpa.start, 4096); 796 if (!runway) { 797 pr_err("perf_write_image: ioremap failed!\n"); 798 return -ENOMEM; 799 } 800 801 /* Merge intrigue bits into Runway STATUS 0 */ 802 tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; 803 __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 804 runway + RUNWAY_STATUS); 805 806 /* Write RUNWAY DEBUG registers */ 807 for (i = 0; i < 8; i++) { 808 __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); 809 } 810 811 return 0; 812 } 813 814 /* 815 * perf_rdr_write 816 * 817 * Write the given RDR register with the contents 818 * of the given buffer. 819 */ 820 static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer) 821 { 822 const struct rdr_tbl_ent *tentry; 823 int32_t i; 824 825 printk("perf_rdr_write\n"); 826 tentry = perf_rdr_get_entry(rdr_num); 827 if (tentry->width == 0) { return; } 828 829 i = tentry->num_words; 830 while (i--) { 831 if (perf_processor_interface == ONYX_INTF) { 832 perf_rdr_shift_out_U(rdr_num, buffer[i]); 833 } else { 834 perf_rdr_shift_out_W(rdr_num, buffer[i]); 835 } 836 } 837 printk("perf_rdr_write done\n"); 838 } 839