1de6cc651SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * Parisc performance counters
41da177e4SLinus Torvalds * Copyright (C) 2001 Randolph Chung <tausq@debian.org>
51da177e4SLinus Torvalds *
61da177e4SLinus Torvalds * This code is derived, with permission, from HP/UX sources.
71da177e4SLinus Torvalds */
81da177e4SLinus Torvalds
91da177e4SLinus Torvalds /*
101da177e4SLinus Torvalds * Edited comment from original sources:
111da177e4SLinus Torvalds *
121da177e4SLinus Torvalds * This driver programs the PCX-U/PCX-W performance counters
131da177e4SLinus Torvalds * on the PA-RISC 2.0 chips. The driver keeps all images now
1406fe9fb4SDirk Hohndel * internally to the kernel to hopefully eliminate the possibility
151da177e4SLinus Torvalds * of a bad image halting the CPU. Also, there are different
161da177e4SLinus Torvalds * images for the PCX-W and later chips vs the PCX-U chips.
171da177e4SLinus Torvalds *
181da177e4SLinus Torvalds * Only 1 process is allowed to access the driver at any time,
191da177e4SLinus Torvalds * so the only protection that is needed is at open and close.
201da177e4SLinus Torvalds * A variable "perf_enabled" is used to hold the state of the
211da177e4SLinus Torvalds * driver. The spinlock "perf_lock" is used to protect the
221da177e4SLinus Torvalds * modification of the state during open/close operations so
231da177e4SLinus Torvalds * multiple processes don't get into the driver simultaneously.
241da177e4SLinus Torvalds *
251da177e4SLinus Torvalds * This driver accesses the processor directly vs going through
261da177e4SLinus Torvalds * the PDC INTRIGUE calls. This is done to eliminate bugs introduced
271da177e4SLinus Torvalds * in various PDC revisions. The code is much more maintainable
281da177e4SLinus Torvalds * and reliable this way vs having to debug on every version of PDC
291da177e4SLinus Torvalds * on every box.
301da177e4SLinus Torvalds */
311da177e4SLinus Torvalds
32a9415644SRandy Dunlap #include <linux/capability.h>
331da177e4SLinus Torvalds #include <linux/init.h>
341da177e4SLinus Torvalds #include <linux/proc_fs.h>
351da177e4SLinus Torvalds #include <linux/miscdevice.h>
361da177e4SLinus Torvalds #include <linux/spinlock.h>
371da177e4SLinus Torvalds
387c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
391da177e4SLinus Torvalds #include <asm/perf.h>
401da177e4SLinus Torvalds #include <asm/parisc-device.h>
411da177e4SLinus Torvalds #include <asm/processor.h>
421da177e4SLinus Torvalds #include <asm/runway.h>
431da177e4SLinus Torvalds #include <asm/io.h> /* for __raw_read() */
441da177e4SLinus Torvalds
451da177e4SLinus Torvalds #include "perf_images.h"
461da177e4SLinus Torvalds
471da177e4SLinus Torvalds #define MAX_RDR_WORDS 24
481da177e4SLinus Torvalds #define PERF_VERSION 2 /* derived from hpux's PI v2 interface */
491da177e4SLinus Torvalds
501da177e4SLinus Torvalds /* definition of RDR regs */
511da177e4SLinus Torvalds struct rdr_tbl_ent {
521da177e4SLinus Torvalds uint16_t width;
531da177e4SLinus Torvalds uint8_t num_words;
541da177e4SLinus Torvalds uint8_t write_control;
551da177e4SLinus Torvalds };
561da177e4SLinus Torvalds
578039de10SHelge Deller static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
58cb6fc18eSHelge Deller static int perf_enabled __read_mostly;
5976cffeb6SHelge Deller static DEFINE_SPINLOCK(perf_lock);
60*d863066eSHelge Deller static struct parisc_device *cpu_device __read_mostly;
611da177e4SLinus Torvalds
621da177e4SLinus Torvalds /* RDRs to write for PCX-W */
63cb6fc18eSHelge Deller static const int perf_rdrs_W[] =
641da177e4SLinus Torvalds { 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 };
651da177e4SLinus Torvalds
661da177e4SLinus Torvalds /* RDRs to write for PCX-U */
67cb6fc18eSHelge Deller static const int perf_rdrs_U[] =
681da177e4SLinus Torvalds { 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 };
691da177e4SLinus Torvalds
701da177e4SLinus Torvalds /* RDR register descriptions for PCX-W */
71cb6fc18eSHelge Deller static const struct rdr_tbl_ent perf_rdr_tbl_W[] = {
721da177e4SLinus Torvalds { 19, 1, 8 }, /* RDR 0 */
731da177e4SLinus Torvalds { 16, 1, 16 }, /* RDR 1 */
741da177e4SLinus Torvalds { 72, 2, 0 }, /* RDR 2 */
751da177e4SLinus Torvalds { 81, 2, 0 }, /* RDR 3 */
761da177e4SLinus Torvalds { 328, 6, 0 }, /* RDR 4 */
771da177e4SLinus Torvalds { 160, 3, 0 }, /* RDR 5 */
781da177e4SLinus Torvalds { 336, 6, 0 }, /* RDR 6 */
791da177e4SLinus Torvalds { 164, 3, 0 }, /* RDR 7 */
801da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 8 */
811da177e4SLinus Torvalds { 35, 1, 0 }, /* RDR 9 */
821da177e4SLinus Torvalds { 6, 1, 0 }, /* RDR 10 */
831da177e4SLinus Torvalds { 18, 1, 0 }, /* RDR 11 */
841da177e4SLinus Torvalds { 13, 1, 0 }, /* RDR 12 */
851da177e4SLinus Torvalds { 8, 1, 0 }, /* RDR 13 */
861da177e4SLinus Torvalds { 8, 1, 0 }, /* RDR 14 */
871da177e4SLinus Torvalds { 8, 1, 0 }, /* RDR 15 */
881da177e4SLinus Torvalds { 1530, 24, 0 }, /* RDR 16 */
891da177e4SLinus Torvalds { 16, 1, 0 }, /* RDR 17 */
901da177e4SLinus Torvalds { 4, 1, 0 }, /* RDR 18 */
911da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 19 */
921da177e4SLinus Torvalds { 152, 3, 24 }, /* RDR 20 */
931da177e4SLinus Torvalds { 152, 3, 24 }, /* RDR 21 */
941da177e4SLinus Torvalds { 233, 4, 48 }, /* RDR 22 */
951da177e4SLinus Torvalds { 233, 4, 48 }, /* RDR 23 */
961da177e4SLinus Torvalds { 71, 2, 0 }, /* RDR 24 */
971da177e4SLinus Torvalds { 71, 2, 0 }, /* RDR 25 */
981da177e4SLinus Torvalds { 11, 1, 0 }, /* RDR 26 */
991da177e4SLinus Torvalds { 18, 1, 0 }, /* RDR 27 */
1001da177e4SLinus Torvalds { 128, 2, 0 }, /* RDR 28 */
1011da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 29 */
1021da177e4SLinus Torvalds { 16, 1, 0 }, /* RDR 30 */
1031da177e4SLinus Torvalds { 16, 1, 0 }, /* RDR 31 */
1041da177e4SLinus Torvalds };
1051da177e4SLinus Torvalds
1061da177e4SLinus Torvalds /* RDR register descriptions for PCX-U */
107cb6fc18eSHelge Deller static const struct rdr_tbl_ent perf_rdr_tbl_U[] = {
1081da177e4SLinus Torvalds { 19, 1, 8 }, /* RDR 0 */
1091da177e4SLinus Torvalds { 32, 1, 16 }, /* RDR 1 */
1101da177e4SLinus Torvalds { 20, 1, 0 }, /* RDR 2 */
1111da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 3 */
1121da177e4SLinus Torvalds { 344, 6, 0 }, /* RDR 4 */
1131da177e4SLinus Torvalds { 176, 3, 0 }, /* RDR 5 */
1141da177e4SLinus Torvalds { 336, 6, 0 }, /* RDR 6 */
1151da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 7 */
1161da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 8 */
1171da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 9 */
1181da177e4SLinus Torvalds { 28, 1, 0 }, /* RDR 10 */
1191da177e4SLinus Torvalds { 33, 1, 0 }, /* RDR 11 */
1201da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 12 */
1211da177e4SLinus Torvalds { 230, 4, 0 }, /* RDR 13 */
1221da177e4SLinus Torvalds { 32, 1, 0 }, /* RDR 14 */
1231da177e4SLinus Torvalds { 128, 2, 0 }, /* RDR 15 */
1241da177e4SLinus Torvalds { 1494, 24, 0 }, /* RDR 16 */
1251da177e4SLinus Torvalds { 18, 1, 0 }, /* RDR 17 */
1261da177e4SLinus Torvalds { 4, 1, 0 }, /* RDR 18 */
1271da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 19 */
1281da177e4SLinus Torvalds { 158, 3, 24 }, /* RDR 20 */
1291da177e4SLinus Torvalds { 158, 3, 24 }, /* RDR 21 */
1301da177e4SLinus Torvalds { 194, 4, 48 }, /* RDR 22 */
1311da177e4SLinus Torvalds { 194, 4, 48 }, /* RDR 23 */
1321da177e4SLinus Torvalds { 71, 2, 0 }, /* RDR 24 */
1331da177e4SLinus Torvalds { 71, 2, 0 }, /* RDR 25 */
1341da177e4SLinus Torvalds { 28, 1, 0 }, /* RDR 26 */
1351da177e4SLinus Torvalds { 33, 1, 0 }, /* RDR 27 */
1361da177e4SLinus Torvalds { 88, 2, 0 }, /* RDR 28 */
1371da177e4SLinus Torvalds { 32, 1, 0 }, /* RDR 29 */
1381da177e4SLinus Torvalds { 24, 1, 0 }, /* RDR 30 */
1391da177e4SLinus Torvalds { 16, 1, 0 }, /* RDR 31 */
1401da177e4SLinus Torvalds };
1411da177e4SLinus Torvalds
1421da177e4SLinus Torvalds /*
1431da177e4SLinus Torvalds * A non-zero write_control in the above tables is a byte offset into
1441da177e4SLinus Torvalds * this array.
1451da177e4SLinus Torvalds */
146cb6fc18eSHelge Deller static const uint64_t perf_bitmasks[] = {
1471da177e4SLinus Torvalds 0x0000000000000000ul, /* first dbl word must be zero */
1481da177e4SLinus Torvalds 0xfdffe00000000000ul, /* RDR0 bitmask */
1491da177e4SLinus Torvalds 0x003f000000000000ul, /* RDR1 bitmask */
1501da177e4SLinus Torvalds 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (152 bits) */
1511da177e4SLinus Torvalds 0xfffffffffffffffful,
1521da177e4SLinus Torvalds 0xfffffffc00000000ul,
1531da177e4SLinus Torvalds 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (233 bits) */
1541da177e4SLinus Torvalds 0xfffffffffffffffful,
1551da177e4SLinus Torvalds 0xfffffffffffffffcul,
1561da177e4SLinus Torvalds 0xff00000000000000ul
1571da177e4SLinus Torvalds };
1581da177e4SLinus Torvalds
1591da177e4SLinus Torvalds /*
1601da177e4SLinus Torvalds * Write control bitmasks for Pa-8700 processor given
1611da177e4SLinus Torvalds * some things have changed slightly.
1621da177e4SLinus Torvalds */
163cb6fc18eSHelge Deller static const uint64_t perf_bitmasks_piranha[] = {
1641da177e4SLinus Torvalds 0x0000000000000000ul, /* first dbl word must be zero */
1651da177e4SLinus Torvalds 0xfdffe00000000000ul, /* RDR0 bitmask */
1661da177e4SLinus Torvalds 0x003f000000000000ul, /* RDR1 bitmask */
1671da177e4SLinus Torvalds 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (158 bits) */
1681da177e4SLinus Torvalds 0xfffffffffffffffful,
1691da177e4SLinus Torvalds 0xfffffffc00000000ul,
1701da177e4SLinus Torvalds 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (210 bits) */
1711da177e4SLinus Torvalds 0xfffffffffffffffful,
1721da177e4SLinus Torvalds 0xfffffffffffffffful,
1731da177e4SLinus Torvalds 0xfffc000000000000ul
1741da177e4SLinus Torvalds };
1751da177e4SLinus Torvalds
176cb6fc18eSHelge Deller static const uint64_t *bitmask_array; /* array of bitmasks to use */
1771da177e4SLinus Torvalds
1781da177e4SLinus Torvalds /******************************************************************************
1791da177e4SLinus Torvalds * Function Prototypes
1801da177e4SLinus Torvalds *****************************************************************************/
1811da177e4SLinus Torvalds static int perf_config(uint32_t *image_ptr);
1821da177e4SLinus Torvalds static int perf_release(struct inode *inode, struct file *file);
1831da177e4SLinus Torvalds static int perf_open(struct inode *inode, struct file *file);
1841da177e4SLinus Torvalds static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
18574e3f6e6SArvind Yadav static ssize_t perf_write(struct file *file, const char __user *buf,
18674e3f6e6SArvind Yadav size_t count, loff_t *ppos);
187ad7dd338SChristoph Hellwig static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1881da177e4SLinus Torvalds static void perf_start_counters(void);
1891da177e4SLinus Torvalds static int perf_stop_counters(uint32_t *raddr);
190cb6fc18eSHelge Deller static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num);
1911da177e4SLinus Torvalds static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer);
1921da177e4SLinus Torvalds static int perf_rdr_clear(uint32_t rdr_num);
1931da177e4SLinus Torvalds static int perf_write_image(uint64_t *memaddr);
1941da177e4SLinus Torvalds static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer);
1951da177e4SLinus Torvalds
1961da177e4SLinus Torvalds /* External Assembly Routines */
1971da177e4SLinus Torvalds extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width);
1981da177e4SLinus Torvalds extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width);
1991da177e4SLinus Torvalds extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer);
2001da177e4SLinus Torvalds extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer);
2011da177e4SLinus Torvalds extern void perf_intrigue_enable_perf_counters (void);
2021da177e4SLinus Torvalds extern void perf_intrigue_disable_perf_counters (void);
2031da177e4SLinus Torvalds
2041da177e4SLinus Torvalds /******************************************************************************
2051da177e4SLinus Torvalds * Function Definitions
2061da177e4SLinus Torvalds *****************************************************************************/
2071da177e4SLinus Torvalds
2081da177e4SLinus Torvalds
2091da177e4SLinus Torvalds /*
2101da177e4SLinus Torvalds * configure:
2111da177e4SLinus Torvalds *
2121da177e4SLinus Torvalds * Configure the cpu with a given data image. First turn off the counters,
2131da177e4SLinus Torvalds * then download the image, then turn the counters back on.
2141da177e4SLinus Torvalds */
perf_config(uint32_t * image_ptr)2151da177e4SLinus Torvalds static int perf_config(uint32_t *image_ptr)
2161da177e4SLinus Torvalds {
2171da177e4SLinus Torvalds long error;
2181da177e4SLinus Torvalds uint32_t raddr[4];
2191da177e4SLinus Torvalds
2201da177e4SLinus Torvalds /* Stop the counters*/
2211da177e4SLinus Torvalds error = perf_stop_counters(raddr);
2221da177e4SLinus Torvalds if (error != 0) {
2231da177e4SLinus Torvalds printk("perf_config: perf_stop_counters = %ld\n", error);
2241da177e4SLinus Torvalds return -EINVAL;
2251da177e4SLinus Torvalds }
2261da177e4SLinus Torvalds
2271da177e4SLinus Torvalds printk("Preparing to write image\n");
2281da177e4SLinus Torvalds /* Write the image to the chip */
2291da177e4SLinus Torvalds error = perf_write_image((uint64_t *)image_ptr);
2301da177e4SLinus Torvalds if (error != 0) {
2311da177e4SLinus Torvalds printk("perf_config: DOWNLOAD = %ld\n", error);
2321da177e4SLinus Torvalds return -EINVAL;
2331da177e4SLinus Torvalds }
2341da177e4SLinus Torvalds
2351da177e4SLinus Torvalds printk("Preparing to start counters\n");
2361da177e4SLinus Torvalds
2371da177e4SLinus Torvalds /* Start the counters */
2381da177e4SLinus Torvalds perf_start_counters();
2391da177e4SLinus Torvalds
2401da177e4SLinus Torvalds return sizeof(uint32_t);
2411da177e4SLinus Torvalds }
2421da177e4SLinus Torvalds
2431da177e4SLinus Torvalds /*
2441da177e4SLinus Torvalds * Open the device and initialize all of its memory. The device is only
2451da177e4SLinus Torvalds * opened once, but can be "queried" by multiple processes that know its
2461da177e4SLinus Torvalds * file descriptor.
2471da177e4SLinus Torvalds */
perf_open(struct inode * inode,struct file * file)2481da177e4SLinus Torvalds static int perf_open(struct inode *inode, struct file *file)
2491da177e4SLinus Torvalds {
2501da177e4SLinus Torvalds spin_lock(&perf_lock);
2511da177e4SLinus Torvalds if (perf_enabled) {
2521da177e4SLinus Torvalds spin_unlock(&perf_lock);
2531da177e4SLinus Torvalds return -EBUSY;
2541da177e4SLinus Torvalds }
2551da177e4SLinus Torvalds perf_enabled = 1;
2561da177e4SLinus Torvalds spin_unlock(&perf_lock);
2571da177e4SLinus Torvalds
2581da177e4SLinus Torvalds return 0;
2591da177e4SLinus Torvalds }
2601da177e4SLinus Torvalds
2611da177e4SLinus Torvalds /*
2621da177e4SLinus Torvalds * Close the device.
2631da177e4SLinus Torvalds */
perf_release(struct inode * inode,struct file * file)2641da177e4SLinus Torvalds static int perf_release(struct inode *inode, struct file *file)
2651da177e4SLinus Torvalds {
2661da177e4SLinus Torvalds spin_lock(&perf_lock);
2671da177e4SLinus Torvalds perf_enabled = 0;
2681da177e4SLinus Torvalds spin_unlock(&perf_lock);
2691da177e4SLinus Torvalds
2701da177e4SLinus Torvalds return 0;
2711da177e4SLinus Torvalds }
2721da177e4SLinus Torvalds
2731da177e4SLinus Torvalds /*
2741da177e4SLinus Torvalds * Read does nothing for this driver
2751da177e4SLinus Torvalds */
perf_read(struct file * file,char __user * buf,size_t cnt,loff_t * ppos)2761da177e4SLinus Torvalds static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos)
2771da177e4SLinus Torvalds {
2781da177e4SLinus Torvalds return 0;
2791da177e4SLinus Torvalds }
2801da177e4SLinus Torvalds
2811da177e4SLinus Torvalds /*
2821da177e4SLinus Torvalds * write:
2831da177e4SLinus Torvalds *
2841da177e4SLinus Torvalds * This routine downloads the image to the chip. It must be
2851da177e4SLinus Torvalds * called on the processor that the download should happen
2861da177e4SLinus Torvalds * on.
2871da177e4SLinus Torvalds */
perf_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)28874e3f6e6SArvind Yadav static ssize_t perf_write(struct file *file, const char __user *buf,
28974e3f6e6SArvind Yadav size_t count, loff_t *ppos)
2901da177e4SLinus Torvalds {
2919e142b72SHelge Deller size_t image_size __maybe_unused;
2921da177e4SLinus Torvalds uint32_t image_type;
2931da177e4SLinus Torvalds uint32_t interface_type;
2941da177e4SLinus Torvalds uint32_t test;
2951da177e4SLinus Torvalds
2961da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF)
2971da177e4SLinus Torvalds image_size = PCXU_IMAGE_SIZE;
2981da177e4SLinus Torvalds else if (perf_processor_interface == CUDA_INTF)
2991da177e4SLinus Torvalds image_size = PCXW_IMAGE_SIZE;
3001da177e4SLinus Torvalds else
3011da177e4SLinus Torvalds return -EFAULT;
3021da177e4SLinus Torvalds
303cf91baf3SAlexey Budankov if (!perfmon_capable())
3041da177e4SLinus Torvalds return -EACCES;
3051da177e4SLinus Torvalds
3061da177e4SLinus Torvalds if (count != sizeof(uint32_t))
3071da177e4SLinus Torvalds return -EIO;
3081da177e4SLinus Torvalds
30982cbd568SDan Carpenter if (copy_from_user(&image_type, buf, sizeof(uint32_t)))
31082cbd568SDan Carpenter return -EFAULT;
3111da177e4SLinus Torvalds
3121da177e4SLinus Torvalds /* Get the interface type and test type */
3131da177e4SLinus Torvalds interface_type = (image_type >> 16) & 0xffff;
3141da177e4SLinus Torvalds test = (image_type & 0xffff);
3151da177e4SLinus Torvalds
3161da177e4SLinus Torvalds /* Make sure everything makes sense */
3171da177e4SLinus Torvalds
3181da177e4SLinus Torvalds /* First check the machine type is correct for
3191da177e4SLinus Torvalds the requested image */
3201da177e4SLinus Torvalds if (((perf_processor_interface == CUDA_INTF) &&
3211da177e4SLinus Torvalds (interface_type != CUDA_INTF)) ||
3221da177e4SLinus Torvalds ((perf_processor_interface == ONYX_INTF) &&
3231da177e4SLinus Torvalds (interface_type != ONYX_INTF)))
3241da177e4SLinus Torvalds return -EINVAL;
3251da177e4SLinus Torvalds
3261da177e4SLinus Torvalds /* Next check to make sure the requested image
3271da177e4SLinus Torvalds is valid */
3281da177e4SLinus Torvalds if (((interface_type == CUDA_INTF) &&
3291da177e4SLinus Torvalds (test >= MAX_CUDA_IMAGES)) ||
3301da177e4SLinus Torvalds ((interface_type == ONYX_INTF) &&
3311da177e4SLinus Torvalds (test >= MAX_ONYX_IMAGES)))
3321da177e4SLinus Torvalds return -EINVAL;
3331da177e4SLinus Torvalds
3341da177e4SLinus Torvalds /* Copy the image into the processor */
3351da177e4SLinus Torvalds if (interface_type == CUDA_INTF)
3361da177e4SLinus Torvalds return perf_config(cuda_images[test]);
3371da177e4SLinus Torvalds else
3381da177e4SLinus Torvalds return perf_config(onyx_images[test]);
3391da177e4SLinus Torvalds
3401da177e4SLinus Torvalds return count;
3411da177e4SLinus Torvalds }
3421da177e4SLinus Torvalds
3431da177e4SLinus Torvalds /*
3441da177e4SLinus Torvalds * Patch the images that need to know the IVA addresses.
3451da177e4SLinus Torvalds */
perf_patch_images(void)3461da177e4SLinus Torvalds static void perf_patch_images(void)
3471da177e4SLinus Torvalds {
3481da177e4SLinus Torvalds #if 0 /* FIXME!! */
3491da177e4SLinus Torvalds /*
3501da177e4SLinus Torvalds * NOTE: this routine is VERY specific to the current TLB image.
3511da177e4SLinus Torvalds * If the image is changed, this routine might also need to be changed.
3521da177e4SLinus Torvalds */
3531da177e4SLinus Torvalds extern void $i_itlb_miss_2_0();
3541da177e4SLinus Torvalds extern void $i_dtlb_miss_2_0();
3551da177e4SLinus Torvalds extern void PA2_0_iva();
3561da177e4SLinus Torvalds
3571da177e4SLinus Torvalds /*
3581da177e4SLinus Torvalds * We can only use the lower 32-bits, the upper 32-bits should be 0
3591da177e4SLinus Torvalds * anyway given this is in the kernel
3601da177e4SLinus Torvalds */
3611da177e4SLinus Torvalds uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0);
3621da177e4SLinus Torvalds uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0);
3631da177e4SLinus Torvalds uint32_t IVAaddress = (uint32_t)&PA2_0_iva;
3641da177e4SLinus Torvalds
3651da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) {
3661da177e4SLinus Torvalds /* clear last 2 bytes */
3671da177e4SLinus Torvalds onyx_images[TLBMISS][15] &= 0xffffff00;
3681da177e4SLinus Torvalds /* set 2 bytes */
3691da177e4SLinus Torvalds onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
3701da177e4SLinus Torvalds onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
3711da177e4SLinus Torvalds onyx_images[TLBMISS][17] = itlb_addr;
3721da177e4SLinus Torvalds
3731da177e4SLinus Torvalds /* clear last 2 bytes */
3741da177e4SLinus Torvalds onyx_images[TLBHANDMISS][15] &= 0xffffff00;
3751da177e4SLinus Torvalds /* set 2 bytes */
3761da177e4SLinus Torvalds onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
3771da177e4SLinus Torvalds onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
3781da177e4SLinus Torvalds onyx_images[TLBHANDMISS][17] = itlb_addr;
3791da177e4SLinus Torvalds
3801da177e4SLinus Torvalds /* clear last 2 bytes */
3811da177e4SLinus Torvalds onyx_images[BIG_CPI][15] &= 0xffffff00;
3821da177e4SLinus Torvalds /* set 2 bytes */
3831da177e4SLinus Torvalds onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
3841da177e4SLinus Torvalds onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
3851da177e4SLinus Torvalds onyx_images[BIG_CPI][17] = itlb_addr;
3861da177e4SLinus Torvalds
3871da177e4SLinus Torvalds onyx_images[PANIC][15] &= 0xffffff00; /* clear last 2 bytes */
3881da177e4SLinus Torvalds onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */
3891da177e4SLinus Torvalds onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00;
3901da177e4SLinus Torvalds
3911da177e4SLinus Torvalds
3921da177e4SLinus Torvalds } else if (perf_processor_interface == CUDA_INTF) {
3931da177e4SLinus Torvalds /* Cuda interface */
3941da177e4SLinus Torvalds cuda_images[TLBMISS][16] =
3951da177e4SLinus Torvalds (cuda_images[TLBMISS][16]&0xffff0000) |
3961da177e4SLinus Torvalds ((dtlb_addr >> 8)&0x0000ffff);
3971da177e4SLinus Torvalds cuda_images[TLBMISS][17] =
3981da177e4SLinus Torvalds ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
3991da177e4SLinus Torvalds cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
4001da177e4SLinus Torvalds
4011da177e4SLinus Torvalds cuda_images[TLBHANDMISS][16] =
4021da177e4SLinus Torvalds (cuda_images[TLBHANDMISS][16]&0xffff0000) |
4031da177e4SLinus Torvalds ((dtlb_addr >> 8)&0x0000ffff);
4041da177e4SLinus Torvalds cuda_images[TLBHANDMISS][17] =
4051da177e4SLinus Torvalds ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
4061da177e4SLinus Torvalds cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
4071da177e4SLinus Torvalds
4081da177e4SLinus Torvalds cuda_images[BIG_CPI][16] =
4091da177e4SLinus Torvalds (cuda_images[BIG_CPI][16]&0xffff0000) |
4101da177e4SLinus Torvalds ((dtlb_addr >> 8)&0x0000ffff);
4111da177e4SLinus Torvalds cuda_images[BIG_CPI][17] =
4121da177e4SLinus Torvalds ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
4131da177e4SLinus Torvalds cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
4141da177e4SLinus Torvalds } else {
4151da177e4SLinus Torvalds /* Unknown type */
4161da177e4SLinus Torvalds }
4171da177e4SLinus Torvalds #endif
4181da177e4SLinus Torvalds }
4191da177e4SLinus Torvalds
4201da177e4SLinus Torvalds
4211da177e4SLinus Torvalds /*
4221da177e4SLinus Torvalds * ioctl routine
4231da177e4SLinus Torvalds * All routines effect the processor that they are executed on. Thus you
4241da177e4SLinus Torvalds * must be running on the processor that you wish to change.
4251da177e4SLinus Torvalds */
4261da177e4SLinus Torvalds
perf_ioctl(struct file * file,unsigned int cmd,unsigned long arg)427ad7dd338SChristoph Hellwig static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
4281da177e4SLinus Torvalds {
4291da177e4SLinus Torvalds long error_start;
4301da177e4SLinus Torvalds uint32_t raddr[4];
431ad7dd338SChristoph Hellwig int error = 0;
4321da177e4SLinus Torvalds
4331da177e4SLinus Torvalds switch (cmd) {
4341da177e4SLinus Torvalds
4351da177e4SLinus Torvalds case PA_PERF_ON:
4361da177e4SLinus Torvalds /* Start the counters */
4371da177e4SLinus Torvalds perf_start_counters();
438ad7dd338SChristoph Hellwig break;
4391da177e4SLinus Torvalds
4401da177e4SLinus Torvalds case PA_PERF_OFF:
4411da177e4SLinus Torvalds error_start = perf_stop_counters(raddr);
4421da177e4SLinus Torvalds if (error_start != 0) {
4431da177e4SLinus Torvalds printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start);
444ad7dd338SChristoph Hellwig error = -EFAULT;
445ad7dd338SChristoph Hellwig break;
4461da177e4SLinus Torvalds }
4471da177e4SLinus Torvalds
4481da177e4SLinus Torvalds /* copy out the Counters */
4491da177e4SLinus Torvalds if (copy_to_user((void __user *)arg, raddr,
4501da177e4SLinus Torvalds sizeof (raddr)) != 0) {
451ad7dd338SChristoph Hellwig error = -EFAULT;
452ad7dd338SChristoph Hellwig break;
4531da177e4SLinus Torvalds }
454ad7dd338SChristoph Hellwig break;
4551da177e4SLinus Torvalds
4561da177e4SLinus Torvalds case PA_PERF_VERSION:
4571da177e4SLinus Torvalds /* Return the version # */
458ad7dd338SChristoph Hellwig error = put_user(PERF_VERSION, (int *)arg);
459ad7dd338SChristoph Hellwig break;
4601da177e4SLinus Torvalds
4611da177e4SLinus Torvalds default:
462ad7dd338SChristoph Hellwig error = -ENOTTY;
4631da177e4SLinus Torvalds }
464ad7dd338SChristoph Hellwig
465ad7dd338SChristoph Hellwig return error;
4661da177e4SLinus Torvalds }
4671da177e4SLinus Torvalds
4685dfe4c96SArjan van de Ven static const struct file_operations perf_fops = {
4691da177e4SLinus Torvalds .llseek = no_llseek,
4701da177e4SLinus Torvalds .read = perf_read,
4711da177e4SLinus Torvalds .write = perf_write,
472ad7dd338SChristoph Hellwig .unlocked_ioctl = perf_ioctl,
473ad7dd338SChristoph Hellwig .compat_ioctl = perf_ioctl,
4741da177e4SLinus Torvalds .open = perf_open,
4751da177e4SLinus Torvalds .release = perf_release
4761da177e4SLinus Torvalds };
4771da177e4SLinus Torvalds
4781da177e4SLinus Torvalds static struct miscdevice perf_dev = {
4791da177e4SLinus Torvalds MISC_DYNAMIC_MINOR,
4801da177e4SLinus Torvalds PA_PERF_DEV,
4811da177e4SLinus Torvalds &perf_fops
4821da177e4SLinus Torvalds };
4831da177e4SLinus Torvalds
4841da177e4SLinus Torvalds /*
4851da177e4SLinus Torvalds * Initialize the module
4861da177e4SLinus Torvalds */
perf_init(void)4871da177e4SLinus Torvalds static int __init perf_init(void)
4881da177e4SLinus Torvalds {
4891da177e4SLinus Torvalds int ret;
4901da177e4SLinus Torvalds
4911da177e4SLinus Torvalds /* Determine correct processor interface to use */
4921da177e4SLinus Torvalds bitmask_array = perf_bitmasks;
4931da177e4SLinus Torvalds
4941da177e4SLinus Torvalds if (boot_cpu_data.cpu_type == pcxu ||
4951da177e4SLinus Torvalds boot_cpu_data.cpu_type == pcxu_) {
4961da177e4SLinus Torvalds perf_processor_interface = ONYX_INTF;
4971da177e4SLinus Torvalds } else if (boot_cpu_data.cpu_type == pcxw ||
4981da177e4SLinus Torvalds boot_cpu_data.cpu_type == pcxw_ ||
4991da177e4SLinus Torvalds boot_cpu_data.cpu_type == pcxw2 ||
5002cbd42dbSKyle McMartin boot_cpu_data.cpu_type == mako ||
5012cbd42dbSKyle McMartin boot_cpu_data.cpu_type == mako2) {
5021da177e4SLinus Torvalds perf_processor_interface = CUDA_INTF;
5031da177e4SLinus Torvalds if (boot_cpu_data.cpu_type == pcxw2 ||
5042cbd42dbSKyle McMartin boot_cpu_data.cpu_type == mako ||
5052cbd42dbSKyle McMartin boot_cpu_data.cpu_type == mako2)
5061da177e4SLinus Torvalds bitmask_array = perf_bitmasks_piranha;
5071da177e4SLinus Torvalds } else {
5081da177e4SLinus Torvalds perf_processor_interface = UNKNOWN_INTF;
5091da177e4SLinus Torvalds printk("Performance monitoring counters not supported on this processor\n");
5101da177e4SLinus Torvalds return -ENODEV;
5111da177e4SLinus Torvalds }
5121da177e4SLinus Torvalds
5131da177e4SLinus Torvalds ret = misc_register(&perf_dev);
5141da177e4SLinus Torvalds if (ret) {
5151da177e4SLinus Torvalds printk(KERN_ERR "Performance monitoring counters: "
5161da177e4SLinus Torvalds "cannot register misc device.\n");
5171da177e4SLinus Torvalds return ret;
5181da177e4SLinus Torvalds }
5191da177e4SLinus Torvalds
5201da177e4SLinus Torvalds /* Patch the images to match the system */
5211da177e4SLinus Torvalds perf_patch_images();
5221da177e4SLinus Torvalds
5231da177e4SLinus Torvalds /* TODO: this only lets us access the first cpu.. what to do for SMP? */
524ef017bebSHelge Deller cpu_device = per_cpu(cpu_data, 0).dev;
5251da177e4SLinus Torvalds printk("Performance monitoring counters enabled for %s\n",
526ef017bebSHelge Deller per_cpu(cpu_data, 0).dev->name);
5271da177e4SLinus Torvalds
5281da177e4SLinus Torvalds return 0;
5291da177e4SLinus Torvalds }
53015becabdSPaul Gortmaker device_initcall(perf_init);
5311da177e4SLinus Torvalds
5321da177e4SLinus Torvalds /*
5331da177e4SLinus Torvalds * perf_start_counters(void)
5341da177e4SLinus Torvalds *
5351da177e4SLinus Torvalds * Start the counters.
5361da177e4SLinus Torvalds */
perf_start_counters(void)5371da177e4SLinus Torvalds static void perf_start_counters(void)
5381da177e4SLinus Torvalds {
5391da177e4SLinus Torvalds /* Enable performance monitor counters */
5401da177e4SLinus Torvalds perf_intrigue_enable_perf_counters();
5411da177e4SLinus Torvalds }
5421da177e4SLinus Torvalds
5431da177e4SLinus Torvalds /*
5441da177e4SLinus Torvalds * perf_stop_counters
5451da177e4SLinus Torvalds *
5461da177e4SLinus Torvalds * Stop the performance counters and save counts
5471da177e4SLinus Torvalds * in a per_processor array.
5481da177e4SLinus Torvalds */
perf_stop_counters(uint32_t * raddr)5491da177e4SLinus Torvalds static int perf_stop_counters(uint32_t *raddr)
5501da177e4SLinus Torvalds {
5511da177e4SLinus Torvalds uint64_t userbuf[MAX_RDR_WORDS];
5521da177e4SLinus Torvalds
5531da177e4SLinus Torvalds /* Disable performance counters */
5541da177e4SLinus Torvalds perf_intrigue_disable_perf_counters();
5551da177e4SLinus Torvalds
5561da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) {
5571da177e4SLinus Torvalds uint64_t tmp64;
5581da177e4SLinus Torvalds /*
5591da177e4SLinus Torvalds * Read the counters
5601da177e4SLinus Torvalds */
5611da177e4SLinus Torvalds if (!perf_rdr_read_ubuf(16, userbuf))
5621da177e4SLinus Torvalds return -13;
5631da177e4SLinus Torvalds
5647022672eSSimon Arlott /* Counter0 is bits 1398 to 1429 */
5651da177e4SLinus Torvalds tmp64 = (userbuf[21] << 22) & 0x00000000ffc00000;
5661da177e4SLinus Torvalds tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff;
5671da177e4SLinus Torvalds /* OR sticky0 (bit 1430) to counter0 bit 32 */
5681da177e4SLinus Torvalds tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000;
5691da177e4SLinus Torvalds raddr[0] = (uint32_t)tmp64;
5701da177e4SLinus Torvalds
5717022672eSSimon Arlott /* Counter1 is bits 1431 to 1462 */
5721da177e4SLinus Torvalds tmp64 = (userbuf[22] >> 9) & 0x00000000ffffffff;
5731da177e4SLinus Torvalds /* OR sticky1 (bit 1463) to counter1 bit 32 */
5741da177e4SLinus Torvalds tmp64 |= (userbuf[22] << 23) & 0x0000000080000000;
5751da177e4SLinus Torvalds raddr[1] = (uint32_t)tmp64;
5761da177e4SLinus Torvalds
5777022672eSSimon Arlott /* Counter2 is bits 1464 to 1495 */
5781da177e4SLinus Torvalds tmp64 = (userbuf[22] << 24) & 0x00000000ff000000;
5791da177e4SLinus Torvalds tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff;
5801da177e4SLinus Torvalds /* OR sticky2 (bit 1496) to counter2 bit 32 */
5811da177e4SLinus Torvalds tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
5821da177e4SLinus Torvalds raddr[2] = (uint32_t)tmp64;
5831da177e4SLinus Torvalds
5847022672eSSimon Arlott /* Counter3 is bits 1497 to 1528 */
5851da177e4SLinus Torvalds tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff;
5861da177e4SLinus Torvalds /* OR sticky3 (bit 1529) to counter3 bit 32 */
5871da177e4SLinus Torvalds tmp64 |= (userbuf[23] << 25) & 0x0000000080000000;
5881da177e4SLinus Torvalds raddr[3] = (uint32_t)tmp64;
5891da177e4SLinus Torvalds
5901da177e4SLinus Torvalds /*
5911da177e4SLinus Torvalds * Zero out the counters
5921da177e4SLinus Torvalds */
5931da177e4SLinus Torvalds
5941da177e4SLinus Torvalds /*
5951da177e4SLinus Torvalds * The counters and sticky-bits comprise the last 132 bits
5961da177e4SLinus Torvalds * (1398 - 1529) of RDR16 on a U chip. We'll zero these
5971da177e4SLinus Torvalds * out the easy way: zero out last 10 bits of dword 21,
5981da177e4SLinus Torvalds * all of dword 22 and 58 bits (plus 6 don't care bits) of
5991da177e4SLinus Torvalds * dword 23.
6001da177e4SLinus Torvalds */
6011da177e4SLinus Torvalds userbuf[21] &= 0xfffffffffffffc00ul; /* 0 to last 10 bits */
6021da177e4SLinus Torvalds userbuf[22] = 0;
6031da177e4SLinus Torvalds userbuf[23] = 0;
6041da177e4SLinus Torvalds
6051da177e4SLinus Torvalds /*
6067022672eSSimon Arlott * Write back the zeroed bytes + the image given
6071da177e4SLinus Torvalds * the read was destructive.
6081da177e4SLinus Torvalds */
6091da177e4SLinus Torvalds perf_rdr_write(16, userbuf);
6101da177e4SLinus Torvalds } else {
6111da177e4SLinus Torvalds
6121da177e4SLinus Torvalds /*
6131da177e4SLinus Torvalds * Read RDR-15 which contains the counters and sticky bits
6141da177e4SLinus Torvalds */
6151da177e4SLinus Torvalds if (!perf_rdr_read_ubuf(15, userbuf)) {
6161da177e4SLinus Torvalds return -13;
6171da177e4SLinus Torvalds }
6181da177e4SLinus Torvalds
6191da177e4SLinus Torvalds /*
6201da177e4SLinus Torvalds * Clear out the counters
6211da177e4SLinus Torvalds */
6221da177e4SLinus Torvalds perf_rdr_clear(15);
6231da177e4SLinus Torvalds
6241da177e4SLinus Torvalds /*
6251da177e4SLinus Torvalds * Copy the counters
6261da177e4SLinus Torvalds */
6271da177e4SLinus Torvalds raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL);
6281da177e4SLinus Torvalds raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL);
6291da177e4SLinus Torvalds raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
6301da177e4SLinus Torvalds raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
6311da177e4SLinus Torvalds }
6321da177e4SLinus Torvalds
6331da177e4SLinus Torvalds return 0;
6341da177e4SLinus Torvalds }
6351da177e4SLinus Torvalds
6361da177e4SLinus Torvalds /*
6371da177e4SLinus Torvalds * perf_rdr_get_entry
6381da177e4SLinus Torvalds *
6391da177e4SLinus Torvalds * Retrieve a pointer to the description of what this
6401da177e4SLinus Torvalds * RDR contains.
6411da177e4SLinus Torvalds */
perf_rdr_get_entry(uint32_t rdr_num)642cb6fc18eSHelge Deller static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num)
6431da177e4SLinus Torvalds {
6441da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) {
6451da177e4SLinus Torvalds return &perf_rdr_tbl_U[rdr_num];
6461da177e4SLinus Torvalds } else {
6471da177e4SLinus Torvalds return &perf_rdr_tbl_W[rdr_num];
6481da177e4SLinus Torvalds }
6491da177e4SLinus Torvalds }
6501da177e4SLinus Torvalds
6511da177e4SLinus Torvalds /*
6521da177e4SLinus Torvalds * perf_rdr_read_ubuf
6531da177e4SLinus Torvalds *
6541da177e4SLinus Torvalds * Read the RDR value into the buffer specified.
6551da177e4SLinus Torvalds */
perf_rdr_read_ubuf(uint32_t rdr_num,uint64_t * buffer)6561da177e4SLinus Torvalds static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer)
6571da177e4SLinus Torvalds {
6581da177e4SLinus Torvalds uint64_t data, data_mask = 0;
6591da177e4SLinus Torvalds uint32_t width, xbits, i;
660cb6fc18eSHelge Deller const struct rdr_tbl_ent *tentry;
6611da177e4SLinus Torvalds
6621da177e4SLinus Torvalds tentry = perf_rdr_get_entry(rdr_num);
6631da177e4SLinus Torvalds if ((width = tentry->width) == 0)
6641da177e4SLinus Torvalds return 0;
6651da177e4SLinus Torvalds
6661da177e4SLinus Torvalds /* Clear out buffer */
6671da177e4SLinus Torvalds i = tentry->num_words;
6681da177e4SLinus Torvalds while (i--) {
6691da177e4SLinus Torvalds buffer[i] = 0;
6701da177e4SLinus Torvalds }
6711da177e4SLinus Torvalds
6721da177e4SLinus Torvalds /* Check for bits an even number of 64 */
6731da177e4SLinus Torvalds if ((xbits = width & 0x03f) != 0) {
6741da177e4SLinus Torvalds data_mask = 1;
6751da177e4SLinus Torvalds data_mask <<= (64 - xbits);
6761da177e4SLinus Torvalds data_mask--;
6771da177e4SLinus Torvalds }
6781da177e4SLinus Torvalds
6791da177e4SLinus Torvalds /* Grab all of the data */
6801da177e4SLinus Torvalds i = tentry->num_words;
6811da177e4SLinus Torvalds while (i--) {
6821da177e4SLinus Torvalds
6831da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) {
6841da177e4SLinus Torvalds data = perf_rdr_shift_in_U(rdr_num, width);
6851da177e4SLinus Torvalds } else {
6861da177e4SLinus Torvalds data = perf_rdr_shift_in_W(rdr_num, width);
6871da177e4SLinus Torvalds }
6881da177e4SLinus Torvalds if (xbits) {
6891da177e4SLinus Torvalds buffer[i] |= (data << (64 - xbits));
6901da177e4SLinus Torvalds if (i) {
6911da177e4SLinus Torvalds buffer[i-1] |= ((data >> xbits) & data_mask);
6921da177e4SLinus Torvalds }
6931da177e4SLinus Torvalds } else {
6941da177e4SLinus Torvalds buffer[i] = data;
6951da177e4SLinus Torvalds }
6961da177e4SLinus Torvalds }
6971da177e4SLinus Torvalds
6981da177e4SLinus Torvalds return 1;
6991da177e4SLinus Torvalds }
7001da177e4SLinus Torvalds
7011da177e4SLinus Torvalds /*
7021da177e4SLinus Torvalds * perf_rdr_clear
7031da177e4SLinus Torvalds *
7041da177e4SLinus Torvalds * Zero out the given RDR register
7051da177e4SLinus Torvalds */
perf_rdr_clear(uint32_t rdr_num)7061da177e4SLinus Torvalds static int perf_rdr_clear(uint32_t rdr_num)
7071da177e4SLinus Torvalds {
708cb6fc18eSHelge Deller const struct rdr_tbl_ent *tentry;
7091da177e4SLinus Torvalds int32_t i;
7101da177e4SLinus Torvalds
7111da177e4SLinus Torvalds tentry = perf_rdr_get_entry(rdr_num);
7121da177e4SLinus Torvalds
7131da177e4SLinus Torvalds if (tentry->width == 0) {
7141da177e4SLinus Torvalds return -1;
7151da177e4SLinus Torvalds }
7161da177e4SLinus Torvalds
7171da177e4SLinus Torvalds i = tentry->num_words;
7181da177e4SLinus Torvalds while (i--) {
7191da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) {
7201da177e4SLinus Torvalds perf_rdr_shift_out_U(rdr_num, 0UL);
7211da177e4SLinus Torvalds } else {
7221da177e4SLinus Torvalds perf_rdr_shift_out_W(rdr_num, 0UL);
7231da177e4SLinus Torvalds }
7241da177e4SLinus Torvalds }
7251da177e4SLinus Torvalds
7261da177e4SLinus Torvalds return 0;
7271da177e4SLinus Torvalds }
7281da177e4SLinus Torvalds
7291da177e4SLinus Torvalds
7301da177e4SLinus Torvalds /*
7311da177e4SLinus Torvalds * perf_write_image
7321da177e4SLinus Torvalds *
7331da177e4SLinus Torvalds * Write the given image out to the processor
7341da177e4SLinus Torvalds */
perf_write_image(uint64_t * memaddr)7351da177e4SLinus Torvalds static int perf_write_image(uint64_t *memaddr)
7361da177e4SLinus Torvalds {
7371da177e4SLinus Torvalds uint64_t buffer[MAX_RDR_WORDS];
7381da177e4SLinus Torvalds uint64_t *bptr;
7391da177e4SLinus Torvalds uint32_t dwords;
740cb6fc18eSHelge Deller const uint32_t *intrigue_rdr;
741cb6fc18eSHelge Deller const uint64_t *intrigue_bitmask;
742cb6fc18eSHelge Deller uint64_t tmp64;
74353f01bbaSMatthew Wilcox void __iomem *runway;
744cb6fc18eSHelge Deller const struct rdr_tbl_ent *tentry;
7451da177e4SLinus Torvalds int i;
7461da177e4SLinus Torvalds
7471da177e4SLinus Torvalds /* Clear out counters */
7481da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) {
7491da177e4SLinus Torvalds
7501da177e4SLinus Torvalds perf_rdr_clear(16);
7511da177e4SLinus Torvalds
7521da177e4SLinus Torvalds /* Toggle performance monitor */
7531da177e4SLinus Torvalds perf_intrigue_enable_perf_counters();
7541da177e4SLinus Torvalds perf_intrigue_disable_perf_counters();
7551da177e4SLinus Torvalds
7561da177e4SLinus Torvalds intrigue_rdr = perf_rdrs_U;
7571da177e4SLinus Torvalds } else {
7581da177e4SLinus Torvalds perf_rdr_clear(15);
7591da177e4SLinus Torvalds intrigue_rdr = perf_rdrs_W;
7601da177e4SLinus Torvalds }
7611da177e4SLinus Torvalds
7621da177e4SLinus Torvalds /* Write all RDRs */
7631da177e4SLinus Torvalds while (*intrigue_rdr != -1) {
7641da177e4SLinus Torvalds tentry = perf_rdr_get_entry(*intrigue_rdr);
7651da177e4SLinus Torvalds perf_rdr_read_ubuf(*intrigue_rdr, buffer);
7661da177e4SLinus Torvalds bptr = &buffer[0];
7671da177e4SLinus Torvalds dwords = tentry->num_words;
7681da177e4SLinus Torvalds if (tentry->write_control) {
7691da177e4SLinus Torvalds intrigue_bitmask = &bitmask_array[tentry->write_control >> 3];
7701da177e4SLinus Torvalds while (dwords--) {
7711da177e4SLinus Torvalds tmp64 = *intrigue_bitmask & *memaddr++;
7721da177e4SLinus Torvalds tmp64 |= (~(*intrigue_bitmask++)) & *bptr;
7731da177e4SLinus Torvalds *bptr++ = tmp64;
7741da177e4SLinus Torvalds }
7751da177e4SLinus Torvalds } else {
7761da177e4SLinus Torvalds while (dwords--) {
7771da177e4SLinus Torvalds *bptr++ = *memaddr++;
7781da177e4SLinus Torvalds }
7791da177e4SLinus Torvalds }
7801da177e4SLinus Torvalds
7811da177e4SLinus Torvalds perf_rdr_write(*intrigue_rdr, buffer);
7821da177e4SLinus Torvalds intrigue_rdr++;
7831da177e4SLinus Torvalds }
7841da177e4SLinus Torvalds
7851da177e4SLinus Torvalds /*
7861da177e4SLinus Torvalds * Now copy out the Runway stuff which is not in RDRs
7871da177e4SLinus Torvalds */
7881da177e4SLinus Torvalds
7891da177e4SLinus Torvalds if (cpu_device == NULL)
7901da177e4SLinus Torvalds {
7911da177e4SLinus Torvalds printk(KERN_ERR "write_image: cpu_device not yet initialized!\n");
7921da177e4SLinus Torvalds return -1;
7931da177e4SLinus Torvalds }
7941da177e4SLinus Torvalds
7954bdc0d67SChristoph Hellwig runway = ioremap(cpu_device->hpa.start, 4096);
79674e3f6e6SArvind Yadav if (!runway) {
79774e3f6e6SArvind Yadav pr_err("perf_write_image: ioremap failed!\n");
79874e3f6e6SArvind Yadav return -ENOMEM;
79974e3f6e6SArvind Yadav }
8001da177e4SLinus Torvalds
8011da177e4SLinus Torvalds /* Merge intrigue bits into Runway STATUS 0 */
80253f01bbaSMatthew Wilcox tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
80353f01bbaSMatthew Wilcox __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
80453f01bbaSMatthew Wilcox runway + RUNWAY_STATUS);
8051da177e4SLinus Torvalds
8061da177e4SLinus Torvalds /* Write RUNWAY DEBUG registers */
8071da177e4SLinus Torvalds for (i = 0; i < 8; i++) {
80853f01bbaSMatthew Wilcox __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
8091da177e4SLinus Torvalds }
8101da177e4SLinus Torvalds
8111da177e4SLinus Torvalds return 0;
8121da177e4SLinus Torvalds }
8131da177e4SLinus Torvalds
8141da177e4SLinus Torvalds /*
8151da177e4SLinus Torvalds * perf_rdr_write
8161da177e4SLinus Torvalds *
8171da177e4SLinus Torvalds * Write the given RDR register with the contents
8181da177e4SLinus Torvalds * of the given buffer.
8191da177e4SLinus Torvalds */
perf_rdr_write(uint32_t rdr_num,uint64_t * buffer)8201da177e4SLinus Torvalds static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer)
8211da177e4SLinus Torvalds {
822cb6fc18eSHelge Deller const struct rdr_tbl_ent *tentry;
8231da177e4SLinus Torvalds int32_t i;
8241da177e4SLinus Torvalds
8251da177e4SLinus Torvalds printk("perf_rdr_write\n");
8261da177e4SLinus Torvalds tentry = perf_rdr_get_entry(rdr_num);
8271da177e4SLinus Torvalds if (tentry->width == 0) { return; }
8281da177e4SLinus Torvalds
8291da177e4SLinus Torvalds i = tentry->num_words;
8301da177e4SLinus Torvalds while (i--) {
8311da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) {
8321da177e4SLinus Torvalds perf_rdr_shift_out_U(rdr_num, buffer[i]);
8331da177e4SLinus Torvalds } else {
8341da177e4SLinus Torvalds perf_rdr_shift_out_W(rdr_num, buffer[i]);
8351da177e4SLinus Torvalds }
8361da177e4SLinus Torvalds }
8371da177e4SLinus Torvalds printk("perf_rdr_write done\n");
8381da177e4SLinus Torvalds }
839