xref: /openbmc/linux/arch/powerpc/perf/hv-gpci.c (revision 5ee9cd065836e5934710ca35653bce7905add20b)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Hypervisor supplied "gpci" ("get performance counter info") performance
4   * counter support
5   *
6   * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
7   * Copyright 2014 IBM Corporation.
8   */
9  
10  #define pr_fmt(fmt) "hv-gpci: " fmt
11  
12  #include <linux/init.h>
13  #include <linux/perf_event.h>
14  #include <asm/firmware.h>
15  #include <asm/hvcall.h>
16  #include <asm/io.h>
17  
18  #include "hv-gpci.h"
19  #include "hv-common.h"
20  
21  /*
22   * Example usage:
23   *  perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8,
24   *		  secondary_index=0,starting_index=0xffffffff,request=0x10/' ...
25   */
26  
27  /* u32 */
28  EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31);
29  /* u32 */
30  /*
31   * Note that starting_index, phys_processor_idx, sibling_part_id,
32   * hw_chip_id, partition_id all refer to the same bit range. They
33   * are basically aliases for the starting_index. The specific alias
34   * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h
35   */
36  EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63);
37  EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63);
38  EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63);
39  EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63);
40  EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63);
41  
42  /* u16 */
43  EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15);
44  /* u8 */
45  EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23);
46  /* u8, bytes of data (1-8) */
47  EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
48  /* u32, byte offset */
49  EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
50  
51  static cpumask_t hv_gpci_cpumask;
52  
53  static struct attribute *format_attrs[] = {
54  	&format_attr_request.attr,
55  	&format_attr_starting_index.attr,
56  	&format_attr_phys_processor_idx.attr,
57  	&format_attr_sibling_part_id.attr,
58  	&format_attr_hw_chip_id.attr,
59  	&format_attr_partition_id.attr,
60  	&format_attr_secondary_index.attr,
61  	&format_attr_counter_info_version.attr,
62  
63  	&format_attr_offset.attr,
64  	&format_attr_length.attr,
65  	NULL,
66  };
67  
68  static const struct attribute_group format_group = {
69  	.name = "format",
70  	.attrs = format_attrs,
71  };
72  
73  static struct attribute_group event_group = {
74  	.name  = "events",
75  	/* .attrs is set in init */
76  };
77  
78  #define HV_CAPS_ATTR(_name, _format)				\
79  static ssize_t _name##_show(struct device *dev,			\
80  			    struct device_attribute *attr,	\
81  			    char *page)				\
82  {								\
83  	struct hv_perf_caps caps;				\
84  	unsigned long hret = hv_perf_caps_get(&caps);		\
85  	if (hret)						\
86  		return -EIO;					\
87  								\
88  	return sprintf(page, _format, caps._name);		\
89  }								\
90  static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name)
91  
kernel_version_show(struct device * dev,struct device_attribute * attr,char * page)92  static ssize_t kernel_version_show(struct device *dev,
93  				   struct device_attribute *attr,
94  				   char *page)
95  {
96  	return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
97  }
98  
cpumask_show(struct device * dev,struct device_attribute * attr,char * buf)99  static ssize_t cpumask_show(struct device *dev,
100  			    struct device_attribute *attr, char *buf)
101  {
102  	return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
103  }
104  
105  /* Interface attribute array index to store system information */
106  #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR	6
107  #define INTERFACE_PROCESSOR_CONFIG_ATTR		7
108  #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR	8
109  #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR	9
110  #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR	10
111  #define INTERFACE_NULL_ATTR			11
112  
113  /* Counter request value to retrieve system information */
114  enum {
115  	PROCESSOR_BUS_TOPOLOGY,
116  	PROCESSOR_CONFIG,
117  	AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
118  	AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
119  	AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
120  };
121  
122  static int sysinfo_counter_request[] = {
123  	[PROCESSOR_BUS_TOPOLOGY] = 0xD0,
124  	[PROCESSOR_CONFIG] = 0x90,
125  	[AFFINITY_DOMAIN_VIA_VP] = 0xA0,
126  	[AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
127  	[AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
128  };
129  
130  static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
131  
systeminfo_gpci_request(u32 req,u32 starting_index,u16 secondary_index,char * buf,size_t * n,struct hv_gpci_request_buffer * arg)132  static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
133  			u16 secondary_index, char *buf,
134  			size_t *n, struct hv_gpci_request_buffer *arg)
135  {
136  	unsigned long ret;
137  	size_t i, j;
138  
139  	arg->params.counter_request = cpu_to_be32(req);
140  	arg->params.starting_index = cpu_to_be32(starting_index);
141  	arg->params.secondary_index = cpu_to_be16(secondary_index);
142  
143  	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
144  			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
145  
146  	/*
147  	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
148  	 * which means that the current buffer size cannot accommodate
149  	 * all the information and a partial buffer returned.
150  	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
151  	 *
152  	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
153  	 * performance information, and required to set
154  	 * "Enable Performance Information Collection" option.
155  	 */
156  	if (ret == H_AUTHORITY)
157  		return -EPERM;
158  
159  	/*
160  	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
161  	 * because of invalid buffer-length/address or due to some hardware
162  	 * error.
163  	 */
164  	if (ret && (ret != H_PARAMETER))
165  		return -EIO;
166  
167  	/*
168  	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
169  	 * to show the total number of counter_value array elements
170  	 * returned via hcall.
171  	 * hcall also populates 'cv_element_size' corresponds to individual
172  	 * counter_value array element size. Below loop go through all
173  	 * counter_value array elements as per their size and add it to
174  	 * the output buffer.
175  	 */
176  	for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
177  		j = i * be16_to_cpu(arg->params.cv_element_size);
178  
179  		for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
180  			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[j]);
181  		*n += sprintf(buf + *n,  "\n");
182  	}
183  
184  	if (*n >= PAGE_SIZE) {
185  		pr_info("System information exceeds PAGE_SIZE\n");
186  		return -EFBIG;
187  	}
188  
189  	return ret;
190  }
191  
processor_bus_topology_show(struct device * dev,struct device_attribute * attr,char * buf)192  static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
193  				char *buf)
194  {
195  	struct hv_gpci_request_buffer *arg;
196  	unsigned long ret;
197  	size_t n = 0;
198  
199  	arg = (void *)get_cpu_var(hv_gpci_reqb);
200  	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
201  
202  	/*
203  	 * Pass the counter request value 0xD0 corresponds to request
204  	 * type 'Processor_bus_topology', to retrieve
205  	 * the system topology information.
206  	 * starting_index value implies the starting hardware
207  	 * chip id.
208  	 */
209  	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
210  			0, 0, buf, &n, arg);
211  
212  	if (!ret)
213  		return n;
214  
215  	if (ret != H_PARAMETER)
216  		goto out;
217  
218  	/*
219  	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
220  	 * implies that buffer can't accommodate all information, and a partial buffer
221  	 * returned. To handle that, we need to make subsequent requests
222  	 * with next starting index to retrieve additional (missing) data.
223  	 * Below loop do subsequent hcalls with next starting index and add it
224  	 * to buffer util we get all the information.
225  	 */
226  	while (ret == H_PARAMETER) {
227  		int returned_values = be16_to_cpu(arg->params.returned_values);
228  		int elementsize = be16_to_cpu(arg->params.cv_element_size);
229  		int last_element = (returned_values - 1) * elementsize;
230  
231  		/*
232  		 * Since the starting index value is part of counter_value
233  		 * buffer elements, use the starting index value in the last
234  		 * element and add 1 to make subsequent hcalls.
235  		 */
236  		u32 starting_index = arg->bytes[last_element + 3] +
237  				(arg->bytes[last_element + 2] << 8) +
238  				(arg->bytes[last_element + 1] << 16) +
239  				(arg->bytes[last_element] << 24) + 1;
240  
241  		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
242  
243  		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
244  				starting_index, 0, buf, &n, arg);
245  
246  		if (!ret)
247  			return n;
248  
249  		if (ret != H_PARAMETER)
250  			goto out;
251  	}
252  
253  	return n;
254  
255  out:
256  	put_cpu_var(hv_gpci_reqb);
257  	return ret;
258  }
259  
processor_config_show(struct device * dev,struct device_attribute * attr,char * buf)260  static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
261  					char *buf)
262  {
263  	struct hv_gpci_request_buffer *arg;
264  	unsigned long ret;
265  	size_t n = 0;
266  
267  	arg = (void *)get_cpu_var(hv_gpci_reqb);
268  	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
269  
270  	/*
271  	 * Pass the counter request value 0x90 corresponds to request
272  	 * type 'Processor_config', to retrieve
273  	 * the system processor information.
274  	 * starting_index value implies the starting hardware
275  	 * processor index.
276  	 */
277  	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
278  			0, 0, buf, &n, arg);
279  
280  	if (!ret)
281  		return n;
282  
283  	if (ret != H_PARAMETER)
284  		goto out;
285  
286  	/*
287  	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
288  	 * implies that buffer can't accommodate all information, and a partial buffer
289  	 * returned. To handle that, we need to take subsequent requests
290  	 * with next starting index to retrieve additional (missing) data.
291  	 * Below loop do subsequent hcalls with next starting index and add it
292  	 * to buffer util we get all the information.
293  	 */
294  	while (ret == H_PARAMETER) {
295  		int returned_values = be16_to_cpu(arg->params.returned_values);
296  		int elementsize = be16_to_cpu(arg->params.cv_element_size);
297  		int last_element = (returned_values - 1) * elementsize;
298  
299  		/*
300  		 * Since the starting index is part of counter_value
301  		 * buffer elements, use the starting index value in the last
302  		 * element and add 1 to subsequent hcalls.
303  		 */
304  		u32 starting_index = arg->bytes[last_element + 3] +
305  				(arg->bytes[last_element + 2] << 8) +
306  				(arg->bytes[last_element + 1] << 16) +
307  				(arg->bytes[last_element] << 24) + 1;
308  
309  		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
310  
311  		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
312  				starting_index, 0, buf, &n, arg);
313  
314  		if (!ret)
315  			return n;
316  
317  		if (ret != H_PARAMETER)
318  			goto out;
319  	}
320  
321  	return n;
322  
323  out:
324  	put_cpu_var(hv_gpci_reqb);
325  	return ret;
326  }
327  
affinity_domain_via_virtual_processor_show(struct device * dev,struct device_attribute * attr,char * buf)328  static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
329  			struct device_attribute *attr, char *buf)
330  {
331  	struct hv_gpci_request_buffer *arg;
332  	unsigned long ret;
333  	size_t n = 0;
334  
335  	arg = (void *)get_cpu_var(hv_gpci_reqb);
336  	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
337  
338  	/*
339  	 * Pass the counter request 0xA0 corresponds to request
340  	 * type 'Affinity_domain_information_by_virutal_processor',
341  	 * to retrieve the system affinity domain information.
342  	 * starting_index value refers to the starting hardware
343  	 * processor index.
344  	 */
345  	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
346  			0, 0, buf, &n, arg);
347  
348  	if (!ret)
349  		return n;
350  
351  	if (ret != H_PARAMETER)
352  		goto out;
353  
354  	/*
355  	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
356  	 * implies that buffer can't accommodate all information, and a partial buffer
357  	 * returned. To handle that, we need to take subsequent requests
358  	 * with next secondary index to retrieve additional (missing) data.
359  	 * Below loop do subsequent hcalls with next secondary index and add it
360  	 * to buffer util we get all the information.
361  	 */
362  	while (ret == H_PARAMETER) {
363  		int returned_values = be16_to_cpu(arg->params.returned_values);
364  		int elementsize = be16_to_cpu(arg->params.cv_element_size);
365  		int last_element = (returned_values - 1) * elementsize;
366  
367  		/*
368  		 * Since the starting index and secondary index type is part of the
369  		 * counter_value buffer elements, use the starting index value in the
370  		 * last array element as subsequent starting index, and use secondary index
371  		 * value in the last array element plus 1 as subsequent secondary index.
372  		 * For counter request '0xA0', starting index points to partition id
373  		 * and secondary index points to corresponding virtual processor index.
374  		 */
375  		u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
376  		u16 secondary_index = arg->bytes[last_element + 3] +
377  				(arg->bytes[last_element + 2] << 8) + 1;
378  
379  		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
380  
381  		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
382  				starting_index, secondary_index, buf, &n, arg);
383  
384  		if (!ret)
385  			return n;
386  
387  		if (ret != H_PARAMETER)
388  			goto out;
389  	}
390  
391  	return n;
392  
393  out:
394  	put_cpu_var(hv_gpci_reqb);
395  	return ret;
396  }
397  
affinity_domain_via_domain_show(struct device * dev,struct device_attribute * attr,char * buf)398  static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
399  						char *buf)
400  {
401  	struct hv_gpci_request_buffer *arg;
402  	unsigned long ret;
403  	size_t n = 0;
404  
405  	arg = (void *)get_cpu_var(hv_gpci_reqb);
406  	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
407  
408  	/*
409  	 * Pass the counter request 0xB0 corresponds to request
410  	 * type 'Affinity_domain_information_by_domain',
411  	 * to retrieve the system affinity domain information.
412  	 * starting_index value refers to the starting hardware
413  	 * processor index.
414  	 */
415  	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
416  			0, 0, buf, &n, arg);
417  
418  	if (!ret)
419  		return n;
420  
421  	if (ret != H_PARAMETER)
422  		goto out;
423  
424  	/*
425  	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
426  	 * implies that buffer can't accommodate all information, and a partial buffer
427  	 * returned. To handle that, we need to take subsequent requests
428  	 * with next starting index to retrieve additional (missing) data.
429  	 * Below loop do subsequent hcalls with next starting index and add it
430  	 * to buffer util we get all the information.
431  	 */
432  	while (ret == H_PARAMETER) {
433  		int returned_values = be16_to_cpu(arg->params.returned_values);
434  		int elementsize = be16_to_cpu(arg->params.cv_element_size);
435  		int last_element = (returned_values - 1) * elementsize;
436  
437  		/*
438  		 * Since the starting index value is part of counter_value
439  		 * buffer elements, use the starting index value in the last
440  		 * element and add 1 to make subsequent hcalls.
441  		 */
442  		u32 starting_index = arg->bytes[last_element + 1] +
443  			(arg->bytes[last_element] << 8) + 1;
444  
445  		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
446  
447  		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
448  					starting_index, 0, buf, &n, arg);
449  
450  		if (!ret)
451  			return n;
452  
453  		if (ret != H_PARAMETER)
454  			goto out;
455  	}
456  
457  	return n;
458  
459  out:
460  	put_cpu_var(hv_gpci_reqb);
461  	return ret;
462  }
463  
affinity_domain_via_partition_result_parse(int returned_values,int element_size,char * buf,size_t * last_element,size_t * n,struct hv_gpci_request_buffer * arg)464  static void affinity_domain_via_partition_result_parse(int returned_values,
465  			int element_size, char *buf, size_t *last_element,
466  			size_t *n, struct hv_gpci_request_buffer *arg)
467  {
468  	size_t i = 0, j = 0;
469  	size_t k, l, m;
470  	uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
471  
472  	/*
473  	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
474  	 * to show the total number of counter_value array elements
475  	 * returned via hcall.
476  	 * Unlike other request types, the data structure returned by this
477  	 * request is variable-size. For this counter request type,
478  	 * hcall populates 'cv_element_size' corresponds to minimum size of
479  	 * the structure returned i.e; the size of the structure with no domain
480  	 * information. Below loop go through all counter_value array
481  	 * to determine the number and size of each domain array element and
482  	 * add it to the output buffer.
483  	 */
484  	while (i < returned_values) {
485  		k = j;
486  		for (; k < j + element_size; k++)
487  			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
488  		*n += sprintf(buf + *n,  "\n");
489  
490  		total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
491  		size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
492  
493  		for (l = 0; l < total_affinity_domain_ele; l++) {
494  			for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
495  				*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
496  				k++;
497  			}
498  			*n += sprintf(buf + *n,  "\n");
499  		}
500  
501  		*n += sprintf(buf + *n,  "\n");
502  		i++;
503  		j = k;
504  	}
505  
506  	*last_element = k;
507  }
508  
affinity_domain_via_partition_show(struct device * dev,struct device_attribute * attr,char * buf)509  static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
510  							char *buf)
511  {
512  	struct hv_gpci_request_buffer *arg;
513  	unsigned long ret;
514  	size_t n = 0;
515  	size_t last_element = 0;
516  	u32 starting_index;
517  
518  	arg = (void *)get_cpu_var(hv_gpci_reqb);
519  	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
520  
521  	/*
522  	 * Pass the counter request value 0xB1 corresponds to counter request
523  	 * type 'Affinity_domain_information_by_partition',
524  	 * to retrieve the system affinity domain by partition information.
525  	 * starting_index value refers to the starting hardware
526  	 * processor index.
527  	 */
528  	arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
529  	arg->params.starting_index = cpu_to_be32(0);
530  
531  	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
532  			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
533  
534  	if (!ret)
535  		goto parse_result;
536  
537  	if (ret && (ret != H_PARAMETER))
538  		goto out;
539  
540  	/*
541  	 * ret value as 'H_PARAMETER' implies that the current buffer size
542  	 * can't accommodate all the information, and a partial buffer
543  	 * returned. To handle that, we need to make subsequent requests
544  	 * with next starting index to retrieve additional (missing) data.
545  	 * Below loop do subsequent hcalls with next starting index and add it
546  	 * to buffer util we get all the information.
547  	 */
548  	while (ret == H_PARAMETER) {
549  		affinity_domain_via_partition_result_parse(
550  			be16_to_cpu(arg->params.returned_values) - 1,
551  			be16_to_cpu(arg->params.cv_element_size), buf,
552  			&last_element, &n, arg);
553  
554  		if (n >= PAGE_SIZE) {
555  			put_cpu_var(hv_gpci_reqb);
556  			pr_debug("System information exceeds PAGE_SIZE\n");
557  			return -EFBIG;
558  		}
559  
560  		/*
561  		 * Since the starting index value is part of counter_value
562  		 * buffer elements, use the starting_index value in the last
563  		 * element and add 1 to make subsequent hcalls.
564  		 */
565  		starting_index = (u8)arg->bytes[last_element] << 8 |
566  				(u8)arg->bytes[last_element + 1];
567  
568  		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
569  		arg->params.counter_request = cpu_to_be32(
570  				sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
571  		arg->params.starting_index = cpu_to_be32(starting_index);
572  
573  		ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
574  				virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
575  
576  		if (ret && (ret != H_PARAMETER))
577  			goto out;
578  	}
579  
580  parse_result:
581  	affinity_domain_via_partition_result_parse(
582  		be16_to_cpu(arg->params.returned_values),
583  		be16_to_cpu(arg->params.cv_element_size),
584  		buf, &last_element, &n, arg);
585  
586  	put_cpu_var(hv_gpci_reqb);
587  	return n;
588  
589  out:
590  	put_cpu_var(hv_gpci_reqb);
591  
592  	/*
593  	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
594  	 * which means that the current buffer size cannot accommodate
595  	 * all the information and a partial buffer returned.
596  	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
597  	 *
598  	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
599  	 * performance information, and required to set
600  	 * "Enable Performance Information Collection" option.
601  	 */
602  	if (ret == H_AUTHORITY)
603  		return -EPERM;
604  
605  	/*
606  	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
607  	 * because of invalid buffer-length/address or due to some hardware
608  	 * error.
609  	 */
610  	return -EIO;
611  }
612  
613  static DEVICE_ATTR_RO(kernel_version);
614  static DEVICE_ATTR_RO(cpumask);
615  
616  HV_CAPS_ATTR(version, "0x%x\n");
617  HV_CAPS_ATTR(ga, "%d\n");
618  HV_CAPS_ATTR(expanded, "%d\n");
619  HV_CAPS_ATTR(lab, "%d\n");
620  HV_CAPS_ATTR(collect_privileged, "%d\n");
621  
622  static struct attribute *interface_attrs[] = {
623  	&dev_attr_kernel_version.attr,
624  	&hv_caps_attr_version.attr,
625  	&hv_caps_attr_ga.attr,
626  	&hv_caps_attr_expanded.attr,
627  	&hv_caps_attr_lab.attr,
628  	&hv_caps_attr_collect_privileged.attr,
629  	/*
630  	 * This NULL is a placeholder for the processor_bus_topology
631  	 * attribute, set in init function if applicable.
632  	 */
633  	NULL,
634  	/*
635  	 * This NULL is a placeholder for the processor_config
636  	 * attribute, set in init function if applicable.
637  	 */
638  	NULL,
639  	/*
640  	 * This NULL is a placeholder for the affinity_domain_via_virtual_processor
641  	 * attribute, set in init function if applicable.
642  	 */
643  	NULL,
644  	/*
645  	 * This NULL is a placeholder for the affinity_domain_via_domain
646  	 * attribute, set in init function if applicable.
647  	 */
648  	NULL,
649  	/*
650  	 * This NULL is a placeholder for the affinity_domain_via_partition
651  	 * attribute, set in init function if applicable.
652  	 */
653  	NULL,
654  	NULL,
655  };
656  
657  static struct attribute *cpumask_attrs[] = {
658  	&dev_attr_cpumask.attr,
659  	NULL,
660  };
661  
662  static const struct attribute_group cpumask_attr_group = {
663  	.attrs = cpumask_attrs,
664  };
665  
666  static const struct attribute_group interface_group = {
667  	.name = "interface",
668  	.attrs = interface_attrs,
669  };
670  
671  static const struct attribute_group *attr_groups[] = {
672  	&format_group,
673  	&event_group,
674  	&interface_group,
675  	&cpumask_attr_group,
676  	NULL,
677  };
678  
single_gpci_request(u32 req,u32 starting_index,u16 secondary_index,u8 version_in,u32 offset,u8 length,u64 * value)679  static unsigned long single_gpci_request(u32 req, u32 starting_index,
680  		u16 secondary_index, u8 version_in, u32 offset, u8 length,
681  		u64 *value)
682  {
683  	unsigned long ret;
684  	size_t i;
685  	u64 count;
686  	struct hv_gpci_request_buffer *arg;
687  
688  	arg = (void *)get_cpu_var(hv_gpci_reqb);
689  	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
690  
691  	arg->params.counter_request = cpu_to_be32(req);
692  	arg->params.starting_index = cpu_to_be32(starting_index);
693  	arg->params.secondary_index = cpu_to_be16(secondary_index);
694  	arg->params.counter_info_version_in = version_in;
695  
696  	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
697  			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
698  
699  	/*
700  	 * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL',
701  	 * specifies that the current buffer size cannot accommodate
702  	 * all the information and a partial buffer returned.
703  	 * Since in this function we are only accessing data for a given starting index,
704  	 * we don't need to accommodate whole data and can get required count by
705  	 * accessing first entry data.
706  	 * Hence hcall fails only incase the ret value is other than H_SUCCESS or
707  	 * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B).
708  	 */
709  	if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B)
710  		ret = 0;
711  
712  	if (ret) {
713  		pr_devel("hcall failed: 0x%lx\n", ret);
714  		goto out;
715  	}
716  
717  	/*
718  	 * we verify offset and length are within the zeroed buffer at event
719  	 * init.
720  	 */
721  	count = 0;
722  	for (i = offset; i < offset + length; i++)
723  		count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
724  
725  	*value = count;
726  out:
727  	put_cpu_var(hv_gpci_reqb);
728  	return ret;
729  }
730  
h_gpci_get_value(struct perf_event * event)731  static u64 h_gpci_get_value(struct perf_event *event)
732  {
733  	u64 count;
734  	unsigned long ret = single_gpci_request(event_get_request(event),
735  					event_get_starting_index(event),
736  					event_get_secondary_index(event),
737  					event_get_counter_info_version(event),
738  					event_get_offset(event),
739  					event_get_length(event),
740  					&count);
741  	if (ret)
742  		return 0;
743  	return count;
744  }
745  
h_gpci_event_update(struct perf_event * event)746  static void h_gpci_event_update(struct perf_event *event)
747  {
748  	s64 prev;
749  	u64 now = h_gpci_get_value(event);
750  	prev = local64_xchg(&event->hw.prev_count, now);
751  	local64_add(now - prev, &event->count);
752  }
753  
h_gpci_event_start(struct perf_event * event,int flags)754  static void h_gpci_event_start(struct perf_event *event, int flags)
755  {
756  	local64_set(&event->hw.prev_count, h_gpci_get_value(event));
757  }
758  
h_gpci_event_stop(struct perf_event * event,int flags)759  static void h_gpci_event_stop(struct perf_event *event, int flags)
760  {
761  	h_gpci_event_update(event);
762  }
763  
h_gpci_event_add(struct perf_event * event,int flags)764  static int h_gpci_event_add(struct perf_event *event, int flags)
765  {
766  	if (flags & PERF_EF_START)
767  		h_gpci_event_start(event, flags);
768  
769  	return 0;
770  }
771  
h_gpci_event_init(struct perf_event * event)772  static int h_gpci_event_init(struct perf_event *event)
773  {
774  	u64 count;
775  	u8 length;
776  	unsigned long ret;
777  
778  	/* Not our event */
779  	if (event->attr.type != event->pmu->type)
780  		return -ENOENT;
781  
782  	/* config2 is unused */
783  	if (event->attr.config2) {
784  		pr_devel("config2 set when reserved\n");
785  		return -EINVAL;
786  	}
787  
788  	/* no branch sampling */
789  	if (has_branch_stack(event))
790  		return -EOPNOTSUPP;
791  
792  	length = event_get_length(event);
793  	if (length < 1 || length > 8) {
794  		pr_devel("length invalid\n");
795  		return -EINVAL;
796  	}
797  
798  	/* last byte within the buffer? */
799  	if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
800  		pr_devel("request outside of buffer: %zu > %zu\n",
801  				(size_t)event_get_offset(event) + length,
802  				HGPCI_MAX_DATA_BYTES);
803  		return -EINVAL;
804  	}
805  
806  	/* check if the request works... */
807  	ret = single_gpci_request(event_get_request(event),
808  				event_get_starting_index(event),
809  				event_get_secondary_index(event),
810  				event_get_counter_info_version(event),
811  				event_get_offset(event),
812  				length,
813  				&count);
814  
815  	/*
816  	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
817  	 * performance information, and required to set
818  	 * "Enable Performance Information Collection" option.
819  	 */
820  	if (ret == H_AUTHORITY)
821  		return -EPERM;
822  
823  	if (ret) {
824  		pr_devel("gpci hcall failed\n");
825  		return -EINVAL;
826  	}
827  
828  	return 0;
829  }
830  
831  static struct pmu h_gpci_pmu = {
832  	.task_ctx_nr = perf_invalid_context,
833  
834  	.name = "hv_gpci",
835  	.attr_groups = attr_groups,
836  	.event_init  = h_gpci_event_init,
837  	.add         = h_gpci_event_add,
838  	.del         = h_gpci_event_stop,
839  	.start       = h_gpci_event_start,
840  	.stop        = h_gpci_event_stop,
841  	.read        = h_gpci_event_update,
842  	.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
843  };
844  
ppc_hv_gpci_cpu_online(unsigned int cpu)845  static int ppc_hv_gpci_cpu_online(unsigned int cpu)
846  {
847  	if (cpumask_empty(&hv_gpci_cpumask))
848  		cpumask_set_cpu(cpu, &hv_gpci_cpumask);
849  
850  	return 0;
851  }
852  
ppc_hv_gpci_cpu_offline(unsigned int cpu)853  static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
854  {
855  	int target;
856  
857  	/* Check if exiting cpu is used for collecting gpci events */
858  	if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
859  		return 0;
860  
861  	/* Find a new cpu to collect gpci events */
862  	target = cpumask_last(cpu_active_mask);
863  
864  	if (target < 0 || target >= nr_cpu_ids) {
865  		pr_err("hv_gpci: CPU hotplug init failed\n");
866  		return -1;
867  	}
868  
869  	/* Migrate gpci events to the new target */
870  	cpumask_set_cpu(target, &hv_gpci_cpumask);
871  	perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
872  
873  	return 0;
874  }
875  
hv_gpci_cpu_hotplug_init(void)876  static int hv_gpci_cpu_hotplug_init(void)
877  {
878  	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
879  			  "perf/powerpc/hv_gcpi:online",
880  			  ppc_hv_gpci_cpu_online,
881  			  ppc_hv_gpci_cpu_offline);
882  }
883  
sysinfo_device_attr_create(int sysinfo_interface_group_index,u32 req)884  static struct device_attribute *sysinfo_device_attr_create(int
885  		sysinfo_interface_group_index, u32 req)
886  {
887  	struct device_attribute *attr = NULL;
888  	unsigned long ret;
889  	struct hv_gpci_request_buffer *arg;
890  
891  	if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
892  			sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
893  		pr_info("Wrong interface group index for system information\n");
894  		return NULL;
895  	}
896  
897  	/* Check for given counter request value support */
898  	arg = (void *)get_cpu_var(hv_gpci_reqb);
899  	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
900  
901  	arg->params.counter_request = cpu_to_be32(req);
902  
903  	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
904  			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
905  
906  	put_cpu_var(hv_gpci_reqb);
907  
908  	/*
909  	 * Add given counter request value attribute in the interface_attrs
910  	 * attribute array, only for valid return types.
911  	 */
912  	if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
913  		attr = kzalloc(sizeof(*attr), GFP_KERNEL);
914  		if (!attr)
915  			return NULL;
916  
917  		sysfs_attr_init(&attr->attr);
918  		attr->attr.mode = 0444;
919  
920  		switch (sysinfo_interface_group_index) {
921  		case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
922  			attr->attr.name = "processor_bus_topology";
923  			attr->show = processor_bus_topology_show;
924  		break;
925  		case INTERFACE_PROCESSOR_CONFIG_ATTR:
926  			attr->attr.name = "processor_config";
927  			attr->show = processor_config_show;
928  		break;
929  		case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
930  			attr->attr.name = "affinity_domain_via_virtual_processor";
931  			attr->show = affinity_domain_via_virtual_processor_show;
932  		break;
933  		case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
934  			attr->attr.name = "affinity_domain_via_domain";
935  			attr->show = affinity_domain_via_domain_show;
936  		break;
937  		case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
938  			attr->attr.name = "affinity_domain_via_partition";
939  			attr->show = affinity_domain_via_partition_show;
940  		break;
941  		}
942  	} else
943  		pr_devel("hcall failed, with error: 0x%lx\n", ret);
944  
945  	return attr;
946  }
947  
add_sysinfo_interface_files(void)948  static void add_sysinfo_interface_files(void)
949  {
950  	int sysfs_count;
951  	struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
952  	int i;
953  
954  	sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
955  
956  	/* Get device attribute for a given counter request value */
957  	for (i = 0; i < sysfs_count; i++) {
958  		attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
959  				sysinfo_counter_request[i]);
960  
961  		if (!attr[i])
962  			goto out;
963  	}
964  
965  	/* Add sysinfo interface attributes in the interface_attrs attribute array */
966  	for (i = 0; i < sysfs_count; i++)
967  		interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
968  
969  	return;
970  
971  out:
972  	/*
973  	 * The sysinfo interface attributes will be added, only if hcall passed for
974  	 * all the counter request values. Free the device attribute array incase
975  	 * of any hcall failure.
976  	 */
977  	if (i > 0) {
978  		while (i >= 0) {
979  			kfree(attr[i]);
980  			i--;
981  		}
982  	}
983  }
984  
hv_gpci_init(void)985  static int hv_gpci_init(void)
986  {
987  	int r;
988  	unsigned long hret;
989  	struct hv_perf_caps caps;
990  	struct hv_gpci_request_buffer *arg;
991  
992  	hv_gpci_assert_offsets_correct();
993  
994  	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
995  		pr_debug("not a virtualized system, not enabling\n");
996  		return -ENODEV;
997  	}
998  
999  	hret = hv_perf_caps_get(&caps);
1000  	if (hret) {
1001  		pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1002  				hret);
1003  		return -ENODEV;
1004  	}
1005  
1006  	/* init cpuhotplug */
1007  	r = hv_gpci_cpu_hotplug_init();
1008  	if (r)
1009  		return r;
1010  
1011  	/* sampling not supported */
1012  	h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1013  
1014  	arg = (void *)get_cpu_var(hv_gpci_reqb);
1015  	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
1016  
1017  	/*
1018  	 * hcall H_GET_PERF_COUNTER_INFO populates the output
1019  	 * counter_info_version value based on the system hypervisor.
1020  	 * Pass the counter request 0x10 corresponds to request type
1021  	 * 'Dispatch_timebase_by_processor', to get the supported
1022  	 * counter_info_version.
1023  	 */
1024  	arg->params.counter_request = cpu_to_be32(0x10);
1025  
1026  	r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
1027  			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
1028  	if (r) {
1029  		pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
1030  		arg->params.counter_info_version_out = 0x8;
1031  	}
1032  
1033  	/*
1034  	 * Use counter_info_version_out value to assign
1035  	 * required hv-gpci event list.
1036  	 */
1037  	if (arg->params.counter_info_version_out >= 0x8)
1038  		event_group.attrs = hv_gpci_event_attrs;
1039  	else
1040  		event_group.attrs = hv_gpci_event_attrs_v6;
1041  
1042  	put_cpu_var(hv_gpci_reqb);
1043  
1044  	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
1045  	if (r)
1046  		return r;
1047  
1048  	/* sysinfo interface files are only available for power10 and above platforms */
1049  	if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
1050  		add_sysinfo_interface_files();
1051  
1052  	return 0;
1053  }
1054  
1055  device_initcall(hv_gpci_init);
1056