1 /*
2  *  (C) 2010,2011       Thomas Renninger <trenn@suse.de>, Novell Inc.
3  *
4  *  Licensed under the terms of the GNU GPL License version 2.
5  */
6 
7 #if defined(__i386__) || defined(__x86_64__)
8 
9 #include <stdio.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <limits.h>
14 
15 #include <cpufreq.h>
16 
17 #include "helpers/helpers.h"
18 #include "idle_monitor/cpupower-monitor.h"
19 
20 #define MSR_APERF	0xE8
21 #define MSR_MPERF	0xE7
22 
23 #define MSR_TSC	0x10
24 
25 enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
26 
27 static int mperf_get_count_percent(unsigned int self_id, double *percent,
28 				   unsigned int cpu);
29 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
30 				unsigned int cpu);
31 
32 static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
33 	{
34 		.name			= "C0",
35 		.desc			= N_("Processor Core not idle"),
36 		.id			= C0,
37 		.range			= RANGE_THREAD,
38 		.get_count_percent	= mperf_get_count_percent,
39 	},
40 	{
41 		.name			= "Cx",
42 		.desc			= N_("Processor Core in an idle state"),
43 		.id			= Cx,
44 		.range			= RANGE_THREAD,
45 		.get_count_percent	= mperf_get_count_percent,
46 	},
47 
48 	{
49 		.name			= "Freq",
50 		.desc			= N_("Average Frequency (including boost) in MHz"),
51 		.id			= AVG_FREQ,
52 		.range			= RANGE_THREAD,
53 		.get_count		= mperf_get_count_freq,
54 	},
55 };
56 
57 static unsigned long long tsc_at_measure_start;
58 static unsigned long long tsc_at_measure_end;
59 static unsigned long max_frequency;
60 static unsigned long long *mperf_previous_count;
61 static unsigned long long *aperf_previous_count;
62 static unsigned long long *mperf_current_count;
63 static unsigned long long *aperf_current_count;
64 /* valid flag for all CPUs. If a MSR read failed it will be zero */
65 static int *is_valid;
66 
67 static int mperf_get_tsc(unsigned long long *tsc)
68 {
69 	return read_msr(0, MSR_TSC, tsc);
70 }
71 
72 static int mperf_init_stats(unsigned int cpu)
73 {
74 	unsigned long long val;
75 	int ret;
76 
77 	ret = read_msr(cpu, MSR_APERF, &val);
78 	aperf_previous_count[cpu] = val;
79 	ret |= read_msr(cpu, MSR_MPERF, &val);
80 	mperf_previous_count[cpu] = val;
81 	is_valid[cpu] = !ret;
82 
83 	return 0;
84 }
85 
86 static int mperf_measure_stats(unsigned int cpu)
87 {
88 	unsigned long long val;
89 	int ret;
90 
91 	ret = read_msr(cpu, MSR_APERF, &val);
92 	aperf_current_count[cpu] = val;
93 	ret |= read_msr(cpu, MSR_MPERF, &val);
94 	mperf_current_count[cpu] = val;
95 	is_valid[cpu] = !ret;
96 
97 	return 0;
98 }
99 
100 /*
101  * get_average_perf()
102  *
103  * Returns the average performance (also considers boosted frequencies)
104  *
105  * Input:
106  *   aperf_diff: Difference of the aperf register over a time period
107  *   mperf_diff: Difference of the mperf register over the same time period
108  *   max_freq:   Maximum frequency (P0)
109  *
110  * Returns:
111  *   Average performance over the time period
112  */
113 static unsigned long get_average_perf(unsigned long long aperf_diff,
114 				      unsigned long long mperf_diff)
115 {
116 	unsigned int perf_percent = 0;
117 	if (((unsigned long)(-1) / 100) < aperf_diff) {
118 		int shift_count = 7;
119 		aperf_diff >>= shift_count;
120 		mperf_diff >>= shift_count;
121 	}
122 	perf_percent = (aperf_diff * 100) / mperf_diff;
123 	return (max_frequency * perf_percent) / 100;
124 }
125 
126 static int mperf_get_count_percent(unsigned int id, double *percent,
127 				   unsigned int cpu)
128 {
129 	unsigned long long aperf_diff, mperf_diff, tsc_diff;
130 
131 	if (!is_valid[cpu])
132 		return -1;
133 
134 	if (id != C0 && id != Cx)
135 		return -1;
136 
137 	mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
138 	aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
139 	tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
140 
141 	*percent = 100.0 * mperf_diff / tsc_diff;
142 	dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n",
143 	       mperf_cstates[id].name, mperf_diff, tsc_diff);
144 
145 	if (id == Cx)
146 		*percent = 100.0 - *percent;
147 
148 	dprint("%s: previous: %llu - current: %llu - (%u)\n", mperf_cstates[id].name,
149 	       mperf_diff, aperf_diff, cpu);
150 	dprint("%s: %f\n", mperf_cstates[id].name, *percent);
151 	return 0;
152 }
153 
154 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
155 			      unsigned int cpu)
156 {
157 	unsigned long long aperf_diff, mperf_diff;
158 
159 	if (id != AVG_FREQ)
160 		return 1;
161 
162 	if (!is_valid[cpu])
163 		return -1;
164 
165 	mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
166 	aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
167 
168 	/* Return MHz for now, might want to return KHz if column width is more
169 	   generic */
170 	*count = get_average_perf(aperf_diff, mperf_diff) / 1000;
171 	dprint("%s: %llu\n", mperf_cstates[id].name, *count);
172 
173 	return 0;
174 }
175 
176 static int mperf_start(void)
177 {
178 	int cpu;
179 	unsigned long long dbg;
180 
181 	mperf_get_tsc(&tsc_at_measure_start);
182 
183 	for (cpu = 0; cpu < cpu_count; cpu++)
184 		mperf_init_stats(cpu);
185 
186 	mperf_get_tsc(&dbg);
187 	dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
188 	return 0;
189 }
190 
191 static int mperf_stop(void)
192 {
193 	unsigned long long dbg;
194 	int cpu;
195 
196 	mperf_get_tsc(&tsc_at_measure_end);
197 
198 	for (cpu = 0; cpu < cpu_count; cpu++)
199 		mperf_measure_stats(cpu);
200 
201 	mperf_get_tsc(&dbg);
202 	dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
203 
204 	return 0;
205 }
206 
207 struct cpuidle_monitor mperf_monitor;
208 
209 struct cpuidle_monitor* mperf_register(void) {
210 
211 	unsigned long min;
212 
213 	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
214 		return NULL;
215 
216 	/* Assume min/max all the same on all cores */
217 	if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
218 		dprint("Cannot retrieve max freq from cpufreq kernel "
219 		       "subsystem\n");
220 		return NULL;
221 	}
222 
223 	/* Free this at program termination */
224 	is_valid = calloc(cpu_count, sizeof (int));
225 	mperf_previous_count = calloc (cpu_count,
226 				       sizeof(unsigned long long));
227 	aperf_previous_count = calloc (cpu_count,
228 				       sizeof(unsigned long long));
229 	mperf_current_count = calloc (cpu_count,
230 				      sizeof(unsigned long long));
231 	aperf_current_count = calloc (cpu_count,
232 				      sizeof(unsigned long long));
233 
234 	mperf_monitor.name_len = strlen(mperf_monitor.name);
235 	return &mperf_monitor;
236 }
237 
238 void mperf_unregister(void) {
239 	free(mperf_previous_count);
240 	free(aperf_previous_count);
241 	free(mperf_current_count);
242 	free(aperf_current_count);
243 	free(is_valid);
244 }
245 
246 struct cpuidle_monitor mperf_monitor = {
247 	.name			= "Mperf",
248 	.hw_states_num		= MPERF_CSTATE_COUNT,
249 	.hw_states		= mperf_cstates,
250 	.start			= mperf_start,
251 	.stop			= mperf_stop,
252 	.do_register		= mperf_register,
253 	.unregister		= mperf_unregister,
254 	.needs_root		= 1,
255 	.overflow_s		= 922000000 /* 922337203 seconds TSC overflow
256 					       at 20GHz */
257 };
258 #endif /* #if defined(__i386__) || defined(__x86_64__) */
259