1c5350777SLeo Yan // SPDX-License-Identifier: GPL-2.0
2c5350777SLeo Yan
3c5350777SLeo Yan #include <linux/version.h>
4c5350777SLeo Yan #include <linux/ptrace.h>
5c5350777SLeo Yan #include <uapi/linux/bpf.h>
67cf245a3SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
7c5350777SLeo Yan
8c5350777SLeo Yan /*
9c5350777SLeo Yan * The CPU number, cstate number and pstate number are based
10c5350777SLeo Yan * on 96boards Hikey with octa CA53 CPUs.
11c5350777SLeo Yan *
12c5350777SLeo Yan * Every CPU have three idle states for cstate:
13c5350777SLeo Yan * WFI, CPU_OFF, CLUSTER_OFF
14c5350777SLeo Yan *
15c5350777SLeo Yan * Every CPU have 5 operating points:
16c5350777SLeo Yan * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
17c5350777SLeo Yan *
18c5350777SLeo Yan * This code is based on these assumption and other platforms
19c5350777SLeo Yan * need to adjust these definitions.
20c5350777SLeo Yan */
21c5350777SLeo Yan #define MAX_CPU 8
22c5350777SLeo Yan #define MAX_PSTATE_ENTRIES 5
23c5350777SLeo Yan #define MAX_CSTATE_ENTRIES 3
24c5350777SLeo Yan
25c5350777SLeo Yan static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
26c5350777SLeo Yan
27c5350777SLeo Yan /*
28c5350777SLeo Yan * my_map structure is used to record cstate and pstate index and
29c5350777SLeo Yan * timestamp (Idx, Ts), when new event incoming we need to update
30c5350777SLeo Yan * combination for new state index and timestamp (Idx`, Ts`).
31c5350777SLeo Yan *
32c5350777SLeo Yan * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
33c5350777SLeo Yan * interval for the previous state: Duration(Idx) = Ts` - Ts.
34c5350777SLeo Yan *
35c5350777SLeo Yan * Every CPU has one below array for recording state index and
36c5350777SLeo Yan * timestamp, and record for cstate and pstate saperately:
37c5350777SLeo Yan *
38c5350777SLeo Yan * +--------------------------+
39c5350777SLeo Yan * | cstate timestamp |
40c5350777SLeo Yan * +--------------------------+
41c5350777SLeo Yan * | cstate index |
42c5350777SLeo Yan * +--------------------------+
43c5350777SLeo Yan * | pstate timestamp |
44c5350777SLeo Yan * +--------------------------+
45c5350777SLeo Yan * | pstate index |
46c5350777SLeo Yan * +--------------------------+
47c5350777SLeo Yan */
48c5350777SLeo Yan #define MAP_OFF_CSTATE_TIME 0
49c5350777SLeo Yan #define MAP_OFF_CSTATE_IDX 1
50c5350777SLeo Yan #define MAP_OFF_PSTATE_TIME 2
51c5350777SLeo Yan #define MAP_OFF_PSTATE_IDX 3
52c5350777SLeo Yan #define MAP_OFF_NUM 4
53c5350777SLeo Yan
54f0c328f8SDaniel T. Lee struct {
55f0c328f8SDaniel T. Lee __uint(type, BPF_MAP_TYPE_ARRAY);
56f0c328f8SDaniel T. Lee __type(key, u32);
57f0c328f8SDaniel T. Lee __type(value, u64);
58f0c328f8SDaniel T. Lee __uint(max_entries, MAX_CPU * MAP_OFF_NUM);
59f0c328f8SDaniel T. Lee } my_map SEC(".maps");
60c5350777SLeo Yan
61c5350777SLeo Yan /* cstate_duration records duration time for every idle state per CPU */
62f0c328f8SDaniel T. Lee struct {
63f0c328f8SDaniel T. Lee __uint(type, BPF_MAP_TYPE_ARRAY);
64f0c328f8SDaniel T. Lee __type(key, u32);
65f0c328f8SDaniel T. Lee __type(value, u64);
66f0c328f8SDaniel T. Lee __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
67f0c328f8SDaniel T. Lee } cstate_duration SEC(".maps");
68c5350777SLeo Yan
69c5350777SLeo Yan /* pstate_duration records duration time for every operating point per CPU */
70f0c328f8SDaniel T. Lee struct {
71f0c328f8SDaniel T. Lee __uint(type, BPF_MAP_TYPE_ARRAY);
72f0c328f8SDaniel T. Lee __type(key, u32);
73f0c328f8SDaniel T. Lee __type(value, u64);
74f0c328f8SDaniel T. Lee __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
75f0c328f8SDaniel T. Lee } pstate_duration SEC(".maps");
76c5350777SLeo Yan
77c5350777SLeo Yan /*
78c5350777SLeo Yan * The trace events for cpu_idle and cpu_frequency are taken from:
79*27d7fdf0SRoss Zwisler * /sys/kernel/tracing/events/power/cpu_idle/format
80*27d7fdf0SRoss Zwisler * /sys/kernel/tracing/events/power/cpu_frequency/format
81c5350777SLeo Yan *
82c5350777SLeo Yan * These two events have same format, so define one common structure.
83c5350777SLeo Yan */
84c5350777SLeo Yan struct cpu_args {
85c5350777SLeo Yan u64 pad;
86c5350777SLeo Yan u32 state;
87c5350777SLeo Yan u32 cpu_id;
88c5350777SLeo Yan };
89c5350777SLeo Yan
90c5350777SLeo Yan /* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
find_cpu_pstate_idx(u32 frequency)91c5350777SLeo Yan static u32 find_cpu_pstate_idx(u32 frequency)
92c5350777SLeo Yan {
93c5350777SLeo Yan u32 i;
94c5350777SLeo Yan
95c5350777SLeo Yan for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
96c5350777SLeo Yan if (frequency == cpu_opps[i])
97c5350777SLeo Yan return i;
98c5350777SLeo Yan }
99c5350777SLeo Yan
100c5350777SLeo Yan return i;
101c5350777SLeo Yan }
102c5350777SLeo Yan
103c5350777SLeo Yan SEC("tracepoint/power/cpu_idle")
bpf_prog1(struct cpu_args * ctx)104c5350777SLeo Yan int bpf_prog1(struct cpu_args *ctx)
105c5350777SLeo Yan {
106c5350777SLeo Yan u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
107c5350777SLeo Yan u32 key, cpu, pstate_idx;
108c5350777SLeo Yan u64 *val;
109c5350777SLeo Yan
110c5350777SLeo Yan if (ctx->cpu_id > MAX_CPU)
111c5350777SLeo Yan return 0;
112c5350777SLeo Yan
113c5350777SLeo Yan cpu = ctx->cpu_id;
114c5350777SLeo Yan
115c5350777SLeo Yan key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
116c5350777SLeo Yan cts = bpf_map_lookup_elem(&my_map, &key);
117c5350777SLeo Yan if (!cts)
118c5350777SLeo Yan return 0;
119c5350777SLeo Yan
120c5350777SLeo Yan key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
121c5350777SLeo Yan cstate = bpf_map_lookup_elem(&my_map, &key);
122c5350777SLeo Yan if (!cstate)
123c5350777SLeo Yan return 0;
124c5350777SLeo Yan
125c5350777SLeo Yan key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
126c5350777SLeo Yan pts = bpf_map_lookup_elem(&my_map, &key);
127c5350777SLeo Yan if (!pts)
128c5350777SLeo Yan return 0;
129c5350777SLeo Yan
130c5350777SLeo Yan key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
131c5350777SLeo Yan pstate = bpf_map_lookup_elem(&my_map, &key);
132c5350777SLeo Yan if (!pstate)
133c5350777SLeo Yan return 0;
134c5350777SLeo Yan
135c5350777SLeo Yan prev_state = *cstate;
136c5350777SLeo Yan *cstate = ctx->state;
137c5350777SLeo Yan
138c5350777SLeo Yan if (!*cts) {
139c5350777SLeo Yan *cts = bpf_ktime_get_ns();
140c5350777SLeo Yan return 0;
141c5350777SLeo Yan }
142c5350777SLeo Yan
143c5350777SLeo Yan cur_ts = bpf_ktime_get_ns();
144c5350777SLeo Yan delta = cur_ts - *cts;
145c5350777SLeo Yan *cts = cur_ts;
146c5350777SLeo Yan
147c5350777SLeo Yan /*
148c5350777SLeo Yan * When state doesn't equal to (u32)-1, the cpu will enter
149c5350777SLeo Yan * one idle state; for this case we need to record interval
150c5350777SLeo Yan * for the pstate.
151c5350777SLeo Yan *
152c5350777SLeo Yan * OPP2
153c5350777SLeo Yan * +---------------------+
154c5350777SLeo Yan * OPP1 | |
155c5350777SLeo Yan * ---------+ |
156c5350777SLeo Yan * | Idle state
157c5350777SLeo Yan * +---------------
158c5350777SLeo Yan *
159c5350777SLeo Yan * |<- pstate duration ->|
160c5350777SLeo Yan * ^ ^
161c5350777SLeo Yan * pts cur_ts
162c5350777SLeo Yan */
163c5350777SLeo Yan if (ctx->state != (u32)-1) {
164c5350777SLeo Yan
165c5350777SLeo Yan /* record pstate after have first cpu_frequency event */
166c5350777SLeo Yan if (!*pts)
167c5350777SLeo Yan return 0;
168c5350777SLeo Yan
169c5350777SLeo Yan delta = cur_ts - *pts;
170c5350777SLeo Yan
171c5350777SLeo Yan pstate_idx = find_cpu_pstate_idx(*pstate);
172c5350777SLeo Yan if (pstate_idx >= MAX_PSTATE_ENTRIES)
173c5350777SLeo Yan return 0;
174c5350777SLeo Yan
175c5350777SLeo Yan key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
176c5350777SLeo Yan val = bpf_map_lookup_elem(&pstate_duration, &key);
177c5350777SLeo Yan if (val)
178c5350777SLeo Yan __sync_fetch_and_add((long *)val, delta);
179c5350777SLeo Yan
180c5350777SLeo Yan /*
181c5350777SLeo Yan * When state equal to (u32)-1, the cpu just exits from one
182c5350777SLeo Yan * specific idle state; for this case we need to record
183c5350777SLeo Yan * interval for the pstate.
184c5350777SLeo Yan *
185c5350777SLeo Yan * OPP2
186c5350777SLeo Yan * -----------+
187c5350777SLeo Yan * | OPP1
188c5350777SLeo Yan * | +-----------
189c5350777SLeo Yan * | Idle state |
190c5350777SLeo Yan * +---------------------+
191c5350777SLeo Yan *
192c5350777SLeo Yan * |<- cstate duration ->|
193c5350777SLeo Yan * ^ ^
194c5350777SLeo Yan * cts cur_ts
195c5350777SLeo Yan */
196c5350777SLeo Yan } else {
197c5350777SLeo Yan
198c5350777SLeo Yan key = cpu * MAX_CSTATE_ENTRIES + prev_state;
199c5350777SLeo Yan val = bpf_map_lookup_elem(&cstate_duration, &key);
200c5350777SLeo Yan if (val)
201c5350777SLeo Yan __sync_fetch_and_add((long *)val, delta);
202c5350777SLeo Yan }
203c5350777SLeo Yan
204c5350777SLeo Yan /* Update timestamp for pstate as new start time */
205c5350777SLeo Yan if (*pts)
206c5350777SLeo Yan *pts = cur_ts;
207c5350777SLeo Yan
208c5350777SLeo Yan return 0;
209c5350777SLeo Yan }
210c5350777SLeo Yan
211c5350777SLeo Yan SEC("tracepoint/power/cpu_frequency")
bpf_prog2(struct cpu_args * ctx)212c5350777SLeo Yan int bpf_prog2(struct cpu_args *ctx)
213c5350777SLeo Yan {
214c5350777SLeo Yan u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
215c5350777SLeo Yan u32 key, cpu, pstate_idx;
216c5350777SLeo Yan u64 *val;
217c5350777SLeo Yan
218c5350777SLeo Yan cpu = ctx->cpu_id;
219c5350777SLeo Yan
220c5350777SLeo Yan key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
221c5350777SLeo Yan pts = bpf_map_lookup_elem(&my_map, &key);
222c5350777SLeo Yan if (!pts)
223c5350777SLeo Yan return 0;
224c5350777SLeo Yan
225c5350777SLeo Yan key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
226c5350777SLeo Yan pstate = bpf_map_lookup_elem(&my_map, &key);
227c5350777SLeo Yan if (!pstate)
228c5350777SLeo Yan return 0;
229c5350777SLeo Yan
230c5350777SLeo Yan key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
231c5350777SLeo Yan cstate = bpf_map_lookup_elem(&my_map, &key);
232c5350777SLeo Yan if (!cstate)
233c5350777SLeo Yan return 0;
234c5350777SLeo Yan
235c5350777SLeo Yan prev_state = *pstate;
236c5350777SLeo Yan *pstate = ctx->state;
237c5350777SLeo Yan
238c5350777SLeo Yan if (!*pts) {
239c5350777SLeo Yan *pts = bpf_ktime_get_ns();
240c5350777SLeo Yan return 0;
241c5350777SLeo Yan }
242c5350777SLeo Yan
243c5350777SLeo Yan cur_ts = bpf_ktime_get_ns();
244c5350777SLeo Yan delta = cur_ts - *pts;
245c5350777SLeo Yan *pts = cur_ts;
246c5350777SLeo Yan
247c5350777SLeo Yan /* When CPU is in idle, bail out to skip pstate statistics */
248c5350777SLeo Yan if (*cstate != (u32)(-1))
249c5350777SLeo Yan return 0;
250c5350777SLeo Yan
251c5350777SLeo Yan /*
252c5350777SLeo Yan * The cpu changes to another different OPP (in below diagram
253c5350777SLeo Yan * change frequency from OPP3 to OPP1), need recording interval
254c5350777SLeo Yan * for previous frequency OPP3 and update timestamp as start
255c5350777SLeo Yan * time for new frequency OPP1.
256c5350777SLeo Yan *
257c5350777SLeo Yan * OPP3
258c5350777SLeo Yan * +---------------------+
259c5350777SLeo Yan * OPP2 | |
260c5350777SLeo Yan * ---------+ |
261c5350777SLeo Yan * | OPP1
262c5350777SLeo Yan * +---------------
263c5350777SLeo Yan *
264c5350777SLeo Yan * |<- pstate duration ->|
265c5350777SLeo Yan * ^ ^
266c5350777SLeo Yan * pts cur_ts
267c5350777SLeo Yan */
268c5350777SLeo Yan pstate_idx = find_cpu_pstate_idx(*pstate);
269c5350777SLeo Yan if (pstate_idx >= MAX_PSTATE_ENTRIES)
270c5350777SLeo Yan return 0;
271c5350777SLeo Yan
272c5350777SLeo Yan key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
273c5350777SLeo Yan val = bpf_map_lookup_elem(&pstate_duration, &key);
274c5350777SLeo Yan if (val)
275c5350777SLeo Yan __sync_fetch_and_add((long *)val, delta);
276c5350777SLeo Yan
277c5350777SLeo Yan return 0;
278c5350777SLeo Yan }
279c5350777SLeo Yan
280c5350777SLeo Yan char _license[] SEC("license") = "GPL";
281c5350777SLeo Yan u32 _version SEC("version") = LINUX_VERSION_CODE;
282