1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 // Copyright (c) 2022 Google
3 #include "vmlinux.h"
4 #include <bpf/bpf_helpers.h>
5 #include <bpf/bpf_tracing.h>
6 #include <bpf/bpf_core_read.h>
7 
8 #include "lock_data.h"
9 
10 /* default buffer size */
11 #define MAX_ENTRIES  10240
12 
13 struct tstamp_data {
14 	__u64 timestamp;
15 	__u64 lock;
16 	__u32 flags;
17 	__s32 stack_id;
18 };
19 
20 /* callstack storage  */
21 struct {
22 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
23 	__uint(key_size, sizeof(__u32));
24 	__uint(value_size, sizeof(__u64));
25 	__uint(max_entries, MAX_ENTRIES);
26 } stacks SEC(".maps");
27 
28 /* maintain timestamp at the beginning of contention */
29 struct {
30 	__uint(type, BPF_MAP_TYPE_HASH);
31 	__type(key, int);
32 	__type(value, struct tstamp_data);
33 	__uint(max_entries, MAX_ENTRIES);
34 } tstamp SEC(".maps");
35 
36 /* actual lock contention statistics */
37 struct {
38 	__uint(type, BPF_MAP_TYPE_HASH);
39 	__uint(key_size, sizeof(struct contention_key));
40 	__uint(value_size, sizeof(struct contention_data));
41 	__uint(max_entries, MAX_ENTRIES);
42 } lock_stat SEC(".maps");
43 
44 struct {
45 	__uint(type, BPF_MAP_TYPE_HASH);
46 	__uint(key_size, sizeof(__u32));
47 	__uint(value_size, sizeof(struct contention_task_data));
48 	__uint(max_entries, MAX_ENTRIES);
49 } task_data SEC(".maps");
50 
51 struct {
52 	__uint(type, BPF_MAP_TYPE_HASH);
53 	__uint(key_size, sizeof(__u32));
54 	__uint(value_size, sizeof(__u8));
55 	__uint(max_entries, 1);
56 } cpu_filter SEC(".maps");
57 
58 struct {
59 	__uint(type, BPF_MAP_TYPE_HASH);
60 	__uint(key_size, sizeof(__u32));
61 	__uint(value_size, sizeof(__u8));
62 	__uint(max_entries, 1);
63 } task_filter SEC(".maps");
64 
65 struct {
66 	__uint(type, BPF_MAP_TYPE_HASH);
67 	__uint(key_size, sizeof(__u32));
68 	__uint(value_size, sizeof(__u8));
69 	__uint(max_entries, 1);
70 } type_filter SEC(".maps");
71 
72 struct {
73 	__uint(type, BPF_MAP_TYPE_HASH);
74 	__uint(key_size, sizeof(__u64));
75 	__uint(value_size, sizeof(__u8));
76 	__uint(max_entries, 1);
77 } addr_filter SEC(".maps");
78 
79 /* control flags */
80 int enabled;
81 int has_cpu;
82 int has_task;
83 int has_type;
84 int has_addr;
85 int stack_skip;
86 
87 /* determine the key of lock stat */
88 int aggr_mode;
89 
90 /* error stat */
91 int lost;
92 
93 static inline int can_record(u64 *ctx)
94 {
95 	if (has_cpu) {
96 		__u32 cpu = bpf_get_smp_processor_id();
97 		__u8 *ok;
98 
99 		ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
100 		if (!ok)
101 			return 0;
102 	}
103 
104 	if (has_task) {
105 		__u8 *ok;
106 		__u32 pid = bpf_get_current_pid_tgid();
107 
108 		ok = bpf_map_lookup_elem(&task_filter, &pid);
109 		if (!ok)
110 			return 0;
111 	}
112 
113 	if (has_type) {
114 		__u8 *ok;
115 		__u32 flags = (__u32)ctx[1];
116 
117 		ok = bpf_map_lookup_elem(&type_filter, &flags);
118 		if (!ok)
119 			return 0;
120 	}
121 
122 	if (has_addr) {
123 		__u8 *ok;
124 		__u64 addr = ctx[0];
125 
126 		ok = bpf_map_lookup_elem(&addr_filter, &addr);
127 		if (!ok)
128 			return 0;
129 	}
130 
131 	return 1;
132 }
133 
134 static inline void update_task_data(__u32 pid)
135 {
136 	struct contention_task_data *p;
137 
138 	p = bpf_map_lookup_elem(&task_data, &pid);
139 	if (p == NULL) {
140 		struct contention_task_data data;
141 
142 		bpf_get_current_comm(data.comm, sizeof(data.comm));
143 		bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST);
144 	}
145 }
146 
147 SEC("tp_btf/contention_begin")
148 int contention_begin(u64 *ctx)
149 {
150 	__u32 pid;
151 	struct tstamp_data *pelem;
152 
153 	if (!enabled || !can_record(ctx))
154 		return 0;
155 
156 	pid = bpf_get_current_pid_tgid();
157 	pelem = bpf_map_lookup_elem(&tstamp, &pid);
158 	if (pelem && pelem->lock)
159 		return 0;
160 
161 	if (pelem == NULL) {
162 		struct tstamp_data zero = {};
163 
164 		bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY);
165 		pelem = bpf_map_lookup_elem(&tstamp, &pid);
166 		if (pelem == NULL) {
167 			lost++;
168 			return 0;
169 		}
170 	}
171 
172 	pelem->timestamp = bpf_ktime_get_ns();
173 	pelem->lock = (__u64)ctx[0];
174 	pelem->flags = (__u32)ctx[1];
175 
176 	if (aggr_mode == LOCK_AGGR_CALLER) {
177 		pelem->stack_id = bpf_get_stackid(ctx, &stacks,
178 						  BPF_F_FAST_STACK_CMP | stack_skip);
179 		if (pelem->stack_id < 0)
180 			lost++;
181 	}
182 
183 	return 0;
184 }
185 
186 SEC("tp_btf/contention_end")
187 int contention_end(u64 *ctx)
188 {
189 	__u32 pid;
190 	struct tstamp_data *pelem;
191 	struct contention_key key;
192 	struct contention_data *data;
193 	__u64 duration;
194 
195 	if (!enabled)
196 		return 0;
197 
198 	pid = bpf_get_current_pid_tgid();
199 	pelem = bpf_map_lookup_elem(&tstamp, &pid);
200 	if (!pelem || pelem->lock != ctx[0])
201 		return 0;
202 
203 	duration = bpf_ktime_get_ns() - pelem->timestamp;
204 
205 	switch (aggr_mode) {
206 	case LOCK_AGGR_CALLER:
207 		key.aggr_key = pelem->stack_id;
208 		break;
209 	case LOCK_AGGR_TASK:
210 		key.aggr_key = pid;
211 		update_task_data(pid);
212 		break;
213 	case LOCK_AGGR_ADDR:
214 		key.aggr_key = pelem->lock;
215 		break;
216 	default:
217 		/* should not happen */
218 		return 0;
219 	}
220 
221 	data = bpf_map_lookup_elem(&lock_stat, &key);
222 	if (!data) {
223 		struct contention_data first = {
224 			.total_time = duration,
225 			.max_time = duration,
226 			.min_time = duration,
227 			.count = 1,
228 			.flags = pelem->flags,
229 		};
230 
231 		bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
232 		bpf_map_delete_elem(&tstamp, &pid);
233 		return 0;
234 	}
235 
236 	__sync_fetch_and_add(&data->total_time, duration);
237 	__sync_fetch_and_add(&data->count, 1);
238 
239 	/* FIXME: need atomic operations */
240 	if (data->max_time < duration)
241 		data->max_time = duration;
242 	if (data->min_time > duration)
243 		data->min_time = duration;
244 
245 	bpf_map_delete_elem(&tstamp, &pid);
246 	return 0;
247 }
248 
249 char LICENSE[] SEC("license") = "Dual BSD/GPL";
250