1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 // Copyright (c) 2022 Google 3 #include "vmlinux.h" 4 #include <bpf/bpf_helpers.h> 5 #include <bpf/bpf_tracing.h> 6 #include <bpf/bpf_core_read.h> 7 8 #include "lock_data.h" 9 10 /* default buffer size */ 11 #define MAX_ENTRIES 10240 12 13 /* lock contention flags from include/trace/events/lock.h */ 14 #define LCB_F_SPIN (1U << 0) 15 #define LCB_F_READ (1U << 1) 16 #define LCB_F_WRITE (1U << 2) 17 #define LCB_F_RT (1U << 3) 18 #define LCB_F_PERCPU (1U << 4) 19 #define LCB_F_MUTEX (1U << 5) 20 21 struct tstamp_data { 22 __u64 timestamp; 23 __u64 lock; 24 __u32 flags; 25 __s32 stack_id; 26 }; 27 28 /* callstack storage */ 29 struct { 30 __uint(type, BPF_MAP_TYPE_STACK_TRACE); 31 __uint(key_size, sizeof(__u32)); 32 __uint(value_size, sizeof(__u64)); 33 __uint(max_entries, MAX_ENTRIES); 34 } stacks SEC(".maps"); 35 36 /* maintain timestamp at the beginning of contention */ 37 struct { 38 __uint(type, BPF_MAP_TYPE_HASH); 39 __type(key, int); 40 __type(value, struct tstamp_data); 41 __uint(max_entries, MAX_ENTRIES); 42 } tstamp SEC(".maps"); 43 44 /* actual lock contention statistics */ 45 struct { 46 __uint(type, BPF_MAP_TYPE_HASH); 47 __uint(key_size, sizeof(struct contention_key)); 48 __uint(value_size, sizeof(struct contention_data)); 49 __uint(max_entries, MAX_ENTRIES); 50 } lock_stat SEC(".maps"); 51 52 struct { 53 __uint(type, BPF_MAP_TYPE_HASH); 54 __uint(key_size, sizeof(__u32)); 55 __uint(value_size, sizeof(struct contention_task_data)); 56 __uint(max_entries, MAX_ENTRIES); 57 } task_data SEC(".maps"); 58 59 struct { 60 __uint(type, BPF_MAP_TYPE_HASH); 61 __uint(key_size, sizeof(__u32)); 62 __uint(value_size, sizeof(__u8)); 63 __uint(max_entries, 1); 64 } cpu_filter SEC(".maps"); 65 66 struct { 67 __uint(type, BPF_MAP_TYPE_HASH); 68 __uint(key_size, sizeof(__u32)); 69 __uint(value_size, sizeof(__u8)); 70 __uint(max_entries, 1); 71 } task_filter SEC(".maps"); 72 73 struct { 74 __uint(type, BPF_MAP_TYPE_HASH); 75 __uint(key_size, sizeof(__u32)); 76 __uint(value_size, sizeof(__u8)); 77 __uint(max_entries, 1); 78 } type_filter SEC(".maps"); 79 80 struct { 81 __uint(type, BPF_MAP_TYPE_HASH); 82 __uint(key_size, sizeof(__u64)); 83 __uint(value_size, sizeof(__u8)); 84 __uint(max_entries, 1); 85 } addr_filter SEC(".maps"); 86 87 struct rw_semaphore___old { 88 struct task_struct *owner; 89 } __attribute__((preserve_access_index)); 90 91 struct rw_semaphore___new { 92 atomic_long_t owner; 93 } __attribute__((preserve_access_index)); 94 95 /* control flags */ 96 int enabled; 97 int has_cpu; 98 int has_task; 99 int has_type; 100 int has_addr; 101 int needs_callstack; 102 int stack_skip; 103 int lock_owner; 104 105 /* determine the key of lock stat */ 106 int aggr_mode; 107 108 /* error stat */ 109 int lost; 110 111 static inline int can_record(u64 *ctx) 112 { 113 if (has_cpu) { 114 __u32 cpu = bpf_get_smp_processor_id(); 115 __u8 *ok; 116 117 ok = bpf_map_lookup_elem(&cpu_filter, &cpu); 118 if (!ok) 119 return 0; 120 } 121 122 if (has_task) { 123 __u8 *ok; 124 __u32 pid = bpf_get_current_pid_tgid(); 125 126 ok = bpf_map_lookup_elem(&task_filter, &pid); 127 if (!ok) 128 return 0; 129 } 130 131 if (has_type) { 132 __u8 *ok; 133 __u32 flags = (__u32)ctx[1]; 134 135 ok = bpf_map_lookup_elem(&type_filter, &flags); 136 if (!ok) 137 return 0; 138 } 139 140 if (has_addr) { 141 __u8 *ok; 142 __u64 addr = ctx[0]; 143 144 ok = bpf_map_lookup_elem(&addr_filter, &addr); 145 if (!ok) 146 return 0; 147 } 148 149 return 1; 150 } 151 152 static inline int update_task_data(struct task_struct *task) 153 { 154 struct contention_task_data *p; 155 int pid, err; 156 157 err = bpf_core_read(&pid, sizeof(pid), &task->pid); 158 if (err) 159 return -1; 160 161 p = bpf_map_lookup_elem(&task_data, &pid); 162 if (p == NULL) { 163 struct contention_task_data data = {}; 164 165 BPF_CORE_READ_STR_INTO(&data.comm, task, comm); 166 bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST); 167 } 168 169 return 0; 170 } 171 172 #ifndef __has_builtin 173 # define __has_builtin(x) 0 174 #endif 175 176 static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags) 177 { 178 struct task_struct *task; 179 __u64 owner = 0; 180 181 if (flags & LCB_F_MUTEX) { 182 struct mutex *mutex = (void *)lock; 183 owner = BPF_CORE_READ(mutex, owner.counter); 184 } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) { 185 #if __has_builtin(bpf_core_type_matches) 186 if (bpf_core_type_matches(struct rw_semaphore___old)) { 187 struct rw_semaphore___old *rwsem = (void *)lock; 188 owner = (unsigned long)BPF_CORE_READ(rwsem, owner); 189 } else if (bpf_core_type_matches(struct rw_semaphore___new)) { 190 struct rw_semaphore___new *rwsem = (void *)lock; 191 owner = BPF_CORE_READ(rwsem, owner.counter); 192 } 193 #else 194 /* assume new struct */ 195 struct rw_semaphore *rwsem = (void *)lock; 196 owner = BPF_CORE_READ(rwsem, owner.counter); 197 #endif 198 } 199 200 if (!owner) 201 return NULL; 202 203 task = (void *)(owner & ~7UL); 204 return task; 205 } 206 207 SEC("tp_btf/contention_begin") 208 int contention_begin(u64 *ctx) 209 { 210 __u32 pid; 211 struct tstamp_data *pelem; 212 213 if (!enabled || !can_record(ctx)) 214 return 0; 215 216 pid = bpf_get_current_pid_tgid(); 217 pelem = bpf_map_lookup_elem(&tstamp, &pid); 218 if (pelem && pelem->lock) 219 return 0; 220 221 if (pelem == NULL) { 222 struct tstamp_data zero = {}; 223 224 bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY); 225 pelem = bpf_map_lookup_elem(&tstamp, &pid); 226 if (pelem == NULL) { 227 lost++; 228 return 0; 229 } 230 } 231 232 pelem->timestamp = bpf_ktime_get_ns(); 233 pelem->lock = (__u64)ctx[0]; 234 pelem->flags = (__u32)ctx[1]; 235 236 if (needs_callstack) { 237 pelem->stack_id = bpf_get_stackid(ctx, &stacks, 238 BPF_F_FAST_STACK_CMP | stack_skip); 239 if (pelem->stack_id < 0) 240 lost++; 241 } else if (aggr_mode == LOCK_AGGR_TASK) { 242 struct task_struct *task; 243 244 if (lock_owner) { 245 task = get_lock_owner(pelem->lock, pelem->flags); 246 247 /* The flags is not used anymore. Pass the owner pid. */ 248 if (task) 249 pelem->flags = BPF_CORE_READ(task, pid); 250 else 251 pelem->flags = -1U; 252 253 } else { 254 task = bpf_get_current_task_btf(); 255 } 256 257 if (task) { 258 if (update_task_data(task) < 0 && lock_owner) 259 pelem->flags = -1U; 260 } 261 } 262 263 return 0; 264 } 265 266 SEC("tp_btf/contention_end") 267 int contention_end(u64 *ctx) 268 { 269 __u32 pid; 270 struct tstamp_data *pelem; 271 struct contention_key key = {}; 272 struct contention_data *data; 273 __u64 duration; 274 275 if (!enabled) 276 return 0; 277 278 pid = bpf_get_current_pid_tgid(); 279 pelem = bpf_map_lookup_elem(&tstamp, &pid); 280 if (!pelem || pelem->lock != ctx[0]) 281 return 0; 282 283 duration = bpf_ktime_get_ns() - pelem->timestamp; 284 285 switch (aggr_mode) { 286 case LOCK_AGGR_CALLER: 287 key.stack_id = pelem->stack_id; 288 break; 289 case LOCK_AGGR_TASK: 290 if (lock_owner) 291 key.pid = pelem->flags; 292 else 293 key.pid = pid; 294 if (needs_callstack) 295 key.stack_id = pelem->stack_id; 296 break; 297 case LOCK_AGGR_ADDR: 298 key.lock_addr = pelem->lock; 299 if (needs_callstack) 300 key.stack_id = pelem->stack_id; 301 break; 302 default: 303 /* should not happen */ 304 return 0; 305 } 306 307 data = bpf_map_lookup_elem(&lock_stat, &key); 308 if (!data) { 309 struct contention_data first = { 310 .total_time = duration, 311 .max_time = duration, 312 .min_time = duration, 313 .count = 1, 314 .flags = pelem->flags, 315 }; 316 317 bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST); 318 bpf_map_delete_elem(&tstamp, &pid); 319 return 0; 320 } 321 322 __sync_fetch_and_add(&data->total_time, duration); 323 __sync_fetch_and_add(&data->count, 1); 324 325 /* FIXME: need atomic operations */ 326 if (data->max_time < duration) 327 data->max_time = duration; 328 if (data->min_time > duration) 329 data->min_time = duration; 330 331 bpf_map_delete_elem(&tstamp, &pid); 332 return 0; 333 } 334 335 char LICENSE[] SEC("license") = "Dual BSD/GPL"; 336