1b061017fSAlexei Starovoitov // SPDX-License-Identifier: GPL-2.0
2b061017fSAlexei Starovoitov // Copyright (c) 2019 Facebook
3b061017fSAlexei Starovoitov
4b061017fSAlexei Starovoitov #include <stdint.h>
5b061017fSAlexei Starovoitov #include <stddef.h>
6b061017fSAlexei Starovoitov #include <stdbool.h>
7b061017fSAlexei Starovoitov #include <linux/bpf.h>
8b061017fSAlexei Starovoitov #include <linux/ptrace.h>
9b061017fSAlexei Starovoitov #include <linux/sched.h>
10b061017fSAlexei Starovoitov #include <linux/types.h>
113e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
12b061017fSAlexei Starovoitov
13b061017fSAlexei Starovoitov typedef uint32_t pid_t;
14b061017fSAlexei Starovoitov struct task_struct {};
15b061017fSAlexei Starovoitov
16b061017fSAlexei Starovoitov #define TASK_COMM_LEN 16
17b061017fSAlexei Starovoitov #define PERF_MAX_STACK_DEPTH 127
18b061017fSAlexei Starovoitov
19b061017fSAlexei Starovoitov #define STROBE_TYPE_INVALID 0
20b061017fSAlexei Starovoitov #define STROBE_TYPE_INT 1
21b061017fSAlexei Starovoitov #define STROBE_TYPE_STR 2
22b061017fSAlexei Starovoitov #define STROBE_TYPE_MAP 3
23b061017fSAlexei Starovoitov
24b061017fSAlexei Starovoitov #define STACK_TABLE_EPOCH_SHIFT 20
25b061017fSAlexei Starovoitov #define STROBE_MAX_STR_LEN 1
26b061017fSAlexei Starovoitov #define STROBE_MAX_CFGS 32
27*e030da5fSEduard Zingerman #define READ_MAP_VAR_PAYLOAD_CAP \
28*e030da5fSEduard Zingerman ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
29b061017fSAlexei Starovoitov #define STROBE_MAX_PAYLOAD \
30b061017fSAlexei Starovoitov (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
31*e030da5fSEduard Zingerman STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
32b061017fSAlexei Starovoitov
33b061017fSAlexei Starovoitov struct strobe_value_header {
34b061017fSAlexei Starovoitov /*
35b061017fSAlexei Starovoitov * meaning depends on type:
36b061017fSAlexei Starovoitov * 1. int: 0, if value not set, 1 otherwise
37b061017fSAlexei Starovoitov * 2. str: 1 always, whether value is set or not is determined by ptr
38b061017fSAlexei Starovoitov * 3. map: 1 always, pointer points to additional struct with number
39b061017fSAlexei Starovoitov * of entries (up to STROBE_MAX_MAP_ENTRIES)
40b061017fSAlexei Starovoitov */
41b061017fSAlexei Starovoitov uint16_t len;
42b061017fSAlexei Starovoitov /*
43b061017fSAlexei Starovoitov * _reserved might be used for some future fields/flags, but we always
44b061017fSAlexei Starovoitov * want to keep strobe_value_header to be 8 bytes, so BPF can read 16
45b061017fSAlexei Starovoitov * bytes in one go and get both header and value
46b061017fSAlexei Starovoitov */
47b061017fSAlexei Starovoitov uint8_t _reserved[6];
48b061017fSAlexei Starovoitov };
49b061017fSAlexei Starovoitov
50b061017fSAlexei Starovoitov /*
51b061017fSAlexei Starovoitov * strobe_value_generic is used from BPF probe only, but needs to be a union
52b061017fSAlexei Starovoitov * of strobe_value_int/strobe_value_str/strobe_value_map
53b061017fSAlexei Starovoitov */
54b061017fSAlexei Starovoitov struct strobe_value_generic {
55b061017fSAlexei Starovoitov struct strobe_value_header header;
56b061017fSAlexei Starovoitov union {
57b061017fSAlexei Starovoitov int64_t val;
58b061017fSAlexei Starovoitov void *ptr;
59b061017fSAlexei Starovoitov };
60b061017fSAlexei Starovoitov };
61b061017fSAlexei Starovoitov
62b061017fSAlexei Starovoitov struct strobe_value_int {
63b061017fSAlexei Starovoitov struct strobe_value_header header;
64b061017fSAlexei Starovoitov int64_t value;
65b061017fSAlexei Starovoitov };
66b061017fSAlexei Starovoitov
67b061017fSAlexei Starovoitov struct strobe_value_str {
68b061017fSAlexei Starovoitov struct strobe_value_header header;
69b061017fSAlexei Starovoitov const char* value;
70b061017fSAlexei Starovoitov };
71b061017fSAlexei Starovoitov
72b061017fSAlexei Starovoitov struct strobe_value_map {
73b061017fSAlexei Starovoitov struct strobe_value_header header;
74b061017fSAlexei Starovoitov const struct strobe_map_raw* value;
75b061017fSAlexei Starovoitov };
76b061017fSAlexei Starovoitov
77b061017fSAlexei Starovoitov struct strobe_map_entry {
78b061017fSAlexei Starovoitov const char* key;
79b061017fSAlexei Starovoitov const char* val;
80b061017fSAlexei Starovoitov };
81b061017fSAlexei Starovoitov
82b061017fSAlexei Starovoitov /*
83b061017fSAlexei Starovoitov * Map of C-string key/value pairs with fixed maximum capacity. Each map has
84b061017fSAlexei Starovoitov * corresponding int64 ID, which application can use (or ignore) in whatever
85b061017fSAlexei Starovoitov * way appropriate. Map is "write-only", there is no way to get data out of
86b061017fSAlexei Starovoitov * map. Map is intended to be used to provide metadata for profilers and is
87b061017fSAlexei Starovoitov * not to be used for internal in-app communication. All methods are
88b061017fSAlexei Starovoitov * thread-safe.
89b061017fSAlexei Starovoitov */
90b061017fSAlexei Starovoitov struct strobe_map_raw {
91b061017fSAlexei Starovoitov /*
92b061017fSAlexei Starovoitov * general purpose unique ID that's up to application to decide
93b061017fSAlexei Starovoitov * whether and how to use; for request metadata use case id is unique
94b061017fSAlexei Starovoitov * request ID that's used to match metadata with stack traces on
95b061017fSAlexei Starovoitov * Strobelight backend side
96b061017fSAlexei Starovoitov */
97b061017fSAlexei Starovoitov int64_t id;
98b061017fSAlexei Starovoitov /* number of used entries in map */
99b061017fSAlexei Starovoitov int64_t cnt;
100b061017fSAlexei Starovoitov /*
101b061017fSAlexei Starovoitov * having volatile doesn't change anything on BPF side, but clang
102b061017fSAlexei Starovoitov * emits warnings for passing `volatile const char *` into
10350f9aa44SDaniel Borkmann * bpf_probe_read_user_str that expects just `const char *`
104b061017fSAlexei Starovoitov */
105b061017fSAlexei Starovoitov const char* tag;
106b061017fSAlexei Starovoitov /*
107b061017fSAlexei Starovoitov * key/value entries, each consisting of 2 pointers to key and value
108b061017fSAlexei Starovoitov * C strings
109b061017fSAlexei Starovoitov */
110b061017fSAlexei Starovoitov struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES];
111b061017fSAlexei Starovoitov };
112b061017fSAlexei Starovoitov
113b061017fSAlexei Starovoitov /* Following values define supported values of TLS mode */
114b061017fSAlexei Starovoitov #define TLS_NOT_SET -1
115b061017fSAlexei Starovoitov #define TLS_LOCAL_EXEC 0
116b061017fSAlexei Starovoitov #define TLS_IMM_EXEC 1
117b061017fSAlexei Starovoitov #define TLS_GENERAL_DYN 2
118b061017fSAlexei Starovoitov
119b061017fSAlexei Starovoitov /*
120b061017fSAlexei Starovoitov * structure that universally represents TLS location (both for static
121b061017fSAlexei Starovoitov * executables and shared libraries)
122b061017fSAlexei Starovoitov */
123b061017fSAlexei Starovoitov struct strobe_value_loc {
124b061017fSAlexei Starovoitov /*
125b061017fSAlexei Starovoitov * tls_mode defines what TLS mode was used for particular metavariable:
126b061017fSAlexei Starovoitov * - -1 (TLS_NOT_SET) - no metavariable;
127b061017fSAlexei Starovoitov * - 0 (TLS_LOCAL_EXEC) - Local Executable mode;
128b061017fSAlexei Starovoitov * - 1 (TLS_IMM_EXEC) - Immediate Executable mode;
129b061017fSAlexei Starovoitov * - 2 (TLS_GENERAL_DYN) - General Dynamic mode;
130b061017fSAlexei Starovoitov * Local Dynamic mode is not yet supported, because never seen in
131b061017fSAlexei Starovoitov * practice. Mode defines how offset field is interpreted. See
132b061017fSAlexei Starovoitov * calc_location() in below for details.
133b061017fSAlexei Starovoitov */
134b061017fSAlexei Starovoitov int64_t tls_mode;
135b061017fSAlexei Starovoitov /*
136b061017fSAlexei Starovoitov * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64,
137b061017fSAlexei Starovoitov * tpidr_el0 for aarch64).
138b061017fSAlexei Starovoitov * TLS_IMM_EXEC: absolute address of GOT entry containing offset
139b061017fSAlexei Starovoitov * from thread pointer;
140df71a42cSTaichi Nishimura * TLS_GENERAL_DYN: absolute address of double GOT entry
141b061017fSAlexei Starovoitov * containing tls_index_t struct;
142b061017fSAlexei Starovoitov */
143b061017fSAlexei Starovoitov int64_t offset;
144b061017fSAlexei Starovoitov };
145b061017fSAlexei Starovoitov
146b061017fSAlexei Starovoitov struct strobemeta_cfg {
147b061017fSAlexei Starovoitov int64_t req_meta_idx;
148b061017fSAlexei Starovoitov struct strobe_value_loc int_locs[STROBE_MAX_INTS];
149b061017fSAlexei Starovoitov struct strobe_value_loc str_locs[STROBE_MAX_STRS];
150b061017fSAlexei Starovoitov struct strobe_value_loc map_locs[STROBE_MAX_MAPS];
151b061017fSAlexei Starovoitov };
152b061017fSAlexei Starovoitov
153b061017fSAlexei Starovoitov struct strobe_map_descr {
154b061017fSAlexei Starovoitov uint64_t id;
155b061017fSAlexei Starovoitov int16_t tag_len;
156b061017fSAlexei Starovoitov /*
157b061017fSAlexei Starovoitov * cnt <0 - map value isn't set;
158b061017fSAlexei Starovoitov * 0 - map has id set, but no key/value entries
159b061017fSAlexei Starovoitov */
160b061017fSAlexei Starovoitov int16_t cnt;
161b061017fSAlexei Starovoitov /*
162b061017fSAlexei Starovoitov * both key_lens[i] and val_lens[i] should be >0 for present key/value
163b061017fSAlexei Starovoitov * entry
164b061017fSAlexei Starovoitov */
165b061017fSAlexei Starovoitov uint16_t key_lens[STROBE_MAX_MAP_ENTRIES];
166b061017fSAlexei Starovoitov uint16_t val_lens[STROBE_MAX_MAP_ENTRIES];
167b061017fSAlexei Starovoitov };
168b061017fSAlexei Starovoitov
169b061017fSAlexei Starovoitov struct strobemeta_payload {
170b061017fSAlexei Starovoitov /* req_id has valid request ID, if req_meta_valid == 1 */
171b061017fSAlexei Starovoitov int64_t req_id;
172b061017fSAlexei Starovoitov uint8_t req_meta_valid;
173b061017fSAlexei Starovoitov /*
174b061017fSAlexei Starovoitov * mask has Nth bit set to 1, if Nth metavar was present and
175b061017fSAlexei Starovoitov * successfully read
176b061017fSAlexei Starovoitov */
177b061017fSAlexei Starovoitov uint64_t int_vals_set_mask;
178b061017fSAlexei Starovoitov int64_t int_vals[STROBE_MAX_INTS];
179b061017fSAlexei Starovoitov /* len is >0 for present values */
180b061017fSAlexei Starovoitov uint16_t str_lens[STROBE_MAX_STRS];
181b061017fSAlexei Starovoitov /* if map_descrs[i].cnt == -1, metavar is not present/set */
182b061017fSAlexei Starovoitov struct strobe_map_descr map_descrs[STROBE_MAX_MAPS];
183b061017fSAlexei Starovoitov /*
184b061017fSAlexei Starovoitov * payload has compactly packed values of str and map variables in the
185b061017fSAlexei Starovoitov * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0
186b061017fSAlexei Starovoitov * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines
187b061017fSAlexei Starovoitov * value length
188b061017fSAlexei Starovoitov */
189b061017fSAlexei Starovoitov char payload[STROBE_MAX_PAYLOAD];
190b061017fSAlexei Starovoitov };
191b061017fSAlexei Starovoitov
192b061017fSAlexei Starovoitov struct strobelight_bpf_sample {
193b061017fSAlexei Starovoitov uint64_t ktime;
194b061017fSAlexei Starovoitov char comm[TASK_COMM_LEN];
195b061017fSAlexei Starovoitov pid_t pid;
196b061017fSAlexei Starovoitov int user_stack_id;
197b061017fSAlexei Starovoitov int kernel_stack_id;
198b061017fSAlexei Starovoitov int has_meta;
199b061017fSAlexei Starovoitov struct strobemeta_payload metadata;
200b061017fSAlexei Starovoitov /*
201b061017fSAlexei Starovoitov * makes it possible to pass (<real payload size> + 1) as data size to
202b061017fSAlexei Starovoitov * perf_submit() to avoid perf_submit's paranoia about passing zero as
203b061017fSAlexei Starovoitov * size, as it deduces that <real payload size> might be
204b061017fSAlexei Starovoitov * **theoretically** zero
205b061017fSAlexei Starovoitov */
206b061017fSAlexei Starovoitov char dummy_safeguard;
207b061017fSAlexei Starovoitov };
208b061017fSAlexei Starovoitov
2091639b17cSAndrii Nakryiko struct {
2101639b17cSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
2111639b17cSAndrii Nakryiko __uint(max_entries, 32);
2121639b17cSAndrii Nakryiko __uint(key_size, sizeof(int));
2131639b17cSAndrii Nakryiko __uint(value_size, sizeof(int));
2141639b17cSAndrii Nakryiko } samples SEC(".maps");
215b061017fSAlexei Starovoitov
2161639b17cSAndrii Nakryiko struct {
2171639b17cSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_STACK_TRACE);
2181639b17cSAndrii Nakryiko __uint(max_entries, 16);
2191639b17cSAndrii Nakryiko __uint(key_size, sizeof(uint32_t));
2201639b17cSAndrii Nakryiko __uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
2211639b17cSAndrii Nakryiko } stacks_0 SEC(".maps");
222b061017fSAlexei Starovoitov
2231639b17cSAndrii Nakryiko struct {
2241639b17cSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_STACK_TRACE);
2251639b17cSAndrii Nakryiko __uint(max_entries, 16);
2261639b17cSAndrii Nakryiko __uint(key_size, sizeof(uint32_t));
2271639b17cSAndrii Nakryiko __uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
2281639b17cSAndrii Nakryiko } stacks_1 SEC(".maps");
229b061017fSAlexei Starovoitov
2301639b17cSAndrii Nakryiko struct {
2311639b17cSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
2321639b17cSAndrii Nakryiko __uint(max_entries, 1);
2331639b17cSAndrii Nakryiko __type(key, uint32_t);
2341639b17cSAndrii Nakryiko __type(value, struct strobelight_bpf_sample);
2351639b17cSAndrii Nakryiko } sample_heap SEC(".maps");
236b061017fSAlexei Starovoitov
2371639b17cSAndrii Nakryiko struct {
2381639b17cSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
2391639b17cSAndrii Nakryiko __uint(max_entries, STROBE_MAX_CFGS);
2401639b17cSAndrii Nakryiko __type(key, pid_t);
2411639b17cSAndrii Nakryiko __type(value, struct strobemeta_cfg);
2421639b17cSAndrii Nakryiko } strobemeta_cfgs SEC(".maps");
243b061017fSAlexei Starovoitov
244b061017fSAlexei Starovoitov /* Type for the dtv. */
245b061017fSAlexei Starovoitov /* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
246b061017fSAlexei Starovoitov typedef union dtv {
247b061017fSAlexei Starovoitov size_t counter;
248b061017fSAlexei Starovoitov struct {
249b061017fSAlexei Starovoitov void* val;
250b061017fSAlexei Starovoitov bool is_static;
251b061017fSAlexei Starovoitov } pointer;
252b061017fSAlexei Starovoitov } dtv_t;
253b061017fSAlexei Starovoitov
254b061017fSAlexei Starovoitov /* Partial definition for tcbhead_t */
255b061017fSAlexei Starovoitov /* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */
256b061017fSAlexei Starovoitov struct tcbhead {
257b061017fSAlexei Starovoitov void* tcb;
258b061017fSAlexei Starovoitov dtv_t* dtv;
259b061017fSAlexei Starovoitov };
260b061017fSAlexei Starovoitov
261b061017fSAlexei Starovoitov /*
262b061017fSAlexei Starovoitov * TLS module/offset information for shared library case.
263b061017fSAlexei Starovoitov * For x86-64, this is mapped onto two entries in GOT.
264b061017fSAlexei Starovoitov * For aarch64, this is pointed to by second GOT entry.
265b061017fSAlexei Starovoitov */
266b061017fSAlexei Starovoitov struct tls_index {
267b061017fSAlexei Starovoitov uint64_t module;
268b061017fSAlexei Starovoitov uint64_t offset;
269b061017fSAlexei Starovoitov };
270b061017fSAlexei Starovoitov
271fab45be1SAndrii Nakryiko #ifdef SUBPROGS
272fab45be1SAndrii Nakryiko __noinline
273fab45be1SAndrii Nakryiko #else
274fab45be1SAndrii Nakryiko __always_inline
275fab45be1SAndrii Nakryiko #endif
calc_location(struct strobe_value_loc * loc,void * tls_base)276fab45be1SAndrii Nakryiko static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
277b061017fSAlexei Starovoitov {
278b061017fSAlexei Starovoitov /*
279b061017fSAlexei Starovoitov * tls_mode value is:
280b061017fSAlexei Starovoitov * - -1 (TLS_NOT_SET), if no metavar is present;
281b061017fSAlexei Starovoitov * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS
282b061017fSAlexei Starovoitov * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64);
283b061017fSAlexei Starovoitov * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS;
284b061017fSAlexei Starovoitov * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS;
285b061017fSAlexei Starovoitov * This schema allows to use something like:
286b061017fSAlexei Starovoitov * (tls_mode + 1) * (tls_base + offset)
287b061017fSAlexei Starovoitov * to get NULL for "no metavar" location, or correct pointer for local
288b061017fSAlexei Starovoitov * executable mode without doing extra ifs.
289b061017fSAlexei Starovoitov */
290b061017fSAlexei Starovoitov if (loc->tls_mode <= TLS_LOCAL_EXEC) {
291b061017fSAlexei Starovoitov /* static executable is simple, we just have offset from
292b061017fSAlexei Starovoitov * tls_base */
293b061017fSAlexei Starovoitov void *addr = tls_base + loc->offset;
294b061017fSAlexei Starovoitov /* multiply by (tls_mode + 1) to get NULL, if we have no
295b061017fSAlexei Starovoitov * metavar in this slot */
296b061017fSAlexei Starovoitov return (void *)((loc->tls_mode + 1) * (int64_t)addr);
297b061017fSAlexei Starovoitov }
298b061017fSAlexei Starovoitov /*
299b061017fSAlexei Starovoitov * Other modes are more complicated, we need to jump through few hoops.
300b061017fSAlexei Starovoitov *
301b061017fSAlexei Starovoitov * For immediate executable mode (currently supported only for aarch64):
302b061017fSAlexei Starovoitov * - loc->offset is pointing to a GOT entry containing fixed offset
303b061017fSAlexei Starovoitov * relative to tls_base;
304b061017fSAlexei Starovoitov *
305b061017fSAlexei Starovoitov * For general dynamic mode:
306b061017fSAlexei Starovoitov * - loc->offset is pointing to a beginning of double GOT entries;
307b061017fSAlexei Starovoitov * - (for aarch64 only) second entry points to tls_index_t struct;
308b061017fSAlexei Starovoitov * - (for x86-64 only) two GOT entries are already tls_index_t;
309b061017fSAlexei Starovoitov * - tls_index_t->module is used to find start of TLS section in
310b061017fSAlexei Starovoitov * which variable resides;
311b061017fSAlexei Starovoitov * - tls_index_t->offset provides offset within that TLS section,
312b061017fSAlexei Starovoitov * pointing to value of variable.
313b061017fSAlexei Starovoitov */
314b061017fSAlexei Starovoitov struct tls_index tls_index;
315b061017fSAlexei Starovoitov dtv_t *dtv;
316b061017fSAlexei Starovoitov void *tls_ptr;
317b061017fSAlexei Starovoitov
31850f9aa44SDaniel Borkmann bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
319b061017fSAlexei Starovoitov (void *)loc->offset);
320b061017fSAlexei Starovoitov /* valid module index is always positive */
321b061017fSAlexei Starovoitov if (tls_index.module > 0) {
322b061017fSAlexei Starovoitov /* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
32350f9aa44SDaniel Borkmann bpf_probe_read_user(&dtv, sizeof(dtv),
324b061017fSAlexei Starovoitov &((struct tcbhead *)tls_base)->dtv);
325b061017fSAlexei Starovoitov dtv += tls_index.module;
326b061017fSAlexei Starovoitov } else {
327b061017fSAlexei Starovoitov dtv = NULL;
328b061017fSAlexei Starovoitov }
32950f9aa44SDaniel Borkmann bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
330b061017fSAlexei Starovoitov /* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
331b061017fSAlexei Starovoitov return tls_ptr && tls_ptr != (void *)-1
332b061017fSAlexei Starovoitov ? tls_ptr + tls_index.offset
333b061017fSAlexei Starovoitov : NULL;
334b061017fSAlexei Starovoitov }
335b061017fSAlexei Starovoitov
336fab45be1SAndrii Nakryiko #ifdef SUBPROGS
337fab45be1SAndrii Nakryiko __noinline
338fab45be1SAndrii Nakryiko #else
339fab45be1SAndrii Nakryiko __always_inline
340fab45be1SAndrii Nakryiko #endif
read_int_var(struct strobemeta_cfg * cfg,size_t idx,void * tls_base,struct strobe_value_generic * value,struct strobemeta_payload * data)341fab45be1SAndrii Nakryiko static void read_int_var(struct strobemeta_cfg *cfg,
342d2f5bbbcSJiri Benc size_t idx, void *tls_base,
343b061017fSAlexei Starovoitov struct strobe_value_generic *value,
344b061017fSAlexei Starovoitov struct strobemeta_payload *data)
345b061017fSAlexei Starovoitov {
346b061017fSAlexei Starovoitov void *location = calc_location(&cfg->int_locs[idx], tls_base);
347b061017fSAlexei Starovoitov if (!location)
348b061017fSAlexei Starovoitov return;
349b061017fSAlexei Starovoitov
35050f9aa44SDaniel Borkmann bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
351b061017fSAlexei Starovoitov data->int_vals[idx] = value->val;
352b061017fSAlexei Starovoitov if (value->header.len)
353b061017fSAlexei Starovoitov data->int_vals_set_mask |= (1 << idx);
354b061017fSAlexei Starovoitov }
355b061017fSAlexei Starovoitov
read_str_var(struct strobemeta_cfg * cfg,size_t idx,void * tls_base,struct strobe_value_generic * value,struct strobemeta_payload * data,size_t off)356d2f5bbbcSJiri Benc static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
357d2f5bbbcSJiri Benc size_t idx, void *tls_base,
358b061017fSAlexei Starovoitov struct strobe_value_generic *value,
359d2f5bbbcSJiri Benc struct strobemeta_payload *data,
360*e030da5fSEduard Zingerman size_t off)
361b061017fSAlexei Starovoitov {
362b061017fSAlexei Starovoitov void *location;
363a20eac0aSAndrii Nakryiko uint64_t len;
364b061017fSAlexei Starovoitov
365b061017fSAlexei Starovoitov data->str_lens[idx] = 0;
366b061017fSAlexei Starovoitov location = calc_location(&cfg->str_locs[idx], tls_base);
367b061017fSAlexei Starovoitov if (!location)
368b061017fSAlexei Starovoitov return 0;
369b061017fSAlexei Starovoitov
37050f9aa44SDaniel Borkmann bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
371*e030da5fSEduard Zingerman len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
372b061017fSAlexei Starovoitov /*
37350f9aa44SDaniel Borkmann * if bpf_probe_read_user_str returns error (<0), due to casting to
374b061017fSAlexei Starovoitov * unsinged int, it will become big number, so next check is
375b061017fSAlexei Starovoitov * sufficient to check for errors AND prove to BPF verifier, that
37650f9aa44SDaniel Borkmann * bpf_probe_read_user_str won't return anything bigger than
377b061017fSAlexei Starovoitov * STROBE_MAX_STR_LEN
378b061017fSAlexei Starovoitov */
379b061017fSAlexei Starovoitov if (len > STROBE_MAX_STR_LEN)
380b061017fSAlexei Starovoitov return 0;
381b061017fSAlexei Starovoitov
382b061017fSAlexei Starovoitov data->str_lens[idx] = len;
383*e030da5fSEduard Zingerman return off + len;
384b061017fSAlexei Starovoitov }
385b061017fSAlexei Starovoitov
read_map_var(struct strobemeta_cfg * cfg,size_t idx,void * tls_base,struct strobe_value_generic * value,struct strobemeta_payload * data,size_t off)386*e030da5fSEduard Zingerman static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
387d2f5bbbcSJiri Benc size_t idx, void *tls_base,
388b061017fSAlexei Starovoitov struct strobe_value_generic *value,
389d2f5bbbcSJiri Benc struct strobemeta_payload *data,
390*e030da5fSEduard Zingerman size_t off)
391b061017fSAlexei Starovoitov {
392b061017fSAlexei Starovoitov struct strobe_map_descr* descr = &data->map_descrs[idx];
393b061017fSAlexei Starovoitov struct strobe_map_raw map;
394b061017fSAlexei Starovoitov void *location;
395a20eac0aSAndrii Nakryiko uint64_t len;
396b061017fSAlexei Starovoitov
397b061017fSAlexei Starovoitov descr->tag_len = 0; /* presume no tag is set */
398b061017fSAlexei Starovoitov descr->cnt = -1; /* presume no value is set */
399b061017fSAlexei Starovoitov
400b061017fSAlexei Starovoitov location = calc_location(&cfg->map_locs[idx], tls_base);
401b061017fSAlexei Starovoitov if (!location)
402*e030da5fSEduard Zingerman return off;
403b061017fSAlexei Starovoitov
40450f9aa44SDaniel Borkmann bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
40550f9aa44SDaniel Borkmann if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
406*e030da5fSEduard Zingerman return off;
407b061017fSAlexei Starovoitov
408b061017fSAlexei Starovoitov descr->id = map.id;
409b061017fSAlexei Starovoitov descr->cnt = map.cnt;
410b061017fSAlexei Starovoitov if (cfg->req_meta_idx == idx) {
411b061017fSAlexei Starovoitov data->req_id = map.id;
412b061017fSAlexei Starovoitov data->req_meta_valid = 1;
413b061017fSAlexei Starovoitov }
414b061017fSAlexei Starovoitov
415*e030da5fSEduard Zingerman len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
416b061017fSAlexei Starovoitov if (len <= STROBE_MAX_STR_LEN) {
417b061017fSAlexei Starovoitov descr->tag_len = len;
418*e030da5fSEduard Zingerman off += len;
419b061017fSAlexei Starovoitov }
420b061017fSAlexei Starovoitov
421b061017fSAlexei Starovoitov #ifdef NO_UNROLL
422b061017fSAlexei Starovoitov #pragma clang loop unroll(disable)
423b061017fSAlexei Starovoitov #else
424b061017fSAlexei Starovoitov #pragma unroll
425b061017fSAlexei Starovoitov #endif
4264670d68bSAndrii Nakryiko for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
4274670d68bSAndrii Nakryiko if (i >= map.cnt)
4284670d68bSAndrii Nakryiko break;
4294670d68bSAndrii Nakryiko
430b061017fSAlexei Starovoitov descr->key_lens[i] = 0;
431*e030da5fSEduard Zingerman len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
432b061017fSAlexei Starovoitov map.entries[i].key);
433b061017fSAlexei Starovoitov if (len <= STROBE_MAX_STR_LEN) {
434b061017fSAlexei Starovoitov descr->key_lens[i] = len;
435*e030da5fSEduard Zingerman off += len;
436b061017fSAlexei Starovoitov }
437b061017fSAlexei Starovoitov descr->val_lens[i] = 0;
438*e030da5fSEduard Zingerman len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
439b061017fSAlexei Starovoitov map.entries[i].val);
440b061017fSAlexei Starovoitov if (len <= STROBE_MAX_STR_LEN) {
441b061017fSAlexei Starovoitov descr->val_lens[i] = len;
442*e030da5fSEduard Zingerman off += len;
443b061017fSAlexei Starovoitov }
444b061017fSAlexei Starovoitov }
445b061017fSAlexei Starovoitov
446*e030da5fSEduard Zingerman return off;
447b061017fSAlexei Starovoitov }
448b061017fSAlexei Starovoitov
449f6e659b7SJoanne Koong #ifdef USE_BPF_LOOP
450f6e659b7SJoanne Koong enum read_type {
451f6e659b7SJoanne Koong READ_INT_VAR,
452f6e659b7SJoanne Koong READ_MAP_VAR,
453f6e659b7SJoanne Koong READ_STR_VAR,
454f6e659b7SJoanne Koong };
455f6e659b7SJoanne Koong
456f6e659b7SJoanne Koong struct read_var_ctx {
457f6e659b7SJoanne Koong struct strobemeta_payload *data;
458f6e659b7SJoanne Koong void *tls_base;
459f6e659b7SJoanne Koong struct strobemeta_cfg *cfg;
460*e030da5fSEduard Zingerman size_t payload_off;
461f6e659b7SJoanne Koong /* value gets mutated */
462f6e659b7SJoanne Koong struct strobe_value_generic *value;
463f6e659b7SJoanne Koong enum read_type type;
464f6e659b7SJoanne Koong };
465f6e659b7SJoanne Koong
read_var_callback(__u64 index,struct read_var_ctx * ctx)466*e030da5fSEduard Zingerman static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
467f6e659b7SJoanne Koong {
468*e030da5fSEduard Zingerman /* lose precision info for ctx->payload_off, verifier won't track
469*e030da5fSEduard Zingerman * double xor, barrier_var() is needed to force clang keep both xors.
470*e030da5fSEduard Zingerman */
471*e030da5fSEduard Zingerman ctx->payload_off ^= index;
472*e030da5fSEduard Zingerman barrier_var(ctx->payload_off);
473*e030da5fSEduard Zingerman ctx->payload_off ^= index;
474f6e659b7SJoanne Koong switch (ctx->type) {
475f6e659b7SJoanne Koong case READ_INT_VAR:
476f6e659b7SJoanne Koong if (index >= STROBE_MAX_INTS)
477f6e659b7SJoanne Koong return 1;
478f6e659b7SJoanne Koong read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
479f6e659b7SJoanne Koong break;
480f6e659b7SJoanne Koong case READ_MAP_VAR:
481f6e659b7SJoanne Koong if (index >= STROBE_MAX_MAPS)
482f6e659b7SJoanne Koong return 1;
483*e030da5fSEduard Zingerman if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
484*e030da5fSEduard Zingerman return 1;
485*e030da5fSEduard Zingerman ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
486*e030da5fSEduard Zingerman ctx->value, ctx->data, ctx->payload_off);
487f6e659b7SJoanne Koong break;
488f6e659b7SJoanne Koong case READ_STR_VAR:
489f6e659b7SJoanne Koong if (index >= STROBE_MAX_STRS)
490f6e659b7SJoanne Koong return 1;
491*e030da5fSEduard Zingerman if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
492*e030da5fSEduard Zingerman return 1;
493*e030da5fSEduard Zingerman ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
494*e030da5fSEduard Zingerman ctx->value, ctx->data, ctx->payload_off);
495f6e659b7SJoanne Koong break;
496f6e659b7SJoanne Koong }
497f6e659b7SJoanne Koong return 0;
498f6e659b7SJoanne Koong }
499f6e659b7SJoanne Koong #endif /* USE_BPF_LOOP */
500f6e659b7SJoanne Koong
501b061017fSAlexei Starovoitov /*
502b061017fSAlexei Starovoitov * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
503b061017fSAlexei Starovoitov * pointer to *right after* payload ends
504b061017fSAlexei Starovoitov */
505fab45be1SAndrii Nakryiko #ifdef SUBPROGS
506fab45be1SAndrii Nakryiko __noinline
507fab45be1SAndrii Nakryiko #else
508fab45be1SAndrii Nakryiko __always_inline
509fab45be1SAndrii Nakryiko #endif
read_strobe_meta(struct task_struct * task,struct strobemeta_payload * data)510fab45be1SAndrii Nakryiko static void *read_strobe_meta(struct task_struct *task,
511d2f5bbbcSJiri Benc struct strobemeta_payload *data)
512d2f5bbbcSJiri Benc {
513b061017fSAlexei Starovoitov pid_t pid = bpf_get_current_pid_tgid() >> 32;
514b061017fSAlexei Starovoitov struct strobe_value_generic value = {0};
515b061017fSAlexei Starovoitov struct strobemeta_cfg *cfg;
516*e030da5fSEduard Zingerman size_t payload_off;
517*e030da5fSEduard Zingerman void *tls_base;
518b061017fSAlexei Starovoitov
519b061017fSAlexei Starovoitov cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
520b061017fSAlexei Starovoitov if (!cfg)
521b061017fSAlexei Starovoitov return NULL;
522b061017fSAlexei Starovoitov
523b061017fSAlexei Starovoitov data->int_vals_set_mask = 0;
524b061017fSAlexei Starovoitov data->req_meta_valid = 0;
525*e030da5fSEduard Zingerman payload_off = 0;
526b061017fSAlexei Starovoitov /*
527b061017fSAlexei Starovoitov * we don't have struct task_struct definition, it should be:
528b061017fSAlexei Starovoitov * tls_base = (void *)task->thread.fsbase;
529b061017fSAlexei Starovoitov */
530b061017fSAlexei Starovoitov tls_base = (void *)task;
531b061017fSAlexei Starovoitov
532f6e659b7SJoanne Koong #ifdef USE_BPF_LOOP
533f6e659b7SJoanne Koong struct read_var_ctx ctx = {
534f6e659b7SJoanne Koong .cfg = cfg,
535f6e659b7SJoanne Koong .tls_base = tls_base,
536f6e659b7SJoanne Koong .value = &value,
537f6e659b7SJoanne Koong .data = data,
538*e030da5fSEduard Zingerman .payload_off = 0,
539f6e659b7SJoanne Koong };
540f6e659b7SJoanne Koong int err;
541f6e659b7SJoanne Koong
542f6e659b7SJoanne Koong ctx.type = READ_INT_VAR;
543f6e659b7SJoanne Koong err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
544f6e659b7SJoanne Koong if (err != STROBE_MAX_INTS)
545f6e659b7SJoanne Koong return NULL;
546f6e659b7SJoanne Koong
547f6e659b7SJoanne Koong ctx.type = READ_STR_VAR;
548f6e659b7SJoanne Koong err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
549f6e659b7SJoanne Koong if (err != STROBE_MAX_STRS)
550f6e659b7SJoanne Koong return NULL;
551f6e659b7SJoanne Koong
552f6e659b7SJoanne Koong ctx.type = READ_MAP_VAR;
553f6e659b7SJoanne Koong err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
554f6e659b7SJoanne Koong if (err != STROBE_MAX_MAPS)
555f6e659b7SJoanne Koong return NULL;
556*e030da5fSEduard Zingerman
557*e030da5fSEduard Zingerman payload_off = ctx.payload_off;
558*e030da5fSEduard Zingerman /* this should not really happen, here only to satisfy verifer */
559*e030da5fSEduard Zingerman if (payload_off > sizeof(data->payload))
560*e030da5fSEduard Zingerman payload_off = sizeof(data->payload);
561f6e659b7SJoanne Koong #else
562b061017fSAlexei Starovoitov #ifdef NO_UNROLL
563b061017fSAlexei Starovoitov #pragma clang loop unroll(disable)
564b061017fSAlexei Starovoitov #else
565b061017fSAlexei Starovoitov #pragma unroll
566f6e659b7SJoanne Koong #endif /* NO_UNROLL */
567b061017fSAlexei Starovoitov for (int i = 0; i < STROBE_MAX_INTS; ++i) {
568b061017fSAlexei Starovoitov read_int_var(cfg, i, tls_base, &value, data);
569b061017fSAlexei Starovoitov }
570b061017fSAlexei Starovoitov #ifdef NO_UNROLL
571b061017fSAlexei Starovoitov #pragma clang loop unroll(disable)
572b061017fSAlexei Starovoitov #else
573b061017fSAlexei Starovoitov #pragma unroll
574f6e659b7SJoanne Koong #endif /* NO_UNROLL */
575b061017fSAlexei Starovoitov for (int i = 0; i < STROBE_MAX_STRS; ++i) {
576*e030da5fSEduard Zingerman payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
577b061017fSAlexei Starovoitov }
578b061017fSAlexei Starovoitov #ifdef NO_UNROLL
579b061017fSAlexei Starovoitov #pragma clang loop unroll(disable)
580b061017fSAlexei Starovoitov #else
581b061017fSAlexei Starovoitov #pragma unroll
582f6e659b7SJoanne Koong #endif /* NO_UNROLL */
583b061017fSAlexei Starovoitov for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
584*e030da5fSEduard Zingerman payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
585b061017fSAlexei Starovoitov }
586f6e659b7SJoanne Koong #endif /* USE_BPF_LOOP */
587f6e659b7SJoanne Koong
588b061017fSAlexei Starovoitov /*
589b061017fSAlexei Starovoitov * return pointer right after end of payload, so it's possible to
590b061017fSAlexei Starovoitov * calculate exact amount of useful data that needs to be sent
591b061017fSAlexei Starovoitov */
592*e030da5fSEduard Zingerman return &data->payload[payload_off];
593b061017fSAlexei Starovoitov }
594b061017fSAlexei Starovoitov
595b061017fSAlexei Starovoitov SEC("raw_tracepoint/kfree_skb")
on_event(struct pt_regs * ctx)596b061017fSAlexei Starovoitov int on_event(struct pt_regs *ctx) {
597b061017fSAlexei Starovoitov pid_t pid = bpf_get_current_pid_tgid() >> 32;
598b061017fSAlexei Starovoitov struct strobelight_bpf_sample* sample;
599b061017fSAlexei Starovoitov struct task_struct *task;
600b061017fSAlexei Starovoitov uint32_t zero = 0;
601b061017fSAlexei Starovoitov uint64_t ktime_ns;
602b061017fSAlexei Starovoitov void *sample_end;
603b061017fSAlexei Starovoitov
604b061017fSAlexei Starovoitov sample = bpf_map_lookup_elem(&sample_heap, &zero);
605b061017fSAlexei Starovoitov if (!sample)
606b061017fSAlexei Starovoitov return 0; /* this will never happen */
607b061017fSAlexei Starovoitov
608b061017fSAlexei Starovoitov sample->pid = pid;
609b061017fSAlexei Starovoitov bpf_get_current_comm(&sample->comm, TASK_COMM_LEN);
610b061017fSAlexei Starovoitov ktime_ns = bpf_ktime_get_ns();
611b061017fSAlexei Starovoitov sample->ktime = ktime_ns;
612b061017fSAlexei Starovoitov
613b061017fSAlexei Starovoitov task = (struct task_struct *)bpf_get_current_task();
614b061017fSAlexei Starovoitov sample_end = read_strobe_meta(task, &sample->metadata);
615b061017fSAlexei Starovoitov sample->has_meta = sample_end != NULL;
616b061017fSAlexei Starovoitov sample_end = sample_end ? : &sample->metadata;
617b061017fSAlexei Starovoitov
618b061017fSAlexei Starovoitov if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) {
619b061017fSAlexei Starovoitov sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0);
620b061017fSAlexei Starovoitov sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK);
621b061017fSAlexei Starovoitov } else {
622b061017fSAlexei Starovoitov sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0);
623b061017fSAlexei Starovoitov sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK);
624b061017fSAlexei Starovoitov }
625b061017fSAlexei Starovoitov
626b061017fSAlexei Starovoitov uint64_t sample_size = sample_end - (void *)sample;
627b061017fSAlexei Starovoitov /* should always be true */
628b061017fSAlexei Starovoitov if (sample_size < sizeof(struct strobelight_bpf_sample))
629b061017fSAlexei Starovoitov bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size);
630b061017fSAlexei Starovoitov return 0;
631b061017fSAlexei Starovoitov }
632b061017fSAlexei Starovoitov
633b061017fSAlexei Starovoitov char _license[] SEC("license") = "GPL";
634