1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Facebook
3 
4 #include <fcntl.h>
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 
11 #include <linux/filter.h>
12 
13 #include <bpf/bpf.h>
14 
15 #include "bpf_rlimit.h"
16 #include "bpf_util.h"
17 #include "cgroup_helpers.h"
18 
19 #define CG_PATH			"/foo"
20 #define MAX_INSNS		512
21 
22 char bpf_log_buf[BPF_LOG_BUF_SIZE];
23 
24 struct sysctl_test {
25 	const char *descr;
26 	struct bpf_insn	insns[MAX_INSNS];
27 	enum bpf_attach_type attach_type;
28 	const char *sysctl;
29 	int open_flags;
30 	const char *newval;
31 	enum {
32 		LOAD_REJECT,
33 		ATTACH_REJECT,
34 		OP_EPERM,
35 		SUCCESS,
36 	} result;
37 };
38 
39 static struct sysctl_test tests[] = {
40 	{
41 		.descr = "sysctl wrong attach_type",
42 		.insns = {
43 			BPF_MOV64_IMM(BPF_REG_0, 1),
44 			BPF_EXIT_INSN(),
45 		},
46 		.attach_type = 0,
47 		.sysctl = "kernel/ostype",
48 		.open_flags = O_RDONLY,
49 		.result = ATTACH_REJECT,
50 	},
51 	{
52 		.descr = "sysctl:read allow all",
53 		.insns = {
54 			BPF_MOV64_IMM(BPF_REG_0, 1),
55 			BPF_EXIT_INSN(),
56 		},
57 		.attach_type = BPF_CGROUP_SYSCTL,
58 		.sysctl = "kernel/ostype",
59 		.open_flags = O_RDONLY,
60 		.result = SUCCESS,
61 	},
62 	{
63 		.descr = "sysctl:read deny all",
64 		.insns = {
65 			BPF_MOV64_IMM(BPF_REG_0, 0),
66 			BPF_EXIT_INSN(),
67 		},
68 		.attach_type = BPF_CGROUP_SYSCTL,
69 		.sysctl = "kernel/ostype",
70 		.open_flags = O_RDONLY,
71 		.result = OP_EPERM,
72 	},
73 	{
74 		.descr = "ctx:write sysctl:read read ok",
75 		.insns = {
76 			/* If (write) */
77 			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
78 				    offsetof(struct bpf_sysctl, write)),
79 			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
80 
81 			/* return DENY; */
82 			BPF_MOV64_IMM(BPF_REG_0, 0),
83 			BPF_JMP_A(1),
84 
85 			/* else return ALLOW; */
86 			BPF_MOV64_IMM(BPF_REG_0, 1),
87 			BPF_EXIT_INSN(),
88 		},
89 		.attach_type = BPF_CGROUP_SYSCTL,
90 		.sysctl = "kernel/ostype",
91 		.open_flags = O_RDONLY,
92 		.result = SUCCESS,
93 	},
94 	{
95 		.descr = "ctx:write sysctl:write read ok",
96 		.insns = {
97 			/* If (write) */
98 			BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
99 				    offsetof(struct bpf_sysctl, write)),
100 			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
101 
102 			/* return DENY; */
103 			BPF_MOV64_IMM(BPF_REG_0, 0),
104 			BPF_JMP_A(1),
105 
106 			/* else return ALLOW; */
107 			BPF_MOV64_IMM(BPF_REG_0, 1),
108 			BPF_EXIT_INSN(),
109 		},
110 		.attach_type = BPF_CGROUP_SYSCTL,
111 		.sysctl = "kernel/domainname",
112 		.open_flags = O_WRONLY,
113 		.newval = "(none)", /* same as default, should fail anyway */
114 		.result = OP_EPERM,
115 	},
116 	{
117 		.descr = "ctx:write sysctl:read write reject",
118 		.insns = {
119 			/* write = X */
120 			BPF_MOV64_IMM(BPF_REG_0, 0),
121 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
122 				    offsetof(struct bpf_sysctl, write)),
123 			BPF_MOV64_IMM(BPF_REG_0, 1),
124 			BPF_EXIT_INSN(),
125 		},
126 		.attach_type = BPF_CGROUP_SYSCTL,
127 		.sysctl = "kernel/ostype",
128 		.open_flags = O_RDONLY,
129 		.result = LOAD_REJECT,
130 	},
131 	{
132 		.descr = "sysctl_get_name sysctl_value:base ok",
133 		.insns = {
134 			/* sysctl_get_name arg2 (buf) */
135 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
136 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
137 			BPF_MOV64_IMM(BPF_REG_0, 0),
138 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
139 
140 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
141 
142 			/* sysctl_get_name arg3 (buf_len) */
143 			BPF_MOV64_IMM(BPF_REG_3, 8),
144 
145 			/* sysctl_get_name arg4 (flags) */
146 			BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
147 
148 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
149 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
150 
151 			/* if (ret == expected && */
152 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
153 			/*     buf == "tcp_mem\0") */
154 			BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
155 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
156 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
157 
158 			/* return ALLOW; */
159 			BPF_MOV64_IMM(BPF_REG_0, 1),
160 			BPF_JMP_A(1),
161 
162 			/* else return DENY; */
163 			BPF_MOV64_IMM(BPF_REG_0, 0),
164 			BPF_EXIT_INSN(),
165 		},
166 		.attach_type = BPF_CGROUP_SYSCTL,
167 		.sysctl = "net/ipv4/tcp_mem",
168 		.open_flags = O_RDONLY,
169 		.result = SUCCESS,
170 	},
171 	{
172 		.descr = "sysctl_get_name sysctl_value:base E2BIG truncated",
173 		.insns = {
174 			/* sysctl_get_name arg2 (buf) */
175 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
176 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
177 			BPF_MOV64_IMM(BPF_REG_0, 0),
178 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
179 
180 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
181 
182 			/* sysctl_get_name arg3 (buf_len) too small */
183 			BPF_MOV64_IMM(BPF_REG_3, 7),
184 
185 			/* sysctl_get_name arg4 (flags) */
186 			BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
187 
188 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
189 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
190 
191 			/* if (ret == expected && */
192 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
193 
194 			/*     buf[0:7] == "tcp_me\0") */
195 			BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
196 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
197 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
198 
199 			/* return ALLOW; */
200 			BPF_MOV64_IMM(BPF_REG_0, 1),
201 			BPF_JMP_A(1),
202 
203 			/* else return DENY; */
204 			BPF_MOV64_IMM(BPF_REG_0, 0),
205 			BPF_EXIT_INSN(),
206 		},
207 		.attach_type = BPF_CGROUP_SYSCTL,
208 		.sysctl = "net/ipv4/tcp_mem",
209 		.open_flags = O_RDONLY,
210 		.result = SUCCESS,
211 	},
212 	{
213 		.descr = "sysctl_get_name sysctl:full ok",
214 		.insns = {
215 			/* sysctl_get_name arg2 (buf) */
216 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
217 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
218 			BPF_MOV64_IMM(BPF_REG_0, 0),
219 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
220 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
221 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
222 
223 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
224 
225 			/* sysctl_get_name arg3 (buf_len) */
226 			BPF_MOV64_IMM(BPF_REG_3, 17),
227 
228 			/* sysctl_get_name arg4 (flags) */
229 			BPF_MOV64_IMM(BPF_REG_4, 0),
230 
231 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
232 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
233 
234 			/* if (ret == expected && */
235 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
236 
237 			/*     buf[0:8] == "net/ipv4" && */
238 			BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
239 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
240 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
241 
242 			/*     buf[8:16] == "/tcp_mem" && */
243 			BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
244 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
245 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
246 
247 			/*     buf[16:24] == "\0") */
248 			BPF_LD_IMM64(BPF_REG_8, 0x0ULL),
249 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
250 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
251 
252 			/* return ALLOW; */
253 			BPF_MOV64_IMM(BPF_REG_0, 1),
254 			BPF_JMP_A(1),
255 
256 			/* else return DENY; */
257 			BPF_MOV64_IMM(BPF_REG_0, 0),
258 			BPF_EXIT_INSN(),
259 		},
260 		.attach_type = BPF_CGROUP_SYSCTL,
261 		.sysctl = "net/ipv4/tcp_mem",
262 		.open_flags = O_RDONLY,
263 		.result = SUCCESS,
264 	},
265 	{
266 		.descr = "sysctl_get_name sysctl:full E2BIG truncated",
267 		.insns = {
268 			/* sysctl_get_name arg2 (buf) */
269 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
270 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
271 			BPF_MOV64_IMM(BPF_REG_0, 0),
272 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
273 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
274 
275 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
276 
277 			/* sysctl_get_name arg3 (buf_len) */
278 			BPF_MOV64_IMM(BPF_REG_3, 16),
279 
280 			/* sysctl_get_name arg4 (flags) */
281 			BPF_MOV64_IMM(BPF_REG_4, 0),
282 
283 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
284 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
285 
286 			/* if (ret == expected && */
287 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
288 
289 			/*     buf[0:8] == "net/ipv4" && */
290 			BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
291 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
292 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
293 
294 			/*     buf[8:16] == "/tcp_me\0") */
295 			BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
296 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
297 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
298 
299 			/* return ALLOW; */
300 			BPF_MOV64_IMM(BPF_REG_0, 1),
301 			BPF_JMP_A(1),
302 
303 			/* else return DENY; */
304 			BPF_MOV64_IMM(BPF_REG_0, 0),
305 			BPF_EXIT_INSN(),
306 		},
307 		.attach_type = BPF_CGROUP_SYSCTL,
308 		.sysctl = "net/ipv4/tcp_mem",
309 		.open_flags = O_RDONLY,
310 		.result = SUCCESS,
311 	},
312 	{
313 		.descr = "sysctl_get_name sysctl:full E2BIG truncated small",
314 		.insns = {
315 			/* sysctl_get_name arg2 (buf) */
316 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
317 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
318 			BPF_MOV64_IMM(BPF_REG_0, 0),
319 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
320 
321 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
322 
323 			/* sysctl_get_name arg3 (buf_len) */
324 			BPF_MOV64_IMM(BPF_REG_3, 7),
325 
326 			/* sysctl_get_name arg4 (flags) */
327 			BPF_MOV64_IMM(BPF_REG_4, 0),
328 
329 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
330 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
331 
332 			/* if (ret == expected && */
333 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
334 
335 			/*     buf[0:8] == "net/ip\0") */
336 			BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
337 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
338 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
339 
340 			/* return ALLOW; */
341 			BPF_MOV64_IMM(BPF_REG_0, 1),
342 			BPF_JMP_A(1),
343 
344 			/* else return DENY; */
345 			BPF_MOV64_IMM(BPF_REG_0, 0),
346 			BPF_EXIT_INSN(),
347 		},
348 		.attach_type = BPF_CGROUP_SYSCTL,
349 		.sysctl = "net/ipv4/tcp_mem",
350 		.open_flags = O_RDONLY,
351 		.result = SUCCESS,
352 	},
353 };
354 
355 static size_t probe_prog_length(const struct bpf_insn *fp)
356 {
357 	size_t len;
358 
359 	for (len = MAX_INSNS - 1; len > 0; --len)
360 		if (fp[len].code != 0 || fp[len].imm != 0)
361 			break;
362 	return len + 1;
363 }
364 
365 static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
366 {
367 	struct bpf_insn *prog = test->insns;
368 	struct bpf_load_program_attr attr;
369 	int ret;
370 
371 	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
372 	attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
373 	attr.insns = prog;
374 	attr.insns_cnt = probe_prog_length(attr.insns);
375 	attr.license = "GPL";
376 
377 	ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
378 	if (ret < 0 && test->result != LOAD_REJECT) {
379 		log_err(">>> Loading program error.\n"
380 			">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
381 	}
382 
383 	return ret;
384 }
385 
386 static int access_sysctl(const char *sysctl_path,
387 			 const struct sysctl_test *test)
388 {
389 	int err = 0;
390 	int fd;
391 
392 	fd = open(sysctl_path, test->open_flags | O_CLOEXEC);
393 	if (fd < 0)
394 		return fd;
395 
396 	if (test->open_flags == O_RDONLY) {
397 		char buf[128];
398 
399 		if (read(fd, buf, sizeof(buf)) == -1)
400 			goto err;
401 	} else if (test->open_flags == O_WRONLY) {
402 		if (!test->newval) {
403 			log_err("New value for sysctl is not set");
404 			goto err;
405 		}
406 		if (write(fd, test->newval, strlen(test->newval)) == -1)
407 			goto err;
408 	} else {
409 		log_err("Unexpected sysctl access: neither read nor write");
410 		goto err;
411 	}
412 
413 	goto out;
414 err:
415 	err = -1;
416 out:
417 	close(fd);
418 	return err;
419 }
420 
421 static int run_test_case(int cgfd, struct sysctl_test *test)
422 {
423 	enum bpf_attach_type atype = test->attach_type;
424 	char sysctl_path[128];
425 	int progfd = -1;
426 	int err = 0;
427 
428 	printf("Test case: %s .. ", test->descr);
429 
430 	snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s",
431 		 test->sysctl);
432 
433 	progfd = load_sysctl_prog(test, sysctl_path);
434 	if (progfd < 0) {
435 		if (test->result == LOAD_REJECT)
436 			goto out;
437 		else
438 			goto err;
439 	}
440 
441 	if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
442 		if (test->result == ATTACH_REJECT)
443 			goto out;
444 		else
445 			goto err;
446 	}
447 
448 	if (access_sysctl(sysctl_path, test) == -1) {
449 		if (test->result == OP_EPERM && errno == EPERM)
450 			goto out;
451 		else
452 			goto err;
453 	}
454 
455 	if (test->result != SUCCESS) {
456 		log_err("Unexpected failure");
457 		goto err;
458 	}
459 
460 	goto out;
461 err:
462 	err = -1;
463 out:
464 	/* Detaching w/o checking return code: best effort attempt. */
465 	if (progfd != -1)
466 		bpf_prog_detach(cgfd, atype);
467 	close(progfd);
468 	printf("[%s]\n", err ? "FAIL" : "PASS");
469 	return err;
470 }
471 
472 static int run_tests(int cgfd)
473 {
474 	int passes = 0;
475 	int fails = 0;
476 	int i;
477 
478 	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
479 		if (run_test_case(cgfd, &tests[i]))
480 			++fails;
481 		else
482 			++passes;
483 	}
484 	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
485 	return fails ? -1 : 0;
486 }
487 
488 int main(int argc, char **argv)
489 {
490 	int cgfd = -1;
491 	int err = 0;
492 
493 	if (setup_cgroup_environment())
494 		goto err;
495 
496 	cgfd = create_and_get_cgroup(CG_PATH);
497 	if (cgfd < 0)
498 		goto err;
499 
500 	if (join_cgroup(CG_PATH))
501 		goto err;
502 
503 	if (run_tests(cgfd))
504 		goto err;
505 
506 	goto out;
507 err:
508 	err = -1;
509 out:
510 	close(cgfd);
511 	cleanup_cgroup_environment();
512 	return err;
513 }
514