1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Facebook
3 
4 #include <fcntl.h>
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 
11 #include <linux/filter.h>
12 
13 #include <bpf/bpf.h>
14 
15 #include "bpf_rlimit.h"
16 #include "bpf_util.h"
17 #include "cgroup_helpers.h"
18 
19 #define CG_PATH			"/foo"
20 #define MAX_INSNS		512
21 #define FIXUP_SYSCTL_VALUE	0
22 
23 char bpf_log_buf[BPF_LOG_BUF_SIZE];
24 
25 struct sysctl_test {
26 	const char *descr;
27 	size_t fixup_value_insn;
28 	struct bpf_insn	insns[MAX_INSNS];
29 	enum bpf_attach_type attach_type;
30 	const char *sysctl;
31 	int open_flags;
32 	const char *newval;
33 	enum {
34 		LOAD_REJECT,
35 		ATTACH_REJECT,
36 		OP_EPERM,
37 		SUCCESS,
38 	} result;
39 };
40 
41 static struct sysctl_test tests[] = {
42 	{
43 		.descr = "sysctl wrong attach_type",
44 		.insns = {
45 			BPF_MOV64_IMM(BPF_REG_0, 1),
46 			BPF_EXIT_INSN(),
47 		},
48 		.attach_type = 0,
49 		.sysctl = "kernel/ostype",
50 		.open_flags = O_RDONLY,
51 		.result = ATTACH_REJECT,
52 	},
53 	{
54 		.descr = "sysctl:read allow all",
55 		.insns = {
56 			BPF_MOV64_IMM(BPF_REG_0, 1),
57 			BPF_EXIT_INSN(),
58 		},
59 		.attach_type = BPF_CGROUP_SYSCTL,
60 		.sysctl = "kernel/ostype",
61 		.open_flags = O_RDONLY,
62 		.result = SUCCESS,
63 	},
64 	{
65 		.descr = "sysctl:read deny all",
66 		.insns = {
67 			BPF_MOV64_IMM(BPF_REG_0, 0),
68 			BPF_EXIT_INSN(),
69 		},
70 		.attach_type = BPF_CGROUP_SYSCTL,
71 		.sysctl = "kernel/ostype",
72 		.open_flags = O_RDONLY,
73 		.result = OP_EPERM,
74 	},
75 	{
76 		.descr = "ctx:write sysctl:read read ok",
77 		.insns = {
78 			/* If (write) */
79 			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
80 				    offsetof(struct bpf_sysctl, write)),
81 			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
82 
83 			/* return DENY; */
84 			BPF_MOV64_IMM(BPF_REG_0, 0),
85 			BPF_JMP_A(1),
86 
87 			/* else return ALLOW; */
88 			BPF_MOV64_IMM(BPF_REG_0, 1),
89 			BPF_EXIT_INSN(),
90 		},
91 		.attach_type = BPF_CGROUP_SYSCTL,
92 		.sysctl = "kernel/ostype",
93 		.open_flags = O_RDONLY,
94 		.result = SUCCESS,
95 	},
96 	{
97 		.descr = "ctx:write sysctl:write read ok",
98 		.insns = {
99 			/* If (write) */
100 			BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
101 				    offsetof(struct bpf_sysctl, write)),
102 			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
103 
104 			/* return DENY; */
105 			BPF_MOV64_IMM(BPF_REG_0, 0),
106 			BPF_JMP_A(1),
107 
108 			/* else return ALLOW; */
109 			BPF_MOV64_IMM(BPF_REG_0, 1),
110 			BPF_EXIT_INSN(),
111 		},
112 		.attach_type = BPF_CGROUP_SYSCTL,
113 		.sysctl = "kernel/domainname",
114 		.open_flags = O_WRONLY,
115 		.newval = "(none)", /* same as default, should fail anyway */
116 		.result = OP_EPERM,
117 	},
118 	{
119 		.descr = "ctx:write sysctl:read write reject",
120 		.insns = {
121 			/* write = X */
122 			BPF_MOV64_IMM(BPF_REG_0, 0),
123 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
124 				    offsetof(struct bpf_sysctl, write)),
125 			BPF_MOV64_IMM(BPF_REG_0, 1),
126 			BPF_EXIT_INSN(),
127 		},
128 		.attach_type = BPF_CGROUP_SYSCTL,
129 		.sysctl = "kernel/ostype",
130 		.open_flags = O_RDONLY,
131 		.result = LOAD_REJECT,
132 	},
133 	{
134 		.descr = "sysctl_get_name sysctl_value:base ok",
135 		.insns = {
136 			/* sysctl_get_name arg2 (buf) */
137 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
138 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
139 			BPF_MOV64_IMM(BPF_REG_0, 0),
140 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
141 
142 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
143 
144 			/* sysctl_get_name arg3 (buf_len) */
145 			BPF_MOV64_IMM(BPF_REG_3, 8),
146 
147 			/* sysctl_get_name arg4 (flags) */
148 			BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
149 
150 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
151 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
152 
153 			/* if (ret == expected && */
154 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
155 			/*     buf == "tcp_mem\0") */
156 			BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
157 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
158 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
159 
160 			/* return ALLOW; */
161 			BPF_MOV64_IMM(BPF_REG_0, 1),
162 			BPF_JMP_A(1),
163 
164 			/* else return DENY; */
165 			BPF_MOV64_IMM(BPF_REG_0, 0),
166 			BPF_EXIT_INSN(),
167 		},
168 		.attach_type = BPF_CGROUP_SYSCTL,
169 		.sysctl = "net/ipv4/tcp_mem",
170 		.open_flags = O_RDONLY,
171 		.result = SUCCESS,
172 	},
173 	{
174 		.descr = "sysctl_get_name sysctl_value:base E2BIG truncated",
175 		.insns = {
176 			/* sysctl_get_name arg2 (buf) */
177 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
178 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
179 			BPF_MOV64_IMM(BPF_REG_0, 0),
180 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
181 
182 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
183 
184 			/* sysctl_get_name arg3 (buf_len) too small */
185 			BPF_MOV64_IMM(BPF_REG_3, 7),
186 
187 			/* sysctl_get_name arg4 (flags) */
188 			BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
189 
190 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
191 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
192 
193 			/* if (ret == expected && */
194 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
195 
196 			/*     buf[0:7] == "tcp_me\0") */
197 			BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
198 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
199 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
200 
201 			/* return ALLOW; */
202 			BPF_MOV64_IMM(BPF_REG_0, 1),
203 			BPF_JMP_A(1),
204 
205 			/* else return DENY; */
206 			BPF_MOV64_IMM(BPF_REG_0, 0),
207 			BPF_EXIT_INSN(),
208 		},
209 		.attach_type = BPF_CGROUP_SYSCTL,
210 		.sysctl = "net/ipv4/tcp_mem",
211 		.open_flags = O_RDONLY,
212 		.result = SUCCESS,
213 	},
214 	{
215 		.descr = "sysctl_get_name sysctl:full ok",
216 		.insns = {
217 			/* sysctl_get_name arg2 (buf) */
218 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
219 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
220 			BPF_MOV64_IMM(BPF_REG_0, 0),
221 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
222 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
223 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
224 
225 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
226 
227 			/* sysctl_get_name arg3 (buf_len) */
228 			BPF_MOV64_IMM(BPF_REG_3, 17),
229 
230 			/* sysctl_get_name arg4 (flags) */
231 			BPF_MOV64_IMM(BPF_REG_4, 0),
232 
233 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
234 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
235 
236 			/* if (ret == expected && */
237 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
238 
239 			/*     buf[0:8] == "net/ipv4" && */
240 			BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
241 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
242 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
243 
244 			/*     buf[8:16] == "/tcp_mem" && */
245 			BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
246 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
247 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
248 
249 			/*     buf[16:24] == "\0") */
250 			BPF_LD_IMM64(BPF_REG_8, 0x0ULL),
251 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
252 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
253 
254 			/* return ALLOW; */
255 			BPF_MOV64_IMM(BPF_REG_0, 1),
256 			BPF_JMP_A(1),
257 
258 			/* else return DENY; */
259 			BPF_MOV64_IMM(BPF_REG_0, 0),
260 			BPF_EXIT_INSN(),
261 		},
262 		.attach_type = BPF_CGROUP_SYSCTL,
263 		.sysctl = "net/ipv4/tcp_mem",
264 		.open_flags = O_RDONLY,
265 		.result = SUCCESS,
266 	},
267 	{
268 		.descr = "sysctl_get_name sysctl:full E2BIG truncated",
269 		.insns = {
270 			/* sysctl_get_name arg2 (buf) */
271 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
272 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
273 			BPF_MOV64_IMM(BPF_REG_0, 0),
274 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
275 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
276 
277 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
278 
279 			/* sysctl_get_name arg3 (buf_len) */
280 			BPF_MOV64_IMM(BPF_REG_3, 16),
281 
282 			/* sysctl_get_name arg4 (flags) */
283 			BPF_MOV64_IMM(BPF_REG_4, 0),
284 
285 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
286 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
287 
288 			/* if (ret == expected && */
289 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
290 
291 			/*     buf[0:8] == "net/ipv4" && */
292 			BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
293 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
294 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
295 
296 			/*     buf[8:16] == "/tcp_me\0") */
297 			BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
298 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
299 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
300 
301 			/* return ALLOW; */
302 			BPF_MOV64_IMM(BPF_REG_0, 1),
303 			BPF_JMP_A(1),
304 
305 			/* else return DENY; */
306 			BPF_MOV64_IMM(BPF_REG_0, 0),
307 			BPF_EXIT_INSN(),
308 		},
309 		.attach_type = BPF_CGROUP_SYSCTL,
310 		.sysctl = "net/ipv4/tcp_mem",
311 		.open_flags = O_RDONLY,
312 		.result = SUCCESS,
313 	},
314 	{
315 		.descr = "sysctl_get_name sysctl:full E2BIG truncated small",
316 		.insns = {
317 			/* sysctl_get_name arg2 (buf) */
318 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
319 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
320 			BPF_MOV64_IMM(BPF_REG_0, 0),
321 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
322 
323 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
324 
325 			/* sysctl_get_name arg3 (buf_len) */
326 			BPF_MOV64_IMM(BPF_REG_3, 7),
327 
328 			/* sysctl_get_name arg4 (flags) */
329 			BPF_MOV64_IMM(BPF_REG_4, 0),
330 
331 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
332 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
333 
334 			/* if (ret == expected && */
335 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
336 
337 			/*     buf[0:8] == "net/ip\0") */
338 			BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
339 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
340 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
341 
342 			/* return ALLOW; */
343 			BPF_MOV64_IMM(BPF_REG_0, 1),
344 			BPF_JMP_A(1),
345 
346 			/* else return DENY; */
347 			BPF_MOV64_IMM(BPF_REG_0, 0),
348 			BPF_EXIT_INSN(),
349 		},
350 		.attach_type = BPF_CGROUP_SYSCTL,
351 		.sysctl = "net/ipv4/tcp_mem",
352 		.open_flags = O_RDONLY,
353 		.result = SUCCESS,
354 	},
355 	{
356 		.descr = "sysctl_get_current_value sysctl:read ok, gt",
357 		.insns = {
358 			/* sysctl_get_current_value arg2 (buf) */
359 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
360 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
361 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
362 
363 			/* sysctl_get_current_value arg3 (buf_len) */
364 			BPF_MOV64_IMM(BPF_REG_3, 8),
365 
366 			/* sysctl_get_current_value(ctx, buf, buf_len) */
367 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
368 
369 			/* if (ret == expected && */
370 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
371 
372 			/*     buf[0:6] == "Linux\n\0") */
373 			BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
374 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
375 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
376 
377 			/* return ALLOW; */
378 			BPF_MOV64_IMM(BPF_REG_0, 1),
379 			BPF_JMP_A(1),
380 
381 			/* else return DENY; */
382 			BPF_MOV64_IMM(BPF_REG_0, 0),
383 			BPF_EXIT_INSN(),
384 		},
385 		.attach_type = BPF_CGROUP_SYSCTL,
386 		.sysctl = "kernel/ostype",
387 		.open_flags = O_RDONLY,
388 		.result = SUCCESS,
389 	},
390 	{
391 		.descr = "sysctl_get_current_value sysctl:read ok, eq",
392 		.insns = {
393 			/* sysctl_get_current_value arg2 (buf) */
394 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
395 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
396 			BPF_MOV64_IMM(BPF_REG_0, 0),
397 			BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7),
398 
399 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
400 
401 			/* sysctl_get_current_value arg3 (buf_len) */
402 			BPF_MOV64_IMM(BPF_REG_3, 7),
403 
404 			/* sysctl_get_current_value(ctx, buf, buf_len) */
405 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
406 
407 			/* if (ret == expected && */
408 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
409 
410 			/*     buf[0:6] == "Linux\n\0") */
411 			BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
412 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
413 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
414 
415 			/* return ALLOW; */
416 			BPF_MOV64_IMM(BPF_REG_0, 1),
417 			BPF_JMP_A(1),
418 
419 			/* else return DENY; */
420 			BPF_MOV64_IMM(BPF_REG_0, 0),
421 			BPF_EXIT_INSN(),
422 		},
423 		.attach_type = BPF_CGROUP_SYSCTL,
424 		.sysctl = "kernel/ostype",
425 		.open_flags = O_RDONLY,
426 		.result = SUCCESS,
427 	},
428 	{
429 		.descr = "sysctl_get_current_value sysctl:read E2BIG truncated",
430 		.insns = {
431 			/* sysctl_get_current_value arg2 (buf) */
432 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
433 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
434 			BPF_MOV64_IMM(BPF_REG_0, 0),
435 			BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6),
436 
437 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
438 
439 			/* sysctl_get_current_value arg3 (buf_len) */
440 			BPF_MOV64_IMM(BPF_REG_3, 6),
441 
442 			/* sysctl_get_current_value(ctx, buf, buf_len) */
443 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
444 
445 			/* if (ret == expected && */
446 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
447 
448 			/*     buf[0:6] == "Linux\0") */
449 			BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL),
450 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
451 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
452 
453 			/* return ALLOW; */
454 			BPF_MOV64_IMM(BPF_REG_0, 1),
455 			BPF_JMP_A(1),
456 
457 			/* else return DENY; */
458 			BPF_MOV64_IMM(BPF_REG_0, 0),
459 			BPF_EXIT_INSN(),
460 		},
461 		.attach_type = BPF_CGROUP_SYSCTL,
462 		.sysctl = "kernel/ostype",
463 		.open_flags = O_RDONLY,
464 		.result = SUCCESS,
465 	},
466 	{
467 		.descr = "sysctl_get_current_value sysctl:read EINVAL",
468 		.insns = {
469 			/* sysctl_get_current_value arg2 (buf) */
470 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
471 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
472 
473 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
474 
475 			/* sysctl_get_current_value arg3 (buf_len) */
476 			BPF_MOV64_IMM(BPF_REG_3, 8),
477 
478 			/* sysctl_get_current_value(ctx, buf, buf_len) */
479 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
480 
481 			/* if (ret == expected && */
482 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4),
483 
484 			/*     buf[0:8] is NUL-filled) */
485 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
486 			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2),
487 
488 			/* return DENY; */
489 			BPF_MOV64_IMM(BPF_REG_0, 0),
490 			BPF_JMP_A(1),
491 
492 			/* else return ALLOW; */
493 			BPF_MOV64_IMM(BPF_REG_0, 1),
494 			BPF_EXIT_INSN(),
495 		},
496 		.attach_type = BPF_CGROUP_SYSCTL,
497 		.sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */
498 		.open_flags = O_RDONLY,
499 		.result = OP_EPERM,
500 	},
501 	{
502 		.descr = "sysctl_get_current_value sysctl:write ok",
503 		.fixup_value_insn = 6,
504 		.insns = {
505 			/* sysctl_get_current_value arg2 (buf) */
506 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
507 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
508 
509 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
510 
511 			/* sysctl_get_current_value arg3 (buf_len) */
512 			BPF_MOV64_IMM(BPF_REG_3, 8),
513 
514 			/* sysctl_get_current_value(ctx, buf, buf_len) */
515 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
516 
517 			/* if (ret == expected && */
518 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6),
519 
520 			/*     buf[0:4] == expected) */
521 			BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE),
522 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
523 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
524 
525 			/* return DENY; */
526 			BPF_MOV64_IMM(BPF_REG_0, 0),
527 			BPF_JMP_A(1),
528 
529 			/* else return ALLOW; */
530 			BPF_MOV64_IMM(BPF_REG_0, 1),
531 			BPF_EXIT_INSN(),
532 		},
533 		.attach_type = BPF_CGROUP_SYSCTL,
534 		.sysctl = "net/ipv4/route/mtu_expires",
535 		.open_flags = O_WRONLY,
536 		.newval = "600", /* same as default, should fail anyway */
537 		.result = OP_EPERM,
538 	},
539 	{
540 		.descr = "sysctl_get_new_value sysctl:read EINVAL",
541 		.insns = {
542 			/* sysctl_get_new_value arg2 (buf) */
543 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
544 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
545 			BPF_MOV64_IMM(BPF_REG_0, 0),
546 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
547 
548 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
549 
550 			/* sysctl_get_new_value arg3 (buf_len) */
551 			BPF_MOV64_IMM(BPF_REG_3, 8),
552 
553 			/* sysctl_get_new_value(ctx, buf, buf_len) */
554 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
555 
556 			/* if (ret == expected) */
557 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
558 
559 			/* return ALLOW; */
560 			BPF_MOV64_IMM(BPF_REG_0, 1),
561 			BPF_JMP_A(1),
562 
563 			/* else return DENY; */
564 			BPF_MOV64_IMM(BPF_REG_0, 0),
565 			BPF_EXIT_INSN(),
566 		},
567 		.attach_type = BPF_CGROUP_SYSCTL,
568 		.sysctl = "net/ipv4/tcp_mem",
569 		.open_flags = O_RDONLY,
570 		.result = SUCCESS,
571 	},
572 	{
573 		.descr = "sysctl_get_new_value sysctl:write ok",
574 		.insns = {
575 			/* sysctl_get_new_value arg2 (buf) */
576 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
577 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
578 
579 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
580 
581 			/* sysctl_get_new_value arg3 (buf_len) */
582 			BPF_MOV64_IMM(BPF_REG_3, 4),
583 
584 			/* sysctl_get_new_value(ctx, buf, buf_len) */
585 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
586 
587 			/* if (ret == expected && */
588 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
589 
590 			/*     buf[0:4] == "606\0") */
591 			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
592 			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2),
593 
594 			/* return DENY; */
595 			BPF_MOV64_IMM(BPF_REG_0, 0),
596 			BPF_JMP_A(1),
597 
598 			/* else return ALLOW; */
599 			BPF_MOV64_IMM(BPF_REG_0, 1),
600 			BPF_EXIT_INSN(),
601 		},
602 		.attach_type = BPF_CGROUP_SYSCTL,
603 		.sysctl = "net/ipv4/route/mtu_expires",
604 		.open_flags = O_WRONLY,
605 		.newval = "606",
606 		.result = OP_EPERM,
607 	},
608 	{
609 		.descr = "sysctl_get_new_value sysctl:write ok long",
610 		.insns = {
611 			/* sysctl_get_new_value arg2 (buf) */
612 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
613 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
614 
615 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
616 
617 			/* sysctl_get_new_value arg3 (buf_len) */
618 			BPF_MOV64_IMM(BPF_REG_3, 24),
619 
620 			/* sysctl_get_new_value(ctx, buf, buf_len) */
621 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
622 
623 			/* if (ret == expected && */
624 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14),
625 
626 			/*     buf[0:8] == "3000000 " && */
627 			BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL),
628 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
629 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
630 
631 			/*     buf[8:16] == "4000000 " && */
632 			BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL),
633 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
634 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
635 
636 			/*     buf[16:24] == "6000000\0") */
637 			BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL),
638 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
639 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
640 
641 			/* return DENY; */
642 			BPF_MOV64_IMM(BPF_REG_0, 0),
643 			BPF_JMP_A(1),
644 
645 			/* else return ALLOW; */
646 			BPF_MOV64_IMM(BPF_REG_0, 1),
647 			BPF_EXIT_INSN(),
648 		},
649 		.attach_type = BPF_CGROUP_SYSCTL,
650 		.sysctl = "net/ipv4/tcp_mem",
651 		.open_flags = O_WRONLY,
652 		.newval = "3000000 4000000 6000000",
653 		.result = OP_EPERM,
654 	},
655 	{
656 		.descr = "sysctl_get_new_value sysctl:write E2BIG",
657 		.insns = {
658 			/* sysctl_get_new_value arg2 (buf) */
659 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
660 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
661 			BPF_MOV64_IMM(BPF_REG_0, 0),
662 			BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 3),
663 
664 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
665 
666 			/* sysctl_get_new_value arg3 (buf_len) */
667 			BPF_MOV64_IMM(BPF_REG_3, 3),
668 
669 			/* sysctl_get_new_value(ctx, buf, buf_len) */
670 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
671 
672 			/* if (ret == expected && */
673 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 4),
674 
675 			/*     buf[0:3] == "60\0") */
676 			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
677 			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2),
678 
679 			/* return DENY; */
680 			BPF_MOV64_IMM(BPF_REG_0, 0),
681 			BPF_JMP_A(1),
682 
683 			/* else return ALLOW; */
684 			BPF_MOV64_IMM(BPF_REG_0, 1),
685 			BPF_EXIT_INSN(),
686 		},
687 		.attach_type = BPF_CGROUP_SYSCTL,
688 		.sysctl = "net/ipv4/route/mtu_expires",
689 		.open_flags = O_WRONLY,
690 		.newval = "606",
691 		.result = OP_EPERM,
692 	},
693 	{
694 		.descr = "sysctl_set_new_value sysctl:read EINVAL",
695 		.insns = {
696 			/* sysctl_set_new_value arg2 (buf) */
697 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
698 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
699 			BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
700 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
701 
702 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
703 
704 			/* sysctl_set_new_value arg3 (buf_len) */
705 			BPF_MOV64_IMM(BPF_REG_3, 3),
706 
707 			/* sysctl_set_new_value(ctx, buf, buf_len) */
708 			BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value),
709 
710 			/* if (ret == expected) */
711 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
712 
713 			/* return ALLOW; */
714 			BPF_MOV64_IMM(BPF_REG_0, 1),
715 			BPF_JMP_A(1),
716 
717 			/* else return DENY; */
718 			BPF_MOV64_IMM(BPF_REG_0, 0),
719 			BPF_EXIT_INSN(),
720 		},
721 		.attach_type = BPF_CGROUP_SYSCTL,
722 		.sysctl = "net/ipv4/route/mtu_expires",
723 		.open_flags = O_RDONLY,
724 		.result = SUCCESS,
725 	},
726 	{
727 		.descr = "sysctl_set_new_value sysctl:write ok",
728 		.fixup_value_insn = 2,
729 		.insns = {
730 			/* sysctl_set_new_value arg2 (buf) */
731 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
732 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
733 			BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE),
734 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
735 
736 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
737 
738 			/* sysctl_set_new_value arg3 (buf_len) */
739 			BPF_MOV64_IMM(BPF_REG_3, 3),
740 
741 			/* sysctl_set_new_value(ctx, buf, buf_len) */
742 			BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value),
743 
744 			/* if (ret == expected) */
745 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
746 
747 			/* return ALLOW; */
748 			BPF_MOV64_IMM(BPF_REG_0, 1),
749 			BPF_JMP_A(1),
750 
751 			/* else return DENY; */
752 			BPF_MOV64_IMM(BPF_REG_0, 0),
753 			BPF_EXIT_INSN(),
754 		},
755 		.attach_type = BPF_CGROUP_SYSCTL,
756 		.sysctl = "net/ipv4/route/mtu_expires",
757 		.open_flags = O_WRONLY,
758 		.newval = "606",
759 		.result = SUCCESS,
760 	},
761 };
762 
763 static size_t probe_prog_length(const struct bpf_insn *fp)
764 {
765 	size_t len;
766 
767 	for (len = MAX_INSNS - 1; len > 0; --len)
768 		if (fp[len].code != 0 || fp[len].imm != 0)
769 			break;
770 	return len + 1;
771 }
772 
773 static int fixup_sysctl_value(const char *buf, size_t buf_len,
774 			      struct bpf_insn *prog, size_t insn_num)
775 {
776 	uint32_t value_num = 0;
777 	uint8_t c, i;
778 
779 	if (buf_len > sizeof(value_num)) {
780 		log_err("Value is too big (%zd) to use in fixup", buf_len);
781 		return -1;
782 	}
783 
784 	for (i = 0; i < buf_len; ++i) {
785 		c = buf[i];
786 		value_num |= (c << i * 8);
787 	}
788 
789 	prog[insn_num].imm = value_num;
790 
791 	return 0;
792 }
793 
794 static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
795 {
796 	struct bpf_insn *prog = test->insns;
797 	struct bpf_load_program_attr attr;
798 	int ret;
799 
800 	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
801 	attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
802 	attr.insns = prog;
803 	attr.insns_cnt = probe_prog_length(attr.insns);
804 	attr.license = "GPL";
805 
806 	if (test->fixup_value_insn) {
807 		char buf[128];
808 		ssize_t len;
809 		int fd;
810 
811 		fd = open(sysctl_path, O_RDONLY | O_CLOEXEC);
812 		if (fd < 0) {
813 			log_err("open(%s) failed", sysctl_path);
814 			return -1;
815 		}
816 		len = read(fd, buf, sizeof(buf));
817 		if (len == -1) {
818 			log_err("read(%s) failed", sysctl_path);
819 			close(fd);
820 			return -1;
821 		}
822 		close(fd);
823 		if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn))
824 			return -1;
825 	}
826 
827 	ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
828 	if (ret < 0 && test->result != LOAD_REJECT) {
829 		log_err(">>> Loading program error.\n"
830 			">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
831 	}
832 
833 	return ret;
834 }
835 
836 static int access_sysctl(const char *sysctl_path,
837 			 const struct sysctl_test *test)
838 {
839 	int err = 0;
840 	int fd;
841 
842 	fd = open(sysctl_path, test->open_flags | O_CLOEXEC);
843 	if (fd < 0)
844 		return fd;
845 
846 	if (test->open_flags == O_RDONLY) {
847 		char buf[128];
848 
849 		if (read(fd, buf, sizeof(buf)) == -1)
850 			goto err;
851 	} else if (test->open_flags == O_WRONLY) {
852 		if (!test->newval) {
853 			log_err("New value for sysctl is not set");
854 			goto err;
855 		}
856 		if (write(fd, test->newval, strlen(test->newval)) == -1)
857 			goto err;
858 	} else {
859 		log_err("Unexpected sysctl access: neither read nor write");
860 		goto err;
861 	}
862 
863 	goto out;
864 err:
865 	err = -1;
866 out:
867 	close(fd);
868 	return err;
869 }
870 
871 static int run_test_case(int cgfd, struct sysctl_test *test)
872 {
873 	enum bpf_attach_type atype = test->attach_type;
874 	char sysctl_path[128];
875 	int progfd = -1;
876 	int err = 0;
877 
878 	printf("Test case: %s .. ", test->descr);
879 
880 	snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s",
881 		 test->sysctl);
882 
883 	progfd = load_sysctl_prog(test, sysctl_path);
884 	if (progfd < 0) {
885 		if (test->result == LOAD_REJECT)
886 			goto out;
887 		else
888 			goto err;
889 	}
890 
891 	if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
892 		if (test->result == ATTACH_REJECT)
893 			goto out;
894 		else
895 			goto err;
896 	}
897 
898 	if (access_sysctl(sysctl_path, test) == -1) {
899 		if (test->result == OP_EPERM && errno == EPERM)
900 			goto out;
901 		else
902 			goto err;
903 	}
904 
905 	if (test->result != SUCCESS) {
906 		log_err("Unexpected failure");
907 		goto err;
908 	}
909 
910 	goto out;
911 err:
912 	err = -1;
913 out:
914 	/* Detaching w/o checking return code: best effort attempt. */
915 	if (progfd != -1)
916 		bpf_prog_detach(cgfd, atype);
917 	close(progfd);
918 	printf("[%s]\n", err ? "FAIL" : "PASS");
919 	return err;
920 }
921 
922 static int run_tests(int cgfd)
923 {
924 	int passes = 0;
925 	int fails = 0;
926 	int i;
927 
928 	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
929 		if (run_test_case(cgfd, &tests[i]))
930 			++fails;
931 		else
932 			++passes;
933 	}
934 	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
935 	return fails ? -1 : 0;
936 }
937 
938 int main(int argc, char **argv)
939 {
940 	int cgfd = -1;
941 	int err = 0;
942 
943 	if (setup_cgroup_environment())
944 		goto err;
945 
946 	cgfd = create_and_get_cgroup(CG_PATH);
947 	if (cgfd < 0)
948 		goto err;
949 
950 	if (join_cgroup(CG_PATH))
951 		goto err;
952 
953 	if (run_tests(cgfd))
954 		goto err;
955 
956 	goto out;
957 err:
958 	err = -1;
959 out:
960 	close(cgfd);
961 	cleanup_cgroup_environment();
962 	return err;
963 }
964