1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Facebook
3 
4 #include <fcntl.h>
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 
11 #include <linux/filter.h>
12 
13 #include <bpf/bpf.h>
14 
15 #include "bpf_rlimit.h"
16 #include "bpf_util.h"
17 #include "cgroup_helpers.h"
18 
19 #define CG_PATH			"/foo"
20 #define MAX_INSNS		512
21 #define FIXUP_SYSCTL_VALUE	0
22 
23 char bpf_log_buf[BPF_LOG_BUF_SIZE];
24 
25 struct sysctl_test {
26 	const char *descr;
27 	size_t fixup_value_insn;
28 	struct bpf_insn	insns[MAX_INSNS];
29 	enum bpf_attach_type attach_type;
30 	const char *sysctl;
31 	int open_flags;
32 	const char *newval;
33 	enum {
34 		LOAD_REJECT,
35 		ATTACH_REJECT,
36 		OP_EPERM,
37 		SUCCESS,
38 	} result;
39 };
40 
41 static struct sysctl_test tests[] = {
42 	{
43 		.descr = "sysctl wrong attach_type",
44 		.insns = {
45 			BPF_MOV64_IMM(BPF_REG_0, 1),
46 			BPF_EXIT_INSN(),
47 		},
48 		.attach_type = 0,
49 		.sysctl = "kernel/ostype",
50 		.open_flags = O_RDONLY,
51 		.result = ATTACH_REJECT,
52 	},
53 	{
54 		.descr = "sysctl:read allow all",
55 		.insns = {
56 			BPF_MOV64_IMM(BPF_REG_0, 1),
57 			BPF_EXIT_INSN(),
58 		},
59 		.attach_type = BPF_CGROUP_SYSCTL,
60 		.sysctl = "kernel/ostype",
61 		.open_flags = O_RDONLY,
62 		.result = SUCCESS,
63 	},
64 	{
65 		.descr = "sysctl:read deny all",
66 		.insns = {
67 			BPF_MOV64_IMM(BPF_REG_0, 0),
68 			BPF_EXIT_INSN(),
69 		},
70 		.attach_type = BPF_CGROUP_SYSCTL,
71 		.sysctl = "kernel/ostype",
72 		.open_flags = O_RDONLY,
73 		.result = OP_EPERM,
74 	},
75 	{
76 		.descr = "ctx:write sysctl:read read ok",
77 		.insns = {
78 			/* If (write) */
79 			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
80 				    offsetof(struct bpf_sysctl, write)),
81 			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
82 
83 			/* return DENY; */
84 			BPF_MOV64_IMM(BPF_REG_0, 0),
85 			BPF_JMP_A(1),
86 
87 			/* else return ALLOW; */
88 			BPF_MOV64_IMM(BPF_REG_0, 1),
89 			BPF_EXIT_INSN(),
90 		},
91 		.attach_type = BPF_CGROUP_SYSCTL,
92 		.sysctl = "kernel/ostype",
93 		.open_flags = O_RDONLY,
94 		.result = SUCCESS,
95 	},
96 	{
97 		.descr = "ctx:write sysctl:write read ok",
98 		.insns = {
99 			/* If (write) */
100 			BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
101 				    offsetof(struct bpf_sysctl, write)),
102 			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
103 
104 			/* return DENY; */
105 			BPF_MOV64_IMM(BPF_REG_0, 0),
106 			BPF_JMP_A(1),
107 
108 			/* else return ALLOW; */
109 			BPF_MOV64_IMM(BPF_REG_0, 1),
110 			BPF_EXIT_INSN(),
111 		},
112 		.attach_type = BPF_CGROUP_SYSCTL,
113 		.sysctl = "kernel/domainname",
114 		.open_flags = O_WRONLY,
115 		.newval = "(none)", /* same as default, should fail anyway */
116 		.result = OP_EPERM,
117 	},
118 	{
119 		.descr = "ctx:write sysctl:read write reject",
120 		.insns = {
121 			/* write = X */
122 			BPF_MOV64_IMM(BPF_REG_0, 0),
123 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
124 				    offsetof(struct bpf_sysctl, write)),
125 			BPF_MOV64_IMM(BPF_REG_0, 1),
126 			BPF_EXIT_INSN(),
127 		},
128 		.attach_type = BPF_CGROUP_SYSCTL,
129 		.sysctl = "kernel/ostype",
130 		.open_flags = O_RDONLY,
131 		.result = LOAD_REJECT,
132 	},
133 	{
134 		.descr = "sysctl_get_name sysctl_value:base ok",
135 		.insns = {
136 			/* sysctl_get_name arg2 (buf) */
137 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
138 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
139 			BPF_MOV64_IMM(BPF_REG_0, 0),
140 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
141 
142 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
143 
144 			/* sysctl_get_name arg3 (buf_len) */
145 			BPF_MOV64_IMM(BPF_REG_3, 8),
146 
147 			/* sysctl_get_name arg4 (flags) */
148 			BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
149 
150 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
151 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
152 
153 			/* if (ret == expected && */
154 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
155 			/*     buf == "tcp_mem\0") */
156 			BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
157 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
158 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
159 
160 			/* return ALLOW; */
161 			BPF_MOV64_IMM(BPF_REG_0, 1),
162 			BPF_JMP_A(1),
163 
164 			/* else return DENY; */
165 			BPF_MOV64_IMM(BPF_REG_0, 0),
166 			BPF_EXIT_INSN(),
167 		},
168 		.attach_type = BPF_CGROUP_SYSCTL,
169 		.sysctl = "net/ipv4/tcp_mem",
170 		.open_flags = O_RDONLY,
171 		.result = SUCCESS,
172 	},
173 	{
174 		.descr = "sysctl_get_name sysctl_value:base E2BIG truncated",
175 		.insns = {
176 			/* sysctl_get_name arg2 (buf) */
177 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
178 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
179 			BPF_MOV64_IMM(BPF_REG_0, 0),
180 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
181 
182 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
183 
184 			/* sysctl_get_name arg3 (buf_len) too small */
185 			BPF_MOV64_IMM(BPF_REG_3, 7),
186 
187 			/* sysctl_get_name arg4 (flags) */
188 			BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
189 
190 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
191 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
192 
193 			/* if (ret == expected && */
194 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
195 
196 			/*     buf[0:7] == "tcp_me\0") */
197 			BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
198 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
199 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
200 
201 			/* return ALLOW; */
202 			BPF_MOV64_IMM(BPF_REG_0, 1),
203 			BPF_JMP_A(1),
204 
205 			/* else return DENY; */
206 			BPF_MOV64_IMM(BPF_REG_0, 0),
207 			BPF_EXIT_INSN(),
208 		},
209 		.attach_type = BPF_CGROUP_SYSCTL,
210 		.sysctl = "net/ipv4/tcp_mem",
211 		.open_flags = O_RDONLY,
212 		.result = SUCCESS,
213 	},
214 	{
215 		.descr = "sysctl_get_name sysctl:full ok",
216 		.insns = {
217 			/* sysctl_get_name arg2 (buf) */
218 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
219 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
220 			BPF_MOV64_IMM(BPF_REG_0, 0),
221 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
222 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
223 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
224 
225 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
226 
227 			/* sysctl_get_name arg3 (buf_len) */
228 			BPF_MOV64_IMM(BPF_REG_3, 17),
229 
230 			/* sysctl_get_name arg4 (flags) */
231 			BPF_MOV64_IMM(BPF_REG_4, 0),
232 
233 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
234 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
235 
236 			/* if (ret == expected && */
237 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
238 
239 			/*     buf[0:8] == "net/ipv4" && */
240 			BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
241 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
242 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
243 
244 			/*     buf[8:16] == "/tcp_mem" && */
245 			BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
246 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
247 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
248 
249 			/*     buf[16:24] == "\0") */
250 			BPF_LD_IMM64(BPF_REG_8, 0x0ULL),
251 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
252 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
253 
254 			/* return ALLOW; */
255 			BPF_MOV64_IMM(BPF_REG_0, 1),
256 			BPF_JMP_A(1),
257 
258 			/* else return DENY; */
259 			BPF_MOV64_IMM(BPF_REG_0, 0),
260 			BPF_EXIT_INSN(),
261 		},
262 		.attach_type = BPF_CGROUP_SYSCTL,
263 		.sysctl = "net/ipv4/tcp_mem",
264 		.open_flags = O_RDONLY,
265 		.result = SUCCESS,
266 	},
267 	{
268 		.descr = "sysctl_get_name sysctl:full E2BIG truncated",
269 		.insns = {
270 			/* sysctl_get_name arg2 (buf) */
271 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
272 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
273 			BPF_MOV64_IMM(BPF_REG_0, 0),
274 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
275 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
276 
277 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
278 
279 			/* sysctl_get_name arg3 (buf_len) */
280 			BPF_MOV64_IMM(BPF_REG_3, 16),
281 
282 			/* sysctl_get_name arg4 (flags) */
283 			BPF_MOV64_IMM(BPF_REG_4, 0),
284 
285 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
286 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
287 
288 			/* if (ret == expected && */
289 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
290 
291 			/*     buf[0:8] == "net/ipv4" && */
292 			BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
293 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
294 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
295 
296 			/*     buf[8:16] == "/tcp_me\0") */
297 			BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
298 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
299 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
300 
301 			/* return ALLOW; */
302 			BPF_MOV64_IMM(BPF_REG_0, 1),
303 			BPF_JMP_A(1),
304 
305 			/* else return DENY; */
306 			BPF_MOV64_IMM(BPF_REG_0, 0),
307 			BPF_EXIT_INSN(),
308 		},
309 		.attach_type = BPF_CGROUP_SYSCTL,
310 		.sysctl = "net/ipv4/tcp_mem",
311 		.open_flags = O_RDONLY,
312 		.result = SUCCESS,
313 	},
314 	{
315 		.descr = "sysctl_get_name sysctl:full E2BIG truncated small",
316 		.insns = {
317 			/* sysctl_get_name arg2 (buf) */
318 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
319 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
320 			BPF_MOV64_IMM(BPF_REG_0, 0),
321 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
322 
323 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
324 
325 			/* sysctl_get_name arg3 (buf_len) */
326 			BPF_MOV64_IMM(BPF_REG_3, 7),
327 
328 			/* sysctl_get_name arg4 (flags) */
329 			BPF_MOV64_IMM(BPF_REG_4, 0),
330 
331 			/* sysctl_get_name(ctx, buf, buf_len, flags) */
332 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
333 
334 			/* if (ret == expected && */
335 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
336 
337 			/*     buf[0:8] == "net/ip\0") */
338 			BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
339 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
340 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
341 
342 			/* return ALLOW; */
343 			BPF_MOV64_IMM(BPF_REG_0, 1),
344 			BPF_JMP_A(1),
345 
346 			/* else return DENY; */
347 			BPF_MOV64_IMM(BPF_REG_0, 0),
348 			BPF_EXIT_INSN(),
349 		},
350 		.attach_type = BPF_CGROUP_SYSCTL,
351 		.sysctl = "net/ipv4/tcp_mem",
352 		.open_flags = O_RDONLY,
353 		.result = SUCCESS,
354 	},
355 	{
356 		.descr = "sysctl_get_current_value sysctl:read ok, gt",
357 		.insns = {
358 			/* sysctl_get_current_value arg2 (buf) */
359 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
360 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
361 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
362 
363 			/* sysctl_get_current_value arg3 (buf_len) */
364 			BPF_MOV64_IMM(BPF_REG_3, 8),
365 
366 			/* sysctl_get_current_value(ctx, buf, buf_len) */
367 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
368 
369 			/* if (ret == expected && */
370 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
371 
372 			/*     buf[0:6] == "Linux\n\0") */
373 			BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
374 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
375 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
376 
377 			/* return ALLOW; */
378 			BPF_MOV64_IMM(BPF_REG_0, 1),
379 			BPF_JMP_A(1),
380 
381 			/* else return DENY; */
382 			BPF_MOV64_IMM(BPF_REG_0, 0),
383 			BPF_EXIT_INSN(),
384 		},
385 		.attach_type = BPF_CGROUP_SYSCTL,
386 		.sysctl = "kernel/ostype",
387 		.open_flags = O_RDONLY,
388 		.result = SUCCESS,
389 	},
390 	{
391 		.descr = "sysctl_get_current_value sysctl:read ok, eq",
392 		.insns = {
393 			/* sysctl_get_current_value arg2 (buf) */
394 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
395 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
396 			BPF_MOV64_IMM(BPF_REG_0, 0),
397 			BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7),
398 
399 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
400 
401 			/* sysctl_get_current_value arg3 (buf_len) */
402 			BPF_MOV64_IMM(BPF_REG_3, 7),
403 
404 			/* sysctl_get_current_value(ctx, buf, buf_len) */
405 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
406 
407 			/* if (ret == expected && */
408 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
409 
410 			/*     buf[0:6] == "Linux\n\0") */
411 			BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
412 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
413 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
414 
415 			/* return ALLOW; */
416 			BPF_MOV64_IMM(BPF_REG_0, 1),
417 			BPF_JMP_A(1),
418 
419 			/* else return DENY; */
420 			BPF_MOV64_IMM(BPF_REG_0, 0),
421 			BPF_EXIT_INSN(),
422 		},
423 		.attach_type = BPF_CGROUP_SYSCTL,
424 		.sysctl = "kernel/ostype",
425 		.open_flags = O_RDONLY,
426 		.result = SUCCESS,
427 	},
428 	{
429 		.descr = "sysctl_get_current_value sysctl:read E2BIG truncated",
430 		.insns = {
431 			/* sysctl_get_current_value arg2 (buf) */
432 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
433 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
434 			BPF_MOV64_IMM(BPF_REG_0, 0),
435 			BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6),
436 
437 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
438 
439 			/* sysctl_get_current_value arg3 (buf_len) */
440 			BPF_MOV64_IMM(BPF_REG_3, 6),
441 
442 			/* sysctl_get_current_value(ctx, buf, buf_len) */
443 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
444 
445 			/* if (ret == expected && */
446 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
447 
448 			/*     buf[0:6] == "Linux\0") */
449 			BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL),
450 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
451 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
452 
453 			/* return ALLOW; */
454 			BPF_MOV64_IMM(BPF_REG_0, 1),
455 			BPF_JMP_A(1),
456 
457 			/* else return DENY; */
458 			BPF_MOV64_IMM(BPF_REG_0, 0),
459 			BPF_EXIT_INSN(),
460 		},
461 		.attach_type = BPF_CGROUP_SYSCTL,
462 		.sysctl = "kernel/ostype",
463 		.open_flags = O_RDONLY,
464 		.result = SUCCESS,
465 	},
466 	{
467 		.descr = "sysctl_get_current_value sysctl:read EINVAL",
468 		.insns = {
469 			/* sysctl_get_current_value arg2 (buf) */
470 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
471 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
472 
473 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
474 
475 			/* sysctl_get_current_value arg3 (buf_len) */
476 			BPF_MOV64_IMM(BPF_REG_3, 8),
477 
478 			/* sysctl_get_current_value(ctx, buf, buf_len) */
479 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
480 
481 			/* if (ret == expected && */
482 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4),
483 
484 			/*     buf[0:8] is NUL-filled) */
485 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
486 			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2),
487 
488 			/* return DENY; */
489 			BPF_MOV64_IMM(BPF_REG_0, 0),
490 			BPF_JMP_A(1),
491 
492 			/* else return ALLOW; */
493 			BPF_MOV64_IMM(BPF_REG_0, 1),
494 			BPF_EXIT_INSN(),
495 		},
496 		.attach_type = BPF_CGROUP_SYSCTL,
497 		.sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */
498 		.open_flags = O_RDONLY,
499 		.result = OP_EPERM,
500 	},
501 	{
502 		.descr = "sysctl_get_current_value sysctl:write ok",
503 		.fixup_value_insn = 6,
504 		.insns = {
505 			/* sysctl_get_current_value arg2 (buf) */
506 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
507 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
508 
509 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
510 
511 			/* sysctl_get_current_value arg3 (buf_len) */
512 			BPF_MOV64_IMM(BPF_REG_3, 8),
513 
514 			/* sysctl_get_current_value(ctx, buf, buf_len) */
515 			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
516 
517 			/* if (ret == expected && */
518 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6),
519 
520 			/*     buf[0:4] == expected) */
521 			BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE),
522 			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
523 			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
524 
525 			/* return DENY; */
526 			BPF_MOV64_IMM(BPF_REG_0, 0),
527 			BPF_JMP_A(1),
528 
529 			/* else return ALLOW; */
530 			BPF_MOV64_IMM(BPF_REG_0, 1),
531 			BPF_EXIT_INSN(),
532 		},
533 		.attach_type = BPF_CGROUP_SYSCTL,
534 		.sysctl = "net/ipv4/route/mtu_expires",
535 		.open_flags = O_WRONLY,
536 		.newval = "600", /* same as default, should fail anyway */
537 		.result = OP_EPERM,
538 	},
539 };
540 
541 static size_t probe_prog_length(const struct bpf_insn *fp)
542 {
543 	size_t len;
544 
545 	for (len = MAX_INSNS - 1; len > 0; --len)
546 		if (fp[len].code != 0 || fp[len].imm != 0)
547 			break;
548 	return len + 1;
549 }
550 
551 static int fixup_sysctl_value(const char *buf, size_t buf_len,
552 			      struct bpf_insn *prog, size_t insn_num)
553 {
554 	uint32_t value_num = 0;
555 	uint8_t c, i;
556 
557 	if (buf_len > sizeof(value_num)) {
558 		log_err("Value is too big (%zd) to use in fixup", buf_len);
559 		return -1;
560 	}
561 
562 	for (i = 0; i < buf_len; ++i) {
563 		c = buf[i];
564 		value_num |= (c << i * 8);
565 	}
566 
567 	prog[insn_num].imm = value_num;
568 
569 	return 0;
570 }
571 
572 static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
573 {
574 	struct bpf_insn *prog = test->insns;
575 	struct bpf_load_program_attr attr;
576 	int ret;
577 
578 	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
579 	attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
580 	attr.insns = prog;
581 	attr.insns_cnt = probe_prog_length(attr.insns);
582 	attr.license = "GPL";
583 
584 	if (test->fixup_value_insn) {
585 		char buf[128];
586 		ssize_t len;
587 		int fd;
588 
589 		fd = open(sysctl_path, O_RDONLY | O_CLOEXEC);
590 		if (fd < 0) {
591 			log_err("open(%s) failed", sysctl_path);
592 			return -1;
593 		}
594 		len = read(fd, buf, sizeof(buf));
595 		if (len == -1) {
596 			log_err("read(%s) failed", sysctl_path);
597 			close(fd);
598 			return -1;
599 		}
600 		close(fd);
601 		if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn))
602 			return -1;
603 	}
604 
605 	ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
606 	if (ret < 0 && test->result != LOAD_REJECT) {
607 		log_err(">>> Loading program error.\n"
608 			">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
609 	}
610 
611 	return ret;
612 }
613 
614 static int access_sysctl(const char *sysctl_path,
615 			 const struct sysctl_test *test)
616 {
617 	int err = 0;
618 	int fd;
619 
620 	fd = open(sysctl_path, test->open_flags | O_CLOEXEC);
621 	if (fd < 0)
622 		return fd;
623 
624 	if (test->open_flags == O_RDONLY) {
625 		char buf[128];
626 
627 		if (read(fd, buf, sizeof(buf)) == -1)
628 			goto err;
629 	} else if (test->open_flags == O_WRONLY) {
630 		if (!test->newval) {
631 			log_err("New value for sysctl is not set");
632 			goto err;
633 		}
634 		if (write(fd, test->newval, strlen(test->newval)) == -1)
635 			goto err;
636 	} else {
637 		log_err("Unexpected sysctl access: neither read nor write");
638 		goto err;
639 	}
640 
641 	goto out;
642 err:
643 	err = -1;
644 out:
645 	close(fd);
646 	return err;
647 }
648 
649 static int run_test_case(int cgfd, struct sysctl_test *test)
650 {
651 	enum bpf_attach_type atype = test->attach_type;
652 	char sysctl_path[128];
653 	int progfd = -1;
654 	int err = 0;
655 
656 	printf("Test case: %s .. ", test->descr);
657 
658 	snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s",
659 		 test->sysctl);
660 
661 	progfd = load_sysctl_prog(test, sysctl_path);
662 	if (progfd < 0) {
663 		if (test->result == LOAD_REJECT)
664 			goto out;
665 		else
666 			goto err;
667 	}
668 
669 	if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
670 		if (test->result == ATTACH_REJECT)
671 			goto out;
672 		else
673 			goto err;
674 	}
675 
676 	if (access_sysctl(sysctl_path, test) == -1) {
677 		if (test->result == OP_EPERM && errno == EPERM)
678 			goto out;
679 		else
680 			goto err;
681 	}
682 
683 	if (test->result != SUCCESS) {
684 		log_err("Unexpected failure");
685 		goto err;
686 	}
687 
688 	goto out;
689 err:
690 	err = -1;
691 out:
692 	/* Detaching w/o checking return code: best effort attempt. */
693 	if (progfd != -1)
694 		bpf_prog_detach(cgfd, atype);
695 	close(progfd);
696 	printf("[%s]\n", err ? "FAIL" : "PASS");
697 	return err;
698 }
699 
700 static int run_tests(int cgfd)
701 {
702 	int passes = 0;
703 	int fails = 0;
704 	int i;
705 
706 	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
707 		if (run_test_case(cgfd, &tests[i]))
708 			++fails;
709 		else
710 			++passes;
711 	}
712 	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
713 	return fails ? -1 : 0;
714 }
715 
716 int main(int argc, char **argv)
717 {
718 	int cgfd = -1;
719 	int err = 0;
720 
721 	if (setup_cgroup_environment())
722 		goto err;
723 
724 	cgfd = create_and_get_cgroup(CG_PATH);
725 	if (cgfd < 0)
726 		goto err;
727 
728 	if (join_cgroup(CG_PATH))
729 		goto err;
730 
731 	if (run_tests(cgfd))
732 		goto err;
733 
734 	goto out;
735 err:
736 	err = -1;
737 out:
738 	close(cgfd);
739 	cleanup_cgroup_environment();
740 	return err;
741 }
742