1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Functions to manage eBPF programs attached to cgroup subsystems 4 * 5 * Copyright 2022 Google LLC. 6 */ 7 #include <asm-generic/errno.h> 8 #include <errno.h> 9 #include <sys/types.h> 10 #include <sys/mount.h> 11 #include <sys/stat.h> 12 #include <unistd.h> 13 14 #include <test_progs.h> 15 #include <bpf/libbpf.h> 16 #include <bpf/bpf.h> 17 18 #include "cgroup_helpers.h" 19 #include "cgroup_hierarchical_stats.skel.h" 20 21 #define PAGE_SIZE 4096 22 #define MB(x) (x << 20) 23 24 #define BPFFS_ROOT "/sys/fs/bpf/" 25 #define BPFFS_VMSCAN BPFFS_ROOT"vmscan/" 26 27 #define CG_ROOT_NAME "root" 28 #define CG_ROOT_ID 1 29 30 #define CGROUP_PATH(p, n) {.path = p"/"n, .name = n} 31 32 static struct { 33 const char *path, *name; 34 unsigned long long id; 35 int fd; 36 } cgroups[] = { 37 CGROUP_PATH("/", "test"), 38 CGROUP_PATH("/test", "child1"), 39 CGROUP_PATH("/test", "child2"), 40 CGROUP_PATH("/test/child1", "child1_1"), 41 CGROUP_PATH("/test/child1", "child1_2"), 42 CGROUP_PATH("/test/child2", "child2_1"), 43 CGROUP_PATH("/test/child2", "child2_2"), 44 }; 45 46 #define N_CGROUPS ARRAY_SIZE(cgroups) 47 #define N_NON_LEAF_CGROUPS 3 48 49 static int root_cgroup_fd; 50 static bool mounted_bpffs; 51 52 /* reads file at 'path' to 'buf', returns 0 on success. */ 53 static int read_from_file(const char *path, char *buf, size_t size) 54 { 55 int fd, len; 56 57 fd = open(path, O_RDONLY); 58 if (fd < 0) 59 return fd; 60 61 len = read(fd, buf, size); 62 close(fd); 63 if (len < 0) 64 return len; 65 66 buf[len] = 0; 67 return 0; 68 } 69 70 /* mounts bpffs and mkdir for reading stats, returns 0 on success. */ 71 static int setup_bpffs(void) 72 { 73 int err; 74 75 /* Mount bpffs */ 76 err = mount("bpf", BPFFS_ROOT, "bpf", 0, NULL); 77 mounted_bpffs = !err; 78 if (ASSERT_FALSE(err && errno != EBUSY, "mount")) 79 return err; 80 81 /* Create a directory to contain stat files in bpffs */ 82 err = mkdir(BPFFS_VMSCAN, 0755); 83 if (!ASSERT_OK(err, "mkdir")) 84 return err; 85 86 return 0; 87 } 88 89 static void cleanup_bpffs(void) 90 { 91 /* Remove created directory in bpffs */ 92 ASSERT_OK(rmdir(BPFFS_VMSCAN), "rmdir "BPFFS_VMSCAN); 93 94 /* Unmount bpffs, if it wasn't already mounted when we started */ 95 if (mounted_bpffs) 96 return; 97 98 ASSERT_OK(umount(BPFFS_ROOT), "unmount bpffs"); 99 } 100 101 /* sets up cgroups, returns 0 on success. */ 102 static int setup_cgroups(void) 103 { 104 int i, fd, err; 105 106 err = setup_cgroup_environment(); 107 if (!ASSERT_OK(err, "setup_cgroup_environment")) 108 return err; 109 110 root_cgroup_fd = get_root_cgroup(); 111 if (!ASSERT_GE(root_cgroup_fd, 0, "get_root_cgroup")) 112 return root_cgroup_fd; 113 114 for (i = 0; i < N_CGROUPS; i++) { 115 fd = create_and_get_cgroup(cgroups[i].path); 116 if (!ASSERT_GE(fd, 0, "create_and_get_cgroup")) 117 return fd; 118 119 cgroups[i].fd = fd; 120 cgroups[i].id = get_cgroup_id(cgroups[i].path); 121 122 /* 123 * Enable memcg controller for the entire hierarchy. 124 * Note that stats are collected for all cgroups in a hierarchy 125 * with memcg enabled anyway, but are only exposed for cgroups 126 * that have memcg enabled. 127 */ 128 if (i < N_NON_LEAF_CGROUPS) { 129 err = enable_controllers(cgroups[i].path, "memory"); 130 if (!ASSERT_OK(err, "enable_controllers")) 131 return err; 132 } 133 } 134 return 0; 135 } 136 137 static void cleanup_cgroups(void) 138 { 139 close(root_cgroup_fd); 140 for (int i = 0; i < N_CGROUPS; i++) 141 close(cgroups[i].fd); 142 cleanup_cgroup_environment(); 143 } 144 145 /* Sets up cgroup hiearchary, returns 0 on success. */ 146 static int setup_hierarchy(void) 147 { 148 return setup_bpffs() || setup_cgroups(); 149 } 150 151 static void destroy_hierarchy(void) 152 { 153 cleanup_cgroups(); 154 cleanup_bpffs(); 155 } 156 157 static int reclaimer(const char *cgroup_path, size_t size) 158 { 159 static char size_buf[128]; 160 char *buf, *ptr; 161 int err; 162 163 /* Join cgroup in the parent process workdir */ 164 if (join_parent_cgroup(cgroup_path)) 165 return EACCES; 166 167 /* Allocate memory */ 168 buf = malloc(size); 169 if (!buf) 170 return ENOMEM; 171 172 /* Write to memory to make sure it's actually allocated */ 173 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 174 *ptr = 1; 175 176 /* Try to reclaim memory */ 177 snprintf(size_buf, 128, "%lu", size); 178 err = write_cgroup_file_parent(cgroup_path, "memory.reclaim", size_buf); 179 180 free(buf); 181 /* memory.reclaim returns EAGAIN if the amount is not fully reclaimed */ 182 if (err && errno != EAGAIN) 183 return errno; 184 185 return 0; 186 } 187 188 static int induce_vmscan(void) 189 { 190 int i, status; 191 192 /* 193 * In every leaf cgroup, run a child process that allocates some memory 194 * and attempts to reclaim some of it. 195 */ 196 for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++) { 197 pid_t pid; 198 199 /* Create reclaimer child */ 200 pid = fork(); 201 if (pid == 0) { 202 status = reclaimer(cgroups[i].path, MB(5)); 203 exit(status); 204 } 205 206 /* Cleanup reclaimer child */ 207 waitpid(pid, &status, 0); 208 ASSERT_TRUE(WIFEXITED(status), "reclaimer exited"); 209 ASSERT_EQ(WEXITSTATUS(status), 0, "reclaim exit code"); 210 } 211 return 0; 212 } 213 214 static unsigned long long 215 get_cgroup_vmscan_delay(unsigned long long cgroup_id, const char *file_name) 216 { 217 unsigned long long vmscan = 0, id = 0; 218 static char buf[128], path[128]; 219 220 /* For every cgroup, read the file generated by cgroup_iter */ 221 snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name); 222 if (!ASSERT_OK(read_from_file(path, buf, 128), "read cgroup_iter")) 223 return 0; 224 225 /* Check the output file formatting */ 226 ASSERT_EQ(sscanf(buf, "cg_id: %llu, total_vmscan_delay: %llu\n", 227 &id, &vmscan), 2, "output format"); 228 229 /* Check that the cgroup_id is displayed correctly */ 230 ASSERT_EQ(id, cgroup_id, "cgroup_id"); 231 /* Check that the vmscan reading is non-zero */ 232 ASSERT_GT(vmscan, 0, "vmscan_reading"); 233 return vmscan; 234 } 235 236 static void check_vmscan_stats(void) 237 { 238 unsigned long long vmscan_readings[N_CGROUPS], vmscan_root; 239 int i; 240 241 for (i = 0; i < N_CGROUPS; i++) { 242 vmscan_readings[i] = get_cgroup_vmscan_delay(cgroups[i].id, 243 cgroups[i].name); 244 } 245 246 /* Read stats for root too */ 247 vmscan_root = get_cgroup_vmscan_delay(CG_ROOT_ID, CG_ROOT_NAME); 248 249 /* Check that child1 == child1_1 + child1_2 */ 250 ASSERT_EQ(vmscan_readings[1], vmscan_readings[3] + vmscan_readings[4], 251 "child1_vmscan"); 252 /* Check that child2 == child2_1 + child2_2 */ 253 ASSERT_EQ(vmscan_readings[2], vmscan_readings[5] + vmscan_readings[6], 254 "child2_vmscan"); 255 /* Check that test == child1 + child2 */ 256 ASSERT_EQ(vmscan_readings[0], vmscan_readings[1] + vmscan_readings[2], 257 "test_vmscan"); 258 /* Check that root >= test */ 259 ASSERT_GE(vmscan_root, vmscan_readings[1], "root_vmscan"); 260 } 261 262 /* Creates iter link and pins in bpffs, returns 0 on success, -errno on failure. 263 */ 264 static int setup_cgroup_iter(struct cgroup_hierarchical_stats *obj, 265 int cgroup_fd, const char *file_name) 266 { 267 DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); 268 union bpf_iter_link_info linfo = {}; 269 struct bpf_link *link; 270 static char path[128]; 271 int err; 272 273 /* 274 * Create an iter link, parameterized by cgroup_fd. We only want to 275 * traverse one cgroup, so set the traversal order to "self". 276 */ 277 linfo.cgroup.cgroup_fd = cgroup_fd; 278 linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; 279 opts.link_info = &linfo; 280 opts.link_info_len = sizeof(linfo); 281 link = bpf_program__attach_iter(obj->progs.dump_vmscan, &opts); 282 if (!ASSERT_OK_PTR(link, "attach_iter")) 283 return -EFAULT; 284 285 /* Pin the link to a bpffs file */ 286 snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name); 287 err = bpf_link__pin(link, path); 288 ASSERT_OK(err, "pin cgroup_iter"); 289 290 /* Remove the link, leaving only the ref held by the pinned file */ 291 bpf_link__destroy(link); 292 return err; 293 } 294 295 /* Sets up programs for collecting stats, returns 0 on success. */ 296 static int setup_progs(struct cgroup_hierarchical_stats **skel) 297 { 298 int i, err; 299 300 *skel = cgroup_hierarchical_stats__open_and_load(); 301 if (!ASSERT_OK_PTR(*skel, "open_and_load")) 302 return 1; 303 304 /* Attach cgroup_iter program that will dump the stats to cgroups */ 305 for (i = 0; i < N_CGROUPS; i++) { 306 err = setup_cgroup_iter(*skel, cgroups[i].fd, cgroups[i].name); 307 if (!ASSERT_OK(err, "setup_cgroup_iter")) 308 return err; 309 } 310 311 /* Also dump stats for root */ 312 err = setup_cgroup_iter(*skel, root_cgroup_fd, CG_ROOT_NAME); 313 if (!ASSERT_OK(err, "setup_cgroup_iter")) 314 return err; 315 316 bpf_program__set_autoattach((*skel)->progs.dump_vmscan, false); 317 err = cgroup_hierarchical_stats__attach(*skel); 318 if (!ASSERT_OK(err, "attach")) 319 return err; 320 321 return 0; 322 } 323 324 static void destroy_progs(struct cgroup_hierarchical_stats *skel) 325 { 326 static char path[128]; 327 int i; 328 329 for (i = 0; i < N_CGROUPS; i++) { 330 /* Delete files in bpffs that cgroup_iters are pinned in */ 331 snprintf(path, 128, "%s%s", BPFFS_VMSCAN, 332 cgroups[i].name); 333 ASSERT_OK(remove(path), "remove cgroup_iter pin"); 334 } 335 336 /* Delete root file in bpffs */ 337 snprintf(path, 128, "%s%s", BPFFS_VMSCAN, CG_ROOT_NAME); 338 ASSERT_OK(remove(path), "remove cgroup_iter root pin"); 339 cgroup_hierarchical_stats__destroy(skel); 340 } 341 342 void test_cgroup_hierarchical_stats(void) 343 { 344 struct cgroup_hierarchical_stats *skel = NULL; 345 346 if (setup_hierarchy()) 347 goto hierarchy_cleanup; 348 if (setup_progs(&skel)) 349 goto cleanup; 350 if (induce_vmscan()) 351 goto cleanup; 352 check_vmscan_stats(); 353 cleanup: 354 destroy_progs(skel); 355 hierarchy_cleanup: 356 destroy_hierarchy(); 357 } 358