1 //SPDX-License-Identifier: GPL-2.0 2 #include <linux/bpf-cgroup.h> 3 #include <linux/bpf.h> 4 #include <linux/bug.h> 5 #include <linux/filter.h> 6 #include <linux/mm.h> 7 #include <linux/rbtree.h> 8 #include <linux/slab.h> 9 10 DEFINE_PER_CPU(struct bpf_cgroup_storage*, 11 bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 12 13 #ifdef CONFIG_CGROUP_BPF 14 15 #define LOCAL_STORAGE_CREATE_FLAG_MASK \ 16 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 17 18 struct bpf_cgroup_storage_map { 19 struct bpf_map map; 20 21 spinlock_t lock; 22 struct bpf_prog *prog; 23 struct rb_root root; 24 struct list_head list; 25 }; 26 27 static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map) 28 { 29 return container_of(map, struct bpf_cgroup_storage_map, map); 30 } 31 32 static int bpf_cgroup_storage_key_cmp( 33 const struct bpf_cgroup_storage_key *key1, 34 const struct bpf_cgroup_storage_key *key2) 35 { 36 if (key1->cgroup_inode_id < key2->cgroup_inode_id) 37 return -1; 38 else if (key1->cgroup_inode_id > key2->cgroup_inode_id) 39 return 1; 40 else if (key1->attach_type < key2->attach_type) 41 return -1; 42 else if (key1->attach_type > key2->attach_type) 43 return 1; 44 return 0; 45 } 46 47 static struct bpf_cgroup_storage *cgroup_storage_lookup( 48 struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key, 49 bool locked) 50 { 51 struct rb_root *root = &map->root; 52 struct rb_node *node; 53 54 if (!locked) 55 spin_lock_bh(&map->lock); 56 57 node = root->rb_node; 58 while (node) { 59 struct bpf_cgroup_storage *storage; 60 61 storage = container_of(node, struct bpf_cgroup_storage, node); 62 63 switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) { 64 case -1: 65 node = node->rb_left; 66 break; 67 case 1: 68 node = node->rb_right; 69 break; 70 default: 71 if (!locked) 72 spin_unlock_bh(&map->lock); 73 return storage; 74 } 75 } 76 77 if (!locked) 78 spin_unlock_bh(&map->lock); 79 80 return NULL; 81 } 82 83 static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, 84 struct bpf_cgroup_storage *storage) 85 { 86 struct rb_root *root = &map->root; 87 struct rb_node **new = &(root->rb_node), *parent = NULL; 88 89 while (*new) { 90 struct bpf_cgroup_storage *this; 91 92 this = container_of(*new, struct bpf_cgroup_storage, node); 93 94 parent = *new; 95 switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) { 96 case -1: 97 new = &((*new)->rb_left); 98 break; 99 case 1: 100 new = &((*new)->rb_right); 101 break; 102 default: 103 return -EEXIST; 104 } 105 } 106 107 rb_link_node(&storage->node, parent, new); 108 rb_insert_color(&storage->node, root); 109 110 return 0; 111 } 112 113 static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key) 114 { 115 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 116 struct bpf_cgroup_storage_key *key = _key; 117 struct bpf_cgroup_storage *storage; 118 119 storage = cgroup_storage_lookup(map, key, false); 120 if (!storage) 121 return NULL; 122 123 return &READ_ONCE(storage->buf)->data[0]; 124 } 125 126 static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, 127 void *value, u64 flags) 128 { 129 struct bpf_cgroup_storage_key *key = _key; 130 struct bpf_cgroup_storage *storage; 131 struct bpf_storage_buffer *new; 132 133 if (flags != BPF_ANY && flags != BPF_EXIST) 134 return -EINVAL; 135 136 storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, 137 key, false); 138 if (!storage) 139 return -ENOENT; 140 141 new = kmalloc_node(sizeof(struct bpf_storage_buffer) + 142 map->value_size, __GFP_ZERO | GFP_USER, 143 map->numa_node); 144 if (!new) 145 return -ENOMEM; 146 147 memcpy(&new->data[0], value, map->value_size); 148 149 new = xchg(&storage->buf, new); 150 kfree_rcu(new, rcu); 151 152 return 0; 153 } 154 155 int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key, 156 void *value) 157 { 158 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 159 struct bpf_cgroup_storage_key *key = _key; 160 struct bpf_cgroup_storage *storage; 161 int cpu, off = 0; 162 u32 size; 163 164 rcu_read_lock(); 165 storage = cgroup_storage_lookup(map, key, false); 166 if (!storage) { 167 rcu_read_unlock(); 168 return -ENOENT; 169 } 170 171 /* per_cpu areas are zero-filled and bpf programs can only 172 * access 'value_size' of them, so copying rounded areas 173 * will not leak any kernel data 174 */ 175 size = round_up(_map->value_size, 8); 176 for_each_possible_cpu(cpu) { 177 bpf_long_memcpy(value + off, 178 per_cpu_ptr(storage->percpu_buf, cpu), size); 179 off += size; 180 } 181 rcu_read_unlock(); 182 return 0; 183 } 184 185 int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key, 186 void *value, u64 map_flags) 187 { 188 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 189 struct bpf_cgroup_storage_key *key = _key; 190 struct bpf_cgroup_storage *storage; 191 int cpu, off = 0; 192 u32 size; 193 194 if (map_flags != BPF_ANY && map_flags != BPF_EXIST) 195 return -EINVAL; 196 197 rcu_read_lock(); 198 storage = cgroup_storage_lookup(map, key, false); 199 if (!storage) { 200 rcu_read_unlock(); 201 return -ENOENT; 202 } 203 204 /* the user space will provide round_up(value_size, 8) bytes that 205 * will be copied into per-cpu area. bpf programs can only access 206 * value_size of it. During lookup the same extra bytes will be 207 * returned or zeros which were zero-filled by percpu_alloc, 208 * so no kernel data leaks possible 209 */ 210 size = round_up(_map->value_size, 8); 211 for_each_possible_cpu(cpu) { 212 bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), 213 value + off, size); 214 off += size; 215 } 216 rcu_read_unlock(); 217 return 0; 218 } 219 220 static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key, 221 void *_next_key) 222 { 223 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 224 struct bpf_cgroup_storage_key *key = _key; 225 struct bpf_cgroup_storage_key *next = _next_key; 226 struct bpf_cgroup_storage *storage; 227 228 spin_lock_bh(&map->lock); 229 230 if (list_empty(&map->list)) 231 goto enoent; 232 233 if (key) { 234 storage = cgroup_storage_lookup(map, key, true); 235 if (!storage) 236 goto enoent; 237 238 storage = list_next_entry(storage, list); 239 if (!storage) 240 goto enoent; 241 } else { 242 storage = list_first_entry(&map->list, 243 struct bpf_cgroup_storage, list); 244 } 245 246 spin_unlock_bh(&map->lock); 247 next->attach_type = storage->key.attach_type; 248 next->cgroup_inode_id = storage->key.cgroup_inode_id; 249 return 0; 250 251 enoent: 252 spin_unlock_bh(&map->lock); 253 return -ENOENT; 254 } 255 256 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) 257 { 258 int numa_node = bpf_map_attr_numa_node(attr); 259 struct bpf_cgroup_storage_map *map; 260 261 if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) 262 return ERR_PTR(-EINVAL); 263 264 if (attr->value_size == 0) 265 return ERR_PTR(-EINVAL); 266 267 if (attr->value_size > PAGE_SIZE) 268 return ERR_PTR(-E2BIG); 269 270 if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK) 271 /* reserved bits should not be used */ 272 return ERR_PTR(-EINVAL); 273 274 if (attr->max_entries) 275 /* max_entries is not used and enforced to be 0 */ 276 return ERR_PTR(-EINVAL); 277 278 map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), 279 __GFP_ZERO | GFP_USER, numa_node); 280 if (!map) 281 return ERR_PTR(-ENOMEM); 282 283 map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map), 284 PAGE_SIZE) >> PAGE_SHIFT; 285 286 /* copy mandatory map attributes */ 287 bpf_map_init_from_attr(&map->map, attr); 288 289 spin_lock_init(&map->lock); 290 map->root = RB_ROOT; 291 INIT_LIST_HEAD(&map->list); 292 293 return &map->map; 294 } 295 296 static void cgroup_storage_map_free(struct bpf_map *_map) 297 { 298 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 299 300 WARN_ON(!RB_EMPTY_ROOT(&map->root)); 301 WARN_ON(!list_empty(&map->list)); 302 303 kfree(map); 304 } 305 306 static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) 307 { 308 return -EINVAL; 309 } 310 311 const struct bpf_map_ops cgroup_storage_map_ops = { 312 .map_alloc = cgroup_storage_map_alloc, 313 .map_free = cgroup_storage_map_free, 314 .map_get_next_key = cgroup_storage_get_next_key, 315 .map_lookup_elem = cgroup_storage_lookup_elem, 316 .map_update_elem = cgroup_storage_update_elem, 317 .map_delete_elem = cgroup_storage_delete_elem, 318 .map_check_btf = map_check_no_btf, 319 }; 320 321 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map) 322 { 323 enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); 324 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 325 int ret = -EBUSY; 326 327 spin_lock_bh(&map->lock); 328 329 if (map->prog && map->prog != prog) 330 goto unlock; 331 if (prog->aux->cgroup_storage[stype] && 332 prog->aux->cgroup_storage[stype] != _map) 333 goto unlock; 334 335 map->prog = prog; 336 prog->aux->cgroup_storage[stype] = _map; 337 ret = 0; 338 unlock: 339 spin_unlock_bh(&map->lock); 340 341 return ret; 342 } 343 344 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map) 345 { 346 enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); 347 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 348 349 spin_lock_bh(&map->lock); 350 if (map->prog == prog) { 351 WARN_ON(prog->aux->cgroup_storage[stype] != _map); 352 map->prog = NULL; 353 prog->aux->cgroup_storage[stype] = NULL; 354 } 355 spin_unlock_bh(&map->lock); 356 } 357 358 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages) 359 { 360 size_t size; 361 362 if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) { 363 size = sizeof(struct bpf_storage_buffer) + map->value_size; 364 *pages = round_up(sizeof(struct bpf_cgroup_storage) + size, 365 PAGE_SIZE) >> PAGE_SHIFT; 366 } else { 367 size = map->value_size; 368 *pages = round_up(round_up(size, 8) * num_possible_cpus(), 369 PAGE_SIZE) >> PAGE_SHIFT; 370 } 371 372 return size; 373 } 374 375 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, 376 enum bpf_cgroup_storage_type stype) 377 { 378 struct bpf_cgroup_storage *storage; 379 struct bpf_map *map; 380 gfp_t flags; 381 size_t size; 382 u32 pages; 383 384 map = prog->aux->cgroup_storage[stype]; 385 if (!map) 386 return NULL; 387 388 size = bpf_cgroup_storage_calculate_size(map, &pages); 389 390 if (bpf_map_charge_memlock(map, pages)) 391 return ERR_PTR(-EPERM); 392 393 storage = kmalloc_node(sizeof(struct bpf_cgroup_storage), 394 __GFP_ZERO | GFP_USER, map->numa_node); 395 if (!storage) 396 goto enomem; 397 398 flags = __GFP_ZERO | GFP_USER; 399 400 if (stype == BPF_CGROUP_STORAGE_SHARED) { 401 storage->buf = kmalloc_node(size, flags, map->numa_node); 402 if (!storage->buf) 403 goto enomem; 404 } else { 405 storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags); 406 if (!storage->percpu_buf) 407 goto enomem; 408 } 409 410 storage->map = (struct bpf_cgroup_storage_map *)map; 411 412 return storage; 413 414 enomem: 415 bpf_map_uncharge_memlock(map, pages); 416 kfree(storage); 417 return ERR_PTR(-ENOMEM); 418 } 419 420 static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu) 421 { 422 struct bpf_cgroup_storage *storage = 423 container_of(rcu, struct bpf_cgroup_storage, rcu); 424 425 kfree(storage->buf); 426 kfree(storage); 427 } 428 429 static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu) 430 { 431 struct bpf_cgroup_storage *storage = 432 container_of(rcu, struct bpf_cgroup_storage, rcu); 433 434 free_percpu(storage->percpu_buf); 435 kfree(storage); 436 } 437 438 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) 439 { 440 enum bpf_cgroup_storage_type stype; 441 struct bpf_map *map; 442 u32 pages; 443 444 if (!storage) 445 return; 446 447 map = &storage->map->map; 448 449 bpf_cgroup_storage_calculate_size(map, &pages); 450 bpf_map_uncharge_memlock(map, pages); 451 452 stype = cgroup_storage_type(map); 453 if (stype == BPF_CGROUP_STORAGE_SHARED) 454 call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu); 455 else 456 call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu); 457 } 458 459 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, 460 struct cgroup *cgroup, 461 enum bpf_attach_type type) 462 { 463 struct bpf_cgroup_storage_map *map; 464 465 if (!storage) 466 return; 467 468 storage->key.attach_type = type; 469 storage->key.cgroup_inode_id = cgroup->kn->id.id; 470 471 map = storage->map; 472 473 spin_lock_bh(&map->lock); 474 WARN_ON(cgroup_storage_insert(map, storage)); 475 list_add(&storage->list, &map->list); 476 spin_unlock_bh(&map->lock); 477 } 478 479 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage) 480 { 481 struct bpf_cgroup_storage_map *map; 482 struct rb_root *root; 483 484 if (!storage) 485 return; 486 487 map = storage->map; 488 489 spin_lock_bh(&map->lock); 490 root = &map->root; 491 rb_erase(&storage->node, root); 492 493 list_del(&storage->list); 494 spin_unlock_bh(&map->lock); 495 } 496 497 #endif 498