1 //SPDX-License-Identifier: GPL-2.0 2 #include <linux/bpf-cgroup.h> 3 #include <linux/bpf.h> 4 #include <linux/bug.h> 5 #include <linux/filter.h> 6 #include <linux/mm.h> 7 #include <linux/rbtree.h> 8 #include <linux/slab.h> 9 10 DEFINE_PER_CPU(struct bpf_cgroup_storage*, 11 bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 12 13 #ifdef CONFIG_CGROUP_BPF 14 15 #define LOCAL_STORAGE_CREATE_FLAG_MASK \ 16 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 17 18 struct bpf_cgroup_storage_map { 19 struct bpf_map map; 20 21 spinlock_t lock; 22 struct bpf_prog *prog; 23 struct rb_root root; 24 struct list_head list; 25 }; 26 27 static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map) 28 { 29 return container_of(map, struct bpf_cgroup_storage_map, map); 30 } 31 32 static int bpf_cgroup_storage_key_cmp( 33 const struct bpf_cgroup_storage_key *key1, 34 const struct bpf_cgroup_storage_key *key2) 35 { 36 if (key1->cgroup_inode_id < key2->cgroup_inode_id) 37 return -1; 38 else if (key1->cgroup_inode_id > key2->cgroup_inode_id) 39 return 1; 40 else if (key1->attach_type < key2->attach_type) 41 return -1; 42 else if (key1->attach_type > key2->attach_type) 43 return 1; 44 return 0; 45 } 46 47 static struct bpf_cgroup_storage *cgroup_storage_lookup( 48 struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key, 49 bool locked) 50 { 51 struct rb_root *root = &map->root; 52 struct rb_node *node; 53 54 if (!locked) 55 spin_lock_bh(&map->lock); 56 57 node = root->rb_node; 58 while (node) { 59 struct bpf_cgroup_storage *storage; 60 61 storage = container_of(node, struct bpf_cgroup_storage, node); 62 63 switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) { 64 case -1: 65 node = node->rb_left; 66 break; 67 case 1: 68 node = node->rb_right; 69 break; 70 default: 71 if (!locked) 72 spin_unlock_bh(&map->lock); 73 return storage; 74 } 75 } 76 77 if (!locked) 78 spin_unlock_bh(&map->lock); 79 80 return NULL; 81 } 82 83 static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, 84 struct bpf_cgroup_storage *storage) 85 { 86 struct rb_root *root = &map->root; 87 struct rb_node **new = &(root->rb_node), *parent = NULL; 88 89 while (*new) { 90 struct bpf_cgroup_storage *this; 91 92 this = container_of(*new, struct bpf_cgroup_storage, node); 93 94 parent = *new; 95 switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) { 96 case -1: 97 new = &((*new)->rb_left); 98 break; 99 case 1: 100 new = &((*new)->rb_right); 101 break; 102 default: 103 return -EEXIST; 104 } 105 } 106 107 rb_link_node(&storage->node, parent, new); 108 rb_insert_color(&storage->node, root); 109 110 return 0; 111 } 112 113 static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key) 114 { 115 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 116 struct bpf_cgroup_storage_key *key = _key; 117 struct bpf_cgroup_storage *storage; 118 119 storage = cgroup_storage_lookup(map, key, false); 120 if (!storage) 121 return NULL; 122 123 return &READ_ONCE(storage->buf)->data[0]; 124 } 125 126 static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, 127 void *value, u64 flags) 128 { 129 struct bpf_cgroup_storage_key *key = _key; 130 struct bpf_cgroup_storage *storage; 131 struct bpf_storage_buffer *new; 132 133 if (flags != BPF_ANY && flags != BPF_EXIST) 134 return -EINVAL; 135 136 storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, 137 key, false); 138 if (!storage) 139 return -ENOENT; 140 141 new = kmalloc_node(sizeof(struct bpf_storage_buffer) + 142 map->value_size, 143 __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, 144 map->numa_node); 145 if (!new) 146 return -ENOMEM; 147 148 memcpy(&new->data[0], value, map->value_size); 149 150 new = xchg(&storage->buf, new); 151 kfree_rcu(new, rcu); 152 153 return 0; 154 } 155 156 int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key, 157 void *value) 158 { 159 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 160 struct bpf_cgroup_storage_key *key = _key; 161 struct bpf_cgroup_storage *storage; 162 int cpu, off = 0; 163 u32 size; 164 165 rcu_read_lock(); 166 storage = cgroup_storage_lookup(map, key, false); 167 if (!storage) { 168 rcu_read_unlock(); 169 return -ENOENT; 170 } 171 172 /* per_cpu areas are zero-filled and bpf programs can only 173 * access 'value_size' of them, so copying rounded areas 174 * will not leak any kernel data 175 */ 176 size = round_up(_map->value_size, 8); 177 for_each_possible_cpu(cpu) { 178 bpf_long_memcpy(value + off, 179 per_cpu_ptr(storage->percpu_buf, cpu), size); 180 off += size; 181 } 182 rcu_read_unlock(); 183 return 0; 184 } 185 186 int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key, 187 void *value, u64 map_flags) 188 { 189 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 190 struct bpf_cgroup_storage_key *key = _key; 191 struct bpf_cgroup_storage *storage; 192 int cpu, off = 0; 193 u32 size; 194 195 if (map_flags != BPF_ANY && map_flags != BPF_EXIST) 196 return -EINVAL; 197 198 rcu_read_lock(); 199 storage = cgroup_storage_lookup(map, key, false); 200 if (!storage) { 201 rcu_read_unlock(); 202 return -ENOENT; 203 } 204 205 /* the user space will provide round_up(value_size, 8) bytes that 206 * will be copied into per-cpu area. bpf programs can only access 207 * value_size of it. During lookup the same extra bytes will be 208 * returned or zeros which were zero-filled by percpu_alloc, 209 * so no kernel data leaks possible 210 */ 211 size = round_up(_map->value_size, 8); 212 for_each_possible_cpu(cpu) { 213 bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), 214 value + off, size); 215 off += size; 216 } 217 rcu_read_unlock(); 218 return 0; 219 } 220 221 static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key, 222 void *_next_key) 223 { 224 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 225 struct bpf_cgroup_storage_key *key = _key; 226 struct bpf_cgroup_storage_key *next = _next_key; 227 struct bpf_cgroup_storage *storage; 228 229 spin_lock_bh(&map->lock); 230 231 if (list_empty(&map->list)) 232 goto enoent; 233 234 if (key) { 235 storage = cgroup_storage_lookup(map, key, true); 236 if (!storage) 237 goto enoent; 238 239 storage = list_next_entry(storage, list); 240 if (!storage) 241 goto enoent; 242 } else { 243 storage = list_first_entry(&map->list, 244 struct bpf_cgroup_storage, list); 245 } 246 247 spin_unlock_bh(&map->lock); 248 next->attach_type = storage->key.attach_type; 249 next->cgroup_inode_id = storage->key.cgroup_inode_id; 250 return 0; 251 252 enoent: 253 spin_unlock_bh(&map->lock); 254 return -ENOENT; 255 } 256 257 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) 258 { 259 int numa_node = bpf_map_attr_numa_node(attr); 260 struct bpf_cgroup_storage_map *map; 261 262 if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) 263 return ERR_PTR(-EINVAL); 264 265 if (attr->value_size == 0) 266 return ERR_PTR(-EINVAL); 267 268 if (attr->value_size > PAGE_SIZE) 269 return ERR_PTR(-E2BIG); 270 271 if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK) 272 /* reserved bits should not be used */ 273 return ERR_PTR(-EINVAL); 274 275 if (attr->max_entries) 276 /* max_entries is not used and enforced to be 0 */ 277 return ERR_PTR(-EINVAL); 278 279 map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), 280 __GFP_ZERO | GFP_USER, numa_node); 281 if (!map) 282 return ERR_PTR(-ENOMEM); 283 284 map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map), 285 PAGE_SIZE) >> PAGE_SHIFT; 286 287 /* copy mandatory map attributes */ 288 bpf_map_init_from_attr(&map->map, attr); 289 290 spin_lock_init(&map->lock); 291 map->root = RB_ROOT; 292 INIT_LIST_HEAD(&map->list); 293 294 return &map->map; 295 } 296 297 static void cgroup_storage_map_free(struct bpf_map *_map) 298 { 299 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 300 301 WARN_ON(!RB_EMPTY_ROOT(&map->root)); 302 WARN_ON(!list_empty(&map->list)); 303 304 kfree(map); 305 } 306 307 static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) 308 { 309 return -EINVAL; 310 } 311 312 const struct bpf_map_ops cgroup_storage_map_ops = { 313 .map_alloc = cgroup_storage_map_alloc, 314 .map_free = cgroup_storage_map_free, 315 .map_get_next_key = cgroup_storage_get_next_key, 316 .map_lookup_elem = cgroup_storage_lookup_elem, 317 .map_update_elem = cgroup_storage_update_elem, 318 .map_delete_elem = cgroup_storage_delete_elem, 319 .map_check_btf = map_check_no_btf, 320 }; 321 322 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map) 323 { 324 enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); 325 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 326 int ret = -EBUSY; 327 328 spin_lock_bh(&map->lock); 329 330 if (map->prog && map->prog != prog) 331 goto unlock; 332 if (prog->aux->cgroup_storage[stype] && 333 prog->aux->cgroup_storage[stype] != _map) 334 goto unlock; 335 336 map->prog = prog; 337 prog->aux->cgroup_storage[stype] = _map; 338 ret = 0; 339 unlock: 340 spin_unlock_bh(&map->lock); 341 342 return ret; 343 } 344 345 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map) 346 { 347 enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); 348 struct bpf_cgroup_storage_map *map = map_to_storage(_map); 349 350 spin_lock_bh(&map->lock); 351 if (map->prog == prog) { 352 WARN_ON(prog->aux->cgroup_storage[stype] != _map); 353 map->prog = NULL; 354 prog->aux->cgroup_storage[stype] = NULL; 355 } 356 spin_unlock_bh(&map->lock); 357 } 358 359 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages) 360 { 361 size_t size; 362 363 if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) { 364 size = sizeof(struct bpf_storage_buffer) + map->value_size; 365 *pages = round_up(sizeof(struct bpf_cgroup_storage) + size, 366 PAGE_SIZE) >> PAGE_SHIFT; 367 } else { 368 size = map->value_size; 369 *pages = round_up(round_up(size, 8) * num_possible_cpus(), 370 PAGE_SIZE) >> PAGE_SHIFT; 371 } 372 373 return size; 374 } 375 376 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, 377 enum bpf_cgroup_storage_type stype) 378 { 379 struct bpf_cgroup_storage *storage; 380 struct bpf_map *map; 381 gfp_t flags; 382 size_t size; 383 u32 pages; 384 385 map = prog->aux->cgroup_storage[stype]; 386 if (!map) 387 return NULL; 388 389 size = bpf_cgroup_storage_calculate_size(map, &pages); 390 391 if (bpf_map_charge_memlock(map, pages)) 392 return ERR_PTR(-EPERM); 393 394 storage = kmalloc_node(sizeof(struct bpf_cgroup_storage), 395 __GFP_ZERO | GFP_USER, map->numa_node); 396 if (!storage) 397 goto enomem; 398 399 flags = __GFP_ZERO | GFP_USER; 400 401 if (stype == BPF_CGROUP_STORAGE_SHARED) { 402 storage->buf = kmalloc_node(size, flags, map->numa_node); 403 if (!storage->buf) 404 goto enomem; 405 } else { 406 storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags); 407 if (!storage->percpu_buf) 408 goto enomem; 409 } 410 411 storage->map = (struct bpf_cgroup_storage_map *)map; 412 413 return storage; 414 415 enomem: 416 bpf_map_uncharge_memlock(map, pages); 417 kfree(storage); 418 return ERR_PTR(-ENOMEM); 419 } 420 421 static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu) 422 { 423 struct bpf_cgroup_storage *storage = 424 container_of(rcu, struct bpf_cgroup_storage, rcu); 425 426 kfree(storage->buf); 427 kfree(storage); 428 } 429 430 static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu) 431 { 432 struct bpf_cgroup_storage *storage = 433 container_of(rcu, struct bpf_cgroup_storage, rcu); 434 435 free_percpu(storage->percpu_buf); 436 kfree(storage); 437 } 438 439 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) 440 { 441 enum bpf_cgroup_storage_type stype; 442 struct bpf_map *map; 443 u32 pages; 444 445 if (!storage) 446 return; 447 448 map = &storage->map->map; 449 450 bpf_cgroup_storage_calculate_size(map, &pages); 451 bpf_map_uncharge_memlock(map, pages); 452 453 stype = cgroup_storage_type(map); 454 if (stype == BPF_CGROUP_STORAGE_SHARED) 455 call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu); 456 else 457 call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu); 458 } 459 460 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, 461 struct cgroup *cgroup, 462 enum bpf_attach_type type) 463 { 464 struct bpf_cgroup_storage_map *map; 465 466 if (!storage) 467 return; 468 469 storage->key.attach_type = type; 470 storage->key.cgroup_inode_id = cgroup->kn->id.id; 471 472 map = storage->map; 473 474 spin_lock_bh(&map->lock); 475 WARN_ON(cgroup_storage_insert(map, storage)); 476 list_add(&storage->list, &map->list); 477 spin_unlock_bh(&map->lock); 478 } 479 480 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage) 481 { 482 struct bpf_cgroup_storage_map *map; 483 struct rb_root *root; 484 485 if (!storage) 486 return; 487 488 map = storage->map; 489 490 spin_lock_bh(&map->lock); 491 root = &map->root; 492 rb_erase(&storage->node, root); 493 494 list_del(&storage->list); 495 spin_unlock_bh(&map->lock); 496 } 497 498 #endif 499