xref: /openbmc/linux/kernel/bpf/local_storage.c (revision e3d786a3)
1 //SPDX-License-Identifier: GPL-2.0
2 #include <linux/bpf-cgroup.h>
3 #include <linux/bpf.h>
4 #include <linux/bug.h>
5 #include <linux/filter.h>
6 #include <linux/mm.h>
7 #include <linux/rbtree.h>
8 #include <linux/slab.h>
9 
10 DEFINE_PER_CPU(struct bpf_cgroup_storage*,
11 	       bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
12 
13 #ifdef CONFIG_CGROUP_BPF
14 
15 #define LOCAL_STORAGE_CREATE_FLAG_MASK					\
16 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
17 
18 struct bpf_cgroup_storage_map {
19 	struct bpf_map map;
20 
21 	spinlock_t lock;
22 	struct bpf_prog *prog;
23 	struct rb_root root;
24 	struct list_head list;
25 };
26 
27 static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
28 {
29 	return container_of(map, struct bpf_cgroup_storage_map, map);
30 }
31 
32 static int bpf_cgroup_storage_key_cmp(
33 	const struct bpf_cgroup_storage_key *key1,
34 	const struct bpf_cgroup_storage_key *key2)
35 {
36 	if (key1->cgroup_inode_id < key2->cgroup_inode_id)
37 		return -1;
38 	else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
39 		return 1;
40 	else if (key1->attach_type < key2->attach_type)
41 		return -1;
42 	else if (key1->attach_type > key2->attach_type)
43 		return 1;
44 	return 0;
45 }
46 
47 static struct bpf_cgroup_storage *cgroup_storage_lookup(
48 	struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
49 	bool locked)
50 {
51 	struct rb_root *root = &map->root;
52 	struct rb_node *node;
53 
54 	if (!locked)
55 		spin_lock_bh(&map->lock);
56 
57 	node = root->rb_node;
58 	while (node) {
59 		struct bpf_cgroup_storage *storage;
60 
61 		storage = container_of(node, struct bpf_cgroup_storage, node);
62 
63 		switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
64 		case -1:
65 			node = node->rb_left;
66 			break;
67 		case 1:
68 			node = node->rb_right;
69 			break;
70 		default:
71 			if (!locked)
72 				spin_unlock_bh(&map->lock);
73 			return storage;
74 		}
75 	}
76 
77 	if (!locked)
78 		spin_unlock_bh(&map->lock);
79 
80 	return NULL;
81 }
82 
83 static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
84 				 struct bpf_cgroup_storage *storage)
85 {
86 	struct rb_root *root = &map->root;
87 	struct rb_node **new = &(root->rb_node), *parent = NULL;
88 
89 	while (*new) {
90 		struct bpf_cgroup_storage *this;
91 
92 		this = container_of(*new, struct bpf_cgroup_storage, node);
93 
94 		parent = *new;
95 		switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
96 		case -1:
97 			new = &((*new)->rb_left);
98 			break;
99 		case 1:
100 			new = &((*new)->rb_right);
101 			break;
102 		default:
103 			return -EEXIST;
104 		}
105 	}
106 
107 	rb_link_node(&storage->node, parent, new);
108 	rb_insert_color(&storage->node, root);
109 
110 	return 0;
111 }
112 
113 static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
114 {
115 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
116 	struct bpf_cgroup_storage_key *key = _key;
117 	struct bpf_cgroup_storage *storage;
118 
119 	storage = cgroup_storage_lookup(map, key, false);
120 	if (!storage)
121 		return NULL;
122 
123 	return &READ_ONCE(storage->buf)->data[0];
124 }
125 
126 static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
127 				      void *value, u64 flags)
128 {
129 	struct bpf_cgroup_storage_key *key = _key;
130 	struct bpf_cgroup_storage *storage;
131 	struct bpf_storage_buffer *new;
132 
133 	if (flags != BPF_ANY && flags != BPF_EXIST)
134 		return -EINVAL;
135 
136 	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
137 					key, false);
138 	if (!storage)
139 		return -ENOENT;
140 
141 	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
142 			   map->value_size, __GFP_ZERO | GFP_USER,
143 			   map->numa_node);
144 	if (!new)
145 		return -ENOMEM;
146 
147 	memcpy(&new->data[0], value, map->value_size);
148 
149 	new = xchg(&storage->buf, new);
150 	kfree_rcu(new, rcu);
151 
152 	return 0;
153 }
154 
155 int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
156 				   void *value)
157 {
158 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
159 	struct bpf_cgroup_storage_key *key = _key;
160 	struct bpf_cgroup_storage *storage;
161 	int cpu, off = 0;
162 	u32 size;
163 
164 	rcu_read_lock();
165 	storage = cgroup_storage_lookup(map, key, false);
166 	if (!storage) {
167 		rcu_read_unlock();
168 		return -ENOENT;
169 	}
170 
171 	/* per_cpu areas are zero-filled and bpf programs can only
172 	 * access 'value_size' of them, so copying rounded areas
173 	 * will not leak any kernel data
174 	 */
175 	size = round_up(_map->value_size, 8);
176 	for_each_possible_cpu(cpu) {
177 		bpf_long_memcpy(value + off,
178 				per_cpu_ptr(storage->percpu_buf, cpu), size);
179 		off += size;
180 	}
181 	rcu_read_unlock();
182 	return 0;
183 }
184 
185 int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
186 				     void *value, u64 map_flags)
187 {
188 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
189 	struct bpf_cgroup_storage_key *key = _key;
190 	struct bpf_cgroup_storage *storage;
191 	int cpu, off = 0;
192 	u32 size;
193 
194 	if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
195 		return -EINVAL;
196 
197 	rcu_read_lock();
198 	storage = cgroup_storage_lookup(map, key, false);
199 	if (!storage) {
200 		rcu_read_unlock();
201 		return -ENOENT;
202 	}
203 
204 	/* the user space will provide round_up(value_size, 8) bytes that
205 	 * will be copied into per-cpu area. bpf programs can only access
206 	 * value_size of it. During lookup the same extra bytes will be
207 	 * returned or zeros which were zero-filled by percpu_alloc,
208 	 * so no kernel data leaks possible
209 	 */
210 	size = round_up(_map->value_size, 8);
211 	for_each_possible_cpu(cpu) {
212 		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
213 				value + off, size);
214 		off += size;
215 	}
216 	rcu_read_unlock();
217 	return 0;
218 }
219 
220 static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
221 				       void *_next_key)
222 {
223 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
224 	struct bpf_cgroup_storage_key *key = _key;
225 	struct bpf_cgroup_storage_key *next = _next_key;
226 	struct bpf_cgroup_storage *storage;
227 
228 	spin_lock_bh(&map->lock);
229 
230 	if (list_empty(&map->list))
231 		goto enoent;
232 
233 	if (key) {
234 		storage = cgroup_storage_lookup(map, key, true);
235 		if (!storage)
236 			goto enoent;
237 
238 		storage = list_next_entry(storage, list);
239 		if (!storage)
240 			goto enoent;
241 	} else {
242 		storage = list_first_entry(&map->list,
243 					 struct bpf_cgroup_storage, list);
244 	}
245 
246 	spin_unlock_bh(&map->lock);
247 	next->attach_type = storage->key.attach_type;
248 	next->cgroup_inode_id = storage->key.cgroup_inode_id;
249 	return 0;
250 
251 enoent:
252 	spin_unlock_bh(&map->lock);
253 	return -ENOENT;
254 }
255 
256 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
257 {
258 	int numa_node = bpf_map_attr_numa_node(attr);
259 	struct bpf_cgroup_storage_map *map;
260 
261 	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
262 		return ERR_PTR(-EINVAL);
263 
264 	if (attr->value_size == 0)
265 		return ERR_PTR(-EINVAL);
266 
267 	if (attr->value_size > PAGE_SIZE)
268 		return ERR_PTR(-E2BIG);
269 
270 	if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
271 		/* reserved bits should not be used */
272 		return ERR_PTR(-EINVAL);
273 
274 	if (attr->max_entries)
275 		/* max_entries is not used and enforced to be 0 */
276 		return ERR_PTR(-EINVAL);
277 
278 	map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
279 			   __GFP_ZERO | GFP_USER, numa_node);
280 	if (!map)
281 		return ERR_PTR(-ENOMEM);
282 
283 	map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
284 				  PAGE_SIZE) >> PAGE_SHIFT;
285 
286 	/* copy mandatory map attributes */
287 	bpf_map_init_from_attr(&map->map, attr);
288 
289 	spin_lock_init(&map->lock);
290 	map->root = RB_ROOT;
291 	INIT_LIST_HEAD(&map->list);
292 
293 	return &map->map;
294 }
295 
296 static void cgroup_storage_map_free(struct bpf_map *_map)
297 {
298 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
299 
300 	WARN_ON(!RB_EMPTY_ROOT(&map->root));
301 	WARN_ON(!list_empty(&map->list));
302 
303 	kfree(map);
304 }
305 
306 static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
307 {
308 	return -EINVAL;
309 }
310 
311 const struct bpf_map_ops cgroup_storage_map_ops = {
312 	.map_alloc = cgroup_storage_map_alloc,
313 	.map_free = cgroup_storage_map_free,
314 	.map_get_next_key = cgroup_storage_get_next_key,
315 	.map_lookup_elem = cgroup_storage_lookup_elem,
316 	.map_update_elem = cgroup_storage_update_elem,
317 	.map_delete_elem = cgroup_storage_delete_elem,
318 	.map_check_btf = map_check_no_btf,
319 };
320 
321 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
322 {
323 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
324 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
325 	int ret = -EBUSY;
326 
327 	spin_lock_bh(&map->lock);
328 
329 	if (map->prog && map->prog != prog)
330 		goto unlock;
331 	if (prog->aux->cgroup_storage[stype] &&
332 	    prog->aux->cgroup_storage[stype] != _map)
333 		goto unlock;
334 
335 	map->prog = prog;
336 	prog->aux->cgroup_storage[stype] = _map;
337 	ret = 0;
338 unlock:
339 	spin_unlock_bh(&map->lock);
340 
341 	return ret;
342 }
343 
344 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
345 {
346 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
347 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
348 
349 	spin_lock_bh(&map->lock);
350 	if (map->prog == prog) {
351 		WARN_ON(prog->aux->cgroup_storage[stype] != _map);
352 		map->prog = NULL;
353 		prog->aux->cgroup_storage[stype] = NULL;
354 	}
355 	spin_unlock_bh(&map->lock);
356 }
357 
358 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
359 {
360 	size_t size;
361 
362 	if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
363 		size = sizeof(struct bpf_storage_buffer) + map->value_size;
364 		*pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
365 				  PAGE_SIZE) >> PAGE_SHIFT;
366 	} else {
367 		size = map->value_size;
368 		*pages = round_up(round_up(size, 8) * num_possible_cpus(),
369 				  PAGE_SIZE) >> PAGE_SHIFT;
370 	}
371 
372 	return size;
373 }
374 
375 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
376 					enum bpf_cgroup_storage_type stype)
377 {
378 	struct bpf_cgroup_storage *storage;
379 	struct bpf_map *map;
380 	gfp_t flags;
381 	size_t size;
382 	u32 pages;
383 
384 	map = prog->aux->cgroup_storage[stype];
385 	if (!map)
386 		return NULL;
387 
388 	size = bpf_cgroup_storage_calculate_size(map, &pages);
389 
390 	if (bpf_map_charge_memlock(map, pages))
391 		return ERR_PTR(-EPERM);
392 
393 	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
394 			       __GFP_ZERO | GFP_USER, map->numa_node);
395 	if (!storage)
396 		goto enomem;
397 
398 	flags = __GFP_ZERO | GFP_USER;
399 
400 	if (stype == BPF_CGROUP_STORAGE_SHARED) {
401 		storage->buf = kmalloc_node(size, flags, map->numa_node);
402 		if (!storage->buf)
403 			goto enomem;
404 	} else {
405 		storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
406 		if (!storage->percpu_buf)
407 			goto enomem;
408 	}
409 
410 	storage->map = (struct bpf_cgroup_storage_map *)map;
411 
412 	return storage;
413 
414 enomem:
415 	bpf_map_uncharge_memlock(map, pages);
416 	kfree(storage);
417 	return ERR_PTR(-ENOMEM);
418 }
419 
420 static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
421 {
422 	struct bpf_cgroup_storage *storage =
423 		container_of(rcu, struct bpf_cgroup_storage, rcu);
424 
425 	kfree(storage->buf);
426 	kfree(storage);
427 }
428 
429 static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
430 {
431 	struct bpf_cgroup_storage *storage =
432 		container_of(rcu, struct bpf_cgroup_storage, rcu);
433 
434 	free_percpu(storage->percpu_buf);
435 	kfree(storage);
436 }
437 
438 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
439 {
440 	enum bpf_cgroup_storage_type stype;
441 	struct bpf_map *map;
442 	u32 pages;
443 
444 	if (!storage)
445 		return;
446 
447 	map = &storage->map->map;
448 
449 	bpf_cgroup_storage_calculate_size(map, &pages);
450 	bpf_map_uncharge_memlock(map, pages);
451 
452 	stype = cgroup_storage_type(map);
453 	if (stype == BPF_CGROUP_STORAGE_SHARED)
454 		call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
455 	else
456 		call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
457 }
458 
459 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
460 			     struct cgroup *cgroup,
461 			     enum bpf_attach_type type)
462 {
463 	struct bpf_cgroup_storage_map *map;
464 
465 	if (!storage)
466 		return;
467 
468 	storage->key.attach_type = type;
469 	storage->key.cgroup_inode_id = cgroup->kn->id.id;
470 
471 	map = storage->map;
472 
473 	spin_lock_bh(&map->lock);
474 	WARN_ON(cgroup_storage_insert(map, storage));
475 	list_add(&storage->list, &map->list);
476 	spin_unlock_bh(&map->lock);
477 }
478 
479 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
480 {
481 	struct bpf_cgroup_storage_map *map;
482 	struct rb_root *root;
483 
484 	if (!storage)
485 		return;
486 
487 	map = storage->map;
488 
489 	spin_lock_bh(&map->lock);
490 	root = &map->root;
491 	rb_erase(&storage->node, root);
492 
493 	list_del(&storage->list);
494 	spin_unlock_bh(&map->lock);
495 }
496 
497 #endif
498