xref: /openbmc/linux/kernel/bpf/local_storage.c (revision d003c346bf75f01d240c80000baf2fbf28e53782)
1 //SPDX-License-Identifier: GPL-2.0
2 #include <linux/bpf-cgroup.h>
3 #include <linux/bpf.h>
4 #include <linux/bug.h>
5 #include <linux/filter.h>
6 #include <linux/mm.h>
7 #include <linux/rbtree.h>
8 #include <linux/slab.h>
9 
10 DEFINE_PER_CPU(struct bpf_cgroup_storage*,
11 	       bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
12 
13 #ifdef CONFIG_CGROUP_BPF
14 
15 #define LOCAL_STORAGE_CREATE_FLAG_MASK					\
16 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
17 
18 struct bpf_cgroup_storage_map {
19 	struct bpf_map map;
20 
21 	spinlock_t lock;
22 	struct bpf_prog *prog;
23 	struct rb_root root;
24 	struct list_head list;
25 };
26 
27 static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
28 {
29 	return container_of(map, struct bpf_cgroup_storage_map, map);
30 }
31 
32 static int bpf_cgroup_storage_key_cmp(
33 	const struct bpf_cgroup_storage_key *key1,
34 	const struct bpf_cgroup_storage_key *key2)
35 {
36 	if (key1->cgroup_inode_id < key2->cgroup_inode_id)
37 		return -1;
38 	else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
39 		return 1;
40 	else if (key1->attach_type < key2->attach_type)
41 		return -1;
42 	else if (key1->attach_type > key2->attach_type)
43 		return 1;
44 	return 0;
45 }
46 
47 static struct bpf_cgroup_storage *cgroup_storage_lookup(
48 	struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
49 	bool locked)
50 {
51 	struct rb_root *root = &map->root;
52 	struct rb_node *node;
53 
54 	if (!locked)
55 		spin_lock_bh(&map->lock);
56 
57 	node = root->rb_node;
58 	while (node) {
59 		struct bpf_cgroup_storage *storage;
60 
61 		storage = container_of(node, struct bpf_cgroup_storage, node);
62 
63 		switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
64 		case -1:
65 			node = node->rb_left;
66 			break;
67 		case 1:
68 			node = node->rb_right;
69 			break;
70 		default:
71 			if (!locked)
72 				spin_unlock_bh(&map->lock);
73 			return storage;
74 		}
75 	}
76 
77 	if (!locked)
78 		spin_unlock_bh(&map->lock);
79 
80 	return NULL;
81 }
82 
83 static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
84 				 struct bpf_cgroup_storage *storage)
85 {
86 	struct rb_root *root = &map->root;
87 	struct rb_node **new = &(root->rb_node), *parent = NULL;
88 
89 	while (*new) {
90 		struct bpf_cgroup_storage *this;
91 
92 		this = container_of(*new, struct bpf_cgroup_storage, node);
93 
94 		parent = *new;
95 		switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
96 		case -1:
97 			new = &((*new)->rb_left);
98 			break;
99 		case 1:
100 			new = &((*new)->rb_right);
101 			break;
102 		default:
103 			return -EEXIST;
104 		}
105 	}
106 
107 	rb_link_node(&storage->node, parent, new);
108 	rb_insert_color(&storage->node, root);
109 
110 	return 0;
111 }
112 
113 static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
114 {
115 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
116 	struct bpf_cgroup_storage_key *key = _key;
117 	struct bpf_cgroup_storage *storage;
118 
119 	storage = cgroup_storage_lookup(map, key, false);
120 	if (!storage)
121 		return NULL;
122 
123 	return &READ_ONCE(storage->buf)->data[0];
124 }
125 
126 static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
127 				      void *value, u64 flags)
128 {
129 	struct bpf_cgroup_storage_key *key = _key;
130 	struct bpf_cgroup_storage *storage;
131 	struct bpf_storage_buffer *new;
132 
133 	if (flags != BPF_ANY && flags != BPF_EXIST)
134 		return -EINVAL;
135 
136 	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
137 					key, false);
138 	if (!storage)
139 		return -ENOENT;
140 
141 	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
142 			   map->value_size,
143 			   __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
144 			   map->numa_node);
145 	if (!new)
146 		return -ENOMEM;
147 
148 	memcpy(&new->data[0], value, map->value_size);
149 
150 	new = xchg(&storage->buf, new);
151 	kfree_rcu(new, rcu);
152 
153 	return 0;
154 }
155 
156 int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
157 				   void *value)
158 {
159 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
160 	struct bpf_cgroup_storage_key *key = _key;
161 	struct bpf_cgroup_storage *storage;
162 	int cpu, off = 0;
163 	u32 size;
164 
165 	rcu_read_lock();
166 	storage = cgroup_storage_lookup(map, key, false);
167 	if (!storage) {
168 		rcu_read_unlock();
169 		return -ENOENT;
170 	}
171 
172 	/* per_cpu areas are zero-filled and bpf programs can only
173 	 * access 'value_size' of them, so copying rounded areas
174 	 * will not leak any kernel data
175 	 */
176 	size = round_up(_map->value_size, 8);
177 	for_each_possible_cpu(cpu) {
178 		bpf_long_memcpy(value + off,
179 				per_cpu_ptr(storage->percpu_buf, cpu), size);
180 		off += size;
181 	}
182 	rcu_read_unlock();
183 	return 0;
184 }
185 
186 int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
187 				     void *value, u64 map_flags)
188 {
189 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
190 	struct bpf_cgroup_storage_key *key = _key;
191 	struct bpf_cgroup_storage *storage;
192 	int cpu, off = 0;
193 	u32 size;
194 
195 	if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
196 		return -EINVAL;
197 
198 	rcu_read_lock();
199 	storage = cgroup_storage_lookup(map, key, false);
200 	if (!storage) {
201 		rcu_read_unlock();
202 		return -ENOENT;
203 	}
204 
205 	/* the user space will provide round_up(value_size, 8) bytes that
206 	 * will be copied into per-cpu area. bpf programs can only access
207 	 * value_size of it. During lookup the same extra bytes will be
208 	 * returned or zeros which were zero-filled by percpu_alloc,
209 	 * so no kernel data leaks possible
210 	 */
211 	size = round_up(_map->value_size, 8);
212 	for_each_possible_cpu(cpu) {
213 		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
214 				value + off, size);
215 		off += size;
216 	}
217 	rcu_read_unlock();
218 	return 0;
219 }
220 
221 static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
222 				       void *_next_key)
223 {
224 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
225 	struct bpf_cgroup_storage_key *key = _key;
226 	struct bpf_cgroup_storage_key *next = _next_key;
227 	struct bpf_cgroup_storage *storage;
228 
229 	spin_lock_bh(&map->lock);
230 
231 	if (list_empty(&map->list))
232 		goto enoent;
233 
234 	if (key) {
235 		storage = cgroup_storage_lookup(map, key, true);
236 		if (!storage)
237 			goto enoent;
238 
239 		storage = list_next_entry(storage, list);
240 		if (!storage)
241 			goto enoent;
242 	} else {
243 		storage = list_first_entry(&map->list,
244 					 struct bpf_cgroup_storage, list);
245 	}
246 
247 	spin_unlock_bh(&map->lock);
248 	next->attach_type = storage->key.attach_type;
249 	next->cgroup_inode_id = storage->key.cgroup_inode_id;
250 	return 0;
251 
252 enoent:
253 	spin_unlock_bh(&map->lock);
254 	return -ENOENT;
255 }
256 
257 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
258 {
259 	int numa_node = bpf_map_attr_numa_node(attr);
260 	struct bpf_cgroup_storage_map *map;
261 
262 	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
263 		return ERR_PTR(-EINVAL);
264 
265 	if (attr->value_size == 0)
266 		return ERR_PTR(-EINVAL);
267 
268 	if (attr->value_size > PAGE_SIZE)
269 		return ERR_PTR(-E2BIG);
270 
271 	if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
272 		/* reserved bits should not be used */
273 		return ERR_PTR(-EINVAL);
274 
275 	if (attr->max_entries)
276 		/* max_entries is not used and enforced to be 0 */
277 		return ERR_PTR(-EINVAL);
278 
279 	map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
280 			   __GFP_ZERO | GFP_USER, numa_node);
281 	if (!map)
282 		return ERR_PTR(-ENOMEM);
283 
284 	map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
285 				  PAGE_SIZE) >> PAGE_SHIFT;
286 
287 	/* copy mandatory map attributes */
288 	bpf_map_init_from_attr(&map->map, attr);
289 
290 	spin_lock_init(&map->lock);
291 	map->root = RB_ROOT;
292 	INIT_LIST_HEAD(&map->list);
293 
294 	return &map->map;
295 }
296 
297 static void cgroup_storage_map_free(struct bpf_map *_map)
298 {
299 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
300 
301 	WARN_ON(!RB_EMPTY_ROOT(&map->root));
302 	WARN_ON(!list_empty(&map->list));
303 
304 	kfree(map);
305 }
306 
307 static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
308 {
309 	return -EINVAL;
310 }
311 
312 const struct bpf_map_ops cgroup_storage_map_ops = {
313 	.map_alloc = cgroup_storage_map_alloc,
314 	.map_free = cgroup_storage_map_free,
315 	.map_get_next_key = cgroup_storage_get_next_key,
316 	.map_lookup_elem = cgroup_storage_lookup_elem,
317 	.map_update_elem = cgroup_storage_update_elem,
318 	.map_delete_elem = cgroup_storage_delete_elem,
319 	.map_check_btf = map_check_no_btf,
320 };
321 
322 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
323 {
324 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
325 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
326 	int ret = -EBUSY;
327 
328 	spin_lock_bh(&map->lock);
329 
330 	if (map->prog && map->prog != prog)
331 		goto unlock;
332 	if (prog->aux->cgroup_storage[stype] &&
333 	    prog->aux->cgroup_storage[stype] != _map)
334 		goto unlock;
335 
336 	map->prog = prog;
337 	prog->aux->cgroup_storage[stype] = _map;
338 	ret = 0;
339 unlock:
340 	spin_unlock_bh(&map->lock);
341 
342 	return ret;
343 }
344 
345 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
346 {
347 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
348 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
349 
350 	spin_lock_bh(&map->lock);
351 	if (map->prog == prog) {
352 		WARN_ON(prog->aux->cgroup_storage[stype] != _map);
353 		map->prog = NULL;
354 		prog->aux->cgroup_storage[stype] = NULL;
355 	}
356 	spin_unlock_bh(&map->lock);
357 }
358 
359 static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
360 {
361 	size_t size;
362 
363 	if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
364 		size = sizeof(struct bpf_storage_buffer) + map->value_size;
365 		*pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
366 				  PAGE_SIZE) >> PAGE_SHIFT;
367 	} else {
368 		size = map->value_size;
369 		*pages = round_up(round_up(size, 8) * num_possible_cpus(),
370 				  PAGE_SIZE) >> PAGE_SHIFT;
371 	}
372 
373 	return size;
374 }
375 
376 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
377 					enum bpf_cgroup_storage_type stype)
378 {
379 	struct bpf_cgroup_storage *storage;
380 	struct bpf_map *map;
381 	gfp_t flags;
382 	size_t size;
383 	u32 pages;
384 
385 	map = prog->aux->cgroup_storage[stype];
386 	if (!map)
387 		return NULL;
388 
389 	size = bpf_cgroup_storage_calculate_size(map, &pages);
390 
391 	if (bpf_map_charge_memlock(map, pages))
392 		return ERR_PTR(-EPERM);
393 
394 	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
395 			       __GFP_ZERO | GFP_USER, map->numa_node);
396 	if (!storage)
397 		goto enomem;
398 
399 	flags = __GFP_ZERO | GFP_USER;
400 
401 	if (stype == BPF_CGROUP_STORAGE_SHARED) {
402 		storage->buf = kmalloc_node(size, flags, map->numa_node);
403 		if (!storage->buf)
404 			goto enomem;
405 	} else {
406 		storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
407 		if (!storage->percpu_buf)
408 			goto enomem;
409 	}
410 
411 	storage->map = (struct bpf_cgroup_storage_map *)map;
412 
413 	return storage;
414 
415 enomem:
416 	bpf_map_uncharge_memlock(map, pages);
417 	kfree(storage);
418 	return ERR_PTR(-ENOMEM);
419 }
420 
421 static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
422 {
423 	struct bpf_cgroup_storage *storage =
424 		container_of(rcu, struct bpf_cgroup_storage, rcu);
425 
426 	kfree(storage->buf);
427 	kfree(storage);
428 }
429 
430 static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
431 {
432 	struct bpf_cgroup_storage *storage =
433 		container_of(rcu, struct bpf_cgroup_storage, rcu);
434 
435 	free_percpu(storage->percpu_buf);
436 	kfree(storage);
437 }
438 
439 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
440 {
441 	enum bpf_cgroup_storage_type stype;
442 	struct bpf_map *map;
443 	u32 pages;
444 
445 	if (!storage)
446 		return;
447 
448 	map = &storage->map->map;
449 
450 	bpf_cgroup_storage_calculate_size(map, &pages);
451 	bpf_map_uncharge_memlock(map, pages);
452 
453 	stype = cgroup_storage_type(map);
454 	if (stype == BPF_CGROUP_STORAGE_SHARED)
455 		call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
456 	else
457 		call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
458 }
459 
460 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
461 			     struct cgroup *cgroup,
462 			     enum bpf_attach_type type)
463 {
464 	struct bpf_cgroup_storage_map *map;
465 
466 	if (!storage)
467 		return;
468 
469 	storage->key.attach_type = type;
470 	storage->key.cgroup_inode_id = cgroup->kn->id.id;
471 
472 	map = storage->map;
473 
474 	spin_lock_bh(&map->lock);
475 	WARN_ON(cgroup_storage_insert(map, storage));
476 	list_add(&storage->list, &map->list);
477 	spin_unlock_bh(&map->lock);
478 }
479 
480 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
481 {
482 	struct bpf_cgroup_storage_map *map;
483 	struct rb_root *root;
484 
485 	if (!storage)
486 		return;
487 
488 	map = storage->map;
489 
490 	spin_lock_bh(&map->lock);
491 	root = &map->root;
492 	rb_erase(&storage->node, root);
493 
494 	list_del(&storage->list);
495 	spin_unlock_bh(&map->lock);
496 }
497 
498 #endif
499