xref: /openbmc/linux/net/core/bpf_sk_storage.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1  // SPDX-License-Identifier: GPL-2.0
2  /* Copyright (c) 2019 Facebook  */
3  #include <linux/rculist.h>
4  #include <linux/list.h>
5  #include <linux/hash.h>
6  #include <linux/types.h>
7  #include <linux/spinlock.h>
8  #include <linux/bpf.h>
9  #include <linux/btf.h>
10  #include <linux/btf_ids.h>
11  #include <linux/bpf_local_storage.h>
12  #include <net/bpf_sk_storage.h>
13  #include <net/sock.h>
14  #include <uapi/linux/sock_diag.h>
15  #include <uapi/linux/btf.h>
16  #include <linux/rcupdate_trace.h>
17  
18  DEFINE_BPF_STORAGE_CACHE(sk_cache);
19  
20  static struct bpf_local_storage_data *
bpf_sk_storage_lookup(struct sock * sk,struct bpf_map * map,bool cacheit_lockit)21  bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
22  {
23  	struct bpf_local_storage *sk_storage;
24  	struct bpf_local_storage_map *smap;
25  
26  	sk_storage =
27  		rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
28  	if (!sk_storage)
29  		return NULL;
30  
31  	smap = (struct bpf_local_storage_map *)map;
32  	return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit);
33  }
34  
bpf_sk_storage_del(struct sock * sk,struct bpf_map * map)35  static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
36  {
37  	struct bpf_local_storage_data *sdata;
38  
39  	sdata = bpf_sk_storage_lookup(sk, map, false);
40  	if (!sdata)
41  		return -ENOENT;
42  
43  	bpf_selem_unlink(SELEM(sdata), false);
44  
45  	return 0;
46  }
47  
48  /* Called by __sk_destruct() & bpf_sk_storage_clone() */
bpf_sk_storage_free(struct sock * sk)49  void bpf_sk_storage_free(struct sock *sk)
50  {
51  	struct bpf_local_storage *sk_storage;
52  
53  	rcu_read_lock();
54  	sk_storage = rcu_dereference(sk->sk_bpf_storage);
55  	if (!sk_storage) {
56  		rcu_read_unlock();
57  		return;
58  	}
59  
60  	bpf_local_storage_destroy(sk_storage);
61  	rcu_read_unlock();
62  }
63  
bpf_sk_storage_map_free(struct bpf_map * map)64  static void bpf_sk_storage_map_free(struct bpf_map *map)
65  {
66  	bpf_local_storage_map_free(map, &sk_cache, NULL);
67  }
68  
bpf_sk_storage_map_alloc(union bpf_attr * attr)69  static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
70  {
71  	return bpf_local_storage_map_alloc(attr, &sk_cache, false);
72  }
73  
notsupp_get_next_key(struct bpf_map * map,void * key,void * next_key)74  static int notsupp_get_next_key(struct bpf_map *map, void *key,
75  				void *next_key)
76  {
77  	return -ENOTSUPP;
78  }
79  
bpf_fd_sk_storage_lookup_elem(struct bpf_map * map,void * key)80  static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
81  {
82  	struct bpf_local_storage_data *sdata;
83  	struct socket *sock;
84  	int fd, err;
85  
86  	fd = *(int *)key;
87  	sock = sockfd_lookup(fd, &err);
88  	if (sock) {
89  		sdata = bpf_sk_storage_lookup(sock->sk, map, true);
90  		sockfd_put(sock);
91  		return sdata ? sdata->data : NULL;
92  	}
93  
94  	return ERR_PTR(err);
95  }
96  
bpf_fd_sk_storage_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)97  static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
98  					  void *value, u64 map_flags)
99  {
100  	struct bpf_local_storage_data *sdata;
101  	struct socket *sock;
102  	int fd, err;
103  
104  	fd = *(int *)key;
105  	sock = sockfd_lookup(fd, &err);
106  	if (sock) {
107  		sdata = bpf_local_storage_update(
108  			sock->sk, (struct bpf_local_storage_map *)map, value,
109  			map_flags, GFP_ATOMIC);
110  		sockfd_put(sock);
111  		return PTR_ERR_OR_ZERO(sdata);
112  	}
113  
114  	return err;
115  }
116  
bpf_fd_sk_storage_delete_elem(struct bpf_map * map,void * key)117  static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
118  {
119  	struct socket *sock;
120  	int fd, err;
121  
122  	fd = *(int *)key;
123  	sock = sockfd_lookup(fd, &err);
124  	if (sock) {
125  		err = bpf_sk_storage_del(sock->sk, map);
126  		sockfd_put(sock);
127  		return err;
128  	}
129  
130  	return err;
131  }
132  
133  static struct bpf_local_storage_elem *
bpf_sk_storage_clone_elem(struct sock * newsk,struct bpf_local_storage_map * smap,struct bpf_local_storage_elem * selem)134  bpf_sk_storage_clone_elem(struct sock *newsk,
135  			  struct bpf_local_storage_map *smap,
136  			  struct bpf_local_storage_elem *selem)
137  {
138  	struct bpf_local_storage_elem *copy_selem;
139  
140  	copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
141  	if (!copy_selem)
142  		return NULL;
143  
144  	if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
145  		copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
146  				      SDATA(selem)->data, true);
147  	else
148  		copy_map_value(&smap->map, SDATA(copy_selem)->data,
149  			       SDATA(selem)->data);
150  
151  	return copy_selem;
152  }
153  
bpf_sk_storage_clone(const struct sock * sk,struct sock * newsk)154  int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
155  {
156  	struct bpf_local_storage *new_sk_storage = NULL;
157  	struct bpf_local_storage *sk_storage;
158  	struct bpf_local_storage_elem *selem;
159  	int ret = 0;
160  
161  	RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
162  
163  	rcu_read_lock();
164  	sk_storage = rcu_dereference(sk->sk_bpf_storage);
165  
166  	if (!sk_storage || hlist_empty(&sk_storage->list))
167  		goto out;
168  
169  	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
170  		struct bpf_local_storage_elem *copy_selem;
171  		struct bpf_local_storage_map *smap;
172  		struct bpf_map *map;
173  
174  		smap = rcu_dereference(SDATA(selem)->smap);
175  		if (!(smap->map.map_flags & BPF_F_CLONE))
176  			continue;
177  
178  		/* Note that for lockless listeners adding new element
179  		 * here can race with cleanup in bpf_local_storage_map_free.
180  		 * Try to grab map refcnt to make sure that it's still
181  		 * alive and prevent concurrent removal.
182  		 */
183  		map = bpf_map_inc_not_zero(&smap->map);
184  		if (IS_ERR(map))
185  			continue;
186  
187  		copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
188  		if (!copy_selem) {
189  			ret = -ENOMEM;
190  			bpf_map_put(map);
191  			goto out;
192  		}
193  
194  		if (new_sk_storage) {
195  			bpf_selem_link_map(smap, copy_selem);
196  			bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
197  		} else {
198  			ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
199  			if (ret) {
200  				bpf_selem_free(copy_selem, smap, true);
201  				atomic_sub(smap->elem_size,
202  					   &newsk->sk_omem_alloc);
203  				bpf_map_put(map);
204  				goto out;
205  			}
206  
207  			new_sk_storage =
208  				rcu_dereference(copy_selem->local_storage);
209  		}
210  		bpf_map_put(map);
211  	}
212  
213  out:
214  	rcu_read_unlock();
215  
216  	/* In case of an error, don't free anything explicitly here, the
217  	 * caller is responsible to call bpf_sk_storage_free.
218  	 */
219  
220  	return ret;
221  }
222  
223  /* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_sk_storage_get,struct bpf_map *,map,struct sock *,sk,void *,value,u64,flags,gfp_t,gfp_flags)224  BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
225  	   void *, value, u64, flags, gfp_t, gfp_flags)
226  {
227  	struct bpf_local_storage_data *sdata;
228  
229  	WARN_ON_ONCE(!bpf_rcu_lock_held());
230  	if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
231  		return (unsigned long)NULL;
232  
233  	sdata = bpf_sk_storage_lookup(sk, map, true);
234  	if (sdata)
235  		return (unsigned long)sdata->data;
236  
237  	if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
238  	    /* Cannot add new elem to a going away sk.
239  	     * Otherwise, the new elem may become a leak
240  	     * (and also other memory issues during map
241  	     *  destruction).
242  	     */
243  	    refcount_inc_not_zero(&sk->sk_refcnt)) {
244  		sdata = bpf_local_storage_update(
245  			sk, (struct bpf_local_storage_map *)map, value,
246  			BPF_NOEXIST, gfp_flags);
247  		/* sk must be a fullsock (guaranteed by verifier),
248  		 * so sock_gen_put() is unnecessary.
249  		 */
250  		sock_put(sk);
251  		return IS_ERR(sdata) ?
252  			(unsigned long)NULL : (unsigned long)sdata->data;
253  	}
254  
255  	return (unsigned long)NULL;
256  }
257  
BPF_CALL_2(bpf_sk_storage_delete,struct bpf_map *,map,struct sock *,sk)258  BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
259  {
260  	WARN_ON_ONCE(!bpf_rcu_lock_held());
261  	if (!sk || !sk_fullsock(sk))
262  		return -EINVAL;
263  
264  	if (refcount_inc_not_zero(&sk->sk_refcnt)) {
265  		int err;
266  
267  		err = bpf_sk_storage_del(sk, map);
268  		sock_put(sk);
269  		return err;
270  	}
271  
272  	return -ENOENT;
273  }
274  
bpf_sk_storage_charge(struct bpf_local_storage_map * smap,void * owner,u32 size)275  static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
276  				 void *owner, u32 size)
277  {
278  	int optmem_max = READ_ONCE(sysctl_optmem_max);
279  	struct sock *sk = (struct sock *)owner;
280  
281  	/* same check as in sock_kmalloc() */
282  	if (size <= optmem_max &&
283  	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
284  		atomic_add(size, &sk->sk_omem_alloc);
285  		return 0;
286  	}
287  
288  	return -ENOMEM;
289  }
290  
bpf_sk_storage_uncharge(struct bpf_local_storage_map * smap,void * owner,u32 size)291  static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap,
292  				    void *owner, u32 size)
293  {
294  	struct sock *sk = owner;
295  
296  	atomic_sub(size, &sk->sk_omem_alloc);
297  }
298  
299  static struct bpf_local_storage __rcu **
bpf_sk_storage_ptr(void * owner)300  bpf_sk_storage_ptr(void *owner)
301  {
302  	struct sock *sk = owner;
303  
304  	return &sk->sk_bpf_storage;
305  }
306  
307  const struct bpf_map_ops sk_storage_map_ops = {
308  	.map_meta_equal = bpf_map_meta_equal,
309  	.map_alloc_check = bpf_local_storage_map_alloc_check,
310  	.map_alloc = bpf_sk_storage_map_alloc,
311  	.map_free = bpf_sk_storage_map_free,
312  	.map_get_next_key = notsupp_get_next_key,
313  	.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
314  	.map_update_elem = bpf_fd_sk_storage_update_elem,
315  	.map_delete_elem = bpf_fd_sk_storage_delete_elem,
316  	.map_check_btf = bpf_local_storage_map_check_btf,
317  	.map_btf_id = &bpf_local_storage_map_btf_id[0],
318  	.map_local_storage_charge = bpf_sk_storage_charge,
319  	.map_local_storage_uncharge = bpf_sk_storage_uncharge,
320  	.map_owner_storage_ptr = bpf_sk_storage_ptr,
321  	.map_mem_usage = bpf_local_storage_map_mem_usage,
322  };
323  
324  const struct bpf_func_proto bpf_sk_storage_get_proto = {
325  	.func		= bpf_sk_storage_get,
326  	.gpl_only	= false,
327  	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
328  	.arg1_type	= ARG_CONST_MAP_PTR,
329  	.arg2_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
330  	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
331  	.arg4_type	= ARG_ANYTHING,
332  };
333  
334  const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
335  	.func		= bpf_sk_storage_get,
336  	.gpl_only	= false,
337  	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
338  	.arg1_type	= ARG_CONST_MAP_PTR,
339  	.arg2_type	= ARG_PTR_TO_CTX, /* context is 'struct sock' */
340  	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
341  	.arg4_type	= ARG_ANYTHING,
342  };
343  
344  const struct bpf_func_proto bpf_sk_storage_delete_proto = {
345  	.func		= bpf_sk_storage_delete,
346  	.gpl_only	= false,
347  	.ret_type	= RET_INTEGER,
348  	.arg1_type	= ARG_CONST_MAP_PTR,
349  	.arg2_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
350  };
351  
bpf_sk_storage_tracing_allowed(const struct bpf_prog * prog)352  static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
353  {
354  	const struct btf *btf_vmlinux;
355  	const struct btf_type *t;
356  	const char *tname;
357  	u32 btf_id;
358  
359  	if (prog->aux->dst_prog)
360  		return false;
361  
362  	/* Ensure the tracing program is not tracing
363  	 * any bpf_sk_storage*() function and also
364  	 * use the bpf_sk_storage_(get|delete) helper.
365  	 */
366  	switch (prog->expected_attach_type) {
367  	case BPF_TRACE_ITER:
368  	case BPF_TRACE_RAW_TP:
369  		/* bpf_sk_storage has no trace point */
370  		return true;
371  	case BPF_TRACE_FENTRY:
372  	case BPF_TRACE_FEXIT:
373  		btf_vmlinux = bpf_get_btf_vmlinux();
374  		if (IS_ERR_OR_NULL(btf_vmlinux))
375  			return false;
376  		btf_id = prog->aux->attach_btf_id;
377  		t = btf_type_by_id(btf_vmlinux, btf_id);
378  		tname = btf_name_by_offset(btf_vmlinux, t->name_off);
379  		return !!strncmp(tname, "bpf_sk_storage",
380  				 strlen("bpf_sk_storage"));
381  	default:
382  		return false;
383  	}
384  
385  	return false;
386  }
387  
388  /* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_sk_storage_get_tracing,struct bpf_map *,map,struct sock *,sk,void *,value,u64,flags,gfp_t,gfp_flags)389  BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
390  	   void *, value, u64, flags, gfp_t, gfp_flags)
391  {
392  	WARN_ON_ONCE(!bpf_rcu_lock_held());
393  	if (in_hardirq() || in_nmi())
394  		return (unsigned long)NULL;
395  
396  	return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
397  						     gfp_flags);
398  }
399  
BPF_CALL_2(bpf_sk_storage_delete_tracing,struct bpf_map *,map,struct sock *,sk)400  BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
401  	   struct sock *, sk)
402  {
403  	WARN_ON_ONCE(!bpf_rcu_lock_held());
404  	if (in_hardirq() || in_nmi())
405  		return -EPERM;
406  
407  	return ____bpf_sk_storage_delete(map, sk);
408  }
409  
410  const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = {
411  	.func		= bpf_sk_storage_get_tracing,
412  	.gpl_only	= false,
413  	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
414  	.arg1_type	= ARG_CONST_MAP_PTR,
415  	.arg2_type	= ARG_PTR_TO_BTF_ID_OR_NULL,
416  	.arg2_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
417  	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
418  	.arg4_type	= ARG_ANYTHING,
419  	.allowed	= bpf_sk_storage_tracing_allowed,
420  };
421  
422  const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = {
423  	.func		= bpf_sk_storage_delete_tracing,
424  	.gpl_only	= false,
425  	.ret_type	= RET_INTEGER,
426  	.arg1_type	= ARG_CONST_MAP_PTR,
427  	.arg2_type	= ARG_PTR_TO_BTF_ID_OR_NULL,
428  	.arg2_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
429  	.allowed	= bpf_sk_storage_tracing_allowed,
430  };
431  
432  struct bpf_sk_storage_diag {
433  	u32 nr_maps;
434  	struct bpf_map *maps[];
435  };
436  
437  /* The reply will be like:
438   * INET_DIAG_BPF_SK_STORAGES (nla_nest)
439   *	SK_DIAG_BPF_STORAGE (nla_nest)
440   *		SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
441   *		SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
442   *	SK_DIAG_BPF_STORAGE (nla_nest)
443   *		SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
444   *		SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
445   *	....
446   */
nla_value_size(u32 value_size)447  static int nla_value_size(u32 value_size)
448  {
449  	/* SK_DIAG_BPF_STORAGE (nla_nest)
450  	 *	SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
451  	 *	SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
452  	 */
453  	return nla_total_size(0) + nla_total_size(sizeof(u32)) +
454  		nla_total_size_64bit(value_size);
455  }
456  
bpf_sk_storage_diag_free(struct bpf_sk_storage_diag * diag)457  void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
458  {
459  	u32 i;
460  
461  	if (!diag)
462  		return;
463  
464  	for (i = 0; i < diag->nr_maps; i++)
465  		bpf_map_put(diag->maps[i]);
466  
467  	kfree(diag);
468  }
469  EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free);
470  
diag_check_dup(const struct bpf_sk_storage_diag * diag,const struct bpf_map * map)471  static bool diag_check_dup(const struct bpf_sk_storage_diag *diag,
472  			   const struct bpf_map *map)
473  {
474  	u32 i;
475  
476  	for (i = 0; i < diag->nr_maps; i++) {
477  		if (diag->maps[i] == map)
478  			return true;
479  	}
480  
481  	return false;
482  }
483  
484  struct bpf_sk_storage_diag *
bpf_sk_storage_diag_alloc(const struct nlattr * nla_stgs)485  bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
486  {
487  	struct bpf_sk_storage_diag *diag;
488  	struct nlattr *nla;
489  	u32 nr_maps = 0;
490  	int rem, err;
491  
492  	/* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
493  	 * the map_alloc_check() side also does.
494  	 */
495  	if (!bpf_capable())
496  		return ERR_PTR(-EPERM);
497  
498  	nla_for_each_nested(nla, nla_stgs, rem) {
499  		if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
500  			if (nla_len(nla) != sizeof(u32))
501  				return ERR_PTR(-EINVAL);
502  			nr_maps++;
503  		}
504  	}
505  
506  	diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
507  	if (!diag)
508  		return ERR_PTR(-ENOMEM);
509  
510  	nla_for_each_nested(nla, nla_stgs, rem) {
511  		struct bpf_map *map;
512  		int map_fd;
513  
514  		if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
515  			continue;
516  
517  		map_fd = nla_get_u32(nla);
518  		map = bpf_map_get(map_fd);
519  		if (IS_ERR(map)) {
520  			err = PTR_ERR(map);
521  			goto err_free;
522  		}
523  		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
524  			bpf_map_put(map);
525  			err = -EINVAL;
526  			goto err_free;
527  		}
528  		if (diag_check_dup(diag, map)) {
529  			bpf_map_put(map);
530  			err = -EEXIST;
531  			goto err_free;
532  		}
533  		diag->maps[diag->nr_maps++] = map;
534  	}
535  
536  	return diag;
537  
538  err_free:
539  	bpf_sk_storage_diag_free(diag);
540  	return ERR_PTR(err);
541  }
542  EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
543  
diag_get(struct bpf_local_storage_data * sdata,struct sk_buff * skb)544  static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
545  {
546  	struct nlattr *nla_stg, *nla_value;
547  	struct bpf_local_storage_map *smap;
548  
549  	/* It cannot exceed max nlattr's payload */
550  	BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
551  
552  	nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
553  	if (!nla_stg)
554  		return -EMSGSIZE;
555  
556  	smap = rcu_dereference(sdata->smap);
557  	if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
558  		goto errout;
559  
560  	nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE,
561  				      smap->map.value_size,
562  				      SK_DIAG_BPF_STORAGE_PAD);
563  	if (!nla_value)
564  		goto errout;
565  
566  	if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
567  		copy_map_value_locked(&smap->map, nla_data(nla_value),
568  				      sdata->data, true);
569  	else
570  		copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
571  
572  	nla_nest_end(skb, nla_stg);
573  	return 0;
574  
575  errout:
576  	nla_nest_cancel(skb, nla_stg);
577  	return -EMSGSIZE;
578  }
579  
bpf_sk_storage_diag_put_all(struct sock * sk,struct sk_buff * skb,int stg_array_type,unsigned int * res_diag_size)580  static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
581  				       int stg_array_type,
582  				       unsigned int *res_diag_size)
583  {
584  	/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
585  	unsigned int diag_size = nla_total_size(0);
586  	struct bpf_local_storage *sk_storage;
587  	struct bpf_local_storage_elem *selem;
588  	struct bpf_local_storage_map *smap;
589  	struct nlattr *nla_stgs;
590  	unsigned int saved_len;
591  	int err = 0;
592  
593  	rcu_read_lock();
594  
595  	sk_storage = rcu_dereference(sk->sk_bpf_storage);
596  	if (!sk_storage || hlist_empty(&sk_storage->list)) {
597  		rcu_read_unlock();
598  		return 0;
599  	}
600  
601  	nla_stgs = nla_nest_start(skb, stg_array_type);
602  	if (!nla_stgs)
603  		/* Continue to learn diag_size */
604  		err = -EMSGSIZE;
605  
606  	saved_len = skb->len;
607  	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
608  		smap = rcu_dereference(SDATA(selem)->smap);
609  		diag_size += nla_value_size(smap->map.value_size);
610  
611  		if (nla_stgs && diag_get(SDATA(selem), skb))
612  			/* Continue to learn diag_size */
613  			err = -EMSGSIZE;
614  	}
615  
616  	rcu_read_unlock();
617  
618  	if (nla_stgs) {
619  		if (saved_len == skb->len)
620  			nla_nest_cancel(skb, nla_stgs);
621  		else
622  			nla_nest_end(skb, nla_stgs);
623  	}
624  
625  	if (diag_size == nla_total_size(0)) {
626  		*res_diag_size = 0;
627  		return 0;
628  	}
629  
630  	*res_diag_size = diag_size;
631  	return err;
632  }
633  
bpf_sk_storage_diag_put(struct bpf_sk_storage_diag * diag,struct sock * sk,struct sk_buff * skb,int stg_array_type,unsigned int * res_diag_size)634  int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
635  			    struct sock *sk, struct sk_buff *skb,
636  			    int stg_array_type,
637  			    unsigned int *res_diag_size)
638  {
639  	/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
640  	unsigned int diag_size = nla_total_size(0);
641  	struct bpf_local_storage *sk_storage;
642  	struct bpf_local_storage_data *sdata;
643  	struct nlattr *nla_stgs;
644  	unsigned int saved_len;
645  	int err = 0;
646  	u32 i;
647  
648  	*res_diag_size = 0;
649  
650  	/* No map has been specified.  Dump all. */
651  	if (!diag->nr_maps)
652  		return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type,
653  						   res_diag_size);
654  
655  	rcu_read_lock();
656  	sk_storage = rcu_dereference(sk->sk_bpf_storage);
657  	if (!sk_storage || hlist_empty(&sk_storage->list)) {
658  		rcu_read_unlock();
659  		return 0;
660  	}
661  
662  	nla_stgs = nla_nest_start(skb, stg_array_type);
663  	if (!nla_stgs)
664  		/* Continue to learn diag_size */
665  		err = -EMSGSIZE;
666  
667  	saved_len = skb->len;
668  	for (i = 0; i < diag->nr_maps; i++) {
669  		sdata = bpf_local_storage_lookup(sk_storage,
670  				(struct bpf_local_storage_map *)diag->maps[i],
671  				false);
672  
673  		if (!sdata)
674  			continue;
675  
676  		diag_size += nla_value_size(diag->maps[i]->value_size);
677  
678  		if (nla_stgs && diag_get(sdata, skb))
679  			/* Continue to learn diag_size */
680  			err = -EMSGSIZE;
681  	}
682  	rcu_read_unlock();
683  
684  	if (nla_stgs) {
685  		if (saved_len == skb->len)
686  			nla_nest_cancel(skb, nla_stgs);
687  		else
688  			nla_nest_end(skb, nla_stgs);
689  	}
690  
691  	if (diag_size == nla_total_size(0)) {
692  		*res_diag_size = 0;
693  		return 0;
694  	}
695  
696  	*res_diag_size = diag_size;
697  	return err;
698  }
699  EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
700  
701  struct bpf_iter_seq_sk_storage_map_info {
702  	struct bpf_map *map;
703  	unsigned int bucket_id;
704  	unsigned skip_elems;
705  };
706  
707  static struct bpf_local_storage_elem *
bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info * info,struct bpf_local_storage_elem * prev_selem)708  bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
709  				 struct bpf_local_storage_elem *prev_selem)
710  	__acquires(RCU) __releases(RCU)
711  {
712  	struct bpf_local_storage *sk_storage;
713  	struct bpf_local_storage_elem *selem;
714  	u32 skip_elems = info->skip_elems;
715  	struct bpf_local_storage_map *smap;
716  	u32 bucket_id = info->bucket_id;
717  	u32 i, count, n_buckets;
718  	struct bpf_local_storage_map_bucket *b;
719  
720  	smap = (struct bpf_local_storage_map *)info->map;
721  	n_buckets = 1U << smap->bucket_log;
722  	if (bucket_id >= n_buckets)
723  		return NULL;
724  
725  	/* try to find next selem in the same bucket */
726  	selem = prev_selem;
727  	count = 0;
728  	while (selem) {
729  		selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)),
730  					 struct bpf_local_storage_elem, map_node);
731  		if (!selem) {
732  			/* not found, unlock and go to the next bucket */
733  			b = &smap->buckets[bucket_id++];
734  			rcu_read_unlock();
735  			skip_elems = 0;
736  			break;
737  		}
738  		sk_storage = rcu_dereference(selem->local_storage);
739  		if (sk_storage) {
740  			info->skip_elems = skip_elems + count;
741  			return selem;
742  		}
743  		count++;
744  	}
745  
746  	for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
747  		b = &smap->buckets[i];
748  		rcu_read_lock();
749  		count = 0;
750  		hlist_for_each_entry_rcu(selem, &b->list, map_node) {
751  			sk_storage = rcu_dereference(selem->local_storage);
752  			if (sk_storage && count >= skip_elems) {
753  				info->bucket_id = i;
754  				info->skip_elems = count;
755  				return selem;
756  			}
757  			count++;
758  		}
759  		rcu_read_unlock();
760  		skip_elems = 0;
761  	}
762  
763  	info->bucket_id = i;
764  	info->skip_elems = 0;
765  	return NULL;
766  }
767  
bpf_sk_storage_map_seq_start(struct seq_file * seq,loff_t * pos)768  static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
769  {
770  	struct bpf_local_storage_elem *selem;
771  
772  	selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
773  	if (!selem)
774  		return NULL;
775  
776  	if (*pos == 0)
777  		++*pos;
778  	return selem;
779  }
780  
bpf_sk_storage_map_seq_next(struct seq_file * seq,void * v,loff_t * pos)781  static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
782  					 loff_t *pos)
783  {
784  	struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
785  
786  	++*pos;
787  	++info->skip_elems;
788  	return bpf_sk_storage_map_seq_find_next(seq->private, v);
789  }
790  
791  struct bpf_iter__bpf_sk_storage_map {
792  	__bpf_md_ptr(struct bpf_iter_meta *, meta);
793  	__bpf_md_ptr(struct bpf_map *, map);
794  	__bpf_md_ptr(struct sock *, sk);
795  	__bpf_md_ptr(void *, value);
796  };
797  
DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map,struct bpf_iter_meta * meta,struct bpf_map * map,struct sock * sk,void * value)798  DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
799  		     struct bpf_map *map, struct sock *sk,
800  		     void *value)
801  
802  static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
803  					 struct bpf_local_storage_elem *selem)
804  {
805  	struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
806  	struct bpf_iter__bpf_sk_storage_map ctx = {};
807  	struct bpf_local_storage *sk_storage;
808  	struct bpf_iter_meta meta;
809  	struct bpf_prog *prog;
810  	int ret = 0;
811  
812  	meta.seq = seq;
813  	prog = bpf_iter_get_info(&meta, selem == NULL);
814  	if (prog) {
815  		ctx.meta = &meta;
816  		ctx.map = info->map;
817  		if (selem) {
818  			sk_storage = rcu_dereference(selem->local_storage);
819  			ctx.sk = sk_storage->owner;
820  			ctx.value = SDATA(selem)->data;
821  		}
822  		ret = bpf_iter_run_prog(prog, &ctx);
823  	}
824  
825  	return ret;
826  }
827  
bpf_sk_storage_map_seq_show(struct seq_file * seq,void * v)828  static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
829  {
830  	return __bpf_sk_storage_map_seq_show(seq, v);
831  }
832  
bpf_sk_storage_map_seq_stop(struct seq_file * seq,void * v)833  static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
834  	__releases(RCU)
835  {
836  	if (!v)
837  		(void)__bpf_sk_storage_map_seq_show(seq, v);
838  	else
839  		rcu_read_unlock();
840  }
841  
bpf_iter_init_sk_storage_map(void * priv_data,struct bpf_iter_aux_info * aux)842  static int bpf_iter_init_sk_storage_map(void *priv_data,
843  					struct bpf_iter_aux_info *aux)
844  {
845  	struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
846  
847  	bpf_map_inc_with_uref(aux->map);
848  	seq_info->map = aux->map;
849  	return 0;
850  }
851  
bpf_iter_fini_sk_storage_map(void * priv_data)852  static void bpf_iter_fini_sk_storage_map(void *priv_data)
853  {
854  	struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
855  
856  	bpf_map_put_with_uref(seq_info->map);
857  }
858  
bpf_iter_attach_map(struct bpf_prog * prog,union bpf_iter_link_info * linfo,struct bpf_iter_aux_info * aux)859  static int bpf_iter_attach_map(struct bpf_prog *prog,
860  			       union bpf_iter_link_info *linfo,
861  			       struct bpf_iter_aux_info *aux)
862  {
863  	struct bpf_map *map;
864  	int err = -EINVAL;
865  
866  	if (!linfo->map.map_fd)
867  		return -EBADF;
868  
869  	map = bpf_map_get_with_uref(linfo->map.map_fd);
870  	if (IS_ERR(map))
871  		return PTR_ERR(map);
872  
873  	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
874  		goto put_map;
875  
876  	if (prog->aux->max_rdwr_access > map->value_size) {
877  		err = -EACCES;
878  		goto put_map;
879  	}
880  
881  	aux->map = map;
882  	return 0;
883  
884  put_map:
885  	bpf_map_put_with_uref(map);
886  	return err;
887  }
888  
bpf_iter_detach_map(struct bpf_iter_aux_info * aux)889  static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
890  {
891  	bpf_map_put_with_uref(aux->map);
892  }
893  
894  static const struct seq_operations bpf_sk_storage_map_seq_ops = {
895  	.start  = bpf_sk_storage_map_seq_start,
896  	.next   = bpf_sk_storage_map_seq_next,
897  	.stop   = bpf_sk_storage_map_seq_stop,
898  	.show   = bpf_sk_storage_map_seq_show,
899  };
900  
901  static const struct bpf_iter_seq_info iter_seq_info = {
902  	.seq_ops		= &bpf_sk_storage_map_seq_ops,
903  	.init_seq_private	= bpf_iter_init_sk_storage_map,
904  	.fini_seq_private	= bpf_iter_fini_sk_storage_map,
905  	.seq_priv_size		= sizeof(struct bpf_iter_seq_sk_storage_map_info),
906  };
907  
908  static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
909  	.target			= "bpf_sk_storage_map",
910  	.attach_target		= bpf_iter_attach_map,
911  	.detach_target		= bpf_iter_detach_map,
912  	.show_fdinfo		= bpf_iter_map_show_fdinfo,
913  	.fill_link_info		= bpf_iter_map_fill_link_info,
914  	.ctx_arg_info_size	= 2,
915  	.ctx_arg_info		= {
916  		{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
917  		  PTR_TO_BTF_ID_OR_NULL },
918  		{ offsetof(struct bpf_iter__bpf_sk_storage_map, value),
919  		  PTR_TO_BUF | PTR_MAYBE_NULL },
920  	},
921  	.seq_info		= &iter_seq_info,
922  };
923  
bpf_sk_storage_map_iter_init(void)924  static int __init bpf_sk_storage_map_iter_init(void)
925  {
926  	bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
927  		btf_sock_ids[BTF_SOCK_TYPE_SOCK];
928  	return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
929  }
930  late_initcall(bpf_sk_storage_map_iter_init);
931