16ac99e8fSMartin KaFai Lau // SPDX-License-Identifier: GPL-2.0
26ac99e8fSMartin KaFai Lau /* Copyright (c) 2019 Facebook */
36ac99e8fSMartin KaFai Lau #include <linux/rculist.h>
46ac99e8fSMartin KaFai Lau #include <linux/list.h>
56ac99e8fSMartin KaFai Lau #include <linux/hash.h>
66ac99e8fSMartin KaFai Lau #include <linux/types.h>
76ac99e8fSMartin KaFai Lau #include <linux/spinlock.h>
86ac99e8fSMartin KaFai Lau #include <linux/bpf.h>
98e4597c6SMartin KaFai Lau #include <linux/btf.h>
105ce6e77cSYonghong Song #include <linux/btf_ids.h>
11450af8d0SKP Singh #include <linux/bpf_local_storage.h>
126ac99e8fSMartin KaFai Lau #include <net/bpf_sk_storage.h>
136ac99e8fSMartin KaFai Lau #include <net/sock.h>
141ed4d924SMartin KaFai Lau #include <uapi/linux/sock_diag.h>
156ac99e8fSMartin KaFai Lau #include <uapi/linux/btf.h>
160fe4b381SKP Singh #include <linux/rcupdate_trace.h>
176ac99e8fSMartin KaFai Lau
184cc9ce4eSKP Singh DEFINE_BPF_STORAGE_CACHE(sk_cache);
194cc9ce4eSKP Singh
201f00d375SKP Singh static struct bpf_local_storage_data *
bpf_sk_storage_lookup(struct sock * sk,struct bpf_map * map,bool cacheit_lockit)21e794bfddSMartin KaFai Lau bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
226ac99e8fSMartin KaFai Lau {
231f00d375SKP Singh struct bpf_local_storage *sk_storage;
241f00d375SKP Singh struct bpf_local_storage_map *smap;
256ac99e8fSMartin KaFai Lau
260fe4b381SKP Singh sk_storage =
270fe4b381SKP Singh rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
286ac99e8fSMartin KaFai Lau if (!sk_storage)
296ac99e8fSMartin KaFai Lau return NULL;
306ac99e8fSMartin KaFai Lau
311f00d375SKP Singh smap = (struct bpf_local_storage_map *)map;
321f00d375SKP Singh return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit);
336ac99e8fSMartin KaFai Lau }
346ac99e8fSMartin KaFai Lau
bpf_sk_storage_del(struct sock * sk,struct bpf_map * map)35e794bfddSMartin KaFai Lau static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
366ac99e8fSMartin KaFai Lau {
371f00d375SKP Singh struct bpf_local_storage_data *sdata;
386ac99e8fSMartin KaFai Lau
39e794bfddSMartin KaFai Lau sdata = bpf_sk_storage_lookup(sk, map, false);
406ac99e8fSMartin KaFai Lau if (!sdata)
416ac99e8fSMartin KaFai Lau return -ENOENT;
426ac99e8fSMartin KaFai Lau
43a47eabf2SMartin KaFai Lau bpf_selem_unlink(SELEM(sdata), false);
446ac99e8fSMartin KaFai Lau
456ac99e8fSMartin KaFai Lau return 0;
466ac99e8fSMartin KaFai Lau }
476ac99e8fSMartin KaFai Lau
488f51dfc7SStanislav Fomichev /* Called by __sk_destruct() & bpf_sk_storage_clone() */
bpf_sk_storage_free(struct sock * sk)496ac99e8fSMartin KaFai Lau void bpf_sk_storage_free(struct sock *sk)
506ac99e8fSMartin KaFai Lau {
511f00d375SKP Singh struct bpf_local_storage *sk_storage;
526ac99e8fSMartin KaFai Lau
536ac99e8fSMartin KaFai Lau rcu_read_lock();
546ac99e8fSMartin KaFai Lau sk_storage = rcu_dereference(sk->sk_bpf_storage);
556ac99e8fSMartin KaFai Lau if (!sk_storage) {
566ac99e8fSMartin KaFai Lau rcu_read_unlock();
576ac99e8fSMartin KaFai Lau return;
586ac99e8fSMartin KaFai Lau }
596ac99e8fSMartin KaFai Lau
602ffcb6fcSMartin KaFai Lau bpf_local_storage_destroy(sk_storage);
616ac99e8fSMartin KaFai Lau rcu_read_unlock();
626ac99e8fSMartin KaFai Lau }
636ac99e8fSMartin KaFai Lau
bpf_sk_storage_map_free(struct bpf_map * map)64e794bfddSMartin KaFai Lau static void bpf_sk_storage_map_free(struct bpf_map *map)
65f836a56eSKP Singh {
66c83597faSYonghong Song bpf_local_storage_map_free(map, &sk_cache, NULL);
676ac99e8fSMartin KaFai Lau }
686ac99e8fSMartin KaFai Lau
bpf_sk_storage_map_alloc(union bpf_attr * attr)69e794bfddSMartin KaFai Lau static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
70f836a56eSKP Singh {
7108a7ce38SMartin KaFai Lau return bpf_local_storage_map_alloc(attr, &sk_cache, false);
726ac99e8fSMartin KaFai Lau }
736ac99e8fSMartin KaFai Lau
notsupp_get_next_key(struct bpf_map * map,void * key,void * next_key)746ac99e8fSMartin KaFai Lau static int notsupp_get_next_key(struct bpf_map *map, void *key,
756ac99e8fSMartin KaFai Lau void *next_key)
766ac99e8fSMartin KaFai Lau {
776ac99e8fSMartin KaFai Lau return -ENOTSUPP;
786ac99e8fSMartin KaFai Lau }
796ac99e8fSMartin KaFai Lau
bpf_fd_sk_storage_lookup_elem(struct bpf_map * map,void * key)806ac99e8fSMartin KaFai Lau static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
816ac99e8fSMartin KaFai Lau {
821f00d375SKP Singh struct bpf_local_storage_data *sdata;
836ac99e8fSMartin KaFai Lau struct socket *sock;
846ac99e8fSMartin KaFai Lau int fd, err;
856ac99e8fSMartin KaFai Lau
866ac99e8fSMartin KaFai Lau fd = *(int *)key;
876ac99e8fSMartin KaFai Lau sock = sockfd_lookup(fd, &err);
886ac99e8fSMartin KaFai Lau if (sock) {
89e794bfddSMartin KaFai Lau sdata = bpf_sk_storage_lookup(sock->sk, map, true);
906ac99e8fSMartin KaFai Lau sockfd_put(sock);
916ac99e8fSMartin KaFai Lau return sdata ? sdata->data : NULL;
926ac99e8fSMartin KaFai Lau }
936ac99e8fSMartin KaFai Lau
946ac99e8fSMartin KaFai Lau return ERR_PTR(err);
956ac99e8fSMartin KaFai Lau }
966ac99e8fSMartin KaFai Lau
bpf_fd_sk_storage_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)97d7ba4cc9SJP Kobryn static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
986ac99e8fSMartin KaFai Lau void *value, u64 map_flags)
996ac99e8fSMartin KaFai Lau {
1001f00d375SKP Singh struct bpf_local_storage_data *sdata;
1016ac99e8fSMartin KaFai Lau struct socket *sock;
1026ac99e8fSMartin KaFai Lau int fd, err;
1036ac99e8fSMartin KaFai Lau
1046ac99e8fSMartin KaFai Lau fd = *(int *)key;
1056ac99e8fSMartin KaFai Lau sock = sockfd_lookup(fd, &err);
1066ac99e8fSMartin KaFai Lau if (sock) {
107f836a56eSKP Singh sdata = bpf_local_storage_update(
108f836a56eSKP Singh sock->sk, (struct bpf_local_storage_map *)map, value,
109b00fa38aSJoanne Koong map_flags, GFP_ATOMIC);
1106ac99e8fSMartin KaFai Lau sockfd_put(sock);
11171f150f4SYueHaibing return PTR_ERR_OR_ZERO(sdata);
1126ac99e8fSMartin KaFai Lau }
1136ac99e8fSMartin KaFai Lau
1146ac99e8fSMartin KaFai Lau return err;
1156ac99e8fSMartin KaFai Lau }
1166ac99e8fSMartin KaFai Lau
bpf_fd_sk_storage_delete_elem(struct bpf_map * map,void * key)117d7ba4cc9SJP Kobryn static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
1186ac99e8fSMartin KaFai Lau {
1196ac99e8fSMartin KaFai Lau struct socket *sock;
1206ac99e8fSMartin KaFai Lau int fd, err;
1216ac99e8fSMartin KaFai Lau
1226ac99e8fSMartin KaFai Lau fd = *(int *)key;
1236ac99e8fSMartin KaFai Lau sock = sockfd_lookup(fd, &err);
1246ac99e8fSMartin KaFai Lau if (sock) {
125e794bfddSMartin KaFai Lau err = bpf_sk_storage_del(sock->sk, map);
1266ac99e8fSMartin KaFai Lau sockfd_put(sock);
1276ac99e8fSMartin KaFai Lau return err;
1286ac99e8fSMartin KaFai Lau }
1296ac99e8fSMartin KaFai Lau
1306ac99e8fSMartin KaFai Lau return err;
1316ac99e8fSMartin KaFai Lau }
1326ac99e8fSMartin KaFai Lau
1331f00d375SKP Singh static struct bpf_local_storage_elem *
bpf_sk_storage_clone_elem(struct sock * newsk,struct bpf_local_storage_map * smap,struct bpf_local_storage_elem * selem)1348f51dfc7SStanislav Fomichev bpf_sk_storage_clone_elem(struct sock *newsk,
1351f00d375SKP Singh struct bpf_local_storage_map *smap,
1361f00d375SKP Singh struct bpf_local_storage_elem *selem)
1378f51dfc7SStanislav Fomichev {
1381f00d375SKP Singh struct bpf_local_storage_elem *copy_selem;
1398f51dfc7SStanislav Fomichev
140b00fa38aSJoanne Koong copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
1418f51dfc7SStanislav Fomichev if (!copy_selem)
1428f51dfc7SStanislav Fomichev return NULL;
1438f51dfc7SStanislav Fomichev
144db559117SKumar Kartikeya Dwivedi if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
1458f51dfc7SStanislav Fomichev copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
1468f51dfc7SStanislav Fomichev SDATA(selem)->data, true);
1478f51dfc7SStanislav Fomichev else
1488f51dfc7SStanislav Fomichev copy_map_value(&smap->map, SDATA(copy_selem)->data,
1498f51dfc7SStanislav Fomichev SDATA(selem)->data);
1508f51dfc7SStanislav Fomichev
1518f51dfc7SStanislav Fomichev return copy_selem;
1528f51dfc7SStanislav Fomichev }
1538f51dfc7SStanislav Fomichev
bpf_sk_storage_clone(const struct sock * sk,struct sock * newsk)1548f51dfc7SStanislav Fomichev int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
1558f51dfc7SStanislav Fomichev {
1561f00d375SKP Singh struct bpf_local_storage *new_sk_storage = NULL;
1571f00d375SKP Singh struct bpf_local_storage *sk_storage;
1581f00d375SKP Singh struct bpf_local_storage_elem *selem;
1598f51dfc7SStanislav Fomichev int ret = 0;
1608f51dfc7SStanislav Fomichev
1618f51dfc7SStanislav Fomichev RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
1628f51dfc7SStanislav Fomichev
1638f51dfc7SStanislav Fomichev rcu_read_lock();
1648f51dfc7SStanislav Fomichev sk_storage = rcu_dereference(sk->sk_bpf_storage);
1658f51dfc7SStanislav Fomichev
1668f51dfc7SStanislav Fomichev if (!sk_storage || hlist_empty(&sk_storage->list))
1678f51dfc7SStanislav Fomichev goto out;
1688f51dfc7SStanislav Fomichev
1698f51dfc7SStanislav Fomichev hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
1701f00d375SKP Singh struct bpf_local_storage_elem *copy_selem;
1711f00d375SKP Singh struct bpf_local_storage_map *smap;
1728f51dfc7SStanislav Fomichev struct bpf_map *map;
1738f51dfc7SStanislav Fomichev
1748f51dfc7SStanislav Fomichev smap = rcu_dereference(SDATA(selem)->smap);
1758f51dfc7SStanislav Fomichev if (!(smap->map.map_flags & BPF_F_CLONE))
1768f51dfc7SStanislav Fomichev continue;
1778f51dfc7SStanislav Fomichev
1788f51dfc7SStanislav Fomichev /* Note that for lockless listeners adding new element
1791f00d375SKP Singh * here can race with cleanup in bpf_local_storage_map_free.
1808f51dfc7SStanislav Fomichev * Try to grab map refcnt to make sure that it's still
1818f51dfc7SStanislav Fomichev * alive and prevent concurrent removal.
1828f51dfc7SStanislav Fomichev */
1831e0bd5a0SAndrii Nakryiko map = bpf_map_inc_not_zero(&smap->map);
1848f51dfc7SStanislav Fomichev if (IS_ERR(map))
1858f51dfc7SStanislav Fomichev continue;
1868f51dfc7SStanislav Fomichev
1878f51dfc7SStanislav Fomichev copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
1888f51dfc7SStanislav Fomichev if (!copy_selem) {
1898f51dfc7SStanislav Fomichev ret = -ENOMEM;
1908f51dfc7SStanislav Fomichev bpf_map_put(map);
1918f51dfc7SStanislav Fomichev goto out;
1928f51dfc7SStanislav Fomichev }
1938f51dfc7SStanislav Fomichev
1948f51dfc7SStanislav Fomichev if (new_sk_storage) {
1951f00d375SKP Singh bpf_selem_link_map(smap, copy_selem);
1961f00d375SKP Singh bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
1978f51dfc7SStanislav Fomichev } else {
198b00fa38aSJoanne Koong ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
1998f51dfc7SStanislav Fomichev if (ret) {
200c0d63f30SMartin KaFai Lau bpf_selem_free(copy_selem, smap, true);
2018f51dfc7SStanislav Fomichev atomic_sub(smap->elem_size,
2028f51dfc7SStanislav Fomichev &newsk->sk_omem_alloc);
2038f51dfc7SStanislav Fomichev bpf_map_put(map);
2048f51dfc7SStanislav Fomichev goto out;
2058f51dfc7SStanislav Fomichev }
2068f51dfc7SStanislav Fomichev
2071f00d375SKP Singh new_sk_storage =
2081f00d375SKP Singh rcu_dereference(copy_selem->local_storage);
2098f51dfc7SStanislav Fomichev }
2108f51dfc7SStanislav Fomichev bpf_map_put(map);
2118f51dfc7SStanislav Fomichev }
2128f51dfc7SStanislav Fomichev
2138f51dfc7SStanislav Fomichev out:
2148f51dfc7SStanislav Fomichev rcu_read_unlock();
2158f51dfc7SStanislav Fomichev
2168f51dfc7SStanislav Fomichev /* In case of an error, don't free anything explicitly here, the
2178f51dfc7SStanislav Fomichev * caller is responsible to call bpf_sk_storage_free.
2188f51dfc7SStanislav Fomichev */
2198f51dfc7SStanislav Fomichev
2208f51dfc7SStanislav Fomichev return ret;
2218f51dfc7SStanislav Fomichev }
2228f51dfc7SStanislav Fomichev
223b00fa38aSJoanne Koong /* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_sk_storage_get,struct bpf_map *,map,struct sock *,sk,void *,value,u64,flags,gfp_t,gfp_flags)224b00fa38aSJoanne Koong BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
225b00fa38aSJoanne Koong void *, value, u64, flags, gfp_t, gfp_flags)
2266ac99e8fSMartin KaFai Lau {
2271f00d375SKP Singh struct bpf_local_storage_data *sdata;
2286ac99e8fSMartin KaFai Lau
2290fe4b381SKP Singh WARN_ON_ONCE(!bpf_rcu_lock_held());
230592a3498SMartin KaFai Lau if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
2316ac99e8fSMartin KaFai Lau return (unsigned long)NULL;
2326ac99e8fSMartin KaFai Lau
233e794bfddSMartin KaFai Lau sdata = bpf_sk_storage_lookup(sk, map, true);
2346ac99e8fSMartin KaFai Lau if (sdata)
2356ac99e8fSMartin KaFai Lau return (unsigned long)sdata->data;
2366ac99e8fSMartin KaFai Lau
2376ac99e8fSMartin KaFai Lau if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
2386ac99e8fSMartin KaFai Lau /* Cannot add new elem to a going away sk.
2396ac99e8fSMartin KaFai Lau * Otherwise, the new elem may become a leak
2406ac99e8fSMartin KaFai Lau * (and also other memory issues during map
2416ac99e8fSMartin KaFai Lau * destruction).
2426ac99e8fSMartin KaFai Lau */
2436ac99e8fSMartin KaFai Lau refcount_inc_not_zero(&sk->sk_refcnt)) {
244f836a56eSKP Singh sdata = bpf_local_storage_update(
245f836a56eSKP Singh sk, (struct bpf_local_storage_map *)map, value,
246b00fa38aSJoanne Koong BPF_NOEXIST, gfp_flags);
2476ac99e8fSMartin KaFai Lau /* sk must be a fullsock (guaranteed by verifier),
2486ac99e8fSMartin KaFai Lau * so sock_gen_put() is unnecessary.
2496ac99e8fSMartin KaFai Lau */
2506ac99e8fSMartin KaFai Lau sock_put(sk);
2516ac99e8fSMartin KaFai Lau return IS_ERR(sdata) ?
2526ac99e8fSMartin KaFai Lau (unsigned long)NULL : (unsigned long)sdata->data;
2536ac99e8fSMartin KaFai Lau }
2546ac99e8fSMartin KaFai Lau
2556ac99e8fSMartin KaFai Lau return (unsigned long)NULL;
2566ac99e8fSMartin KaFai Lau }
2576ac99e8fSMartin KaFai Lau
BPF_CALL_2(bpf_sk_storage_delete,struct bpf_map *,map,struct sock *,sk)2586ac99e8fSMartin KaFai Lau BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
2596ac99e8fSMartin KaFai Lau {
2600fe4b381SKP Singh WARN_ON_ONCE(!bpf_rcu_lock_held());
261592a3498SMartin KaFai Lau if (!sk || !sk_fullsock(sk))
262592a3498SMartin KaFai Lau return -EINVAL;
263592a3498SMartin KaFai Lau
2646ac99e8fSMartin KaFai Lau if (refcount_inc_not_zero(&sk->sk_refcnt)) {
2656ac99e8fSMartin KaFai Lau int err;
2666ac99e8fSMartin KaFai Lau
267e794bfddSMartin KaFai Lau err = bpf_sk_storage_del(sk, map);
2686ac99e8fSMartin KaFai Lau sock_put(sk);
2696ac99e8fSMartin KaFai Lau return err;
2706ac99e8fSMartin KaFai Lau }
2716ac99e8fSMartin KaFai Lau
2726ac99e8fSMartin KaFai Lau return -ENOENT;
2736ac99e8fSMartin KaFai Lau }
2746ac99e8fSMartin KaFai Lau
bpf_sk_storage_charge(struct bpf_local_storage_map * smap,void * owner,u32 size)275e794bfddSMartin KaFai Lau static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
276f836a56eSKP Singh void *owner, u32 size)
277f836a56eSKP Singh {
2787de6d09fSKuniyuki Iwashima int optmem_max = READ_ONCE(sysctl_optmem_max);
2799e838b02SMartin KaFai Lau struct sock *sk = (struct sock *)owner;
2809e838b02SMartin KaFai Lau
2819e838b02SMartin KaFai Lau /* same check as in sock_kmalloc() */
2827de6d09fSKuniyuki Iwashima if (size <= optmem_max &&
2837de6d09fSKuniyuki Iwashima atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
2849e838b02SMartin KaFai Lau atomic_add(size, &sk->sk_omem_alloc);
2859e838b02SMartin KaFai Lau return 0;
2869e838b02SMartin KaFai Lau }
2879e838b02SMartin KaFai Lau
2889e838b02SMartin KaFai Lau return -ENOMEM;
289f836a56eSKP Singh }
290f836a56eSKP Singh
bpf_sk_storage_uncharge(struct bpf_local_storage_map * smap,void * owner,u32 size)291e794bfddSMartin KaFai Lau static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap,
292f836a56eSKP Singh void *owner, u32 size)
293f836a56eSKP Singh {
294f836a56eSKP Singh struct sock *sk = owner;
295f836a56eSKP Singh
296f836a56eSKP Singh atomic_sub(size, &sk->sk_omem_alloc);
297f836a56eSKP Singh }
298f836a56eSKP Singh
299f836a56eSKP Singh static struct bpf_local_storage __rcu **
bpf_sk_storage_ptr(void * owner)300e794bfddSMartin KaFai Lau bpf_sk_storage_ptr(void *owner)
301f836a56eSKP Singh {
302f836a56eSKP Singh struct sock *sk = owner;
303f836a56eSKP Singh
304f836a56eSKP Singh return &sk->sk_bpf_storage;
305f836a56eSKP Singh }
306f836a56eSKP Singh
3076ac99e8fSMartin KaFai Lau const struct bpf_map_ops sk_storage_map_ops = {
308f4d05259SMartin KaFai Lau .map_meta_equal = bpf_map_meta_equal,
3091f00d375SKP Singh .map_alloc_check = bpf_local_storage_map_alloc_check,
310e794bfddSMartin KaFai Lau .map_alloc = bpf_sk_storage_map_alloc,
311e794bfddSMartin KaFai Lau .map_free = bpf_sk_storage_map_free,
3126ac99e8fSMartin KaFai Lau .map_get_next_key = notsupp_get_next_key,
3136ac99e8fSMartin KaFai Lau .map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
3146ac99e8fSMartin KaFai Lau .map_update_elem = bpf_fd_sk_storage_update_elem,
3156ac99e8fSMartin KaFai Lau .map_delete_elem = bpf_fd_sk_storage_delete_elem,
3161f00d375SKP Singh .map_check_btf = bpf_local_storage_map_check_btf,
3173144bfa5SYonghong Song .map_btf_id = &bpf_local_storage_map_btf_id[0],
318e794bfddSMartin KaFai Lau .map_local_storage_charge = bpf_sk_storage_charge,
319e794bfddSMartin KaFai Lau .map_local_storage_uncharge = bpf_sk_storage_uncharge,
320e794bfddSMartin KaFai Lau .map_owner_storage_ptr = bpf_sk_storage_ptr,
3217490b7f1SYafang Shao .map_mem_usage = bpf_local_storage_map_mem_usage,
3226ac99e8fSMartin KaFai Lau };
3236ac99e8fSMartin KaFai Lau
3246ac99e8fSMartin KaFai Lau const struct bpf_func_proto bpf_sk_storage_get_proto = {
3256ac99e8fSMartin KaFai Lau .func = bpf_sk_storage_get,
3266ac99e8fSMartin KaFai Lau .gpl_only = false,
3276ac99e8fSMartin KaFai Lau .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
3286ac99e8fSMartin KaFai Lau .arg1_type = ARG_CONST_MAP_PTR,
329592a3498SMartin KaFai Lau .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
3306ac99e8fSMartin KaFai Lau .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
3316ac99e8fSMartin KaFai Lau .arg4_type = ARG_ANYTHING,
3326ac99e8fSMartin KaFai Lau };
3336ac99e8fSMartin KaFai Lau
334f7c6cb1dSStanislav Fomichev const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
335f7c6cb1dSStanislav Fomichev .func = bpf_sk_storage_get,
336f7c6cb1dSStanislav Fomichev .gpl_only = false,
337f7c6cb1dSStanislav Fomichev .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
338f7c6cb1dSStanislav Fomichev .arg1_type = ARG_CONST_MAP_PTR,
339f7c6cb1dSStanislav Fomichev .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */
340f7c6cb1dSStanislav Fomichev .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
341f7c6cb1dSStanislav Fomichev .arg4_type = ARG_ANYTHING,
342f7c6cb1dSStanislav Fomichev };
343f7c6cb1dSStanislav Fomichev
3446ac99e8fSMartin KaFai Lau const struct bpf_func_proto bpf_sk_storage_delete_proto = {
3456ac99e8fSMartin KaFai Lau .func = bpf_sk_storage_delete,
3466ac99e8fSMartin KaFai Lau .gpl_only = false,
3476ac99e8fSMartin KaFai Lau .ret_type = RET_INTEGER,
3486ac99e8fSMartin KaFai Lau .arg1_type = ARG_CONST_MAP_PTR,
349592a3498SMartin KaFai Lau .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
35030897832SKP Singh };
35130897832SKP Singh
bpf_sk_storage_tracing_allowed(const struct bpf_prog * prog)3528e4597c6SMartin KaFai Lau static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
3538e4597c6SMartin KaFai Lau {
3548e4597c6SMartin KaFai Lau const struct btf *btf_vmlinux;
3558e4597c6SMartin KaFai Lau const struct btf_type *t;
3568e4597c6SMartin KaFai Lau const char *tname;
3578e4597c6SMartin KaFai Lau u32 btf_id;
3588e4597c6SMartin KaFai Lau
3598e4597c6SMartin KaFai Lau if (prog->aux->dst_prog)
3608e4597c6SMartin KaFai Lau return false;
3618e4597c6SMartin KaFai Lau
3628e4597c6SMartin KaFai Lau /* Ensure the tracing program is not tracing
3638e4597c6SMartin KaFai Lau * any bpf_sk_storage*() function and also
3648e4597c6SMartin KaFai Lau * use the bpf_sk_storage_(get|delete) helper.
3658e4597c6SMartin KaFai Lau */
3668e4597c6SMartin KaFai Lau switch (prog->expected_attach_type) {
367a50a85e4SFlorent Revest case BPF_TRACE_ITER:
3688e4597c6SMartin KaFai Lau case BPF_TRACE_RAW_TP:
3698e4597c6SMartin KaFai Lau /* bpf_sk_storage has no trace point */
3708e4597c6SMartin KaFai Lau return true;
3718e4597c6SMartin KaFai Lau case BPF_TRACE_FENTRY:
3728e4597c6SMartin KaFai Lau case BPF_TRACE_FEXIT:
3738e4597c6SMartin KaFai Lau btf_vmlinux = bpf_get_btf_vmlinux();
3747ada3787SKumar Kartikeya Dwivedi if (IS_ERR_OR_NULL(btf_vmlinux))
3757ada3787SKumar Kartikeya Dwivedi return false;
3768e4597c6SMartin KaFai Lau btf_id = prog->aux->attach_btf_id;
3778e4597c6SMartin KaFai Lau t = btf_type_by_id(btf_vmlinux, btf_id);
3788e4597c6SMartin KaFai Lau tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3798e4597c6SMartin KaFai Lau return !!strncmp(tname, "bpf_sk_storage",
3808e4597c6SMartin KaFai Lau strlen("bpf_sk_storage"));
3818e4597c6SMartin KaFai Lau default:
3828e4597c6SMartin KaFai Lau return false;
3838e4597c6SMartin KaFai Lau }
3848e4597c6SMartin KaFai Lau
3858e4597c6SMartin KaFai Lau return false;
3868e4597c6SMartin KaFai Lau }
3878e4597c6SMartin KaFai Lau
388b00fa38aSJoanne Koong /* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_sk_storage_get_tracing,struct bpf_map *,map,struct sock *,sk,void *,value,u64,flags,gfp_t,gfp_flags)389b00fa38aSJoanne Koong BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
390b00fa38aSJoanne Koong void *, value, u64, flags, gfp_t, gfp_flags)
3918e4597c6SMartin KaFai Lau {
3920fe4b381SKP Singh WARN_ON_ONCE(!bpf_rcu_lock_held());
393afa79d08SChangbin Du if (in_hardirq() || in_nmi())
3948e4597c6SMartin KaFai Lau return (unsigned long)NULL;
3958e4597c6SMartin KaFai Lau
396b00fa38aSJoanne Koong return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
397b00fa38aSJoanne Koong gfp_flags);
3988e4597c6SMartin KaFai Lau }
3998e4597c6SMartin KaFai Lau
BPF_CALL_2(bpf_sk_storage_delete_tracing,struct bpf_map *,map,struct sock *,sk)4008e4597c6SMartin KaFai Lau BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
4018e4597c6SMartin KaFai Lau struct sock *, sk)
4028e4597c6SMartin KaFai Lau {
4030fe4b381SKP Singh WARN_ON_ONCE(!bpf_rcu_lock_held());
404afa79d08SChangbin Du if (in_hardirq() || in_nmi())
4058e4597c6SMartin KaFai Lau return -EPERM;
4068e4597c6SMartin KaFai Lau
4078e4597c6SMartin KaFai Lau return ____bpf_sk_storage_delete(map, sk);
4088e4597c6SMartin KaFai Lau }
4098e4597c6SMartin KaFai Lau
4108e4597c6SMartin KaFai Lau const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = {
4118e4597c6SMartin KaFai Lau .func = bpf_sk_storage_get_tracing,
4128e4597c6SMartin KaFai Lau .gpl_only = false,
4138e4597c6SMartin KaFai Lau .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
4148e4597c6SMartin KaFai Lau .arg1_type = ARG_CONST_MAP_PTR,
41591571a51SAlexei Starovoitov .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
4168e4597c6SMartin KaFai Lau .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4178e4597c6SMartin KaFai Lau .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
4188e4597c6SMartin KaFai Lau .arg4_type = ARG_ANYTHING,
4198e4597c6SMartin KaFai Lau .allowed = bpf_sk_storage_tracing_allowed,
4208e4597c6SMartin KaFai Lau };
4218e4597c6SMartin KaFai Lau
4228e4597c6SMartin KaFai Lau const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = {
4238e4597c6SMartin KaFai Lau .func = bpf_sk_storage_delete_tracing,
4248e4597c6SMartin KaFai Lau .gpl_only = false,
4258e4597c6SMartin KaFai Lau .ret_type = RET_INTEGER,
4268e4597c6SMartin KaFai Lau .arg1_type = ARG_CONST_MAP_PTR,
42791571a51SAlexei Starovoitov .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
4288e4597c6SMartin KaFai Lau .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4298e4597c6SMartin KaFai Lau .allowed = bpf_sk_storage_tracing_allowed,
4308e4597c6SMartin KaFai Lau };
4318e4597c6SMartin KaFai Lau
4321ed4d924SMartin KaFai Lau struct bpf_sk_storage_diag {
4331ed4d924SMartin KaFai Lau u32 nr_maps;
4341ed4d924SMartin KaFai Lau struct bpf_map *maps[];
4351ed4d924SMartin KaFai Lau };
4361ed4d924SMartin KaFai Lau
4371ed4d924SMartin KaFai Lau /* The reply will be like:
4381ed4d924SMartin KaFai Lau * INET_DIAG_BPF_SK_STORAGES (nla_nest)
4391ed4d924SMartin KaFai Lau * SK_DIAG_BPF_STORAGE (nla_nest)
4401ed4d924SMartin KaFai Lau * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
4411ed4d924SMartin KaFai Lau * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
4421ed4d924SMartin KaFai Lau * SK_DIAG_BPF_STORAGE (nla_nest)
4431ed4d924SMartin KaFai Lau * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
4441ed4d924SMartin KaFai Lau * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
4451ed4d924SMartin KaFai Lau * ....
4461ed4d924SMartin KaFai Lau */
nla_value_size(u32 value_size)4471ed4d924SMartin KaFai Lau static int nla_value_size(u32 value_size)
4481ed4d924SMartin KaFai Lau {
4491ed4d924SMartin KaFai Lau /* SK_DIAG_BPF_STORAGE (nla_nest)
4501ed4d924SMartin KaFai Lau * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
4511ed4d924SMartin KaFai Lau * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
4521ed4d924SMartin KaFai Lau */
4531ed4d924SMartin KaFai Lau return nla_total_size(0) + nla_total_size(sizeof(u32)) +
4541ed4d924SMartin KaFai Lau nla_total_size_64bit(value_size);
4551ed4d924SMartin KaFai Lau }
4561ed4d924SMartin KaFai Lau
bpf_sk_storage_diag_free(struct bpf_sk_storage_diag * diag)4571ed4d924SMartin KaFai Lau void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
4581ed4d924SMartin KaFai Lau {
4591ed4d924SMartin KaFai Lau u32 i;
4601ed4d924SMartin KaFai Lau
4611ed4d924SMartin KaFai Lau if (!diag)
4621ed4d924SMartin KaFai Lau return;
4631ed4d924SMartin KaFai Lau
4641ed4d924SMartin KaFai Lau for (i = 0; i < diag->nr_maps; i++)
4651ed4d924SMartin KaFai Lau bpf_map_put(diag->maps[i]);
4661ed4d924SMartin KaFai Lau
4671ed4d924SMartin KaFai Lau kfree(diag);
4681ed4d924SMartin KaFai Lau }
4691ed4d924SMartin KaFai Lau EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free);
4701ed4d924SMartin KaFai Lau
diag_check_dup(const struct bpf_sk_storage_diag * diag,const struct bpf_map * map)4711ed4d924SMartin KaFai Lau static bool diag_check_dup(const struct bpf_sk_storage_diag *diag,
4721ed4d924SMartin KaFai Lau const struct bpf_map *map)
4731ed4d924SMartin KaFai Lau {
4741ed4d924SMartin KaFai Lau u32 i;
4751ed4d924SMartin KaFai Lau
4761ed4d924SMartin KaFai Lau for (i = 0; i < diag->nr_maps; i++) {
4771ed4d924SMartin KaFai Lau if (diag->maps[i] == map)
4781ed4d924SMartin KaFai Lau return true;
4791ed4d924SMartin KaFai Lau }
4801ed4d924SMartin KaFai Lau
4811ed4d924SMartin KaFai Lau return false;
4821ed4d924SMartin KaFai Lau }
4831ed4d924SMartin KaFai Lau
4841ed4d924SMartin KaFai Lau struct bpf_sk_storage_diag *
bpf_sk_storage_diag_alloc(const struct nlattr * nla_stgs)4851ed4d924SMartin KaFai Lau bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
4861ed4d924SMartin KaFai Lau {
4871ed4d924SMartin KaFai Lau struct bpf_sk_storage_diag *diag;
4881ed4d924SMartin KaFai Lau struct nlattr *nla;
4891ed4d924SMartin KaFai Lau u32 nr_maps = 0;
4901ed4d924SMartin KaFai Lau int rem, err;
4911ed4d924SMartin KaFai Lau
4921f00d375SKP Singh /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
4931ed4d924SMartin KaFai Lau * the map_alloc_check() side also does.
4941ed4d924SMartin KaFai Lau */
4952c78ee89SAlexei Starovoitov if (!bpf_capable())
4961ed4d924SMartin KaFai Lau return ERR_PTR(-EPERM);
4971ed4d924SMartin KaFai Lau
4981ed4d924SMartin KaFai Lau nla_for_each_nested(nla, nla_stgs, rem) {
499*bcc29b7fSLin Ma if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
500*bcc29b7fSLin Ma if (nla_len(nla) != sizeof(u32))
501*bcc29b7fSLin Ma return ERR_PTR(-EINVAL);
5021ed4d924SMartin KaFai Lau nr_maps++;
5031ed4d924SMartin KaFai Lau }
504*bcc29b7fSLin Ma }
5051ed4d924SMartin KaFai Lau
506fe0bdaecSGustavo A. R. Silva diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
5071ed4d924SMartin KaFai Lau if (!diag)
5081ed4d924SMartin KaFai Lau return ERR_PTR(-ENOMEM);
5091ed4d924SMartin KaFai Lau
5101ed4d924SMartin KaFai Lau nla_for_each_nested(nla, nla_stgs, rem) {
5111ed4d924SMartin KaFai Lau struct bpf_map *map;
5121ed4d924SMartin KaFai Lau int map_fd;
5131ed4d924SMartin KaFai Lau
5141ed4d924SMartin KaFai Lau if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
5151ed4d924SMartin KaFai Lau continue;
5161ed4d924SMartin KaFai Lau
5171ed4d924SMartin KaFai Lau map_fd = nla_get_u32(nla);
5181ed4d924SMartin KaFai Lau map = bpf_map_get(map_fd);
5191ed4d924SMartin KaFai Lau if (IS_ERR(map)) {
5201ed4d924SMartin KaFai Lau err = PTR_ERR(map);
5211ed4d924SMartin KaFai Lau goto err_free;
5221ed4d924SMartin KaFai Lau }
5231ed4d924SMartin KaFai Lau if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
5241ed4d924SMartin KaFai Lau bpf_map_put(map);
5251ed4d924SMartin KaFai Lau err = -EINVAL;
5261ed4d924SMartin KaFai Lau goto err_free;
5271ed4d924SMartin KaFai Lau }
5281ed4d924SMartin KaFai Lau if (diag_check_dup(diag, map)) {
5291ed4d924SMartin KaFai Lau bpf_map_put(map);
5301ed4d924SMartin KaFai Lau err = -EEXIST;
5311ed4d924SMartin KaFai Lau goto err_free;
5321ed4d924SMartin KaFai Lau }
5331ed4d924SMartin KaFai Lau diag->maps[diag->nr_maps++] = map;
5341ed4d924SMartin KaFai Lau }
5351ed4d924SMartin KaFai Lau
5361ed4d924SMartin KaFai Lau return diag;
5371ed4d924SMartin KaFai Lau
5381ed4d924SMartin KaFai Lau err_free:
5391ed4d924SMartin KaFai Lau bpf_sk_storage_diag_free(diag);
5401ed4d924SMartin KaFai Lau return ERR_PTR(err);
5411ed4d924SMartin KaFai Lau }
5421ed4d924SMartin KaFai Lau EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
5431ed4d924SMartin KaFai Lau
diag_get(struct bpf_local_storage_data * sdata,struct sk_buff * skb)5441f00d375SKP Singh static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
5451ed4d924SMartin KaFai Lau {
5461ed4d924SMartin KaFai Lau struct nlattr *nla_stg, *nla_value;
5471f00d375SKP Singh struct bpf_local_storage_map *smap;
5481ed4d924SMartin KaFai Lau
5491ed4d924SMartin KaFai Lau /* It cannot exceed max nlattr's payload */
5501f00d375SKP Singh BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
5511ed4d924SMartin KaFai Lau
5521ed4d924SMartin KaFai Lau nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
5531ed4d924SMartin KaFai Lau if (!nla_stg)
5541ed4d924SMartin KaFai Lau return -EMSGSIZE;
5551ed4d924SMartin KaFai Lau
5561ed4d924SMartin KaFai Lau smap = rcu_dereference(sdata->smap);
5571ed4d924SMartin KaFai Lau if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
5581ed4d924SMartin KaFai Lau goto errout;
5591ed4d924SMartin KaFai Lau
5601ed4d924SMartin KaFai Lau nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE,
5611ed4d924SMartin KaFai Lau smap->map.value_size,
5621ed4d924SMartin KaFai Lau SK_DIAG_BPF_STORAGE_PAD);
5631ed4d924SMartin KaFai Lau if (!nla_value)
5641ed4d924SMartin KaFai Lau goto errout;
5651ed4d924SMartin KaFai Lau
566db559117SKumar Kartikeya Dwivedi if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
5671ed4d924SMartin KaFai Lau copy_map_value_locked(&smap->map, nla_data(nla_value),
5681ed4d924SMartin KaFai Lau sdata->data, true);
5691ed4d924SMartin KaFai Lau else
5701ed4d924SMartin KaFai Lau copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
5711ed4d924SMartin KaFai Lau
5721ed4d924SMartin KaFai Lau nla_nest_end(skb, nla_stg);
5731ed4d924SMartin KaFai Lau return 0;
5741ed4d924SMartin KaFai Lau
5751ed4d924SMartin KaFai Lau errout:
5761ed4d924SMartin KaFai Lau nla_nest_cancel(skb, nla_stg);
5771ed4d924SMartin KaFai Lau return -EMSGSIZE;
5781ed4d924SMartin KaFai Lau }
5791ed4d924SMartin KaFai Lau
bpf_sk_storage_diag_put_all(struct sock * sk,struct sk_buff * skb,int stg_array_type,unsigned int * res_diag_size)5801ed4d924SMartin KaFai Lau static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
5811ed4d924SMartin KaFai Lau int stg_array_type,
5821ed4d924SMartin KaFai Lau unsigned int *res_diag_size)
5831ed4d924SMartin KaFai Lau {
5841ed4d924SMartin KaFai Lau /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
5851ed4d924SMartin KaFai Lau unsigned int diag_size = nla_total_size(0);
5861f00d375SKP Singh struct bpf_local_storage *sk_storage;
5871f00d375SKP Singh struct bpf_local_storage_elem *selem;
5881f00d375SKP Singh struct bpf_local_storage_map *smap;
5891ed4d924SMartin KaFai Lau struct nlattr *nla_stgs;
5901ed4d924SMartin KaFai Lau unsigned int saved_len;
5911ed4d924SMartin KaFai Lau int err = 0;
5921ed4d924SMartin KaFai Lau
5931ed4d924SMartin KaFai Lau rcu_read_lock();
5941ed4d924SMartin KaFai Lau
5951ed4d924SMartin KaFai Lau sk_storage = rcu_dereference(sk->sk_bpf_storage);
5961ed4d924SMartin KaFai Lau if (!sk_storage || hlist_empty(&sk_storage->list)) {
5971ed4d924SMartin KaFai Lau rcu_read_unlock();
5981ed4d924SMartin KaFai Lau return 0;
5991ed4d924SMartin KaFai Lau }
6001ed4d924SMartin KaFai Lau
6011ed4d924SMartin KaFai Lau nla_stgs = nla_nest_start(skb, stg_array_type);
6021ed4d924SMartin KaFai Lau if (!nla_stgs)
6031ed4d924SMartin KaFai Lau /* Continue to learn diag_size */
6041ed4d924SMartin KaFai Lau err = -EMSGSIZE;
6051ed4d924SMartin KaFai Lau
6061ed4d924SMartin KaFai Lau saved_len = skb->len;
6071ed4d924SMartin KaFai Lau hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
6081ed4d924SMartin KaFai Lau smap = rcu_dereference(SDATA(selem)->smap);
6091ed4d924SMartin KaFai Lau diag_size += nla_value_size(smap->map.value_size);
6101ed4d924SMartin KaFai Lau
6111ed4d924SMartin KaFai Lau if (nla_stgs && diag_get(SDATA(selem), skb))
6121ed4d924SMartin KaFai Lau /* Continue to learn diag_size */
6131ed4d924SMartin KaFai Lau err = -EMSGSIZE;
6141ed4d924SMartin KaFai Lau }
6151ed4d924SMartin KaFai Lau
6161ed4d924SMartin KaFai Lau rcu_read_unlock();
6171ed4d924SMartin KaFai Lau
6181ed4d924SMartin KaFai Lau if (nla_stgs) {
6191ed4d924SMartin KaFai Lau if (saved_len == skb->len)
6201ed4d924SMartin KaFai Lau nla_nest_cancel(skb, nla_stgs);
6211ed4d924SMartin KaFai Lau else
6221ed4d924SMartin KaFai Lau nla_nest_end(skb, nla_stgs);
6231ed4d924SMartin KaFai Lau }
6241ed4d924SMartin KaFai Lau
6251ed4d924SMartin KaFai Lau if (diag_size == nla_total_size(0)) {
6261ed4d924SMartin KaFai Lau *res_diag_size = 0;
6271ed4d924SMartin KaFai Lau return 0;
6281ed4d924SMartin KaFai Lau }
6291ed4d924SMartin KaFai Lau
6301ed4d924SMartin KaFai Lau *res_diag_size = diag_size;
6311ed4d924SMartin KaFai Lau return err;
6321ed4d924SMartin KaFai Lau }
6331ed4d924SMartin KaFai Lau
bpf_sk_storage_diag_put(struct bpf_sk_storage_diag * diag,struct sock * sk,struct sk_buff * skb,int stg_array_type,unsigned int * res_diag_size)6341ed4d924SMartin KaFai Lau int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
6351ed4d924SMartin KaFai Lau struct sock *sk, struct sk_buff *skb,
6361ed4d924SMartin KaFai Lau int stg_array_type,
6371ed4d924SMartin KaFai Lau unsigned int *res_diag_size)
6381ed4d924SMartin KaFai Lau {
6391ed4d924SMartin KaFai Lau /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
6401ed4d924SMartin KaFai Lau unsigned int diag_size = nla_total_size(0);
6411f00d375SKP Singh struct bpf_local_storage *sk_storage;
6421f00d375SKP Singh struct bpf_local_storage_data *sdata;
6431ed4d924SMartin KaFai Lau struct nlattr *nla_stgs;
6441ed4d924SMartin KaFai Lau unsigned int saved_len;
6451ed4d924SMartin KaFai Lau int err = 0;
6461ed4d924SMartin KaFai Lau u32 i;
6471ed4d924SMartin KaFai Lau
6481ed4d924SMartin KaFai Lau *res_diag_size = 0;
6491ed4d924SMartin KaFai Lau
6501ed4d924SMartin KaFai Lau /* No map has been specified. Dump all. */
6511ed4d924SMartin KaFai Lau if (!diag->nr_maps)
6521ed4d924SMartin KaFai Lau return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type,
6531ed4d924SMartin KaFai Lau res_diag_size);
6541ed4d924SMartin KaFai Lau
6551ed4d924SMartin KaFai Lau rcu_read_lock();
6561ed4d924SMartin KaFai Lau sk_storage = rcu_dereference(sk->sk_bpf_storage);
6571ed4d924SMartin KaFai Lau if (!sk_storage || hlist_empty(&sk_storage->list)) {
6581ed4d924SMartin KaFai Lau rcu_read_unlock();
6591ed4d924SMartin KaFai Lau return 0;
6601ed4d924SMartin KaFai Lau }
6611ed4d924SMartin KaFai Lau
6621ed4d924SMartin KaFai Lau nla_stgs = nla_nest_start(skb, stg_array_type);
6631ed4d924SMartin KaFai Lau if (!nla_stgs)
6641ed4d924SMartin KaFai Lau /* Continue to learn diag_size */
6651ed4d924SMartin KaFai Lau err = -EMSGSIZE;
6661ed4d924SMartin KaFai Lau
6671ed4d924SMartin KaFai Lau saved_len = skb->len;
6681ed4d924SMartin KaFai Lau for (i = 0; i < diag->nr_maps; i++) {
6691f00d375SKP Singh sdata = bpf_local_storage_lookup(sk_storage,
6701f00d375SKP Singh (struct bpf_local_storage_map *)diag->maps[i],
6711ed4d924SMartin KaFai Lau false);
6721ed4d924SMartin KaFai Lau
6731ed4d924SMartin KaFai Lau if (!sdata)
6741ed4d924SMartin KaFai Lau continue;
6751ed4d924SMartin KaFai Lau
6761ed4d924SMartin KaFai Lau diag_size += nla_value_size(diag->maps[i]->value_size);
6771ed4d924SMartin KaFai Lau
6781ed4d924SMartin KaFai Lau if (nla_stgs && diag_get(sdata, skb))
6791ed4d924SMartin KaFai Lau /* Continue to learn diag_size */
6801ed4d924SMartin KaFai Lau err = -EMSGSIZE;
6811ed4d924SMartin KaFai Lau }
6821ed4d924SMartin KaFai Lau rcu_read_unlock();
6831ed4d924SMartin KaFai Lau
6841ed4d924SMartin KaFai Lau if (nla_stgs) {
6851ed4d924SMartin KaFai Lau if (saved_len == skb->len)
6861ed4d924SMartin KaFai Lau nla_nest_cancel(skb, nla_stgs);
6871ed4d924SMartin KaFai Lau else
6881ed4d924SMartin KaFai Lau nla_nest_end(skb, nla_stgs);
6891ed4d924SMartin KaFai Lau }
6901ed4d924SMartin KaFai Lau
6911ed4d924SMartin KaFai Lau if (diag_size == nla_total_size(0)) {
6921ed4d924SMartin KaFai Lau *res_diag_size = 0;
6931ed4d924SMartin KaFai Lau return 0;
6941ed4d924SMartin KaFai Lau }
6951ed4d924SMartin KaFai Lau
6961ed4d924SMartin KaFai Lau *res_diag_size = diag_size;
6971ed4d924SMartin KaFai Lau return err;
6981ed4d924SMartin KaFai Lau }
6991ed4d924SMartin KaFai Lau EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
7005ce6e77cSYonghong Song
7015ce6e77cSYonghong Song struct bpf_iter_seq_sk_storage_map_info {
7025ce6e77cSYonghong Song struct bpf_map *map;
7035ce6e77cSYonghong Song unsigned int bucket_id;
7045ce6e77cSYonghong Song unsigned skip_elems;
7055ce6e77cSYonghong Song };
7065ce6e77cSYonghong Song
7071f00d375SKP Singh static struct bpf_local_storage_elem *
bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info * info,struct bpf_local_storage_elem * prev_selem)7085ce6e77cSYonghong Song bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
7091f00d375SKP Singh struct bpf_local_storage_elem *prev_selem)
710c69d2ddbSYonghong Song __acquires(RCU) __releases(RCU)
7115ce6e77cSYonghong Song {
7121f00d375SKP Singh struct bpf_local_storage *sk_storage;
7131f00d375SKP Singh struct bpf_local_storage_elem *selem;
7145ce6e77cSYonghong Song u32 skip_elems = info->skip_elems;
7151f00d375SKP Singh struct bpf_local_storage_map *smap;
7165ce6e77cSYonghong Song u32 bucket_id = info->bucket_id;
7175ce6e77cSYonghong Song u32 i, count, n_buckets;
7181f00d375SKP Singh struct bpf_local_storage_map_bucket *b;
7195ce6e77cSYonghong Song
7201f00d375SKP Singh smap = (struct bpf_local_storage_map *)info->map;
7215ce6e77cSYonghong Song n_buckets = 1U << smap->bucket_log;
7225ce6e77cSYonghong Song if (bucket_id >= n_buckets)
7235ce6e77cSYonghong Song return NULL;
7245ce6e77cSYonghong Song
7255ce6e77cSYonghong Song /* try to find next selem in the same bucket */
7265ce6e77cSYonghong Song selem = prev_selem;
7275ce6e77cSYonghong Song count = 0;
7285ce6e77cSYonghong Song while (selem) {
729c69d2ddbSYonghong Song selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)),
7301f00d375SKP Singh struct bpf_local_storage_elem, map_node);
7315ce6e77cSYonghong Song if (!selem) {
7325ce6e77cSYonghong Song /* not found, unlock and go to the next bucket */
7335ce6e77cSYonghong Song b = &smap->buckets[bucket_id++];
734c69d2ddbSYonghong Song rcu_read_unlock();
7355ce6e77cSYonghong Song skip_elems = 0;
7365ce6e77cSYonghong Song break;
7375ce6e77cSYonghong Song }
738c69d2ddbSYonghong Song sk_storage = rcu_dereference(selem->local_storage);
7395ce6e77cSYonghong Song if (sk_storage) {
7405ce6e77cSYonghong Song info->skip_elems = skip_elems + count;
7415ce6e77cSYonghong Song return selem;
7425ce6e77cSYonghong Song }
7435ce6e77cSYonghong Song count++;
7445ce6e77cSYonghong Song }
7455ce6e77cSYonghong Song
7465ce6e77cSYonghong Song for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
7475ce6e77cSYonghong Song b = &smap->buckets[i];
748c69d2ddbSYonghong Song rcu_read_lock();
7495ce6e77cSYonghong Song count = 0;
750c69d2ddbSYonghong Song hlist_for_each_entry_rcu(selem, &b->list, map_node) {
751c69d2ddbSYonghong Song sk_storage = rcu_dereference(selem->local_storage);
7525ce6e77cSYonghong Song if (sk_storage && count >= skip_elems) {
7535ce6e77cSYonghong Song info->bucket_id = i;
7545ce6e77cSYonghong Song info->skip_elems = count;
7555ce6e77cSYonghong Song return selem;
7565ce6e77cSYonghong Song }
7575ce6e77cSYonghong Song count++;
7585ce6e77cSYonghong Song }
759c69d2ddbSYonghong Song rcu_read_unlock();
7605ce6e77cSYonghong Song skip_elems = 0;
7615ce6e77cSYonghong Song }
7625ce6e77cSYonghong Song
7635ce6e77cSYonghong Song info->bucket_id = i;
7645ce6e77cSYonghong Song info->skip_elems = 0;
7655ce6e77cSYonghong Song return NULL;
7665ce6e77cSYonghong Song }
7675ce6e77cSYonghong Song
bpf_sk_storage_map_seq_start(struct seq_file * seq,loff_t * pos)7685ce6e77cSYonghong Song static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
7695ce6e77cSYonghong Song {
7701f00d375SKP Singh struct bpf_local_storage_elem *selem;
7715ce6e77cSYonghong Song
7725ce6e77cSYonghong Song selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
7735ce6e77cSYonghong Song if (!selem)
7745ce6e77cSYonghong Song return NULL;
7755ce6e77cSYonghong Song
7765ce6e77cSYonghong Song if (*pos == 0)
7775ce6e77cSYonghong Song ++*pos;
7785ce6e77cSYonghong Song return selem;
7795ce6e77cSYonghong Song }
7805ce6e77cSYonghong Song
bpf_sk_storage_map_seq_next(struct seq_file * seq,void * v,loff_t * pos)7815ce6e77cSYonghong Song static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
7825ce6e77cSYonghong Song loff_t *pos)
7835ce6e77cSYonghong Song {
7845ce6e77cSYonghong Song struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
7855ce6e77cSYonghong Song
7865ce6e77cSYonghong Song ++*pos;
7875ce6e77cSYonghong Song ++info->skip_elems;
7885ce6e77cSYonghong Song return bpf_sk_storage_map_seq_find_next(seq->private, v);
7895ce6e77cSYonghong Song }
7905ce6e77cSYonghong Song
7915ce6e77cSYonghong Song struct bpf_iter__bpf_sk_storage_map {
7925ce6e77cSYonghong Song __bpf_md_ptr(struct bpf_iter_meta *, meta);
7935ce6e77cSYonghong Song __bpf_md_ptr(struct bpf_map *, map);
7945ce6e77cSYonghong Song __bpf_md_ptr(struct sock *, sk);
7955ce6e77cSYonghong Song __bpf_md_ptr(void *, value);
7965ce6e77cSYonghong Song };
7975ce6e77cSYonghong Song
DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map,struct bpf_iter_meta * meta,struct bpf_map * map,struct sock * sk,void * value)7985ce6e77cSYonghong Song DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
7995ce6e77cSYonghong Song struct bpf_map *map, struct sock *sk,
8005ce6e77cSYonghong Song void *value)
8015ce6e77cSYonghong Song
8025ce6e77cSYonghong Song static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
8031f00d375SKP Singh struct bpf_local_storage_elem *selem)
8045ce6e77cSYonghong Song {
8055ce6e77cSYonghong Song struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
8065ce6e77cSYonghong Song struct bpf_iter__bpf_sk_storage_map ctx = {};
8071f00d375SKP Singh struct bpf_local_storage *sk_storage;
8085ce6e77cSYonghong Song struct bpf_iter_meta meta;
8095ce6e77cSYonghong Song struct bpf_prog *prog;
8105ce6e77cSYonghong Song int ret = 0;
8115ce6e77cSYonghong Song
8125ce6e77cSYonghong Song meta.seq = seq;
8135ce6e77cSYonghong Song prog = bpf_iter_get_info(&meta, selem == NULL);
8145ce6e77cSYonghong Song if (prog) {
8155ce6e77cSYonghong Song ctx.meta = &meta;
8165ce6e77cSYonghong Song ctx.map = info->map;
8175ce6e77cSYonghong Song if (selem) {
818c69d2ddbSYonghong Song sk_storage = rcu_dereference(selem->local_storage);
8191f00d375SKP Singh ctx.sk = sk_storage->owner;
8205ce6e77cSYonghong Song ctx.value = SDATA(selem)->data;
8215ce6e77cSYonghong Song }
8225ce6e77cSYonghong Song ret = bpf_iter_run_prog(prog, &ctx);
8235ce6e77cSYonghong Song }
8245ce6e77cSYonghong Song
8255ce6e77cSYonghong Song return ret;
8265ce6e77cSYonghong Song }
8275ce6e77cSYonghong Song
bpf_sk_storage_map_seq_show(struct seq_file * seq,void * v)8285ce6e77cSYonghong Song static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
8295ce6e77cSYonghong Song {
8305ce6e77cSYonghong Song return __bpf_sk_storage_map_seq_show(seq, v);
8315ce6e77cSYonghong Song }
8325ce6e77cSYonghong Song
bpf_sk_storage_map_seq_stop(struct seq_file * seq,void * v)8335ce6e77cSYonghong Song static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
834c69d2ddbSYonghong Song __releases(RCU)
8355ce6e77cSYonghong Song {
836c69d2ddbSYonghong Song if (!v)
8375ce6e77cSYonghong Song (void)__bpf_sk_storage_map_seq_show(seq, v);
838c69d2ddbSYonghong Song else
839c69d2ddbSYonghong Song rcu_read_unlock();
8405ce6e77cSYonghong Song }
8415ce6e77cSYonghong Song
bpf_iter_init_sk_storage_map(void * priv_data,struct bpf_iter_aux_info * aux)8425ce6e77cSYonghong Song static int bpf_iter_init_sk_storage_map(void *priv_data,
8435ce6e77cSYonghong Song struct bpf_iter_aux_info *aux)
8445ce6e77cSYonghong Song {
8455ce6e77cSYonghong Song struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
8465ce6e77cSYonghong Song
8473c5f6e69SHou Tao bpf_map_inc_with_uref(aux->map);
8485ce6e77cSYonghong Song seq_info->map = aux->map;
8495ce6e77cSYonghong Song return 0;
8505ce6e77cSYonghong Song }
8515ce6e77cSYonghong Song
bpf_iter_fini_sk_storage_map(void * priv_data)8523c5f6e69SHou Tao static void bpf_iter_fini_sk_storage_map(void *priv_data)
8533c5f6e69SHou Tao {
8543c5f6e69SHou Tao struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
8553c5f6e69SHou Tao
8563c5f6e69SHou Tao bpf_map_put_with_uref(seq_info->map);
8573c5f6e69SHou Tao }
8583c5f6e69SHou Tao
bpf_iter_attach_map(struct bpf_prog * prog,union bpf_iter_link_info * linfo,struct bpf_iter_aux_info * aux)8595e7b3020SYonghong Song static int bpf_iter_attach_map(struct bpf_prog *prog,
8605e7b3020SYonghong Song union bpf_iter_link_info *linfo,
8615ce6e77cSYonghong Song struct bpf_iter_aux_info *aux)
8625ce6e77cSYonghong Song {
8635e7b3020SYonghong Song struct bpf_map *map;
8645e7b3020SYonghong Song int err = -EINVAL;
8655e7b3020SYonghong Song
8665e7b3020SYonghong Song if (!linfo->map.map_fd)
8675e7b3020SYonghong Song return -EBADF;
8685e7b3020SYonghong Song
8695e7b3020SYonghong Song map = bpf_map_get_with_uref(linfo->map.map_fd);
8705e7b3020SYonghong Song if (IS_ERR(map))
8715e7b3020SYonghong Song return PTR_ERR(map);
8725ce6e77cSYonghong Song
8735ce6e77cSYonghong Song if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
8745e7b3020SYonghong Song goto put_map;
8755ce6e77cSYonghong Song
87652bd05ebSHou Tao if (prog->aux->max_rdwr_access > map->value_size) {
8775e7b3020SYonghong Song err = -EACCES;
8785e7b3020SYonghong Song goto put_map;
8795e7b3020SYonghong Song }
8805ce6e77cSYonghong Song
8815e7b3020SYonghong Song aux->map = map;
8825ce6e77cSYonghong Song return 0;
8835e7b3020SYonghong Song
8845e7b3020SYonghong Song put_map:
8855e7b3020SYonghong Song bpf_map_put_with_uref(map);
8865e7b3020SYonghong Song return err;
8875e7b3020SYonghong Song }
8885e7b3020SYonghong Song
bpf_iter_detach_map(struct bpf_iter_aux_info * aux)8895e7b3020SYonghong Song static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
8905e7b3020SYonghong Song {
8915e7b3020SYonghong Song bpf_map_put_with_uref(aux->map);
8925ce6e77cSYonghong Song }
8935ce6e77cSYonghong Song
8945ce6e77cSYonghong Song static const struct seq_operations bpf_sk_storage_map_seq_ops = {
8955ce6e77cSYonghong Song .start = bpf_sk_storage_map_seq_start,
8965ce6e77cSYonghong Song .next = bpf_sk_storage_map_seq_next,
8975ce6e77cSYonghong Song .stop = bpf_sk_storage_map_seq_stop,
8985ce6e77cSYonghong Song .show = bpf_sk_storage_map_seq_show,
8995ce6e77cSYonghong Song };
9005ce6e77cSYonghong Song
9015ce6e77cSYonghong Song static const struct bpf_iter_seq_info iter_seq_info = {
9025ce6e77cSYonghong Song .seq_ops = &bpf_sk_storage_map_seq_ops,
9035ce6e77cSYonghong Song .init_seq_private = bpf_iter_init_sk_storage_map,
9043c5f6e69SHou Tao .fini_seq_private = bpf_iter_fini_sk_storage_map,
9055ce6e77cSYonghong Song .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info),
9065ce6e77cSYonghong Song };
9075ce6e77cSYonghong Song
9085ce6e77cSYonghong Song static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
9095ce6e77cSYonghong Song .target = "bpf_sk_storage_map",
9105e7b3020SYonghong Song .attach_target = bpf_iter_attach_map,
9115e7b3020SYonghong Song .detach_target = bpf_iter_detach_map,
912b76f2226SYonghong Song .show_fdinfo = bpf_iter_map_show_fdinfo,
913b76f2226SYonghong Song .fill_link_info = bpf_iter_map_fill_link_info,
9145ce6e77cSYonghong Song .ctx_arg_info_size = 2,
9155ce6e77cSYonghong Song .ctx_arg_info = {
9165ce6e77cSYonghong Song { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
9175ce6e77cSYonghong Song PTR_TO_BTF_ID_OR_NULL },
9185ce6e77cSYonghong Song { offsetof(struct bpf_iter__bpf_sk_storage_map, value),
91920b2aff4SHao Luo PTR_TO_BUF | PTR_MAYBE_NULL },
9205ce6e77cSYonghong Song },
9215ce6e77cSYonghong Song .seq_info = &iter_seq_info,
9225ce6e77cSYonghong Song };
9235ce6e77cSYonghong Song
bpf_sk_storage_map_iter_init(void)9245ce6e77cSYonghong Song static int __init bpf_sk_storage_map_iter_init(void)
9255ce6e77cSYonghong Song {
9265ce6e77cSYonghong Song bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
9275ce6e77cSYonghong Song btf_sock_ids[BTF_SOCK_TYPE_SOCK];
9285ce6e77cSYonghong Song return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
9295ce6e77cSYonghong Song }
9305ce6e77cSYonghong Song late_initcall(bpf_sk_storage_map_iter_init);
931